Last active
April 3, 2025 02:10
-
-
Save ksmin23/4f1947bdfad8ab802a62934329ccf112 to your computer and use it in GitHub Desktop.
Amazon OpenSearch Serverless Hybrid Search Example with Script score query
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # -*- encoding: utf-8 -*- | |
| # vim: tabstop=4 shiftwidth=4 softtabstop=4 expandtab | |
| # Required python packages | |
| # pip install -U boto3>=1.26.159 | |
| # pip install -U langchain==0.2.6 | |
| # pip install -U langchain-aws==0.1.9 | |
| # pip install -U langchain-community==0.2.6 | |
| # pip install -U opensearch-py==2.2.0 | |
| import sys | |
| import json | |
| import boto3 | |
| from langchain_community.embeddings import BedrockEmbeddings | |
| import numpy as np | |
| from opensearchpy import ( | |
| OpenSearch, | |
| RequestsHttpConnection, | |
| AWSV4SignerAuth | |
| ) | |
| def get_auth(region_name: str) -> AWSV4SignerAuth: | |
| """ | |
| Get AWSV4SignerAuth to access Amazon OpenSearch Serverless | |
| """ | |
| credentials = boto3.Session(region_name=region_name).get_credentials() | |
| auth = AWSV4SignerAuth(credentials, region_name, 'aoss') | |
| return auth | |
| aws_region = boto3.Session().region_name | |
| opensearch_endpoint = 'https://{collection-arn}.{region}.aoss.amazonaws.com' # Replace opensearch_endpoint with yours | |
| opensearch_index_name = '<index_name>' # Replace opensearch_index_name with yours | |
| http_auth = get_auth(aws_region) | |
| aoss_client = OpenSearch( | |
| hosts=[{'host': opensearch_endpoint.replace("https://", ""), 'port': 443}], | |
| http_auth=http_auth, | |
| use_ssl=True, | |
| verify_certs=True, | |
| connection_class=RequestsHttpConnection | |
| ) | |
| embedding_function = BedrockEmbeddings( | |
| model_id='amazon.titan-embed-text-v1', | |
| region_name=aws_region | |
| ) | |
| q = "Which XGBoost versions does SageMaker support?" | |
| query_vector = embedding_function.embed_query(q) | |
| k = 3 # Number of hits to return | |
| score_threshold = 0.0 | |
| space_type = "cosinesimil" # ["l2", "l1", "cosinesimil", "linf", "innerproduct"] | |
| # Replace text_field_name, vector_field_name with yours | |
| text_field_name, vector_field_name = "text", "vector_field" | |
| pre_filter = { | |
| "match": { | |
| text_field_name: q | |
| } | |
| } | |
| #XXX: For more information, see | |
| # https://opensearch.org/docs/latest/query-dsl/specialized/script-score/ | |
| script_score_query = { | |
| "size": k, | |
| "min_score": score_threshold, | |
| "query": { | |
| "script_score": { | |
| "query": pre_filter, | |
| "script": { | |
| "source": "knn_score", | |
| "lang": "knn", | |
| "params": { | |
| "field": vector_field_name, | |
| "query_value": query_vector, | |
| "space_type": space_type, | |
| } | |
| } | |
| } | |
| } | |
| } | |
| response = aoss_client.search( | |
| body=script_score_query, | |
| index=opensearch_index_name, | |
| size=k | |
| ) | |
| print(response) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
๐ฏ LGTM ์ ๋ฌด์ ์ ์ฐธ๊ณ ํ๊ฒ ์ต๋๋ค. ๊ฐ์ฌํฉ๋๋ค!