Skip to content

Instantly share code, notes, and snippets.

@ksmin23
Last active April 3, 2025 02:10
Show Gist options
  • Select an option

  • Save ksmin23/4f1947bdfad8ab802a62934329ccf112 to your computer and use it in GitHub Desktop.

Select an option

Save ksmin23/4f1947bdfad8ab802a62934329ccf112 to your computer and use it in GitHub Desktop.
Amazon OpenSearch Serverless Hybrid Search Example with Script score query
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# vim: tabstop=4 shiftwidth=4 softtabstop=4 expandtab
# Required python packages
# pip install -U boto3>=1.26.159
# pip install -U langchain==0.2.6
# pip install -U langchain-aws==0.1.9
# pip install -U langchain-community==0.2.6
# pip install -U opensearch-py==2.2.0
import sys
import json
import boto3
from langchain_community.embeddings import BedrockEmbeddings
import numpy as np
from opensearchpy import (
OpenSearch,
RequestsHttpConnection,
AWSV4SignerAuth
)
def get_auth(region_name: str) -> AWSV4SignerAuth:
"""
Get AWSV4SignerAuth to access Amazon OpenSearch Serverless
"""
credentials = boto3.Session(region_name=region_name).get_credentials()
auth = AWSV4SignerAuth(credentials, region_name, 'aoss')
return auth
aws_region = boto3.Session().region_name
opensearch_endpoint = 'https://{collection-arn}.{region}.aoss.amazonaws.com' # Replace opensearch_endpoint with yours
opensearch_index_name = '<index_name>' # Replace opensearch_index_name with yours
http_auth = get_auth(aws_region)
aoss_client = OpenSearch(
hosts=[{'host': opensearch_endpoint.replace("https://", ""), 'port': 443}],
http_auth=http_auth,
use_ssl=True,
verify_certs=True,
connection_class=RequestsHttpConnection
)
embedding_function = BedrockEmbeddings(
model_id='amazon.titan-embed-text-v1',
region_name=aws_region
)
q = "Which XGBoost versions does SageMaker support?"
query_vector = embedding_function.embed_query(q)
k = 3 # Number of hits to return
score_threshold = 0.0
space_type = "cosinesimil" # ["l2", "l1", "cosinesimil", "linf", "innerproduct"]
# Replace text_field_name, vector_field_name with yours
text_field_name, vector_field_name = "text", "vector_field"
pre_filter = {
"match": {
text_field_name: q
}
}
#XXX: For more information, see
# https://opensearch.org/docs/latest/query-dsl/specialized/script-score/
script_score_query = {
"size": k,
"min_score": score_threshold,
"query": {
"script_score": {
"query": pre_filter,
"script": {
"source": "knn_score",
"lang": "knn",
"params": {
"field": vector_field_name,
"query_value": query_vector,
"space_type": space_type,
}
}
}
}
}
response = aoss_client.search(
body=script_score_query,
index=opensearch_index_name,
size=k
)
print(response)
@GSEnergy-KimKyungho
Copy link

๐Ÿ’ฏ LGTM ์—…๋ฌด์— ์ž˜ ์ฐธ๊ณ  ํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค. ๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment