-
-
Save janakiramm/7dd73e83c92a0de0c683ed27072cdde2 to your computer and use it in GitHub Desktop.
| # The previous part of this tutorial is at https://gist.github.com/janakiramm/55d2d8ec5d14dd45c7e9127d81cdafcd | |
| from vertexai.language_models import TextEmbeddingModel | |
| from google.cloud import aiplatform | |
| import vertexai | |
| from vertexai.preview.generative_models import GenerativeModel, Part | |
| import json | |
| import os | |
| project=”YOUR_GCP_PROJECT” | |
| location="us-central1" | |
| sentence_file_path = "lakeside_sentences.json" | |
| index_name="INDEX_EP_ID" #Get this from the console or the previous step | |
| aiplatform.init(project=project,location=location) | |
| vertexai.init() | |
| model = GenerativeModel("gemini-pro") | |
| lakeside_index_ep = aiplatform.MatchingEngineIndexEndpoint(index_endpoint_name=index_name) | |
| def generate_text_embeddings(sentences) -> list: | |
| model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001") | |
| embeddings = model.get_embeddings(sentences) | |
| vectors = [embedding.values for embedding in embeddings] | |
| return vectors | |
| def generate_context(ids,data): | |
| concatenated_names = '' | |
| for id in ids: | |
| for entry in data: | |
| if entry['id'] == id: | |
| concatenated_names += entry['sentence'] + "\n" | |
| return concatenated_names.strip() | |
| data=load_file(sentence_file_path) | |
| #query=["How many days of unpaid leave in an year"] | |
| #query=["Allowed cost of online course"] | |
| #query=["process for applying sick leave"] | |
| query=["process for applying personal leave"] | |
| qry_emb=generate_text_embeddings(query) | |
| response = lakeside_index_ep.find_neighbors( | |
| deployed_index_id = index_name, | |
| queries = [qry_emb[0]], | |
| num_neighbors = 10 | |
| ) | |
| matching_ids = [neighbor.id for sublist in response for neighbor in sublist] | |
| context = generate_context(matching_ids,data) | |
| prompt=f"Based on the context delimited in backticks, answer the query. ```{context}``` {query}" | |
| chat = model.start_chat(history=[]) | |
| response = chat.send_message(prompt) | |
| print(response.text) |
Thank you for you video and introduction to RAG.
When i run: data=load_file(sentence_file_path)
I received the following error:
NameError: name 'load_file' is not defined
Any thoughts on the issue i might be having?
Thanks!
def load_file(sentence_file_path):
data = []
with open(sentence_file_path, 'r') as file:
for line in file:
entry = json.loads(line)
data.append(entry)
return data
add this function before calling it.
400 POST https:///…/projects/5816******/locations/us-/indexEndpoints/20388**:findNeighbors
Could not find field "%24alt" in the type "google.cloud.aiplatform.v1beta1.FindNeighborsRequest".
Facing this issue - when trying with this piece of code:::
response = lakeside_index_ep.find_neighbors(
deployed_index_id = index_name,
queries = [qry_emb[0]],
num_neighbors = 10
)
Tried another alternative as well - with this method ::::
from google.cloud import aiplatform_v1
API_ENDPOINT = "<YOUR_API_ENDPOINT>.vdb.vertexai.goog"
INDEX_ENDPOINT = "projects/<PROJECT_ID>/locations/<LOCATION>/indexEndpoints/<INDEX_ENDPOINT_ID>"
DEPLOYED_INDEX_ID = "<DEPLOYED_INDEX_ID>"
# Configure Vector Search client
client_options = {
"api_endpoint": API_ENDPOINT
}
vector_search_client = aiplatform_v1.MatchServiceClient(
client_options=client_options,
)
# Example feature vector (must be a list of floats, same dimension as your index)
datapoint = aiplatform_v1.IndexDatapoint(
feature_vector=[0.12, -0.34, 0.56, 0.78] #
)
# Build query
query = aiplatform_v1.FindNeighborsRequest.Query(
datapoint=datapoint,
neighbor_count=10 # number of nearest neighbors
)
# Build request
request = aiplatform_v1.FindNeighborsRequest(
index_endpoint=INDEX_ENDPOINT,
deployed_index_id=DEPLOYED_INDEX_ID,
queries=[query],
return_full_datapoint=False,
)
# Execute the request
response = vector_search_client.find_neighbors(request)
# Handle the response
for neighbor in response.nearest_neighbors[0].neighbors:
print(f"ID: {neighbor.datapoint.datapoint_id}, Distance: {neighbor.distance}")
But the above giving no erros, and no response is recieve upon running...
Thank you for you video and introduction to RAG.
When i run:
data=load_file(sentence_file_path)
I received the following error:
NameError: name 'load_file' is not defined
Any thoughts on the issue i might be having?
Thanks!