Created
September 23, 2023 15:13
-
-
Save vinodkiran/d299f8f0137dc52f6bee5c5782953e84 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import { OpenAI } from "langchain/llms/openai"; | |
| import { OpenAIEmbeddings } from "langchain/embeddings/openai"; | |
| import { VectorDBQAChain } from "langchain/chains"; | |
| import dotenv from "dotenv"; | |
| import { Client } from "@elastic/elasticsearch"; | |
| import { ElasticVectorSearch } from "langchain/vectorstores/elasticsearch"; | |
| import { DirectoryLoader } from "langchain/document_loaders/fs/directory"; | |
| import { TextLoader } from "langchain/document_loaders/fs/text"; | |
| import { CharacterTextSplitter } from "langchain/text_splitter"; | |
| dotenv.config(); | |
| export async function run() { | |
| const config = { | |
| node: process.env.ELASTIC_URL, | |
| auth: { | |
| apiKey: process.env.ELASTIC_API_KEY | |
| }, | |
| } | |
| const clientArgs = { | |
| client: new Client(config), | |
| indexName: process.env.ELASTIC_INDEX ?? "test_vectorstore", | |
| }; | |
| const loader = new DirectoryLoader("./documents", { | |
| ".txt": (path) => new TextLoader(path), | |
| }); | |
| console.log("Loading docs..."); | |
| const documents = await loader.load() | |
| const text_splitter = new CharacterTextSplitter({chunkSize:1000, chunkOverlap:0}) | |
| const docs = await text_splitter.splitDocuments(documents) | |
| console.log("Docs found...."+docs.length); | |
| const embeddings = new OpenAIEmbeddings(); | |
| // await ElasticVectorSearch.fromDocuments(docs, embeddings, clientArgs); | |
| const vectorStore = new ElasticVectorSearch(embeddings, clientArgs); | |
| //Once the main flow fails, uncomment the following code, we can see the error.reason and document failure count. | |
| // const result = await clientArgs.client.helpers.bulk({ | |
| // datasource: docs, | |
| // onDrop (doc) { | |
| // console.log(doc.error.reason) | |
| // }, | |
| // onDocument: (doc) => ({ index: { _index: 'test_vectorstore' } }), | |
| // }) | |
| // // eslint-disable-next-line no-console | |
| // console.log(result) | |
| // Also supports an additional {ids: []} parameter for upsertion | |
| const ids = await vectorStore.addDocuments(docs); | |
| // the following statement returns IDs but the insertion fails. | |
| console.log("Docs loaded...."+ids.length); | |
| /* Use as part of a chain (currently no metadata filters) for LLM query */ | |
| const model = new OpenAI(); | |
| const chain = VectorDBQAChain.fromLLM(model, vectorStore, { | |
| k: 3, | |
| returnSourceDocuments: true, | |
| }); | |
| let question = "What did the president say about Justice Breyer?"; | |
| /* Search the vector DB independently with meta filters */ | |
| const results = await vectorStore.similaritySearch(question, 3); | |
| console.log("Search.."+JSON.stringify(results, null, 2)); | |
| const response = await chain.call({ query: question }); | |
| console.log("OpenAI.."+JSON.stringify(response, null, 2)); | |
| //await vectorStore.delete({ ids }); | |
| } | |
| run(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment