vinodkiran · September 23, 2023 15:13
diff --git a/electricSearch.js b/electricSearch.js


 import { OpenAI } from "langchain/llms/openai";
 import { OpenAIEmbeddings } from "langchain/embeddings/openai";
 import { VectorDBQAChain } from "langchain/chains";
 import dotenv from "dotenv";
 import { Client } from "@elastic/elasticsearch";
 import { ElasticVectorSearch } from "langchain/vectorstores/elasticsearch";
 import { DirectoryLoader } from "langchain/document_loaders/fs/directory";
 import { TextLoader } from "langchain/document_loaders/fs/text";
 import { CharacterTextSplitter } from "langchain/text_splitter";

 dotenv.config();

 export async function run() {
  const config = {
    node: process.env.ELASTIC_URL,
    auth: {
      apiKey: process.env.ELASTIC_API_KEY
    },
  }
  const clientArgs = {
    client: new Client(config),
    indexName: process.env.ELASTIC_INDEX ?? "test_vectorstore",
  };

  const loader = new DirectoryLoader("./documents", {
    ".txt": (path) => new TextLoader(path),
  });
  console.log("Loading docs...");
  const documents = await loader.load()
  const text_splitter = new CharacterTextSplitter({chunkSize:1000, chunkOverlap:0})
  const docs = await text_splitter.splitDocuments(documents)
  console.log("Docs found...."+docs.length);
  const embeddings = new OpenAIEmbeddings();

  // await ElasticVectorSearch.fromDocuments(docs, embeddings, clientArgs);
  const vectorStore = new ElasticVectorSearch(embeddings, clientArgs);


  //Once the main flow fails, uncomment the following code, we can see the error.reason and document failure count.
 //   const result = await clientArgs.client.helpers.bulk({
 //     datasource: docs,
 //     onDrop (doc) {
 //       console.log(doc.error.reason)
 //     },
 //     onDocument: (doc) => ({ index: { _index: 'test_vectorstore' } }),
 //   })

 //   // eslint-disable-next-line no-console
 //   console.log(result)
 
  // Also supports an additional {ids: []} parameter for upsertion
  const ids = await vectorStore.addDocuments(docs);
  // the following statement returns IDs but the insertion fails.
  console.log("Docs loaded...."+ids.length);

  /* Use as part of a chain (currently no metadata filters) for LLM query */
  const model = new OpenAI();
  const chain = VectorDBQAChain.fromLLM(model, vectorStore, {
    k: 3,
    returnSourceDocuments: true,
  });
  let question = "What did the president say about Justice Breyer?";

  /* Search the vector DB independently with meta filters */
  const results = await vectorStore.similaritySearch(question, 3);
  console.log("Search.."+JSON.stringify(results, null, 2));

  const response = await chain.call({ query: question });
  console.log("OpenAI.."+JSON.stringify(response, null, 2));
  //await vectorStore.delete({ ids });
 }

 run();


	import { OpenAI } from "langchain/llms/openai";
	import { OpenAIEmbeddings } from "langchain/embeddings/openai";
	import { VectorDBQAChain } from "langchain/chains";
	import dotenv from "dotenv";
	import { Client } from "@elastic/elasticsearch";
	import { ElasticVectorSearch } from "langchain/vectorstores/elasticsearch";
	import { DirectoryLoader } from "langchain/document_loaders/fs/directory";
	import { TextLoader } from "langchain/document_loaders/fs/text";
	import { CharacterTextSplitter } from "langchain/text_splitter";

	dotenv.config();

	export async function run() {
	const config = {
	node: process.env.ELASTIC_URL,
	auth: {
	apiKey: process.env.ELASTIC_API_KEY
	},
	}
	const clientArgs = {
	client: new Client(config),
	indexName: process.env.ELASTIC_INDEX ?? "test_vectorstore",
	};

	const loader = new DirectoryLoader("./documents", {
	".txt": (path) => new TextLoader(path),
	});
	console.log("Loading docs...");
	const documents = await loader.load()
	const text_splitter = new CharacterTextSplitter({chunkSize:1000, chunkOverlap:0})
	const docs = await text_splitter.splitDocuments(documents)
	console.log("Docs found...."+docs.length);
	const embeddings = new OpenAIEmbeddings();

	// await ElasticVectorSearch.fromDocuments(docs, embeddings, clientArgs);
	const vectorStore = new ElasticVectorSearch(embeddings, clientArgs);


	//Once the main flow fails, uncomment the following code, we can see the error.reason and document failure count.
	// const result = await clientArgs.client.helpers.bulk({
	// datasource: docs,
	// onDrop (doc) {
	// console.log(doc.error.reason)
	// },
	// onDocument: (doc) => ({ index: { _index: 'test_vectorstore' } }),
	// })

	// // eslint-disable-next-line no-console
	// console.log(result)

	// Also supports an additional {ids: []} parameter for upsertion
	const ids = await vectorStore.addDocuments(docs);
	// the following statement returns IDs but the insertion fails.
	console.log("Docs loaded...."+ids.length);

	/* Use as part of a chain (currently no metadata filters) for LLM query */
	const model = new OpenAI();
	const chain = VectorDBQAChain.fromLLM(model, vectorStore, {
	k: 3,
	returnSourceDocuments: true,
	});
	let question = "What did the president say about Justice Breyer?";

	/* Search the vector DB independently with meta filters */
	const results = await vectorStore.similaritySearch(question, 3);
	console.log("Search.."+JSON.stringify(results, null, 2));

	const response = await chain.call({ query: question });
	console.log("OpenAI.."+JSON.stringify(response, null, 2));
	//await vectorStore.delete({ ids });
	}

	run();
No results found