Created
May 11, 2020 11:15
-
-
Save vishal-kvn/47c1c9115248c782916bdcce028db3fd to your computer and use it in GitHub Desktop.
Similarity search with IVFPQ index using vector search and id search with reconstruction
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import numpy as np | |
| d = 64 # dimension | |
| nb = 100000 # database size | |
| nq = 10000 # nb of queries | |
| np.random.seed(1234) # make reproducible | |
| xb = np.random.random((nb, d)).astype('float32') | |
| xb[:, 0] += np.arange(nb) / 1000. | |
| xq = np.random.random((nq, d)).astype('float32') | |
| # idxs = np.arange(0, nb) | |
| rs = np.random.RandomState(123) | |
| idxs = rs.choice(nb, size=nb, replace=False) | |
| xq[:, 0] += np.arange(nq) / 1000. | |
| import faiss | |
| # import faiss_cpu as faiss | |
| nlist = 100 | |
| m = 8 | |
| # k = 4 | |
| k = 10 | |
| quantizer = faiss.IndexFlatL2(d) # this remains the same | |
| index = faiss.IndexIVFPQ(quantizer, d, nlist, m, 8) # 8 specifies that each sub-vector is encoded as 8 bits | |
| index.train(xb) | |
| index.set_direct_map_type(faiss.DirectMap.Hashtable) | |
| index.add_with_ids( | |
| xb, | |
| idxs | |
| ) | |
| print("#####################") | |
| print("ntotal: {}",format(index.ntotal)) | |
| print("is_trained: {}",format(index.is_trained)) | |
| print("metric_type: {}",format(index.metric_type)) | |
| print("nprobe: {}",format(index.nprobe)) | |
| print("nlist: {}",format(index.nlist)) | |
| print("code_size: {}",format(index.code_size)) | |
| # print("maintain_direct_map: {}",format(index.maintain_direct_map)) | |
| print("#####################") | |
| D, I = index.search(xb[:5], k) # sanity check | |
| print(I) | |
| print(D) | |
| # Similarity search based on ids | |
| search_id = idxs[0] | |
| recons_xb0 = index.reconstruct(int(search_id)) | |
| D0, I0 = index.search(np.array([recons_xb0]), k) | |
| import pdb | |
| pdb.set_trace() | |
| # Reverse image lookup based on image vector | |
| index.nprobe = 10 # make comparable with experiment above | |
| D, I = index.search(xq, k) # search | |
| print(I[-5:]) | |
| import pdb | |
| pdb.set_trace() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment