Skip to content

Instantly share code, notes, and snippets.

@vishal-kvn
Created May 11, 2020 11:15
Show Gist options
  • Select an option

  • Save vishal-kvn/47c1c9115248c782916bdcce028db3fd to your computer and use it in GitHub Desktop.

Select an option

Save vishal-kvn/47c1c9115248c782916bdcce028db3fd to your computer and use it in GitHub Desktop.
Similarity search with IVFPQ index using vector search and id search with reconstruction
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
d = 64 # dimension
nb = 100000 # database size
nq = 10000 # nb of queries
np.random.seed(1234) # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
# idxs = np.arange(0, nb)
rs = np.random.RandomState(123)
idxs = rs.choice(nb, size=nb, replace=False)
xq[:, 0] += np.arange(nq) / 1000.
import faiss
# import faiss_cpu as faiss
nlist = 100
m = 8
# k = 4
k = 10
quantizer = faiss.IndexFlatL2(d) # this remains the same
index = faiss.IndexIVFPQ(quantizer, d, nlist, m, 8) # 8 specifies that each sub-vector is encoded as 8 bits
index.train(xb)
index.set_direct_map_type(faiss.DirectMap.Hashtable)
index.add_with_ids(
xb,
idxs
)
print("#####################")
print("ntotal: {}",format(index.ntotal))
print("is_trained: {}",format(index.is_trained))
print("metric_type: {}",format(index.metric_type))
print("nprobe: {}",format(index.nprobe))
print("nlist: {}",format(index.nlist))
print("code_size: {}",format(index.code_size))
# print("maintain_direct_map: {}",format(index.maintain_direct_map))
print("#####################")
D, I = index.search(xb[:5], k) # sanity check
print(I)
print(D)
# Similarity search based on ids
search_id = idxs[0]
recons_xb0 = index.reconstruct(int(search_id))
D0, I0 = index.search(np.array([recons_xb0]), k)
import pdb
pdb.set_trace()
# Reverse image lookup based on image vector
index.nprobe = 10 # make comparable with experiment above
D, I = index.search(xq, k) # search
print(I[-5:])
import pdb
pdb.set_trace()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment