@@ -216,8 +216,8 @@ def semantic_search_usearch(
216
216
`corpus_embeddings` or `corpus_index` should be used, not
217
217
both.
218
218
corpus_precision: Precision of the corpus embeddings. The
219
- options are "float32", "int8", or "binary". Default is
220
- "float32".
219
+ options are "float32", "int8", "ubinary" or "binary". Default
220
+ is "float32".
221
221
top_k: Number of top results to retrieve. Default is 10.
222
222
ranges: Ranges for quantization of embeddings. This is only used
223
223
for int8 quantization, where the ranges refers to the
@@ -263,8 +263,8 @@ def semantic_search_usearch(
263
263
raise ValueError ("Only corpus_embeddings or corpus_index should be used, not both." )
264
264
if corpus_embeddings is None and corpus_index is None :
265
265
raise ValueError ("Either corpus_embeddings or corpus_index should be used." )
266
- if corpus_precision not in ["float32" , "int8" , "binary" ]:
267
- raise ValueError ('corpus_precision must be "float32", "int8", or "binary" for usearch' )
266
+ if corpus_precision not in ["float32" , "int8" , "ubinary" , " binary" ]:
267
+ raise ValueError ('corpus_precision must be "float32", "int8", "ubinary", "binary" for usearch' )
268
268
269
269
# If corpus_index is not provided, create a new index
270
270
if corpus_index is None :
@@ -284,6 +284,12 @@ def semantic_search_usearch(
284
284
corpus_index = Index (
285
285
ndim = corpus_embeddings .shape [1 ],
286
286
metric = "hamming" ,
287
+ dtype = "i8" ,
288
+ )
289
+ elif corpus_precision == "ubinary" :
290
+ corpus_index = Index (
291
+ ndim = corpus_embeddings .shape [1 ] * 8 ,
292
+ metric = "hamming" ,
287
293
dtype = "b1" ,
288
294
)
289
295
corpus_index .add (np .arange (len (corpus_embeddings )), corpus_embeddings )
@@ -331,7 +337,7 @@ def semantic_search_usearch(
331
337
if rescore_embeddings is not None :
332
338
top_k_embeddings = np .array ([corpus_index .get (query_indices ) for query_indices in indices ])
333
339
# If the corpus precision is binary, we need to unpack the bits
334
- if corpus_precision == " binary" :
340
+ if corpus_precision in ( "ubinary" , " binary") :
335
341
top_k_embeddings = np .unpackbits (top_k_embeddings .astype (np .uint8 ), axis = - 1 )
336
342
top_k_embeddings = top_k_embeddings .astype (int )
337
343
0 commit comments