Skip to content

Commit b018504

Browse files
feat(Qdrant): use_sparse_embeddings true by default + bugfix
1 parent fd064da commit b018504

File tree

2 files changed

+131
-8
lines changed

2 files changed

+131
-8
lines changed

integrations/qdrant/tests/test_dict_converters.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def test_to_dict():
2525
"content_field": "content",
2626
"name_field": "name",
2727
"embedding_field": "embedding",
28-
"use_sparse_embeddings": False,
28+
"use_sparse_embeddings": True,
2929
"sparse_embedding_field": "sparse_embedding",
3030
"similarity": "cosine",
3131
"return_embedding": False,
@@ -65,7 +65,7 @@ def test_from_dict():
6565
"content_field": "content",
6666
"name_field": "name",
6767
"embedding_field": "embedding",
68-
"use_sparse_embeddings": False,
68+
"use_sparse_embeddings": True,
6969
"sparse_embedding_field": "sparse_embedding",
7070
"similarity": "cosine",
7171
"return_embedding": False,
@@ -90,7 +90,7 @@ def test_from_dict():
9090
document_store.content_field == "content",
9191
document_store.name_field == "name",
9292
document_store.embedding_field == "embedding",
93-
document_store.use_sparse_embeddings is False,
93+
document_store.use_sparse_embeddings is True,
9494
document_store.sparse_embedding_field == "sparse_embedding",
9595
document_store.on_disk is False,
9696
document_store.similarity == "cosine",

integrations/qdrant/tests/test_retriever.py

+128-5
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,121 @@
1515

1616
class TestQdrantRetriever(FilterableDocsFixtureMixin):
1717
def test_init_default(self):
18-
document_store = QdrantDocumentStore(location=":memory:", index="test")
18+
document_store = QdrantDocumentStore(location=":memory:", index="test", use_sparse_embeddings=False)
1919
retriever = QdrantEmbeddingRetriever(document_store=document_store)
2020
assert retriever._document_store == document_store
2121
assert retriever._filters is None
2222
assert retriever._top_k == 10
2323
assert retriever._return_embedding is False
2424

2525
def test_to_dict(self):
26-
document_store = QdrantDocumentStore(location=":memory:", index="test")
26+
document_store = QdrantDocumentStore(location=":memory:", index="test", use_sparse_embeddings=False)
27+
retriever = QdrantEmbeddingRetriever(document_store=document_store)
28+
res = retriever.to_dict()
29+
assert res == {
30+
"type": "haystack_integrations.components.retrievers.qdrant.retriever.QdrantEmbeddingRetriever",
31+
"init_parameters": {
32+
"document_store": {
33+
"type": "haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore",
34+
"init_parameters": {
35+
"location": ":memory:",
36+
"url": None,
37+
"port": 6333,
38+
"grpc_port": 6334,
39+
"prefer_grpc": False,
40+
"https": None,
41+
"api_key": None,
42+
"prefix": None,
43+
"timeout": None,
44+
"host": None,
45+
"path": None,
46+
"index": "test",
47+
"embedding_dim": 768,
48+
"on_disk": False,
49+
"content_field": "content",
50+
"name_field": "name",
51+
"embedding_field": "embedding",
52+
"use_sparse_embeddings": True,
53+
"sparse_embedding_field": "sparse_embedding",
54+
"similarity": "cosine",
55+
"return_embedding": False,
56+
"progress_bar": True,
57+
"duplicate_documents": "overwrite",
58+
"recreate_index": False,
59+
"shard_number": None,
60+
"replication_factor": None,
61+
"write_consistency_factor": None,
62+
"on_disk_payload": None,
63+
"hnsw_config": None,
64+
"optimizers_config": None,
65+
"wal_config": None,
66+
"quantization_config": None,
67+
"init_from": None,
68+
"wait_result_from_api": True,
69+
"metadata": {},
70+
"write_batch_size": 100,
71+
"scroll_size": 10000,
72+
"payload_fields_to_index": None,
73+
},
74+
},
75+
"filters": None,
76+
"top_k": 10,
77+
"scale_score": True,
78+
"return_embedding": False,
79+
},
80+
}
81+
82+
def test_from_dict(self):
83+
data = {
84+
"type": "haystack_integrations.components.retrievers.qdrant.retriever.QdrantEmbeddingRetriever",
85+
"init_parameters": {
86+
"document_store": {
87+
"init_parameters": {"location": ":memory:", "index": "test"},
88+
"type": "haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore",
89+
},
90+
"filters": None,
91+
"top_k": 5,
92+
"scale_score": False,
93+
"return_embedding": True,
94+
},
95+
}
96+
retriever = QdrantEmbeddingRetriever.from_dict(data)
97+
assert isinstance(retriever._document_store, QdrantDocumentStore)
98+
assert retriever._document_store.index == "test"
99+
assert retriever._filters is None
100+
assert retriever._top_k == 5
101+
assert retriever._scale_score is False
102+
assert retriever._return_embedding is True
103+
104+
def test_run(self, filterable_docs: List[Document]):
105+
document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=False)
106+
107+
document_store.write_documents(filterable_docs)
108+
109+
retriever = QdrantEmbeddingRetriever(document_store=document_store)
110+
111+
results: List[Document] = retriever.run(query_embedding=_random_embeddings(768))
112+
113+
assert len(results["documents"]) == 10 # type: ignore
114+
115+
results = retriever.run(query_embedding=_random_embeddings(768), top_k=5, return_embedding=False)
116+
117+
assert len(results["documents"]) == 5 # type: ignore
118+
119+
for document in results["documents"]: # type: ignore
120+
assert document.embedding is None
121+
122+
class TestQdrantRetriever(FilterableDocsFixtureMixin):
123+
def test_init_default(self):
124+
document_store = QdrantDocumentStore(location=":memory:", index="test", use_sparse_embeddings=False)
125+
retriever = QdrantEmbeddingRetriever(document_store=document_store)
126+
assert retriever._document_store == document_store
127+
assert retriever._filters is None
128+
assert retriever._top_k == 10
129+
assert retriever._return_embedding is False
130+
131+
def test_to_dict(self):
132+
document_store = QdrantDocumentStore(location=":memory:", index="test", use_sparse_embeddings=False)
27133
retriever = QdrantEmbeddingRetriever(document_store=document_store)
28134
res = retriever.to_dict()
29135
assert res == {
@@ -102,7 +208,7 @@ def test_from_dict(self):
102208
assert retriever._return_embedding is True
103209

104210
def test_run(self, filterable_docs: List[Document]):
105-
document_store = QdrantDocumentStore(location=":memory:", index="Boi")
211+
document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=False)
106212

107213
document_store.write_documents(filterable_docs)
108214

@@ -119,18 +225,35 @@ def test_run(self, filterable_docs: List[Document]):
119225
for document in results["documents"]: # type: ignore
120226
assert document.embedding is None
121227

228+
def test_run_with_sparse_activated(self, filterable_docs: List[Document]):
229+
document_store = QdrantDocumentStore(location=":memory:", index="Boi", use_sparse_embeddings=True)
230+
231+
document_store.write_documents(filterable_docs)
232+
233+
retriever = QdrantEmbeddingRetriever(document_store=document_store)
234+
235+
results: List[Document] = retriever.run(query_embedding=_random_embeddings(768))
236+
237+
assert len(results["documents"]) == 10 # type: ignore
238+
239+
results = retriever.run(query_embedding=_random_embeddings(768), top_k=5, return_embedding=False)
240+
241+
assert len(results["documents"]) == 5 # type: ignore
242+
243+
for document in results["documents"]: # type: ignore
244+
assert document.embedding is None
122245

123246
class TestQdrantSparseRetriever(FilterableDocsFixtureMixin):
124247
def test_init_default(self):
125-
document_store = QdrantDocumentStore(location=":memory:", index="test", use_sparse_embeddings=True)
248+
document_store = QdrantDocumentStore(location=":memory:", index="test")
126249
retriever = QdrantSparseRetriever(document_store=document_store)
127250
assert retriever._document_store == document_store
128251
assert retriever._filters is None
129252
assert retriever._top_k == 10
130253
assert retriever._return_embedding is False
131254

132255
def test_to_dict(self):
133-
document_store = QdrantDocumentStore(location=":memory:", index="test", use_sparse_embeddings=True)
256+
document_store = QdrantDocumentStore(location=":memory:", index="test")
134257
retriever = QdrantSparseRetriever(document_store=document_store)
135258
res = retriever.to_dict()
136259
assert res == {

0 commit comments

Comments
 (0)