1
1
from typing import Any , Dict , List , Optional
2
2
3
3
from haystack import Document , component , default_from_dict , default_to_dict
4
+ from haystack .dataclasses .sparse_embedding import SparseEmbedding
4
5
from haystack_integrations .document_stores .qdrant import QdrantDocumentStore
5
6
6
7
7
8
@component
8
9
class QdrantEmbeddingRetriever :
9
10
"""
10
- A component for retrieving documents from an QdrantDocumentStore.
11
+ A component for retrieving documents from an QdrantDocumentStore using dense vectors .
11
12
12
13
Usage example:
13
14
```python
@@ -32,8 +33,8 @@ def __init__(
32
33
document_store : QdrantDocumentStore ,
33
34
filters : Optional [Dict [str , Any ]] = None ,
34
35
top_k : int = 10 ,
35
- scale_score : bool = True , # noqa: FBT001, FBT002
36
- return_embedding : bool = False , # noqa: FBT001, FBT002
36
+ scale_score : bool = True ,
37
+ return_embedding : bool = False ,
37
38
):
38
39
"""
39
40
Create a QdrantEmbeddingRetriever component.
@@ -120,3 +121,121 @@ def run(
120
121
)
121
122
122
123
return {"documents" : docs }
124
+
125
+
126
+ @component
127
+ class QdrantSparseRetriever :
128
+ """
129
+ A component for retrieving documents from an QdrantDocumentStore using sparse vectors.
130
+
131
+ Usage example:
132
+ ```python
133
+ from haystack_integrations.components.retrievers.qdrant import QdrantSparseRetriever
134
+ from haystack_integrations.document_stores.qdrant import QdrantDocumentStore
135
+ from haystack.dataclasses.sparse_embedding import SparseEmbedding
136
+
137
+ document_store = QdrantDocumentStore(
138
+ ":memory:",
139
+ recreate_index=True,
140
+ return_embedding=True,
141
+ wait_result_from_api=True,
142
+ )
143
+ retriever = QdrantSparseRetriever(document_store=document_store)
144
+ sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33])
145
+ retriever.run(query_sparse_embedding=sparse_embedding)
146
+ ```
147
+ """
148
+
149
+ def __init__ (
150
+ self ,
151
+ document_store : QdrantDocumentStore ,
152
+ filters : Optional [Dict [str , Any ]] = None ,
153
+ top_k : int = 10 ,
154
+ scale_score : bool = True ,
155
+ return_embedding : bool = False ,
156
+ ):
157
+ """
158
+ Create a QdrantSparseRetriever component.
159
+
160
+ :param document_store: An instance of QdrantDocumentStore.
161
+ :param filters: A dictionary with filters to narrow down the search space. Default is None.
162
+ :param top_k: The maximum number of documents to retrieve. Default is 10.
163
+ :param scale_score: Whether to scale the scores of the retrieved documents or not. Default is True.
164
+ :param return_embedding: Whether to return the sparse embedding of the retrieved Documents. Default is False.
165
+
166
+ :raises ValueError: If 'document_store' is not an instance of QdrantDocumentStore.
167
+ """
168
+
169
+ if not isinstance (document_store , QdrantDocumentStore ):
170
+ msg = "document_store must be an instance of QdrantDocumentStore"
171
+ raise ValueError (msg )
172
+
173
+ self ._document_store = document_store
174
+ self ._filters = filters
175
+ self ._top_k = top_k
176
+ self ._scale_score = scale_score
177
+ self ._return_embedding = return_embedding
178
+
179
+ def to_dict (self ) -> Dict [str , Any ]:
180
+ """
181
+ Serializes the component to a dictionary.
182
+
183
+ :returns:
184
+ Dictionary with serialized data.
185
+ """
186
+ d = default_to_dict (
187
+ self ,
188
+ document_store = self ._document_store ,
189
+ filters = self ._filters ,
190
+ top_k = self ._top_k ,
191
+ scale_score = self ._scale_score ,
192
+ return_embedding = self ._return_embedding ,
193
+ )
194
+ d ["init_parameters" ]["document_store" ] = self ._document_store .to_dict ()
195
+
196
+ return d
197
+
198
+ @classmethod
199
+ def from_dict (cls , data : Dict [str , Any ]) -> "QdrantEmbeddingRetriever" :
200
+ """
201
+ Deserializes the component from a dictionary.
202
+
203
+ :param data:
204
+ Dictionary to deserialize from.
205
+ :returns:
206
+ Deserialized component.
207
+ """
208
+ document_store = QdrantDocumentStore .from_dict (data ["init_parameters" ]["document_store" ])
209
+ data ["init_parameters" ]["document_store" ] = document_store
210
+ return default_from_dict (cls , data )
211
+
212
+ @component .output_types (documents = List [Document ])
213
+ def run (
214
+ self ,
215
+ query_sparse_embedding : SparseEmbedding ,
216
+ filters : Optional [Dict [str , Any ]] = None ,
217
+ top_k : Optional [int ] = None ,
218
+ scale_score : Optional [bool ] = None ,
219
+ return_embedding : Optional [bool ] = None ,
220
+ ):
221
+ """
222
+ Run the Sparse Embedding Retriever on the given input data.
223
+
224
+ :param query_sparse_embedding: Sparse Embedding of the query.
225
+ :param filters: A dictionary with filters to narrow down the search space.
226
+ :param top_k: The maximum number of documents to return.
227
+ :param scale_score: Whether to scale the scores of the retrieved documents or not.
228
+ :param return_embedding: Whether to return the embedding of the retrieved Documents.
229
+ :returns:
230
+ The retrieved documents.
231
+
232
+ """
233
+ docs = self ._document_store .query_by_sparse (
234
+ query_sparse_embedding = query_sparse_embedding ,
235
+ filters = filters or self ._filters ,
236
+ top_k = top_k or self ._top_k ,
237
+ scale_score = scale_score or self ._scale_score ,
238
+ return_embedding = return_embedding or self ._return_embedding ,
239
+ )
240
+
241
+ return {"documents" : docs }
0 commit comments