31
31
from haystack .utils import Secret , deserialize_secrets_inplace
32
32
33
33
from .errors import AzureAISearchDocumentStoreConfigError
34
- from .filters import normalize_filters
34
+ from .filters import _normalize_filters
35
35
36
36
type_mapping = {
37
37
str : "Edm.String" ,
@@ -70,7 +70,7 @@ def __init__(
70
70
embedding_dimension : int = 768 ,
71
71
metadata_fields : Optional [Dict [str , type ]] = None ,
72
72
vector_search_configuration : VectorSearch = None ,
73
- ** kwargs ,
73
+ ** index_creation_kwargs ,
74
74
):
75
75
"""
76
76
A document store using [Azure AI Search](https://azure.microsoft.com/products/ai-services/ai-search/)
@@ -87,19 +87,20 @@ def __init__(
87
87
:param vector_search_configuration: Configuration option related to vector search.
88
88
Default configuration uses the HNSW algorithm with cosine similarity to handle vector searches.
89
89
90
- :param kwargs: Optional keyword parameters for Azure AI Search.
91
- Some of the supported parameters:
92
- - `api_version`: The Search API version to use for requests.
93
- - `audience`: sets the Audience to use for authentication with Azure Active Directory (AAD).
94
- The audience is not considered when using a shared key. If audience is not provided,
95
- the public cloud audience will be assumed.
90
+ :param index_creation_kwargs: Optional keyword parameters to be passed to `SearchIndex` class
91
+ during index creation. Some of the supported parameters:
92
+ - `semantic_search`: Defines semantic configuration of the search index. This parameter is needed
93
+ to enable semantic search capabilities in index.
94
+ - `similarity`: The type of similarity algorithm to be used when scoring and ranking the documents
95
+ matching a search query. The similarity algorithm can only be defined at index creation time and
96
+ cannot be modified on existing indexes.
96
97
97
- For more information on parameters, see the [official Azure AI Search documentation](https://learn.microsoft.com/en-us/azure/search/)
98
+ For more information on parameters, see the [official Azure AI Search documentation](https://learn.microsoft.com/en-us/azure/search/).
98
99
"""
99
100
100
101
azure_endpoint = azure_endpoint or os .environ .get ("AZURE_SEARCH_SERVICE_ENDPOINT" ) or None
101
102
if not azure_endpoint :
102
- msg = "Please provide an Azure endpoint or set the environment variable AZURE_OPENAI_ENDPOINT ."
103
+ msg = "Please provide an Azure endpoint or set the environment variable AZURE_SEARCH_SERVICE_ENDPOINT ."
103
104
raise ValueError (msg )
104
105
105
106
api_key = api_key or os .environ .get ("AZURE_SEARCH_API_KEY" ) or None
@@ -114,7 +115,7 @@ def __init__(
114
115
self ._dummy_vector = [- 10.0 ] * self ._embedding_dimension
115
116
self ._metadata_fields = metadata_fields
116
117
self ._vector_search_configuration = vector_search_configuration or DEFAULT_VECTOR_SEARCH
117
- self ._kwargs = kwargs
118
+ self ._index_creation_kwargs = index_creation_kwargs
118
119
119
120
@property
120
121
def client (self ) -> SearchClient :
@@ -128,7 +129,10 @@ def client(self) -> SearchClient:
128
129
credential = AzureKeyCredential (resolved_key ) if resolved_key else DefaultAzureCredential ()
129
130
try :
130
131
if not self ._index_client :
131
- self ._index_client = SearchIndexClient (resolved_endpoint , credential , ** self ._kwargs )
132
+ self ._index_client = SearchIndexClient (
133
+ resolved_endpoint ,
134
+ credential ,
135
+ )
132
136
if not self ._index_exists (self ._index_name ):
133
137
# Create a new index if it does not exist
134
138
logger .debug (
@@ -151,7 +155,7 @@ def client(self) -> SearchClient:
151
155
152
156
return self ._client
153
157
154
- def _create_index (self , index_name : str , ** kwargs ) -> None :
158
+ def _create_index (self , index_name : str ) -> None :
155
159
"""
156
160
Creates a new search index.
157
161
:param index_name: Name of the index to create. If None, the index name from the constructor is used.
@@ -177,7 +181,10 @@ def _create_index(self, index_name: str, **kwargs) -> None:
177
181
if self ._metadata_fields :
178
182
default_fields .extend (self ._create_metadata_index_fields (self ._metadata_fields ))
179
183
index = SearchIndex (
180
- name = index_name , fields = default_fields , vector_search = self ._vector_search_configuration , ** kwargs
184
+ name = index_name ,
185
+ fields = default_fields ,
186
+ vector_search = self ._vector_search_configuration ,
187
+ ** self ._index_creation_kwargs ,
181
188
)
182
189
if self ._index_client :
183
190
self ._index_client .create_index (index )
@@ -194,13 +201,13 @@ def to_dict(self) -> Dict[str, Any]:
194
201
"""
195
202
return default_to_dict (
196
203
self ,
197
- azure_endpoint = self ._azure_endpoint .to_dict () if self ._azure_endpoint is not None else None ,
198
- api_key = self ._api_key .to_dict () if self ._api_key is not None else None ,
204
+ azure_endpoint = self ._azure_endpoint .to_dict () if self ._azure_endpoint else None ,
205
+ api_key = self ._api_key .to_dict () if self ._api_key else None ,
199
206
index_name = self ._index_name ,
200
207
embedding_dimension = self ._embedding_dimension ,
201
208
metadata_fields = self ._metadata_fields ,
202
209
vector_search_configuration = self ._vector_search_configuration .as_dict (),
203
- ** self ._kwargs ,
210
+ ** self ._index_creation_kwargs ,
204
211
)
205
212
206
213
@classmethod
@@ -298,7 +305,7 @@ def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Doc
298
305
:returns: A list of Documents that match the given filters.
299
306
"""
300
307
if filters :
301
- normalized_filters = normalize_filters (filters )
308
+ normalized_filters = _normalize_filters (filters )
302
309
result = self .client .search (filter = normalized_filters )
303
310
return self ._convert_search_result_to_documents (result )
304
311
else :
@@ -409,8 +416,8 @@ def _embedding_retrieval(
409
416
query_embedding : List [float ],
410
417
* ,
411
418
top_k : int = 10 ,
412
- fields : Optional [List [str ]] = None ,
413
419
filters : Optional [Dict [str , Any ]] = None ,
420
+ ** kwargs ,
414
421
) -> List [Document ]:
415
422
"""
416
423
Retrieves documents that are most similar to the query embedding using a vector similarity metric.
@@ -422,9 +429,10 @@ def _embedding_retrieval(
422
429
`AzureAISearchEmbeddingRetriever` uses this method directly and is the public interface for it.
423
430
424
431
:param query_embedding: Embedding of the query.
432
+ :param top_k: Maximum number of Documents to return, defaults to 10.
425
433
:param filters: Filters applied to the retrieved Documents. Defaults to None.
426
434
Filters are applied during the approximate kNN search to ensure that top_k matching documents are returned.
427
- :param top_k: Maximum number of Documents to return, defaults to 10
435
+ :param kwargs: Optional keyword arguments to pass to the Azure AI's search endpoint.
428
436
429
437
:raises ValueError: If `query_embedding` is an empty list
430
438
:returns: List of Document that are most similar to `query_embedding`
@@ -435,6 +443,6 @@ def _embedding_retrieval(
435
443
raise ValueError (msg )
436
444
437
445
vector_query = VectorizedQuery (vector = query_embedding , k_nearest_neighbors = top_k , fields = "embedding" )
438
- result = self .client .search (search_text = None , vector_queries = [vector_query ], select = fields , filter = filters )
446
+ result = self .client .search (vector_queries = [vector_query ], filter = filters , ** kwargs )
439
447
azure_docs = list (result )
440
448
return self ._convert_search_result_to_documents (azure_docs )
0 commit comments