Skip to content

Commit 232eb3d

Browse files
vblagojetstadel
andauthored
feat: Add Secret handling in OpenSearchDocumentStore (#1288)
* Add Secret handling in OpenSearchDocumentStore * only serialize auth secrets when values are resolvable * Update integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py Co-authored-by: tstadel <[email protected]> * Fixes * Revert accidental commit * Special list of Secrets handling only, keep everything else as it was before * Small improvement * More simplifications --------- Co-authored-by: tstadel <[email protected]>
1 parent 7ceaeaf commit 232eb3d

File tree

2 files changed

+96
-9
lines changed

2 files changed

+96
-9
lines changed

integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

+36-9
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from haystack.dataclasses import Document
1010
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
1111
from haystack.document_stores.types import DuplicatePolicy
12+
from haystack.utils.auth import Secret
1213
from opensearchpy import OpenSearch
1314
from opensearchpy.helpers import bulk
1415

@@ -45,7 +46,10 @@ def __init__(
4546
mappings: Optional[Dict[str, Any]] = None,
4647
settings: Optional[Dict[str, Any]] = DEFAULT_SETTINGS,
4748
create_index: bool = True,
48-
http_auth: Any = None,
49+
http_auth: Any = (
50+
Secret.from_env_var("OPENSEARCH_USERNAME", strict=False), # noqa: B008
51+
Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False), # noqa: B008
52+
),
4953
use_ssl: Optional[bool] = None,
5054
verify_certs: Optional[bool] = None,
5155
timeout: Optional[int] = None,
@@ -79,6 +83,7 @@ def __init__(
7983
- a tuple of (username, password)
8084
- a list of [username, password]
8185
- a string of "username:password"
86+
If not provided, will read values from OPENSEARCH_USERNAME and OPENSEARCH_PASSWORD environment variables.
8287
For AWS authentication with `Urllib3HttpConnection` pass an instance of `AWSAuth`.
8388
Defaults to None
8489
:param use_ssl: Whether to use SSL. Defaults to None
@@ -97,6 +102,17 @@ def __init__(
97102
self._mappings = mappings or self._get_default_mappings()
98103
self._settings = settings
99104
self._create_index = create_index
105+
self._http_auth_are_secrets = False
106+
107+
# Handle authentication
108+
if isinstance(http_auth, (tuple, list)) and len(http_auth) == 2: # noqa: PLR2004
109+
username, password = http_auth
110+
if isinstance(username, Secret) and isinstance(password, Secret):
111+
self._http_auth_are_secrets = True
112+
username_val = username.resolve_value()
113+
password_val = password.resolve_value()
114+
http_auth = [username_val, password_val] if username_val and password_val else None
115+
100116
self._http_auth = http_auth
101117
self._use_ssl = use_ssl
102118
self._verify_certs = verify_certs
@@ -174,15 +190,24 @@ def create_index(
174190
self.client.indices.create(index=index, body={"mappings": mappings, "settings": settings})
175191

176192
def to_dict(self) -> Dict[str, Any]:
177-
# This is not the best solution to serialise this class but is the fastest to implement.
178-
# Not all kwargs types can be serialised to text so this can fail. We must serialise each
179-
# type explicitly to handle this properly.
180193
"""
181194
Serializes the component to a dictionary.
182195
183196
:returns:
184197
Dictionary with serialized data.
185198
"""
199+
# Handle http_auth serialization
200+
if isinstance(self._http_auth, list) and self._http_auth_are_secrets:
201+
# Recreate the Secret objects for serialization
202+
http_auth = [
203+
Secret.from_env_var("OPENSEARCH_USERNAME", strict=False).to_dict(),
204+
Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False).to_dict(),
205+
]
206+
elif isinstance(self._http_auth, AWSAuth):
207+
http_auth = self._http_auth.to_dict()
208+
else:
209+
http_auth = self._http_auth
210+
186211
return default_to_dict(
187212
self,
188213
hosts=self._hosts,
@@ -194,7 +219,7 @@ def to_dict(self) -> Dict[str, Any]:
194219
settings=self._settings,
195220
create_index=self._create_index,
196221
return_embedding=self._return_embedding,
197-
http_auth=self._http_auth.to_dict() if isinstance(self._http_auth, AWSAuth) else self._http_auth,
222+
http_auth=http_auth,
198223
use_ssl=self._use_ssl,
199224
verify_certs=self._verify_certs,
200225
timeout=self._timeout,
@@ -208,14 +233,16 @@ def from_dict(cls, data: Dict[str, Any]) -> "OpenSearchDocumentStore":
208233
209234
:param data:
210235
Dictionary to deserialize from.
211-
212236
:returns:
213237
Deserialized component.
214238
"""
215-
if http_auth := data.get("init_parameters", {}).get("http_auth"):
239+
init_params = data.get("init_parameters", {})
240+
if http_auth := init_params.get("http_auth"):
216241
if isinstance(http_auth, dict):
217-
data["init_parameters"]["http_auth"] = AWSAuth.from_dict(http_auth)
218-
242+
init_params["http_auth"] = AWSAuth.from_dict(http_auth)
243+
elif isinstance(http_auth, (tuple, list)):
244+
are_secrets = all(isinstance(item, dict) and "type" in item for item in http_auth)
245+
init_params["http_auth"] = [Secret.from_dict(item) for item in http_auth] if are_secrets else http_auth
219246
return default_from_dict(cls, data)
220247

221248
def count_documents(self) -> int:

integrations/opensearch/tests/test_document_store.py

+60
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,66 @@ def test_to_dict_aws_auth(self, _mock_opensearch_client, monkeypatch: pytest.Mon
263263
},
264264
}
265265

266+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
267+
def test_init_with_env_var_secrets(self, _mock_opensearch_client, monkeypatch):
268+
"""Test the default initialization using environment variables"""
269+
monkeypatch.setenv("OPENSEARCH_USERNAME", "user")
270+
monkeypatch.setenv("OPENSEARCH_PASSWORD", "pass")
271+
272+
document_store = OpenSearchDocumentStore(hosts="testhost")
273+
assert document_store.client
274+
_mock_opensearch_client.assert_called_once()
275+
assert _mock_opensearch_client.call_args[1]["http_auth"] == ["user", "pass"]
276+
277+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
278+
def test_init_with_missing_env_vars(self, _mock_opensearch_client):
279+
"""Test that auth is None when environment variables are missing"""
280+
document_store = OpenSearchDocumentStore(hosts="testhost")
281+
assert document_store.client
282+
_mock_opensearch_client.assert_called_once()
283+
assert _mock_opensearch_client.call_args[1]["http_auth"] is None
284+
285+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
286+
def test_to_dict_with_env_var_secrets(self, _mock_opensearch_client, monkeypatch):
287+
"""Test serialization with environment variables"""
288+
monkeypatch.setenv("OPENSEARCH_USERNAME", "user")
289+
monkeypatch.setenv("OPENSEARCH_PASSWORD", "pass")
290+
291+
document_store = OpenSearchDocumentStore(hosts="testhost")
292+
serialized = document_store.to_dict()
293+
294+
assert "http_auth" in serialized["init_parameters"]
295+
auth = serialized["init_parameters"]["http_auth"]
296+
assert isinstance(auth, list)
297+
assert len(auth) == 2
298+
# Check that we have two Secret dictionaries with correct env vars
299+
assert auth[0]["type"] == "env_var"
300+
assert auth[0]["env_vars"] == ["OPENSEARCH_USERNAME"]
301+
assert auth[1]["type"] == "env_var"
302+
assert auth[1]["env_vars"] == ["OPENSEARCH_PASSWORD"]
303+
304+
@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
305+
def test_from_dict_with_env_var_secrets(self, _mock_opensearch_client, monkeypatch):
306+
"""Test deserialization with environment variables"""
307+
# Set environment variables so the secrets resolve properly
308+
monkeypatch.setenv("OPENSEARCH_USERNAME", "user")
309+
monkeypatch.setenv("OPENSEARCH_PASSWORD", "pass")
310+
311+
data = {
312+
"type": "haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore",
313+
"init_parameters": {
314+
"hosts": "testhost",
315+
"http_auth": [
316+
{"type": "env_var", "env_vars": ["OPENSEARCH_USERNAME"], "strict": False},
317+
{"type": "env_var", "env_vars": ["OPENSEARCH_PASSWORD"], "strict": False},
318+
],
319+
},
320+
}
321+
document_store = OpenSearchDocumentStore.from_dict(data)
322+
assert document_store.client
323+
_mock_opensearch_client.assert_called_once()
324+
assert _mock_opensearch_client.call_args[1]["http_auth"] == ["user", "pass"]
325+
266326

267327
@pytest.mark.integration
268328
class TestDocumentStore(DocumentStoreBaseTests):

0 commit comments

Comments
 (0)