Skip to content

Commit 8fdf89c

Browse files
davydov-dakashkulk
authored andcommitted
#2576 oncall. SAT: test spec against exposed secrets (#19124)
* #2576 oncall. SAT: test spec against exposed secrets * #2576 upd changelog * #2576 review fixes * #2576 more test fixes
1 parent dc17d75 commit 8fdf89c

File tree

5 files changed

+239
-2
lines changed

5 files changed

+239
-2
lines changed

airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
4+
## 0.2.18
5+
Test connector specification against exposed secret fields. [#19124](https://github.com/airbytehq/airbyte/pull/19124).
6+
37
## 0.2.17
48
Make `incremental.future_state` mandatory in `high` `test_strictness_level`. [#19085](https://github.com/airbytehq/airbyte/pull/19085/).
59

airbyte-integrations/bases/source-acceptance-test/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ COPY pytest.ini setup.py ./
3333
COPY source_acceptance_test ./source_acceptance_test
3434
RUN pip install .
3535

36-
LABEL io.airbyte.version=0.2.17
36+
LABEL io.airbyte.version=0.2.18
3737
LABEL io.airbyte.name=airbyte/source-acceptance-test
3838

3939
ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin", "-r", "fEsx"]

airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from collections import Counter, defaultdict
99
from functools import reduce
1010
from logging import Logger
11-
from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Set
11+
from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Set, Tuple
1212
from xmlrpc.client import Boolean
1313

1414
import dpath.util
@@ -53,6 +53,29 @@ def connector_spec_dict_fixture(actual_connector_spec):
5353
return json.loads(actual_connector_spec.json())
5454

5555

56+
@pytest.fixture(name="secret_property_names")
57+
def secret_property_names_fixture():
58+
return (
59+
"client_token",
60+
"access_token",
61+
"api_token",
62+
"token",
63+
"secret",
64+
"client_secret",
65+
"password",
66+
"key",
67+
"service_account_info",
68+
"service_account",
69+
"tenant_id",
70+
"certificate",
71+
"jwt",
72+
"credentials",
73+
"app_id",
74+
"appid",
75+
"refresh_token",
76+
)
77+
78+
5679
@pytest.mark.default_timeout(10)
5780
class TestSpec(BaseTest):
5881

@@ -164,6 +187,84 @@ def test_has_secret(self):
164187
def test_secret_never_in_the_output(self):
165188
"""This test should be injected into any docker command it needs to know current config and spec"""
166189

190+
@staticmethod
191+
def _is_spec_property_name_secret(path: str, secret_property_names) -> Tuple[Optional[str], bool]:
192+
"""
193+
Given a path to a type field, extract a field name and decide whether it is a name of secret or not
194+
based on a provided list of secret names.
195+
Split the path by `/`, drop the last item and make list reversed.
196+
Then iterate over it and find the first item that's not a reserved keyword or an index.
197+
Example:
198+
properties/credentials/oneOf/1/properties/api_key/type -> [api_key, properties, 1, oneOf, credentials, properties] -> api_key
199+
"""
200+
reserved_keywords = ("anyOf", "oneOf", "allOf", "not", "properties", "items", "type", "prefixItems")
201+
for part in reversed(path.split("/")[:-1]):
202+
if part.isdigit() or part in reserved_keywords:
203+
continue
204+
return part, part.lower() in secret_property_names
205+
return None, False
206+
207+
@staticmethod
208+
def _property_can_store_secret(prop: dict) -> bool:
209+
"""
210+
Some fields can not hold a secret by design, others can.
211+
Null type as well as boolean can not hold a secret value.
212+
A string, a number or an integer type can always store secrets.
213+
Objects and arrays can hold a secret in case they are generic,
214+
meaning their inner structure is not described in details with properties/items.
215+
"""
216+
unsecure_types = {"string", "integer", "number"}
217+
type_ = prop["type"]
218+
is_property_generic_object = type_ == "object" and not any(
219+
[prop.get("properties", {}), prop.get("anyOf", []), prop.get("oneOf", []), prop.get("allOf", [])]
220+
)
221+
is_property_generic_array = type_ == "array" and not any([prop.get("items", []), prop.get("prefixItems", [])])
222+
return any(
223+
[
224+
isinstance(type_, str) and type_ in unsecure_types,
225+
is_property_generic_object,
226+
is_property_generic_array,
227+
isinstance(type_, list) and (set(type_) & unsecure_types),
228+
]
229+
)
230+
231+
def test_secret_is_properly_marked(self, connector_spec_dict: dict, detailed_logger, secret_property_names):
232+
"""
233+
Each field has a type, therefore we can make a flat list of fields from the returned specification.
234+
Iterate over the list, check if a field name is a secret name, can potentially hold a secret value
235+
and make sure it is marked as `airbyte_secret`.
236+
"""
237+
secrets_exposed = []
238+
non_secrets_hidden = []
239+
spec_properties = connector_spec_dict["connectionSpecification"]["properties"]
240+
for type_path, value in dpath.util.search(spec_properties, "**/type", yielded=True):
241+
_, is_property_name_secret = self._is_spec_property_name_secret(type_path, secret_property_names)
242+
if not is_property_name_secret:
243+
continue
244+
absolute_path = f"/{type_path}"
245+
property_path, _ = absolute_path.rsplit(sep="/", maxsplit=1)
246+
property_definition = dpath.util.get(spec_properties, property_path)
247+
marked_as_secret = property_definition.get("airbyte_secret", False)
248+
possibly_a_secret = self._property_can_store_secret(property_definition)
249+
if marked_as_secret and not possibly_a_secret:
250+
non_secrets_hidden.append(property_path)
251+
if not marked_as_secret and possibly_a_secret:
252+
secrets_exposed.append(property_path)
253+
254+
if non_secrets_hidden:
255+
properties = "\n".join(non_secrets_hidden)
256+
detailed_logger.warning(
257+
f"""Some properties are marked with `airbyte_secret` although they probably should not be.
258+
Please double check them. If they're okay, please fix this test.
259+
{properties}"""
260+
)
261+
if secrets_exposed:
262+
properties = "\n".join(secrets_exposed)
263+
pytest.fail(
264+
f"""The following properties should be marked with `airbyte_secret!`
265+
{properties}"""
266+
)
267+
167268
def test_defined_refs_exist_in_json_spec_file(self, connector_spec_dict: dict):
168269
"""Checking for the presence of unresolved `$ref`s values within each json spec file"""
169270
check_result = list(find_all_values_for_key_in_schema(connector_spec_dict, "$ref"))

airbyte-integrations/bases/source-acceptance-test/unit_tests/test_spec.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import pytest
88
from airbyte_cdk.models import ConnectorSpecification
9+
from source_acceptance_test import conftest
910
from source_acceptance_test.tests.test_core import TestSpec as _TestSpec
1011

1112
from .conftest import does_not_raise
@@ -648,3 +649,133 @@ def test_additional_properties_is_true(connector_spec, expectation):
648649
t = _TestSpec()
649650
with expectation:
650651
t.test_additional_properties_is_true(connector_spec)
652+
653+
654+
@pytest.mark.parametrize(
655+
"connector_spec, should_fail, is_warning_logged",
656+
(
657+
(
658+
{
659+
"connectionSpecification": {"type": "object", "properties": {"api_token": {"type": "string", "airbyte_secret": True}}}
660+
},
661+
False,
662+
False
663+
),
664+
(
665+
{
666+
"connectionSpecification": {"type": "object", "properties": {"api_token": {"type": "null"}}}
667+
},
668+
False,
669+
False
670+
),
671+
(
672+
{
673+
"connectionSpecification": {"type": "object", "properties": {"refresh_token": {"type": "boolean", "airbyte_secret": True}}}
674+
},
675+
False,
676+
True
677+
),
678+
(
679+
{
680+
"connectionSpecification": {"type": "object", "properties": {"jwt": {"type": "object"}}}
681+
},
682+
True,
683+
False
684+
),
685+
(
686+
{
687+
"connectionSpecification": {"type": "object", "properties": {"refresh_token": {"type": ["null", "string"]}}}
688+
},
689+
True,
690+
False
691+
),
692+
(
693+
{
694+
"connectionSpecification": {"type": "object", "properties": {"credentials": {"type": "array"}}}
695+
},
696+
True,
697+
False
698+
),
699+
(
700+
{
701+
"connectionSpecification": {"type": "object", "properties": {"credentials": {"type": "array", "items": {"type": "string"}}}}
702+
},
703+
True,
704+
False
705+
),
706+
(
707+
{
708+
"connectionSpecification": {"type": "object", "properties": {"auth": {"oneOf": [{"api_token": {"type": "string"}}]}}}
709+
},
710+
True,
711+
False
712+
),
713+
(
714+
{
715+
"connectionSpecification": {"type": "object", "properties": {"credentials": {"oneOf": [{"type": "object", "properties": {"api_key": {"type": "string"}}}]}}}
716+
},
717+
True,
718+
False
719+
),
720+
(
721+
{
722+
"connectionSpecification": {"type": "object", "properties": {"start_date": {"type": ["null", "string"]}}}
723+
},
724+
False,
725+
False
726+
)
727+
),
728+
)
729+
def test_airbyte_secret(mocker, connector_spec, should_fail, is_warning_logged):
730+
mocker.patch.object(conftest.pytest, "fail")
731+
t = _TestSpec()
732+
logger = mocker.Mock()
733+
t.test_secret_is_properly_marked(connector_spec, logger, ("api_key", "api_token", "refresh_token", "jwt", "credentials"))
734+
if should_fail:
735+
conftest.pytest.fail.assert_called_once()
736+
else:
737+
conftest.pytest.fail.assert_not_called()
738+
if is_warning_logged:
739+
_, args, _ = logger.warning.mock_calls[0]
740+
msg, *_ = args
741+
assert "Some properties are marked with `airbyte_secret` although they probably should not be" in msg
742+
else:
743+
logger.warning.assert_not_called()
744+
745+
746+
@pytest.mark.parametrize(
747+
"path, expected_name, expected_result",
748+
(
749+
("properties/api_key/type", "api_key", True),
750+
("properties/start_date/type", "start_date", False),
751+
("properties/credentials/oneOf/1/properties/api_token/type", "api_token", True),
752+
("properties/type", None, False), # root element
753+
("properties/accounts/items/2/properties/jwt/type", "jwt", True)
754+
)
755+
)
756+
def test_is_spec_property_name_secret(path, expected_name, expected_result):
757+
t = _TestSpec()
758+
assert t._is_spec_property_name_secret(path, ("api_key", "api_token", "refresh_token", "jwt", "credentials")) == (expected_name, expected_result)
759+
760+
761+
@pytest.mark.parametrize(
762+
"property_def, can_store_secret",
763+
(
764+
({"type": "boolean"}, False),
765+
({"type": "null"}, False),
766+
({"type": "string"}, True),
767+
({"type": "integer"}, True),
768+
({"type": "number"}, True),
769+
({"type": ["null", "string"]}, True),
770+
({"type": ["null", "boolean"]}, False),
771+
({"type": "object"}, True),
772+
# the object itself cannot hold a secret but the inner items can and will be processed separately
773+
({"type": "object", "properties": {"api_key": {}}}, False),
774+
({"type": "array"}, True),
775+
# same as object
776+
({"type": "array", "items": {"type": "string"}}, False)
777+
)
778+
)
779+
def test_property_can_store_secret(property_def, can_store_secret):
780+
t = _TestSpec()
781+
assert t._property_can_store_secret(property_def) is can_store_secret

docs/connector-development/testing-connectors/source-acceptance-tests-reference.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ acceptance_tests: # Tests configuration
101101
Verify that a `spec` operation issued to the connector returns a valid connector specification.
102102
Additional tests are validating the backward compatibility of the current specification compared to the specification of the previous connector version. If no previous connector version is found (by default the test looks for a docker image with the same name but with the `latest` tag), this test is skipped.
103103
These backward compatibility tests can be bypassed by changing the value of the `backward_compatibility_tests_config.disable_for_version` input in `acceptance-test-config.yml` (see below).
104+
One more test validates the specification against containing exposed secrets. This means fields that potentially could hold a secret value should be explicitly marked with `"airbyte_secret": true`. If an input field like `api_key` / `password` / `client_secret` / etc. is exposed, the test will fail.
104105

105106
| Input | Type | Default | Note |
106107
| :--------------------------------------------------------------- | :----- | :------------------ | :-------------------------------------------------------------------------------------------------------------------- |

0 commit comments

Comments
 (0)