|
8 | 8 | from collections import Counter, defaultdict
|
9 | 9 | from functools import reduce
|
10 | 10 | from logging import Logger
|
11 |
| -from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Set |
| 11 | +from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Set, Tuple |
12 | 12 | from xmlrpc.client import Boolean
|
13 | 13 |
|
14 | 14 | import dpath.util
|
@@ -53,6 +53,29 @@ def connector_spec_dict_fixture(actual_connector_spec):
|
53 | 53 | return json.loads(actual_connector_spec.json())
|
54 | 54 |
|
55 | 55 |
|
| 56 | +@pytest.fixture(name="secret_property_names") |
| 57 | +def secret_property_names_fixture(): |
| 58 | + return ( |
| 59 | + "client_token", |
| 60 | + "access_token", |
| 61 | + "api_token", |
| 62 | + "token", |
| 63 | + "secret", |
| 64 | + "client_secret", |
| 65 | + "password", |
| 66 | + "key", |
| 67 | + "service_account_info", |
| 68 | + "service_account", |
| 69 | + "tenant_id", |
| 70 | + "certificate", |
| 71 | + "jwt", |
| 72 | + "credentials", |
| 73 | + "app_id", |
| 74 | + "appid", |
| 75 | + "refresh_token", |
| 76 | + ) |
| 77 | + |
| 78 | + |
56 | 79 | @pytest.mark.default_timeout(10)
|
57 | 80 | class TestSpec(BaseTest):
|
58 | 81 |
|
@@ -164,6 +187,84 @@ def test_has_secret(self):
|
164 | 187 | def test_secret_never_in_the_output(self):
|
165 | 188 | """This test should be injected into any docker command it needs to know current config and spec"""
|
166 | 189 |
|
| 190 | + @staticmethod |
| 191 | + def _is_spec_property_name_secret(path: str, secret_property_names) -> Tuple[Optional[str], bool]: |
| 192 | + """ |
| 193 | + Given a path to a type field, extract a field name and decide whether it is a name of secret or not |
| 194 | + based on a provided list of secret names. |
| 195 | + Split the path by `/`, drop the last item and make list reversed. |
| 196 | + Then iterate over it and find the first item that's not a reserved keyword or an index. |
| 197 | + Example: |
| 198 | + properties/credentials/oneOf/1/properties/api_key/type -> [api_key, properties, 1, oneOf, credentials, properties] -> api_key |
| 199 | + """ |
| 200 | + reserved_keywords = ("anyOf", "oneOf", "allOf", "not", "properties", "items", "type", "prefixItems") |
| 201 | + for part in reversed(path.split("/")[:-1]): |
| 202 | + if part.isdigit() or part in reserved_keywords: |
| 203 | + continue |
| 204 | + return part, part.lower() in secret_property_names |
| 205 | + return None, False |
| 206 | + |
| 207 | + @staticmethod |
| 208 | + def _property_can_store_secret(prop: dict) -> bool: |
| 209 | + """ |
| 210 | + Some fields can not hold a secret by design, others can. |
| 211 | + Null type as well as boolean can not hold a secret value. |
| 212 | + A string, a number or an integer type can always store secrets. |
| 213 | + Objects and arrays can hold a secret in case they are generic, |
| 214 | + meaning their inner structure is not described in details with properties/items. |
| 215 | + """ |
| 216 | + unsecure_types = {"string", "integer", "number"} |
| 217 | + type_ = prop["type"] |
| 218 | + is_property_generic_object = type_ == "object" and not any( |
| 219 | + [prop.get("properties", {}), prop.get("anyOf", []), prop.get("oneOf", []), prop.get("allOf", [])] |
| 220 | + ) |
| 221 | + is_property_generic_array = type_ == "array" and not any([prop.get("items", []), prop.get("prefixItems", [])]) |
| 222 | + return any( |
| 223 | + [ |
| 224 | + isinstance(type_, str) and type_ in unsecure_types, |
| 225 | + is_property_generic_object, |
| 226 | + is_property_generic_array, |
| 227 | + isinstance(type_, list) and (set(type_) & unsecure_types), |
| 228 | + ] |
| 229 | + ) |
| 230 | + |
| 231 | + def test_secret_is_properly_marked(self, connector_spec_dict: dict, detailed_logger, secret_property_names): |
| 232 | + """ |
| 233 | + Each field has a type, therefore we can make a flat list of fields from the returned specification. |
| 234 | + Iterate over the list, check if a field name is a secret name, can potentially hold a secret value |
| 235 | + and make sure it is marked as `airbyte_secret`. |
| 236 | + """ |
| 237 | + secrets_exposed = [] |
| 238 | + non_secrets_hidden = [] |
| 239 | + spec_properties = connector_spec_dict["connectionSpecification"]["properties"] |
| 240 | + for type_path, value in dpath.util.search(spec_properties, "**/type", yielded=True): |
| 241 | + _, is_property_name_secret = self._is_spec_property_name_secret(type_path, secret_property_names) |
| 242 | + if not is_property_name_secret: |
| 243 | + continue |
| 244 | + absolute_path = f"/{type_path}" |
| 245 | + property_path, _ = absolute_path.rsplit(sep="/", maxsplit=1) |
| 246 | + property_definition = dpath.util.get(spec_properties, property_path) |
| 247 | + marked_as_secret = property_definition.get("airbyte_secret", False) |
| 248 | + possibly_a_secret = self._property_can_store_secret(property_definition) |
| 249 | + if marked_as_secret and not possibly_a_secret: |
| 250 | + non_secrets_hidden.append(property_path) |
| 251 | + if not marked_as_secret and possibly_a_secret: |
| 252 | + secrets_exposed.append(property_path) |
| 253 | + |
| 254 | + if non_secrets_hidden: |
| 255 | + properties = "\n".join(non_secrets_hidden) |
| 256 | + detailed_logger.warning( |
| 257 | + f"""Some properties are marked with `airbyte_secret` although they probably should not be. |
| 258 | + Please double check them. If they're okay, please fix this test. |
| 259 | + {properties}""" |
| 260 | + ) |
| 261 | + if secrets_exposed: |
| 262 | + properties = "\n".join(secrets_exposed) |
| 263 | + pytest.fail( |
| 264 | + f"""The following properties should be marked with `airbyte_secret!` |
| 265 | + {properties}""" |
| 266 | + ) |
| 267 | + |
167 | 268 | def test_defined_refs_exist_in_json_spec_file(self, connector_spec_dict: dict):
|
168 | 269 | """Checking for the presence of unresolved `$ref`s values within each json spec file"""
|
169 | 270 | check_result = list(find_all_values_for_key_in_schema(connector_spec_dict, "$ref"))
|
|
0 commit comments