Skip to content

Commit 13a5a40

Browse files
authored
Airbyte-ci: Add --metadata-query option (#30330)
Co-authored-by: bnchrch <[email protected]>
1 parent 59f803e commit 13a5a40

File tree

12 files changed

+291
-32
lines changed

12 files changed

+291
-32
lines changed

.github/workflows/connectors_weekly_build.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@ jobs:
4141
gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }}
4242
git_branch: ${{ steps.extract_branch.outputs.branch }}
4343
github_token: ${{ secrets.GITHUB_TOKEN }}
44-
subcommand: "--show-dagger-logs connectors ${{ inputs.test-connectors-options || '--concurrency=3 --support-level=community' }} test"
44+
subcommand: "--show-dagger-logs connectors ${{ inputs.test-connectors-options || '--concurrency=3 --metadata-query=\"(data.ab_internal.ql > 100) & (data.ab_internal.sl < 200)\"' }} test"
+28-17
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,46 @@
11
# connector_ops
22

3-
A collection of tools and checks run by Github Actions
3+
A collection of utilities for working with Airbyte connectors.
44

5-
## Running Locally
5+
# Setup
66

7-
From this directory, create a virtual environment:
7+
## Prerequisites
88

9-
```
10-
python3 -m venv .venv
9+
#### Poetry
10+
11+
Before you can start working on this project, you will need to have Poetry installed on your system. Please follow the instructions below to install Poetry:
12+
13+
1. Open your terminal or command prompt.
14+
2. Install Poetry using the recommended installation method:
15+
16+
```bash
17+
curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.5.1 python3 -
1118
```
1219

13-
This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your
14-
development environment of choice. To activate it from the terminal, run:
20+
Alternatively, you can use `pip` to install Poetry:
1521

1622
```bash
17-
source .venv/bin/activate
18-
pip install -e . # assuming you are in the ./airbyte-ci/connectors/connector_ops directory
23+
pip install --user poetry
1924
```
2025

21-
pip will make binaries for all the commands in setup.py, so you can run `allowed-hosts-checks` directly from the virtual-env
26+
3. After the installation is complete, close and reopen your terminal to ensure the newly installed `poetry` command is available in your system's PATH.
2227

23-
## Testing Locally
28+
For more detailed instructions and alternative installation methods, please refer to the official Poetry documentation: https://python-poetry.org/docs/#installation
2429

25-
To install requirements to run unit tests, use:
30+
### Using Poetry in the Project
2631

27-
```
28-
pip install -e ".[tests]"
29-
```
32+
Once Poetry is installed, you can use it to manage the project's dependencies and virtual environment. To get started, navigate to the project's root directory in your terminal and follow these steps:
3033

31-
Unit tests are currently configured to be run from the base `airbyte` directory. You can run the tests from that directory with the following command:
3234

35+
## Installation
36+
```bash
37+
poetry install
3338
```
34-
pytest -s airbyte-ci/connector_ops/connectors/tests
39+
40+
41+
## Testing Locally
42+
43+
Simply run
44+
```bash
45+
poetry run pytest
3546
```

airbyte-ci/connectors/connector_ops/connector_ops/utils.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from ci_credentials import SecretsManager
1919
from pydash.objects import get
2020
from rich.console import Console
21+
from simpleeval import simple_eval
2122

2223
console = Console()
2324

@@ -259,7 +260,6 @@ def language(self) -> ConnectorLanguage:
259260
except FileNotFoundError:
260261
pass
261262
return None
262-
# raise ConnectorLanguageError(f"We could not infer {self.technical_name} connector language")
263263

264264
@property
265265
def version(self) -> str:
@@ -288,6 +288,37 @@ def name_from_metadata(self) -> Optional[str]:
288288
def support_level(self) -> Optional[str]:
289289
return self.metadata.get("supportLevel") if self.metadata else None
290290

291+
def metadata_query_match(self, query_string: str) -> bool:
292+
"""Evaluate a query string against the connector metadata.
293+
294+
Based on the simpleeval library:
295+
https://github.com/danthedeckie/simpleeval
296+
297+
Examples
298+
--------
299+
>>> connector.metadata_query_match("'s3' in data.name")
300+
True
301+
302+
>>> connector.metadata_query_match("data.supportLevel == 'certified'")
303+
False
304+
305+
>>> connector.metadata_query_match("data.ab_internal.ql >= 100")
306+
True
307+
308+
Args:
309+
query_string (str): The query string to evaluate.
310+
311+
Returns:
312+
bool: True if the query string matches the connector metadata, False otherwise.
313+
"""
314+
try:
315+
matches = simple_eval(query_string, names={"data": self.metadata})
316+
return bool(matches)
317+
except Exception as e:
318+
# Skip on error as we not all fields are present in all connectors.
319+
logging.debug(f"Failed to evaluate query string {query_string} for connector {self.technical_name}, error: {e}")
320+
return False
321+
291322
@property
292323
def ab_internal_sl(self) -> int:
293324
"""Airbyte Internal Field.

airbyte-ci/connectors/connector_ops/poetry.lock

+12-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

airbyte-ci/connectors/connector_ops/pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[build-system]
2-
requires = ["poetry-core>=1.0.0"]
2+
requires = ["poetry-core>=1.1.0"]
33
build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
@@ -21,6 +21,7 @@ pydash = "^7.0.4"
2121
google-cloud-storage = "^2.8.0"
2222
ci-credentials = {path = "../ci_credentials"}
2323
pandas = "^2.0.3"
24+
simpleeval = "^0.9.13"
2425

2526
[tool.poetry.group.test.dependencies]
2627
pytest = "^7.4.0"

airbyte-ci/connectors/connector_ops/tests/conftest.py

+10
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#
44

55

6+
import os
67
from datetime import datetime
78

89
import pandas as pd
@@ -53,3 +54,12 @@ def dummy_qa_report() -> pd.DataFrame:
5354
}
5455
]
5556
)
57+
58+
59+
@pytest.fixture(autouse=True)
60+
def set_working_dir_to_repo_root(monkeypatch):
61+
"""Set working directory to the root of the repository.
62+
63+
HACK: This is a workaround for the fact that these tests are not run from the root of the repository.
64+
"""
65+
monkeypatch.chdir(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))

airbyte-ci/connectors/connector_ops/tests/test_utils.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -44,20 +44,35 @@ def test_init(self, connector, exists, mocker, tmp_path):
4444
assert isinstance(connector.metadata, dict)
4545
assert isinstance(connector.support_level, str)
4646
assert isinstance(connector.acceptance_test_config, dict)
47-
assert connector.icon_path == Path(f"./airbyte-config-oss/init-oss/src/main/resources/icons/{connector.metadata['icon']}")
47+
assert connector.icon_path == Path(f"./airbyte-integrations/connectors/{connector.technical_name}/icon.svg")
4848
assert len(connector.version.split(".")) == 3
4949
else:
5050
assert connector.metadata is None
5151
assert connector.support_level is None
5252
assert connector.acceptance_test_config is None
53-
assert connector.icon_path == Path(f"./airbyte-config-oss/init-oss/src/main/resources/icons/{connector.name}.svg")
53+
assert connector.icon_path == Path(f"./airbyte-integrations/connectors/{connector.technical_name}/icon.svg")
5454
with pytest.raises(FileNotFoundError):
5555
connector.version
5656
with pytest.raises(utils.ConnectorVersionNotFound):
5757
Path(tmp_path / "Dockerfile").touch()
5858
mocker.patch.object(utils.Connector, "code_directory", tmp_path)
5959
utils.Connector(connector.technical_name).version
6060

61+
def test_metadata_query_match(self, mocker):
62+
connector = utils.Connector("source-faker")
63+
mocker.patch.object(utils.Connector, "metadata", {"dockerRepository": "airbyte/source-faker", "ab_internal": {"ql": 100}})
64+
assert connector.metadata_query_match("data.dockerRepository == 'airbyte/source-faker'")
65+
assert connector.metadata_query_match("'source' in data.dockerRepository")
66+
assert not connector.metadata_query_match("data.dockerRepository == 'airbyte/source-faker2'")
67+
assert not connector.metadata_query_match("'destination' in data.dockerRepository")
68+
assert connector.metadata_query_match("data.ab_internal.ql == 100")
69+
assert connector.metadata_query_match("data.ab_internal.ql >= 100")
70+
assert connector.metadata_query_match("data.ab_internal.ql > 1")
71+
assert not connector.metadata_query_match("data.ab_internal.ql == 101")
72+
assert not connector.metadata_query_match("data.ab_internal.ql >= 101")
73+
assert not connector.metadata_query_match("data.ab_internal.ql > 101")
74+
assert not connector.metadata_query_match("data.ab_internal == whatever")
75+
6176

6277
@pytest.fixture()
6378
def gradle_file_with_dependencies(tmpdir) -> Path:
@@ -77,15 +92,15 @@ def gradle_file_with_dependencies(tmpdir) -> Path:
7792
}
7893
"""
7994
)
80-
expected_dependencies = [Path("path/to/dependency1"), Path("path/to/dependency2")]
95+
expected_dependencies = [Path("path/to/dependency1"), Path("path/to/dependency2"), Path("airbyte-cdk/java/airbyte-cdk")]
8196
expected_test_dependencies = [Path("path/to/test/dependency"), Path("path/to/test/dependency1"), Path("path/to/test/dependency2")]
8297

8398
return test_gradle_file, expected_dependencies, expected_test_dependencies
8499

85100

86101
def test_parse_dependencies(gradle_file_with_dependencies):
87102
gradle_file, expected_regular_dependencies, expected_test_dependencies = gradle_file_with_dependencies
88-
regular_dependencies, test_dependencies = utils.parse_dependencies(gradle_file)
103+
regular_dependencies, test_dependencies = utils.parse_gradle_dependencies(gradle_file)
89104
assert len(regular_dependencies) == len(expected_regular_dependencies)
90105
assert all([regular_dependency in expected_regular_dependencies for regular_dependency in regular_dependencies])
91106
assert len(test_dependencies) == len(expected_test_dependencies)

airbyte-ci/connectors/pipelines/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ Available commands:
122122
| `--use-remote-secrets` | False | True | If True, connectors configuration will be pulled from Google Secret Manager. Requires the GCP_GSM_CREDENTIALS environment variable to be set with a service account with permission to read GSM secrets. If False the connector configuration will be read from the local connector `secrets` folder. |
123123
| `--name` | True | | Select a specific connector for which the pipeline will run. Can be used multiple time to select multiple connectors. The expected name is the connector technical name. e.g. `source-pokeapi` |
124124
| `--support-level` | True | | Select connectors with a specific support level: `community`, `certified`. Can be used multiple times to select multiple support levels. |
125+
| `--metadata-query` | False | | Filter connectors by the `data` field in the metadata file using a [simpleeval](https://github.com/danthedeckie/simpleeval) query. e.g. 'data.ab_internal.ql == 200' |
125126
| `--language` | True | | Select connectors with a specific language: `python`, `low-code`, `java`. Can be used multiple times to select multiple languages. |
126127
| `--modified` | False | False | Run the pipeline on only the modified connectors on the branch or previous commit (depends on the pipeline implementation). |
127128
| `--concurrency` | False | 5 | Control the number of connector pipelines that can run in parallel. Useful to speed up pipelines or control their resource usage. |
@@ -405,6 +406,7 @@ This command runs the Python tests for a airbyte-ci poetry package.
405406
## Changelog
406407
| Version | PR | Description |
407408
|---------| --------------------------------------------------------- |-----------------------------------------------------------------------------------------------------------|
409+
| 1.2.0 | [#30330](https://github.com/airbytehq/airbyte/pull/30330) | Add `--metadata-query` option to connectors command |
408410
| 1.1.3 | [#30314](https://github.com/airbytehq/airbyte/pull/30314) | Stop patching gradle files to make them work with airbyte-ci. |
409411
| 1.1.2 | [#30279](https://github.com/airbytehq/airbyte/pull/30279) | Fix correctness issues in layer caching by making atomic execution groupings |
410412
| 1.1.1 | [#30252](https://github.com/airbytehq/airbyte/pull/30252) | Fix redundancies and broken logic in GradleTask, to speed up the CI runs. |

airbyte-ci/connectors/pipelines/pipelines/commands/groups/connectors.py

+22-3
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def get_selected_connectors_with_modified_files(
5454
selected_languages: Tuple[str],
5555
modified: bool,
5656
metadata_changes_only: bool,
57+
metadata_query: str,
5758
modified_files: Set[Path],
5859
enable_dependency_scanning: bool = False,
5960
) -> List[ConnectorWithModifiedFiles]:
@@ -81,17 +82,22 @@ def get_selected_connectors_with_modified_files(
8182
selected_connectors_by_name = {c for c in ALL_CONNECTORS if c.technical_name in selected_names}
8283
selected_connectors_by_support_level = {connector for connector in ALL_CONNECTORS if connector.support_level in selected_support_levels}
8384
selected_connectors_by_language = {connector for connector in ALL_CONNECTORS if connector.language in selected_languages}
85+
selected_connectors_by_query = (
86+
{connector for connector in ALL_CONNECTORS if connector.metadata_query_match(metadata_query)} if metadata_query else set()
87+
)
88+
8489
non_empty_connector_sets = [
8590
connector_set
8691
for connector_set in [
8792
selected_connectors_by_name,
8893
selected_connectors_by_support_level,
8994
selected_connectors_by_language,
95+
selected_connectors_by_query,
9096
selected_modified_connectors,
9197
]
9298
if connector_set
9399
]
94-
# The selected connectors are the intersection of the selected connectors by name, support_level, language and modified.
100+
# The selected connectors are the intersection of the selected connectors by name, support_level, language, simpleeval query and modified.
95101
selected_connectors = set.intersection(*non_empty_connector_sets) if non_empty_connector_sets else set()
96102

97103
selected_connectors_with_modified_files = []
@@ -134,6 +140,11 @@ def get_selected_connectors_with_modified_files(
134140
default=False,
135141
type=bool,
136142
)
143+
@click.option(
144+
"--metadata-query",
145+
help="Filter connectors by metadata query using `simpleeval`. e.g. 'data.ab_internal.ql == 200'",
146+
type=str,
147+
)
137148
@click.option("--concurrency", help="Number of connector tests pipeline to run in parallel.", default=5, type=int)
138149
@click.option(
139150
"--execute-timeout",
@@ -156,6 +167,7 @@ def connectors(
156167
support_levels: Tuple[str],
157168
modified: bool,
158169
metadata_changes_only: bool,
170+
metadata_query: str,
159171
concurrency: int,
160172
execute_timeout: int,
161173
enable_dependency_scanning: bool,
@@ -168,7 +180,14 @@ def connectors(
168180
ctx.obj["concurrency"] = concurrency
169181
ctx.obj["execute_timeout"] = execute_timeout
170182
ctx.obj["selected_connectors_with_modified_files"] = get_selected_connectors_with_modified_files(
171-
names, support_levels, languages, modified, metadata_changes_only, ctx.obj["modified_files"], enable_dependency_scanning
183+
names,
184+
support_levels,
185+
languages,
186+
modified,
187+
metadata_changes_only,
188+
metadata_query,
189+
ctx.obj["modified_files"],
190+
enable_dependency_scanning,
172191
)
173192
log_selected_connectors(ctx.obj["selected_connectors_with_modified_files"])
174193

@@ -500,6 +519,6 @@ def format_code(ctx: click.Context) -> bool:
500519
def log_selected_connectors(selected_connectors_with_modified_files: List[ConnectorWithModifiedFiles]) -> None:
501520
if selected_connectors_with_modified_files:
502521
selected_connectors_names = [c.technical_name for c in selected_connectors_with_modified_files]
503-
main_logger.info(f"Will run on the following connectors: {', '.join(selected_connectors_names)}.")
522+
main_logger.info(f"Will run on the following {len(selected_connectors_names)} connectors: {', '.join(selected_connectors_names)}.")
504523
else:
505524
main_logger.info("No connectors to run.")

0 commit comments

Comments
 (0)