airbytehq · bnchrch · Sep 13, 2023 · Sep 12, 2023 · Sep 12, 2023 · Sep 12, 2023
@@ -41,4 +41,4 @@ jobs:
           gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }}
           git_branch: ${{ steps.extract_branch.outputs.branch }}
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          subcommand: "--show-dagger-logs connectors ${{ inputs.test-connectors-options || '--concurrency=3 --support-level=community' }} test"
+          subcommand: "--show-dagger-logs connectors ${{ inputs.test-connectors-options || '--concurrency=3 --metadata-query=\"(data.ab_internal.ql > 100) & (data.ab_internal.sl < 200)\"' }} test"
@@ -1,35 +1,46 @@
 # connector_ops
 
-A collection of tools and checks run by Github Actions
+A collection of utilities for working with Airbyte connectors.
 
-## Running Locally
+# Setup
 
-From this directory, create a virtual environment:
+## Prerequisites
 
-```
-python3 -m venv .venv
+#### Poetry
+
+Before you can start working on this project, you will need to have Poetry installed on your system. Please follow the instructions below to install Poetry:
+
+1. Open your terminal or command prompt.
+2. Install Poetry using the recommended installation method:
+
+```bash
+curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.5.1 python3 -
 ```
 
-This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your
-development environment of choice. To activate it from the terminal, run:
+Alternatively, you can use `pip` to install Poetry:
 
 ```bash
-source .venv/bin/activate
-pip install -e . # assuming you are in the ./airbyte-ci/connectors/connector_ops directory
+pip install --user poetry
 ```
 
-pip will make binaries for all the commands in setup.py, so you can run `allowed-hosts-checks` directly from the virtual-env
+3. After the installation is complete, close and reopen your terminal to ensure the newly installed `poetry` command is available in your system's PATH.
 
-## Testing Locally
+For more detailed instructions and alternative installation methods, please refer to the official Poetry documentation: https://python-poetry.org/docs/#installation
 
-To install requirements to run unit tests, use:
+### Using Poetry in the Project
 
-```
-pip install -e ".[tests]"
-```
+Once Poetry is installed, you can use it to manage the project's dependencies and virtual environment. To get started, navigate to the project's root directory in your terminal and follow these steps:
 
-Unit tests are currently configured to be run from the base `airbyte` directory. You can run the tests from that directory with the following command:
 
+## Installation
+```bash
+poetry install
 ```
-pytest -s airbyte-ci/connector_ops/connectors/tests
+
+
+## Testing Locally
+
+Simply run
+```bash
+poetry run pytest
 ```
@@ -18,6 +18,7 @@
 from ci_credentials import SecretsManager
 from pydash.objects import get
 from rich.console import Console
+from simpleeval import simple_eval
 
 console = Console()
 
@@ -259,7 +260,6 @@ def language(self) -> ConnectorLanguage:
         except FileNotFoundError:
             pass
         return None
-        # raise ConnectorLanguageError(f"We could not infer {self.technical_name} connector language")
 
     @property
     def version(self) -> str:
@@ -288,6 +288,37 @@ def name_from_metadata(self) -> Optional[str]:
     def support_level(self) -> Optional[str]:
         return self.metadata.get("supportLevel") if self.metadata else None
 
+    def metadata_query_match(self, query_string: str) -> bool:
+        """Evaluate a query string against the connector metadata.
+
+        Based on the simpleeval library:
+        https://github.com/danthedeckie/simpleeval
+
+        Examples
+        --------
+        >>> connector.metadata_query_match("'s3' in data.name")
+        True
+
+        >>> connector.metadata_query_match("data.supportLevel == 'certified'")
+        False
+
+        >>> connector.metadata_query_match("data.ab_internal.ql >= 100")
+        True
+
+        Args:
+            query_string (str): The query string to evaluate.
+
+        Returns:
+            bool: True if the query string matches the connector metadata, False otherwise.
+        """
+        try:
+            matches = simple_eval(query_string, names={"data": self.metadata})
+            return bool(matches)
+        except Exception as e:
+            # Skip on error as we not all fields are present in all connectors.
+            logging.debug(f"Failed to evaluate query string {query_string} for connector {self.technical_name}, error: {e}")
+            return False
+
     @property
     def ab_internal_sl(self) -> int:
         """Airbyte Internal Field.

@@ -1,5 +1,5 @@
 [build-system]
-requires = ["poetry-core>=1.0.0"]
+requires = ["poetry-core>=1.1.0"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
@@ -21,6 +21,7 @@ pydash = "^7.0.4"
 google-cloud-storage = "^2.8.0"
 ci-credentials = {path = "../ci_credentials"}
 pandas = "^2.0.3"
+simpleeval = "^0.9.13"
 
 [tool.poetry.group.test.dependencies]
 pytest = "^7.4.0"

@@ -3,6 +3,7 @@
 #
 
 
+import os
 from datetime import datetime
 
 import pandas as pd
@@ -53,3 +54,8 @@ def dummy_qa_report() -> pd.DataFrame:
             }
         ]
     )
+
+
+@pytest.fixture()
+def set_working_dir_to_repo_root(monkeypatch):
+    monkeypatch.chdir(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
@@ -44,20 +44,35 @@ def test_init(self, connector, exists, mocker, tmp_path):
             assert isinstance(connector.metadata, dict)
             assert isinstance(connector.support_level, str)
             assert isinstance(connector.acceptance_test_config, dict)
-            assert connector.icon_path == Path(f"./airbyte-config-oss/init-oss/src/main/resources/icons/{connector.metadata['icon']}")
+            assert connector.icon_path == Path(f"./airbyte-integrations/connectors/{connector.technical_name}/icon.svg")
             assert len(connector.version.split(".")) == 3
         else:
             assert connector.metadata is None
             assert connector.support_level is None
             assert connector.acceptance_test_config is None
-            assert connector.icon_path == Path(f"./airbyte-config-oss/init-oss/src/main/resources/icons/{connector.name}.svg")
+            assert connector.icon_path == Path(f"./airbyte-integrations/connectors/{connector.technical_name}/icon.svg")
             with pytest.raises(FileNotFoundError):
                 connector.version
             with pytest.raises(utils.ConnectorVersionNotFound):
                 Path(tmp_path / "Dockerfile").touch()
                 mocker.patch.object(utils.Connector, "code_directory", tmp_path)
                 utils.Connector(connector.technical_name).version
 
+    def test_metadata_query_match(self, mocker):
+        connector = utils.Connector("source-faker")
+        mocker.patch.object(utils.Connector, "metadata", {"dockerRepository": "airbyte/source-faker", "ab_internal": {"ql": 100}})
+        assert connector.metadata_query_match("data.dockerRepository == 'airbyte/source-faker'")
+        assert connector.metadata_query_match("'source' in data.dockerRepository")
+        assert not connector.metadata_query_match("data.dockerRepository == 'airbyte/source-faker2'")
+        assert not connector.metadata_query_match("'destination' in data.dockerRepository")
+        assert connector.metadata_query_match("data.ab_internal.ql == 100")
+        assert connector.metadata_query_match("data.ab_internal.ql >= 100")
+        assert connector.metadata_query_match("data.ab_internal.ql > 1")
+        assert not connector.metadata_query_match("data.ab_internal.ql == 101")
+        assert not connector.metadata_query_match("data.ab_internal.ql >= 101")
+        assert not connector.metadata_query_match("data.ab_internal.ql > 101")
+        assert not connector.metadata_query_match("data.ab_internal == whatever")
+
 
 @pytest.fixture()
 def gradle_file_with_dependencies(tmpdir) -> Path:
@@ -77,15 +92,15 @@ def gradle_file_with_dependencies(tmpdir) -> Path:
     }
     """
     )
-    expected_dependencies = [Path("path/to/dependency1"), Path("path/to/dependency2")]
+    expected_dependencies = [Path("path/to/dependency1"), Path("path/to/dependency2"), Path("airbyte-cdk/java/airbyte-cdk")]
     expected_test_dependencies = [Path("path/to/test/dependency"), Path("path/to/test/dependency1"), Path("path/to/test/dependency2")]
 
     return test_gradle_file, expected_dependencies, expected_test_dependencies
 
 
 def test_parse_dependencies(gradle_file_with_dependencies):
     gradle_file, expected_regular_dependencies, expected_test_dependencies = gradle_file_with_dependencies
-    regular_dependencies, test_dependencies = utils.parse_dependencies(gradle_file)
+    regular_dependencies, test_dependencies = utils.parse_gradle_dependencies(gradle_file)
     assert len(regular_dependencies) == len(expected_regular_dependencies)
     assert all([regular_dependency in expected_regular_dependencies for regular_dependency in regular_dependencies])
     assert len(test_dependencies) == len(expected_test_dependencies)

@@ -122,6 +122,7 @@ Available commands:
 | `--use-remote-secrets`                                         | False    | True                             | If True, connectors configuration will be pulled from Google Secret Manager. Requires the GCP_GSM_CREDENTIALS environment variable to be set with a service account with permission to read GSM secrets. If False the connector configuration will be read from the local connector `secrets` folder. |
 | `--name`                                                       | True     |                                  | Select a specific connector for which the pipeline will run. Can be used multiple time to select multiple connectors. The expected name is the connector technical name. e.g. `source-pokeapi`                                                                                                        |
 | `--support-level`                                              | True     |                                  | Select connectors with a specific support level: `community`, `certified`.  Can be used multiple times to select multiple support levels.                                                                                                                                                             |
+| `--metadata-query`                                              | False     |                                | Filter connectors by metadata query using `simpleeval`. e.g. 'data.ab_internal.ql == 200' |
 | `--language`                                                   | True     |                                  | Select connectors with a specific language: `python`, `low-code`, `java`. Can be used multiple times to select multiple languages.                                                                                                                                                                    |
 | `--modified`                                                   | False    | False                            | Run the pipeline on only the modified connectors on the branch or previous commit (depends on the pipeline implementation).                                                                                                                                                                           |
 | `--concurrency`                                                | False    | 5                                | Control the number of connector pipelines that can run in parallel. Useful to speed up pipelines or control their resource usage.                                                                                                                                                                     |
@@ -405,6 +406,7 @@ This command runs the Python tests for a airbyte-ci poetry package.
 ## Changelog
 | Version | PR                                                        | Description                                                                                               |
 |---------| --------------------------------------------------------- |-----------------------------------------------------------------------------------------------------------|
+| 1.2.0   | [#30330](https://github.com/airbytehq/airbyte/pull/30330) | Add `--metadata-query` option to connectors command                                                       |
 | 1.1.3   | [#30314](https://github.com/airbytehq/airbyte/pull/30314) | Stop patching gradle files to make them work with airbyte-ci.                                             |
 | 1.1.2   | [#30279](https://github.com/airbytehq/airbyte/pull/30279) | Fix correctness issues in layer caching by making atomic execution groupings                              |
 | 1.1.1   | [#30252](https://github.com/airbytehq/airbyte/pull/30252) | Fix redundancies and broken logic in GradleTask, to speed up the CI runs.                                 |

@@ -54,6 +54,7 @@ def get_selected_connectors_with_modified_files(
     selected_languages: Tuple[str],
     modified: bool,
     metadata_changes_only: bool,
+    metadata_query: str,
     modified_files: Set[Path],
     enable_dependency_scanning: bool = False,
 ) -> List[ConnectorWithModifiedFiles]:
@@ -81,12 +82,17 @@ def get_selected_connectors_with_modified_files(
     selected_connectors_by_name = {c for c in ALL_CONNECTORS if c.technical_name in selected_names}
     selected_connectors_by_support_level = {connector for connector in ALL_CONNECTORS if connector.support_level in selected_support_levels}
     selected_connectors_by_language = {connector for connector in ALL_CONNECTORS if connector.language in selected_languages}
+    selected_connectors_by_query = (
+        {connector for connector in ALL_CONNECTORS if connector.metadata_query_match(metadata_query)} if metadata_query else set()
+    )
+
     non_empty_connector_sets = [
         connector_set
         for connector_set in [
             selected_connectors_by_name,
             selected_connectors_by_support_level,
             selected_connectors_by_language,
+            selected_connectors_by_query,
             selected_modified_connectors,
         ]
         if connector_set
@@ -134,6 +140,11 @@ def get_selected_connectors_with_modified_files(
     default=False,
     type=bool,
 )
+@click.option(
+    "--metadata-query",
+    help="Filter connectors by metadata query using `simpleeval`. e.g. 'data.ab_internal.ql == 200'",
+    type=str,
+)
 @click.option("--concurrency", help="Number of connector tests pipeline to run in parallel.", default=5, type=int)
 @click.option(
     "--execute-timeout",
@@ -156,6 +167,7 @@ def connectors(
     support_levels: Tuple[str],
     modified: bool,
     metadata_changes_only: bool,
+    metadata_query: str,
     concurrency: int,
     execute_timeout: int,
     enable_dependency_scanning: bool,
@@ -168,7 +180,14 @@ def connectors(
     ctx.obj["concurrency"] = concurrency
     ctx.obj["execute_timeout"] = execute_timeout
     ctx.obj["selected_connectors_with_modified_files"] = get_selected_connectors_with_modified_files(
-        names, support_levels, languages, modified, metadata_changes_only, ctx.obj["modified_files"], enable_dependency_scanning
+        names,
+        support_levels,
+        languages,
+        modified,
+        metadata_changes_only,
+        metadata_query,
+        ctx.obj["modified_files"],
+        enable_dependency_scanning,
     )
     log_selected_connectors(ctx.obj["selected_connectors_with_modified_files"])
 
@@ -500,6 +519,6 @@ def format_code(ctx: click.Context) -> bool:
 def log_selected_connectors(selected_connectors_with_modified_files: List[ConnectorWithModifiedFiles]) -> None:
     if selected_connectors_with_modified_files:
         selected_connectors_names = [c.technical_name for c in selected_connectors_with_modified_files]
-        main_logger.info(f"Will run on the following connectors: {', '.join(selected_connectors_names)}.")
+        main_logger.info(f"Will run on the following {len(selected_connectors_names)} connectors: {', '.join(selected_connectors_names)}.")
     else:
         main_logger.info("No connectors to run.")