Skip to content

Commit e172376

Browse files
authored
regression-test: automatically fetch connection candidates (#37384)
1 parent 6399307 commit e172376

File tree

7 files changed

+733
-218
lines changed

7 files changed

+733
-218
lines changed

airbyte-ci/connectors/live-tests/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@ The traffic recorded on the control connector is passed to the target connector
237237

238238
## Changelog
239239

240+
### 0.15.0
241+
Automatic retrieval of connection objects for regression tests. The connection id is not required anymore.
242+
240243
### 0.14.2
241244
Fix KeyError when target & control streams differ.
242245

airbyte-ci/connectors/live-tests/poetry.lock

+643-191
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

airbyte-ci/connectors/live-tests/pyproject.toml

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "live-tests"
7-
version = "0.14.2"
7+
version = "0.15.0"
88
description = "Contains utilities for testing connectors against live data."
99
authors = ["Airbyte <[email protected]>"]
1010
license = "MIT"
@@ -26,7 +26,6 @@ pytest = "^8.1.1"
2626
pydash = "~=7.0.7"
2727
docker = ">=6,<7"
2828
asyncclick = "^8.1.7.1"
29-
# TODO: when this is open-sourced, don't require connection-retriever
3029
connection-retriever = {git = "[email protected]:airbytehq/airbyte-platform-internal", subdirectory = "tools/connection-retriever"}
3130
duckdb = "^0.10.0"
3231
pandas = "^2.2.1"

airbyte-ci/connectors/live-tests/src/live_tests/commons/connection_objects_retrieval.py

+47-9
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,18 @@
22

33
import json
44
import logging
5+
import os
56
from pathlib import Path
67
from typing import Dict, Optional, Set
78

9+
import rich
810
from connection_retriever import ConnectionObject, retrieve_objects # type: ignore
911
from connection_retriever.errors import NotPermittedError # type: ignore
1012

1113
from .models import AirbyteCatalog, Command, ConfiguredAirbyteCatalog, ConnectionObjects, SecretDict
1214

1315
LOGGER = logging.getLogger(__name__)
16+
console = rich.get_console()
1417

1518

1619
def parse_config(config: Dict | str | None) -> Optional[SecretDict]:
@@ -32,14 +35,17 @@ def parse_catalog(catalog: Dict | str | None) -> Optional[AirbyteCatalog]:
3235

3336

3437
def parse_configured_catalog(
35-
configured_catalog: Dict | str | None,
38+
configured_catalog: Dict | str | None, selected_streams: Set[str] | None = None
3639
) -> Optional[ConfiguredAirbyteCatalog]:
3740
if not configured_catalog:
3841
return None
3942
if isinstance(configured_catalog, str):
40-
return ConfiguredAirbyteCatalog.parse_obj(json.loads(configured_catalog))
43+
catalog = ConfiguredAirbyteCatalog.parse_obj(json.loads(configured_catalog))
4144
else:
42-
return ConfiguredAirbyteCatalog.parse_obj(configured_catalog)
45+
catalog = ConfiguredAirbyteCatalog.parse_obj(configured_catalog)
46+
if selected_streams:
47+
return ConfiguredAirbyteCatalog(streams=[stream for stream in catalog.streams if stream.stream.name in selected_streams])
48+
return catalog
4349

4450

4551
def parse_state(state: Dict | str | None) -> Optional[Dict]:
@@ -59,8 +65,8 @@ def get_state_from_path(state_path: Path) -> Optional[Dict]:
5965
return parse_state(state_path.read_text())
6066

6167

62-
def get_configured_catalog_from_path(path: Path) -> Optional[ConfiguredAirbyteCatalog]:
63-
return parse_configured_catalog(path.read_text())
68+
def get_configured_catalog_from_path(path: Path, selected_streams: Optional[Set[str]] = None) -> Optional[ConfiguredAirbyteCatalog]:
69+
return parse_configured_catalog(path.read_text(), selected_streams)
6470

6571

6672
COMMAND_TO_REQUIRED_OBJECT_TYPES = {
@@ -85,6 +91,8 @@ def get_connection_objects(
8591
retrieval_reason: Optional[str],
8692
fail_if_missing_objects: bool = True,
8793
connector_image: Optional[str] = None,
94+
auto_select_connection: bool = False,
95+
selected_streams: Optional[Set[str]] = None,
8896
) -> ConnectionObjects:
8997
"""This function retrieves the connection objects values.
9098
It checks that the required objects are available and raises a UsageError if they are not.
@@ -100,18 +108,26 @@ def get_connection_objects(
100108
retrieval_reason (Optional[str]): The reason to access the connection objects.
101109
fail_if_missing_objects (bool, optional): Whether to raise a ValueError if a required object is missing. Defaults to True.
102110
connector_image (Optional[str]): The image name for the connector under test.
111+
auto_select_connection (bool, optional): Whether to automatically select a connection if no connection id is passed. Defaults to False.
112+
selected_streams (Optional[Set[str]]): The set of selected streams to use when auto selecting a connection.
103113
Raises:
104114
click.UsageError: If a required object is missing for the command.
105115
click.UsageError: If a retrieval reason is missing when passing a connection id.
106116
Returns:
107117
ConnectionObjects: The connection objects values.
108118
"""
119+
if connection_id is None and not auto_select_connection:
120+
raise ValueError("A connection id or auto_select_connection must be provided to retrieve the connection objects.")
121+
if auto_select_connection and not connector_image:
122+
raise ValueError("A connector image must be provided when using auto_select_connection.")
109123

110124
custom_config = get_connector_config_from_path(custom_config_path) if custom_config_path else None
111-
custom_configured_catalog = get_configured_catalog_from_path(custom_configured_catalog_path) if custom_configured_catalog_path else None
125+
custom_configured_catalog = (
126+
get_configured_catalog_from_path(custom_configured_catalog_path, selected_streams) if custom_configured_catalog_path else None
127+
)
112128
custom_state = get_state_from_path(custom_state_path) if custom_state_path else None
113129

114-
if not connection_id:
130+
if not connection_id and not auto_select_connection:
115131
connection_object = ConnectionObjects(
116132
source_config=custom_config,
117133
destination_config=custom_config,
@@ -121,15 +137,35 @@ def get_connection_objects(
121137
workspace_id=None,
122138
source_id=None,
123139
destination_id=None,
140+
connection_id=None,
141+
source_docker_image=None,
124142
)
125143
else:
126144
if not retrieval_reason:
127145
raise ValueError("A retrieval reason is required to access the connection objects when passing a connection id.")
128-
retrieved_objects = retrieve_objects(connection_id, requested_objects, retrieval_reason=retrieval_reason)
146+
LOGGER.info("Retrieving connection objects from the database...")
147+
if auto_select_connection:
148+
is_ci = os.getenv("CI", False)
149+
connection_id, retrieved_objects = retrieve_objects(
150+
requested_objects,
151+
retrieval_reason=retrieval_reason,
152+
source_docker_repository=connector_image,
153+
prompt_for_connection_selection=not is_ci,
154+
with_streams=selected_streams,
155+
)
156+
else:
157+
connection_id, retrieved_objects = retrieve_objects(
158+
requested_objects,
159+
retrieval_reason=retrieval_reason,
160+
connection_id=connection_id,
161+
with_streams=selected_streams,
162+
)
129163
retrieved_source_config = parse_config(retrieved_objects.get(ConnectionObject.SOURCE_CONFIG))
130164
rerieved_destination_config = parse_config(retrieved_objects.get(ConnectionObject.DESTINATION_CONFIG))
131165
retrieved_catalog = parse_catalog(retrieved_objects.get(ConnectionObject.CATALOG))
132-
retrieved_configured_catalog = parse_configured_catalog(retrieved_objects.get(ConnectionObject.CONFIGURED_CATALOG))
166+
retrieved_configured_catalog = parse_configured_catalog(
167+
retrieved_objects.get(ConnectionObject.CONFIGURED_CATALOG), selected_streams
168+
)
133169
retrieved_state = parse_state(retrieved_objects.get(ConnectionObject.STATE))
134170

135171
retrieved_source_docker_image = retrieved_objects.get(ConnectionObject.SOURCE_DOCKER_IMAGE)
@@ -149,6 +185,8 @@ def get_connection_objects(
149185
workspace_id=retrieved_objects.get(ConnectionObject.WORKSPACE_ID),
150186
source_id=retrieved_objects.get(ConnectionObject.SOURCE_ID),
151187
destination_id=retrieved_objects.get(ConnectionObject.DESTINATION_ID),
188+
source_docker_image=retrieved_source_docker_image,
189+
connection_id=connection_id,
152190
)
153191
if fail_if_missing_objects:
154192
if not connection_object.source_config and ConnectionObject.SOURCE_CONFIG in requested_objects:

airbyte-ci/connectors/live-tests/src/live_tests/commons/models.py

+4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import _collections_abc
1414
import dagger
1515
import requests
16+
17+
# type: ignore
1618
from airbyte_protocol.models import AirbyteCatalog, AirbyteMessage, ConfiguredAirbyteCatalog # type: ignore
1719
from airbyte_protocol.models import Type as AirbyteMessageType
1820
from genson import SchemaBuilder # type: ignore
@@ -429,3 +431,5 @@ class ConnectionObjects:
429431
workspace_id: Optional[str]
430432
source_id: Optional[str]
431433
destination_id: Optional[str]
434+
source_docker_image: Optional[str]
435+
connection_id: Optional[str]

airbyte-ci/connectors/live-tests/src/live_tests/regression_tests/conftest.py

+32-15
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import time
88
import webbrowser
99
from pathlib import Path
10-
from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterable, Callable, Dict, Generator, Iterable, List, Optional
10+
from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterable, Callable, Dict, Generator, Iterable, List, Optional, Set
1111

1212
import dagger
1313
import pytest
@@ -56,26 +56,30 @@ def pytest_addoption(parser: Parser) -> None:
5656
)
5757
parser.addoption(
5858
"--control-version",
59-
default="latest",
60-
help="The control version used for regression testing. Defaults to latest",
59+
help="The control version used for regression testing.",
6160
)
6261
parser.addoption(
6362
"--target-version",
6463
default="dev",
65-
help="The target version used for regression testing. Defaults to latest",
64+
help="The target version used for regression testing. Defaults to dev.",
6665
)
6766
parser.addoption("--config-path")
6867
parser.addoption("--catalog-path")
6968
parser.addoption("--state-path")
7069
parser.addoption("--connection-id")
70+
parser.addoption(
71+
"--auto-select-connection",
72+
default=True,
73+
help="Automatically select the connection to run the tests on.",
74+
)
7175
parser.addoption("--pr-url", help="The URL of the PR you are testing")
76+
parser.addoption("--stream", help="The stream to run the tests on. (Can be used multiple times)", action="append")
7277

7378

7479
def pytest_configure(config: Config) -> None:
7580
user_email = get_user_email()
7681
prompt_for_confirmation(user_email)
7782
track_usage(user_email, vars(config.option))
78-
7983
config.stash[stash_keys.AIRBYTE_API_KEY] = get_airbyte_api_key()
8084
config.stash[stash_keys.USER] = user_email
8185
start_timestamp = int(time.time())
@@ -91,18 +95,16 @@ def pytest_configure(config: Config) -> None:
9195
dagger_log_path.touch()
9296
config.stash[stash_keys.DAGGER_LOG_PATH] = dagger_log_path
9397
config.stash[stash_keys.PR_URL] = get_option_or_fail(config, "--pr-url")
94-
config.stash[stash_keys.CONNECTION_ID] = get_option_or_fail(config, "--connection-id")
95-
98+
config.stash[stash_keys.AUTO_SELECT_CONNECTION] = config.getoption("--auto-select-connection")
9699
config.stash[stash_keys.CONNECTOR_IMAGE] = get_option_or_fail(config, "--connector-image")
97-
config.stash[stash_keys.CONTROL_VERSION] = get_option_or_fail(config, "--control-version")
98100
config.stash[stash_keys.TARGET_VERSION] = get_option_or_fail(config, "--target-version")
99-
if config.stash[stash_keys.CONTROL_VERSION] == config.stash[stash_keys.TARGET_VERSION]:
100-
pytest.exit(f"Control and target versions are the same: {control_version}. Please provide different versions.")
101101
custom_source_config_path = config.getoption("--config-path")
102102
custom_configured_catalog_path = config.getoption("--catalog-path")
103103
custom_state_path = config.getoption("--state-path")
104+
config.stash[stash_keys.SELECTED_STREAMS] = set(config.getoption("--stream") or [])
105+
104106
config.stash[stash_keys.SHOULD_READ_WITH_STATE] = prompt_for_read_with_or_without_state()
105-
retrieval_reason = f"Running regression tests on connection {config.stash[stash_keys.CONNECTION_ID]} for connector {config.stash[stash_keys.CONNECTOR_IMAGE]} on the control ({config.stash[stash_keys.CONTROL_VERSION]}) and target versions ({config.stash[stash_keys.TARGET_VERSION]})."
107+
retrieval_reason = f"Running regression tests on connection for connector {config.stash[stash_keys.CONNECTOR_IMAGE]} on target versions ({config.stash[stash_keys.TARGET_VERSION]})."
106108
try:
107109
config.stash[stash_keys.CONNECTION_OBJECTS] = get_connection_objects(
108110
{
@@ -115,18 +117,30 @@ def pytest_configure(config: Config) -> None:
115117
ConnectionObject.SOURCE_ID,
116118
ConnectionObject.DESTINATION_ID,
117119
},
118-
config.stash[stash_keys.CONNECTION_ID],
120+
config.getoption("--connection-id"),
119121
Path(custom_source_config_path) if custom_source_config_path else None,
120122
Path(custom_configured_catalog_path) if custom_configured_catalog_path else None,
121123
Path(custom_state_path) if custom_state_path else None,
122124
retrieval_reason,
123125
fail_if_missing_objects=False,
124126
connector_image=config.stash[stash_keys.CONNECTOR_IMAGE],
127+
auto_select_connection=config.stash[stash_keys.AUTO_SELECT_CONNECTION],
128+
selected_streams=config.stash[stash_keys.SELECTED_STREAMS],
125129
)
126130
config.stash[stash_keys.IS_PERMITTED_BOOL] = True
127131
except (ConnectionNotFoundError, NotPermittedError) as exc:
128132
clean_up_artifacts(MAIN_OUTPUT_DIRECTORY, LOGGER)
129133
pytest.exit(str(exc))
134+
135+
config.stash[stash_keys.CONNECTION_ID] = config.stash[stash_keys.CONNECTION_OBJECTS].connection_id # type: ignore
136+
137+
if source_docker_image := config.stash[stash_keys.CONNECTION_OBJECTS].source_docker_image:
138+
config.stash[stash_keys.CONTROL_VERSION] = source_docker_image.split(":")[-1]
139+
else:
140+
config.stash[stash_keys.CONTROL_VERSION] = "latest"
141+
142+
if config.stash[stash_keys.CONTROL_VERSION] == config.stash[stash_keys.TARGET_VERSION]:
143+
pytest.exit(f"Control and target versions are the same: {control_version}. Please provide different versions.")
130144
if config.stash[stash_keys.CONNECTION_OBJECTS].workspace_id and config.stash[stash_keys.CONNECTION_ID]:
131145
config.stash[stash_keys.CONNECTION_URL] = build_connection_url(
132146
config.stash[stash_keys.CONNECTION_OBJECTS].workspace_id,
@@ -304,9 +318,12 @@ def actor_id(connection_objects: ConnectionObjects, control_connector: Connector
304318

305319

306320
@pytest.fixture(scope="session")
307-
def configured_catalog(
308-
connection_objects: ConnectionObjects,
309-
) -> ConfiguredAirbyteCatalog:
321+
def selected_streams(request: SubRequest) -> Set[str]:
322+
return request.config.stash[stash_keys.SELECTED_STREAMS]
323+
324+
325+
@pytest.fixture(scope="session")
326+
def configured_catalog(connection_objects: ConnectionObjects, selected_streams: Optional[Set[str]]) -> ConfiguredAirbyteCatalog:
310327
if not connection_objects.configured_catalog:
311328
pytest.skip("Catalog is not provided. The catalog fixture can't be used.")
312329
assert connection_objects.configured_catalog is not None

airbyte-ci/connectors/live-tests/src/live_tests/regression_tests/stash_keys.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
22

33
from pathlib import Path
4-
from typing import List
4+
from typing import List, Set
55

66
import pytest
77
from live_tests.commons.models import ConnectionObjects
88
from live_tests.regression_tests.report import Report
99

1010
AIRBYTE_API_KEY = pytest.StashKey[str]()
11+
AUTO_SELECT_CONNECTION = pytest.StashKey[bool]()
1112
CONNECTION_ID = pytest.StashKey[str]()
1213
CONNECTION_OBJECTS = pytest.StashKey[ConnectionObjects]()
1314
CONNECTION_URL = pytest.StashKey[str | None]()
@@ -20,6 +21,7 @@
2021
PR_URL = pytest.StashKey[str]()
2122
REPORT = pytest.StashKey[Report]()
2223
RETRIEVAL_REASONS = pytest.StashKey[str]()
24+
SELECTED_STREAMS = pytest.StashKey[Set[str]]()
2325
SESSION_START_TIMESTAMP = pytest.StashKey[int]()
2426
SHOULD_READ_WITH_STATE = pytest.StashKey[bool]()
2527
TARGET_VERSION = pytest.StashKey[str]()

0 commit comments

Comments
 (0)