Skip to content

Commit 7bd0324

Browse files
authored
Regression tests: run with airbyte-ci (#37440)
1 parent c4ad3d9 commit 7bd0324

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+828
-544
lines changed

.github/workflows/airbyte-ci-tests.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ jobs:
9999
gcs_credentials: ${{ secrets.METADATA_SERVICE_PROD_GCS_CREDENTIALS }}
100100
git_branch: ${{ github.head_ref }}
101101
git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }}
102-
github_token: ${{ github.token }}
102+
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OSS }}
103103
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
104104
subcommand: "test --modified"
105105

@@ -116,6 +116,6 @@ jobs:
116116
gcs_credentials: ${{ secrets.METADATA_SERVICE_PROD_GCS_CREDENTIALS }}
117117
git_branch: ${{ steps.extract_branch.outputs.branch }}
118118
git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }}
119-
github_token: ${{ github.token }}
119+
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OSS }}
120120
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
121121
subcommand: "test ${{ inputs.airbyte_ci_subcommand}}"

airbyte-ci/connectors/live-tests/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@ The traffic recorded on the control connector is passed to the target connector
237237

238238
## Changelog
239239

240+
### 0.16.0
241+
Enable running with airbyte-ci.
242+
240243
### 0.15.0
241244
Automatic retrieval of connection objects for regression tests. The connection id is not required anymore.
242245

airbyte-ci/connectors/live-tests/poetry.lock

+170-167
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

airbyte-ci/connectors/live-tests/pyproject.toml

+6-3
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,11 @@ select = ["I", "F"]
5757
known-first-party = ["connection-retriever"]
5858

5959
[tool.poe.tasks]
60-
format = "ruff format src"
6160
test = "pytest tests"
62-
lint = "ruff check src"
6361
type_check = "mypy src --disallow-untyped-defs"
64-
pre-push = ["format", "lint", "test", "type_check"]
62+
pre-push = []
63+
64+
[tool.airbyte_ci]
65+
optional_poetry_groups = ["dev"]
66+
poe_tasks = []
67+
required_environment_variables = ["DOCKER_HUB_USERNAME", "DOCKER_HUB_PASSWORD"]

airbyte-ci/connectors/live-tests/src/live_tests/cli.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2+
from __future__ import annotations
23

34
import asyncclick as click
45
from live_tests.debug.cli import debug_cmd

airbyte-ci/connectors/live-tests/src/live_tests/commons/backends/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2+
from __future__ import annotations
23

34
from .base_backend import BaseBackend
45
from .duckdb_backend import DuckDbBackend

airbyte-ci/connectors/live-tests/src/live_tests/commons/backends/base_backend.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2+
from __future__ import annotations
23

34
from abc import ABC, abstractmethod
4-
from typing import Iterable
5+
from collections.abc import Iterable
56

67
from airbyte_protocol.models import AirbyteMessage # type: ignore
78

airbyte-ci/connectors/live-tests/src/live_tests/commons/backends/duckdb_backend.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2-
2+
from __future__ import annotations
33

44
import logging
55
import re
6+
from collections.abc import Iterable
67
from pathlib import Path
7-
from typing import Iterable, Optional
8+
from typing import Optional
89

910
import duckdb
1011
from airbyte_protocol.models import AirbyteMessage # type: ignore

airbyte-ci/connectors/live-tests/src/live_tests/commons/backends/file_backend.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2+
from __future__ import annotations
23

34
import json
45
import logging
6+
from collections.abc import Iterable
57
from pathlib import Path
6-
from typing import Any, Dict, Iterable, TextIO, Tuple
8+
from typing import Any, TextIO
79

810
from airbyte_protocol.models import AirbyteMessage # type: ignore
911
from airbyte_protocol.models import Type as AirbyteMessageType
@@ -13,7 +15,7 @@
1315

1416

1517
class FileDescriptorLRUCache(LRUCache):
16-
def popitem(self) -> Tuple[Any, Any]:
18+
def popitem(self) -> tuple[Any, Any]:
1719
filepath, fd = LRUCache.popitem(self)
1820
fd.close() # type: ignore # Close the file descriptor when it's evicted from the cache
1921
return filepath, fd
@@ -34,8 +36,8 @@ def __init__(self, output_directory: Path):
3436
self._output_directory = output_directory
3537
self.record_per_stream_directory = self._output_directory / "records_per_stream"
3638
self.record_per_stream_directory.mkdir(exist_ok=True, parents=True)
37-
self.record_per_stream_paths: Dict[str, Path] = {}
38-
self.record_per_stream_paths_data_only: Dict[str, Path] = {}
39+
self.record_per_stream_paths: dict[str, Path] = {}
40+
self.record_per_stream_paths_data_only: dict[str, Path] = {}
3941

4042
@property
4143
def jsonl_specs_path(self) -> Path:
@@ -101,14 +103,14 @@ def _open_file(path: Path) -> TextIO:
101103
if not isinstance(_message, AirbyteMessage):
102104
continue
103105
filepaths, messages = self._get_filepaths_and_messages(_message)
104-
for filepath, message in zip(filepaths, messages):
106+
for filepath, message in zip(filepaths, messages, strict=False):
105107
_open_file(self._output_directory / filepath).write(f"{message}\n")
106108
logging.info("Finished writing airbyte messages to disk")
107109
finally:
108110
for f in self.CACHE.values():
109111
f.close()
110112

111-
def _get_filepaths_and_messages(self, message: AirbyteMessage) -> Tuple[Tuple[str, ...], Tuple[str, ...]]:
113+
def _get_filepaths_and_messages(self, message: AirbyteMessage) -> tuple[tuple[str, ...], tuple[str, ...]]:
112114
if message.type == AirbyteMessageType.CATALOG:
113115
return (self.RELATIVE_CATALOGS_PATH,), (message.catalog.json(),)
114116

airbyte-ci/connectors/live-tests/src/live_tests/commons/connection_objects_retrieval.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2+
from __future__ import annotations
23

34
import json
45
import logging
56
import os
67
from pathlib import Path
7-
from typing import Dict, Optional, Set
8+
from typing import Optional
89

910
import rich
1011
from connection_retriever import ConnectionObject, retrieve_objects # type: ignore
@@ -16,7 +17,7 @@
1617
console = rich.get_console()
1718

1819

19-
def parse_config(config: Dict | str | None) -> Optional[SecretDict]:
20+
def parse_config(config: dict | str | None) -> Optional[SecretDict]:
2021
if not config:
2122
return None
2223
if isinstance(config, str):
@@ -25,7 +26,7 @@ def parse_config(config: Dict | str | None) -> Optional[SecretDict]:
2526
return SecretDict(config)
2627

2728

28-
def parse_catalog(catalog: Dict | str | None) -> Optional[AirbyteCatalog]:
29+
def parse_catalog(catalog: dict | str | None) -> Optional[AirbyteCatalog]:
2930
if not catalog:
3031
return None
3132
if isinstance(catalog, str):
@@ -35,7 +36,7 @@ def parse_catalog(catalog: Dict | str | None) -> Optional[AirbyteCatalog]:
3536

3637

3738
def parse_configured_catalog(
38-
configured_catalog: Dict | str | None, selected_streams: Set[str] | None = None
39+
configured_catalog: dict | str | None, selected_streams: set[str] | None = None
3940
) -> Optional[ConfiguredAirbyteCatalog]:
4041
if not configured_catalog:
4142
return None
@@ -48,7 +49,7 @@ def parse_configured_catalog(
4849
return catalog
4950

5051

51-
def parse_state(state: Dict | str | None) -> Optional[Dict]:
52+
def parse_state(state: dict | str | None) -> Optional[dict]:
5253
if not state:
5354
return None
5455
if isinstance(state, str):
@@ -61,11 +62,11 @@ def get_connector_config_from_path(config_path: Path) -> Optional[SecretDict]:
6162
return parse_config(config_path.read_text())
6263

6364

64-
def get_state_from_path(state_path: Path) -> Optional[Dict]:
65+
def get_state_from_path(state_path: Path) -> Optional[dict]:
6566
return parse_state(state_path.read_text())
6667

6768

68-
def get_configured_catalog_from_path(path: Path, selected_streams: Optional[Set[str]] = None) -> Optional[ConfiguredAirbyteCatalog]:
69+
def get_configured_catalog_from_path(path: Path, selected_streams: Optional[set[str]] = None) -> Optional[ConfiguredAirbyteCatalog]:
6970
return parse_configured_catalog(path.read_text(), selected_streams)
7071

7172

@@ -83,7 +84,7 @@ def get_configured_catalog_from_path(path: Path, selected_streams: Optional[Set[
8384

8485

8586
def get_connection_objects(
86-
requested_objects: Set[ConnectionObject],
87+
requested_objects: set[ConnectionObject],
8788
connection_id: Optional[str],
8889
custom_config_path: Optional[Path],
8990
custom_configured_catalog_path: Optional[Path],
@@ -92,7 +93,7 @@ def get_connection_objects(
9293
fail_if_missing_objects: bool = True,
9394
connector_image: Optional[str] = None,
9495
auto_select_connection: bool = False,
95-
selected_streams: Optional[Set[str]] = None,
96+
selected_streams: Optional[set[str]] = None,
9697
) -> ConnectionObjects:
9798
"""This function retrieves the connection objects values.
9899
It checks that the required objects are available and raises a UsageError if they are not.

airbyte-ci/connectors/live-tests/src/live_tests/commons/connector_runner.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
#
22
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
33
#
4+
from __future__ import annotations
45

56
import datetime
67
import json
78
import logging
89
import uuid
910
from pathlib import Path
10-
from typing import List, Optional
11+
from typing import Optional
1112

1213
import anyio
1314
import asyncer
@@ -39,7 +40,7 @@ def __init__(
3940
self.actor_id = execution_inputs.actor_id
4041
self.environment_variables = execution_inputs.environment_variables if execution_inputs.environment_variables else {}
4142

42-
self.full_command: List[str] = self._get_full_command(execution_inputs.command)
43+
self.full_command: list[str] = self._get_full_command(execution_inputs.command)
4344
self.completion_event = anyio.Event()
4445
self.http_proxy = http_proxy
4546
self.logger = logging.getLogger(f"{self.connector_under_test.name}-{self.connector_under_test.version}")
@@ -57,7 +58,7 @@ def stdout_file_path(self) -> Path:
5758
def stderr_file_path(self) -> Path:
5859
return (self.output_dir / "stderr.log").resolve()
5960

60-
def _get_full_command(self, command: Command) -> List[str]:
61+
def _get_full_command(self, command: Command) -> list[str]:
6162
if command is Command.SPEC:
6263
return ["spec"]
6364
elif command is Command.CHECK:
@@ -184,7 +185,7 @@ async def _log_progress(self) -> None:
184185
def format_duration(time_delta: datetime.timedelta) -> str:
185186
total_seconds = time_delta.total_seconds()
186187
if total_seconds < 60:
187-
return "{:.2f}s".format(total_seconds)
188+
return f"{total_seconds:.2f}s"
188189
minutes = int(total_seconds // 60)
189190
seconds = int(total_seconds % 60)
190-
return "{:02d}mn{:02d}s".format(minutes, seconds)
191+
return f"{minutes:02d}mn{seconds:02d}s"

airbyte-ci/connectors/live-tests/src/live_tests/commons/errors.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2+
from __future__ import annotations
23

34

45
class ExportError(Exception):

airbyte-ci/connectors/live-tests/src/live_tests/commons/mitm_addons.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2+
from __future__ import annotations
23

34
from urllib.parse import parse_qs, urlencode, urlparse
45

airbyte-ci/connectors/live-tests/src/live_tests/commons/models.py

+19-18
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,18 @@
55
import logging
66
import tempfile
77
from collections import defaultdict
8+
from collections.abc import Iterable, Iterator, MutableMapping
89
from dataclasses import dataclass, field
910
from enum import Enum
1011
from pathlib import Path
11-
from typing import Any, Dict, Iterable, Iterator, List, MutableMapping, Optional, Type
12+
from typing import Any, Optional
1213

1314
import _collections_abc
1415
import dagger
1516
import requests
16-
17-
# type: ignore
18-
from airbyte_protocol.models import AirbyteCatalog, AirbyteMessage, ConfiguredAirbyteCatalog # type: ignore
17+
from airbyte_protocol.models import AirbyteCatalog # type: ignore
18+
from airbyte_protocol.models import AirbyteMessage # type: ignore
19+
from airbyte_protocol.models import ConfiguredAirbyteCatalog # type: ignore
1920
from airbyte_protocol.models import Type as AirbyteMessageType
2021
from genson import SchemaBuilder # type: ignore
2122
from live_tests.commons.backends import DuckDbBackend, FileBackend
@@ -174,7 +175,7 @@ def actor_type(self) -> ActorType:
174175

175176
@classmethod
176177
async def from_image_name(
177-
cls: Type[ConnectorUnderTest],
178+
cls: type[ConnectorUnderTest],
178179
dagger_client: dagger.Client,
179180
image_name: str,
180181
target_or_control: TargetOrControl,
@@ -191,8 +192,8 @@ class ExecutionInputs:
191192
command: Command
192193
config: Optional[SecretDict] = None
193194
configured_catalog: Optional[ConfiguredAirbyteCatalog] = None
194-
state: Optional[Dict] = None
195-
environment_variables: Optional[Dict] = None
195+
state: Optional[dict] = None
196+
environment_variables: Optional[dict] = None
196197
duckdb_path: Optional[Path] = None
197198

198199
def raise_if_missing_attr_for_command(self, attribute: str) -> None:
@@ -232,8 +233,8 @@ class ExecutionResult:
232233
success: bool
233234
executed_container: Optional[dagger.Container]
234235
http_dump: Optional[dagger.File] = None
235-
http_flows: List[http.HTTPFlow] = field(default_factory=list)
236-
stream_schemas: Optional[Dict[str, Any]] = None
236+
http_flows: list[http.HTTPFlow] = field(default_factory=list)
237+
stream_schemas: Optional[dict[str, Any]] = None
237238
backend: Optional[FileBackend] = None
238239

239240
HTTP_DUMP_FILE_NAME = "http_dump.mitm"
@@ -253,7 +254,7 @@ def duckdb_schema(self) -> Iterable[str]:
253254

254255
@classmethod
255256
async def load(
256-
cls: Type[ExecutionResult],
257+
cls: type[ExecutionResult],
257258
connector_under_test: ConnectorUnderTest,
258259
actor_id: str,
259260
command: Command,
@@ -286,7 +287,7 @@ async def load_http_flows(self) -> None:
286287
def parse_airbyte_messages_from_command_output(
287288
self, command_output_path: Path, log_validation_errors: bool = False
288289
) -> Iterable[AirbyteMessage]:
289-
with open(command_output_path, "r") as command_output:
290+
with open(command_output_path) as command_output:
290291
for line in command_output:
291292
try:
292293
yield AirbyteMessage.parse_raw(line)
@@ -302,9 +303,9 @@ def get_records(self) -> Iterable[AirbyteMessage]:
302303
if message.type is AirbyteMessageType.RECORD:
303304
yield message
304305

305-
def generate_stream_schemas(self) -> Dict[str, Any]:
306+
def generate_stream_schemas(self) -> dict[str, Any]:
306307
self.logger.info("Generating stream schemas")
307-
stream_builders: Dict[str, SchemaBuilder] = {}
308+
stream_builders: dict[str, SchemaBuilder] = {}
308309
for record in self.get_records():
309310
stream = record.record.stream
310311
if stream not in stream_builders:
@@ -328,8 +329,8 @@ def get_records_per_stream(self, stream: str) -> Iterator[AirbyteMessage]:
328329
if message.type is AirbyteMessageType.RECORD:
329330
yield message
330331

331-
def get_message_count_per_type(self) -> Dict[AirbyteMessageType, int]:
332-
message_count: Dict[AirbyteMessageType, int] = defaultdict(int)
332+
def get_message_count_per_type(self) -> dict[AirbyteMessageType, int]:
333+
message_count: dict[AirbyteMessageType, int] = defaultdict(int)
333334
for message in self.airbyte_messages:
334335
message_count[message.type] += 1
335336
return message_count
@@ -376,7 +377,7 @@ async def save_artifacts(self, output_dir: Path, duckdb_path: Optional[Path] = N
376377
self.save_stream_schemas(output_dir)
377378
self.logger.info("All artifacts saved to disk")
378379

379-
def get_updated_configuration(self, control_message_path: Path) -> Optional[Dict[str, Any]]:
380+
def get_updated_configuration(self, control_message_path: Path) -> Optional[dict[str, Any]]:
380381
"""Iterate through the control messages to find CONNECTOR_CONFIG message and return the last updated configuration."""
381382
if not control_message_path.exists():
382383
return None
@@ -403,7 +404,7 @@ def update_configuration(self) -> None:
403404
payload = {
404405
"configuration": {
405406
**updated_configuration,
406-
**{f"{self.connector_under_test.actor_type.value}Type": self.connector_under_test.name_without_type_prefix},
407+
f"{self.connector_under_test.actor_type.value}Type": self.connector_under_test.name_without_type_prefix,
407408
}
408409
}
409410
headers = {
@@ -427,7 +428,7 @@ class ConnectionObjects:
427428
destination_config: Optional[SecretDict]
428429
configured_catalog: Optional[ConfiguredAirbyteCatalog]
429430
catalog: Optional[AirbyteCatalog]
430-
state: Optional[Dict]
431+
state: Optional[dict]
431432
workspace_id: Optional[str]
432433
source_id: Optional[str]
433434
destination_id: Optional[str]

airbyte-ci/connectors/live-tests/src/live_tests/commons/proxy.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2+
from __future__ import annotations
23

34
import logging
45
import uuid

0 commit comments

Comments
 (0)