Skip to content

Commit aabbf8c

Browse files
committed
connectors-ci: improve modified connectors detection
1 parent 1658ecc commit aabbf8c

File tree

12 files changed

+267
-95
lines changed

12 files changed

+267
-95
lines changed

airbyte-ci/connectors/connector_ops/connector_ops/utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def __repr__(self) -> str:
319319

320320
@functools.lru_cache(maxsize=2)
321321
def get_local_dependency_paths(self, with_test_dependencies: bool = True) -> Set[Path]:
322-
dependencies_paths = [self.code_directory]
322+
dependencies_paths = []
323323
if self.language == ConnectorLanguage.JAVA:
324324
dependencies_paths += get_all_gradle_dependencies(
325325
self.code_directory / "build.gradle", with_test_dependencies=with_test_dependencies
@@ -352,8 +352,11 @@ def get_changed_connectors(
352352

353353

354354
def get_all_released_connectors() -> Set:
355+
repo = git.Repo(search_parent_directories=True)
356+
repo_path = repo.working_tree_dir
357+
355358
return {
356359
Connector(Path(metadata_file).parent.name)
357-
for metadata_file in glob("airbyte-integrations/connectors/**/metadata.yaml", recursive=True)
360+
for metadata_file in glob(f"{repo_path}/airbyte-integrations/connectors/**/metadata.yaml", recursive=True)
358361
if SCAFFOLD_CONNECTOR_GLOB not in metadata_file
359362
}

airbyte-ci/connectors/connector_ops/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "connector_ops"
7-
version = "0.2.1"
7+
version = "0.2.2"
88
description = "Packaged maintained by the connector operations team to perform CI for connectors"
99
authors = ["Airbyte <[email protected]>"]
1010

airbyte-ci/connectors/pipelines/pipelines/commands/airbyte_ci.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import List
88

99
import click
10+
from github import PullRequest
1011
from pipelines import github, main_logger
1112
from pipelines.bases import CIContext
1213
from pipelines.utils import (
@@ -17,7 +18,6 @@
1718
get_modified_files_in_commit,
1819
get_modified_files_in_pull_request,
1920
)
20-
from github import PullRequest
2121

2222
from .groups.connectors import connectors
2323
from .groups.metadata import metadata
@@ -133,6 +133,7 @@ def airbyte_ci(
133133
main_logger.info(f"Pipeline Start Timestamp: {pipeline_start_timestamp}")
134134
main_logger.info(f"Modified Files: {ctx.obj['modified_files']}")
135135

136+
136137
airbyte_ci.add_command(connectors)
137138
airbyte_ci.add_command(metadata)
138139

airbyte-ci/connectors/pipelines/pipelines/commands/groups/connectors.py

Lines changed: 62 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
import os
88
import sys
99
from pathlib import Path
10-
from typing import Any, Dict, Tuple
10+
from typing import Set, Tuple, Union
1111

1212
import anyio
1313
import click
14+
from connector_ops.utils import Connector, ConnectorLanguage, console, get_all_released_connectors
1415
from pipelines import main_logger
1516
from pipelines.builds import run_connector_build_pipeline
1617
from pipelines.contexts import ConnectorContext, ContextState, PublishConnectorContext
@@ -19,8 +20,7 @@
1920
from pipelines.pipelines.connectors import run_connectors_pipelines
2021
from pipelines.publish import reorder_contexts, run_connector_publish_pipeline
2122
from pipelines.tests import run_connector_test_pipeline
22-
from pipelines.utils import DaggerPipelineCommand, get_modified_connectors
23-
from connector_ops.utils import ConnectorLanguage, console, get_all_released_connectors
23+
from pipelines.utils import DaggerPipelineCommand, get_connector_modified_files, get_modified_connectors_and_files
2424
from rich.table import Table
2525
from rich.text import Text
2626

@@ -87,45 +87,26 @@ def connectors(
8787

8888
ctx.ensure_object(dict)
8989
ctx.obj["use_remote_secrets"] = use_remote_secrets
90-
ctx.obj["connector_names"] = names
91-
ctx.obj["connector_languages"] = languages
92-
ctx.obj["release_states"] = release_stages
93-
ctx.obj["modified"] = modified
90+
ctx.obj["selected_connectors_names"] = set(names) if names else set()
91+
ctx.obj["selected_connectors_languages"] = set(languages) if languages else set()
92+
ctx.obj["selected_connectors_release_stages"] = set(release_stages) if release_stages else set()
93+
ctx.obj["select_modified_connectors"] = modified
9494
ctx.obj["concurrency"] = concurrency
9595
ctx.obj["execute_timeout"] = execute_timeout
96-
97-
all_connectors = get_all_released_connectors()
98-
99-
# We get the modified connectors and downstream connector deps, and files
100-
modified_connectors_and_files = get_modified_connectors(ctx.obj["modified_files"])
101-
102-
# We select all connectors by default
103-
# and attach modified files to them
104-
selected_connectors_and_files = {connector: modified_connectors_and_files.get(connector, []) for connector in all_connectors}
105-
106-
if modified:
107-
selected_connectors_and_files = modified_connectors_and_files
108-
if names:
109-
selected_connectors_and_files = {
110-
connector: selected_connectors_and_files[connector]
111-
for connector in selected_connectors_and_files
112-
if connector.technical_name in names
113-
}
114-
if languages:
115-
selected_connectors_and_files = {
116-
connector: selected_connectors_and_files[connector]
117-
for connector in selected_connectors_and_files
118-
if connector.language in languages
119-
}
120-
if release_stages:
121-
selected_connectors_and_files = {
122-
connector: selected_connectors_and_files[connector]
123-
for connector in selected_connectors_and_files
124-
if connector.release_stage in release_stages
125-
}
126-
127-
ctx.obj["selected_connectors_and_files"] = selected_connectors_and_files
128-
ctx.obj["selected_connectors_names"] = [c.technical_name for c in selected_connectors_and_files.keys()]
96+
ctx.obj["all_connectors"] = get_all_released_connectors()
97+
ctx.obj["selected_connectors_by_names"] = {Connector(technical_name=name) for name in names}
98+
ctx.obj["selected_connectors_by_languages"] = {connector for connector in ctx.obj["all_connectors"] if connector.language in languages}
99+
ctx.obj["selected_connectors_by_release_stages"] = {
100+
connector for connector in ctx.obj["all_connectors"] if connector.release_stage in release_stages
101+
}
102+
ctx.obj["selected_connectors"] = (
103+
ctx.obj["selected_connectors_by_names"]
104+
| ctx.obj["selected_connectors_by_languages"]
105+
| ctx.obj["selected_connectors_by_release_stages"]
106+
)
107+
ctx.obj["selected_connectors_and_files"] = {
108+
connector: get_connector_modified_files(connector, ctx.obj["modified_files"]) for connector in ctx.obj["selected_connectors"]
109+
}
129110

130111

131112
@connectors.command(cls=DaggerPipelineCommand, help="Test all the selected connectors.")
@@ -142,7 +123,13 @@ def test(
142123
main_logger.info("Skipping connectors tests for draft pull request.")
143124
sys.exit(0)
144125

145-
main_logger.info(f"Will run the test pipeline for the following connectors: {', '.join(ctx.obj['selected_connectors_names'])}")
126+
if ctx.obj["select_modified_connectors"]:
127+
ctx.obj["selected_connectors_and_files"] = {
128+
**ctx.obj["selected_connectors_and_files"],
129+
**get_modified_connectors_and_files(ctx.obj["modified_files"]),
130+
}
131+
main_logger.info(ctx.obj)
132+
log_selected_connectors(ctx.obj["selected_connectors_and_files"])
146133
if ctx.obj["selected_connectors_and_files"]:
147134
update_global_commit_status_check_for_tests(ctx.obj, "pending")
148135
else:
@@ -198,7 +185,14 @@ def send_commit_status_check() -> None:
198185
@connectors.command(cls=DaggerPipelineCommand, help="Build all images for the selected connectors.")
199186
@click.pass_context
200187
def build(ctx: click.Context) -> bool:
201-
main_logger.info(f"Will build the following connectors: {', '.join(ctx.obj['selected_connectors_names'])}.")
188+
"""Runs a build pipeline for the selected connectors."""
189+
if ctx.obj["select_modified_connectors"]:
190+
ctx.obj["selected_connectors_and_files"] = {
191+
**ctx.obj["selected_connectors_and_files"],
192+
**get_modified_connectors_and_files(ctx.obj["modified_files"]),
193+
}
194+
195+
log_selected_connectors(ctx.obj["selected_connectors_and_files"])
202196
connectors_contexts = [
203197
ConnectorContext(
204198
pipeline_name=f"Build connector {connector.technical_name}",
@@ -305,17 +299,19 @@ def publish(
305299
ctx.obj["spec_cache_bucket_name"] = spec_cache_bucket_name
306300
ctx.obj["metadata_service_bucket_name"] = metadata_service_bucket_name
307301
ctx.obj["metadata_service_gcs_credentials"] = metadata_service_gcs_credentials
308-
309302
if ctx.obj["is_local"]:
310303
click.confirm(
311-
"Publishing from a local environment is not recommend and requires to be logged in Airbyte's DockerHub registry, do you want to continue?",
304+
"Publishing from a local environment is not recommended and requires to be logged in Airbyte's DockerHub registry, do you want to continue?",
312305
abort=True,
313306
)
314307

315-
selected_connectors_and_files = ctx.obj["selected_connectors_and_files"]
316-
selected_connectors_names = ctx.obj["selected_connectors_names"]
308+
if ctx.obj["select_modified_connectors"]:
309+
ctx.obj["selected_connectors_and_files"] = {
310+
**ctx.obj["selected_connectors_and_files"],
311+
**get_modified_connectors_and_files(ctx.obj["modified_files"], metadata_modification_only=True, dependency_scanning=False),
312+
}
317313

318-
main_logger.info(f"Will publish the following connectors: {', '.join(selected_connectors_names)}")
314+
log_selected_connectors(ctx.obj["selected_connectors_and_files"])
319315
publish_connector_contexts = reorder_contexts(
320316
[
321317
PublishConnectorContext(
@@ -342,7 +338,7 @@ def publish(
342338
ci_gcs_credentials=ctx.obj["ci_gcs_credentials"],
343339
pull_request=ctx.obj.get("pull_request"),
344340
)
345-
for connector, modified_files in selected_connectors_and_files.items()
341+
for connector, modified_files in ctx.obj["selected_connectors_and_files"].items()
346342
]
347343
)
348344

@@ -394,23 +390,18 @@ def list(
394390
return True
395391

396392

397-
@connectors.command(cls=DaggerPipelineCommand, help="Autoformat connector code.")
393+
@connectors.command(name="format", cls=DaggerPipelineCommand, help="Autoformat connector code.")
398394
@click.pass_context
399-
def format(ctx: click.Context) -> bool:
400-
if ctx.obj["modified"]:
401-
# We only want to format the connector that with modified files on the current branch.
402-
connectors_and_files_to_format = [
403-
(connector, modified_files) for connector, modified_files in ctx.obj["selected_connectors_and_files"].items() if modified_files
404-
]
405-
else:
406-
# We explicitly want to format specific connectors
407-
connectors_and_files_to_format = [
408-
(connector, modified_files) for connector, modified_files in ctx.obj["selected_connectors_and_files"].items()
409-
]
395+
def format_code(ctx: click.Context) -> bool:
396+
if ctx.obj["select_modified_connectors"]:
397+
ctx.obj["selected_connectors_and_files"] = {
398+
**ctx.obj["selected_connectors_and_files"],
399+
**get_modified_connectors_and_files(ctx.obj["modified_files"]),
400+
}
410401

411-
if connectors_and_files_to_format:
402+
if ctx.obj["selected_connectors_and_files"]:
412403
main_logger.info(
413-
f"Will format the following connectors: {', '.join([connector.technical_name for connector, _ in connectors_and_files_to_format])}."
404+
f"Will format the following connectors: {', '.join([connector.technical_name for connector, _ in ctx.obj['selected_connectors_and_files'].keys()])}."
414405
)
415406
else:
416407
main_logger.info("No connectors to format.")
@@ -435,7 +426,7 @@ def format(ctx: click.Context) -> bool:
435426
pull_request=ctx.obj.get("pull_request"),
436427
should_save_report=False,
437428
)
438-
for connector, modified_files in connectors_and_files_to_format
429+
for connector, modified_files in ctx.obj["selected_connectors_and_files"].items()
439430
]
440431

441432
anyio.run(
@@ -449,3 +440,11 @@ def format(ctx: click.Context) -> bool:
449440
)
450441

451442
return True
443+
444+
445+
def log_selected_connectors(selected_connectors_and_files: dict[Connector, Set[Union[str, Path]]]) -> None:
446+
if selected_connectors_and_files:
447+
selected_connectors_names = [c.technical_name for c in selected_connectors_and_files.keys()]
448+
main_logger.info(f"Will run on the following connectors: {', '.join(selected_connectors_names)}.")
449+
else:
450+
main_logger.info("No connectors to run.")

airbyte-ci/connectors/pipelines/pipelines/utils.py

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,34 +12,35 @@
1212
import re
1313
import sys
1414
import unicodedata
15-
1615
from glob import glob
16+
from io import TextIOWrapper
1717
from pathlib import Path
1818
from typing import TYPE_CHECKING, Any, Callable, List, Optional, Set, Tuple, Union
19-
from io import TextIOWrapper
2019

2120
import anyio
2221
import asyncer
2322
import click
2423
import git
25-
from pipelines import consts, main_logger, sentry_utils
26-
from pipelines.consts import GCS_PUBLIC_DOMAIN
2724
from connector_ops.utils import get_all_released_connectors, get_changed_connectors
2825
from dagger import Client, Config, Connection, Container, DaggerError, ExecError, File, ImageLayerCompression, QueryError, Secret
2926
from google.cloud import storage
3027
from google.oauth2 import service_account
3128
from more_itertools import chunked
29+
from pipelines import consts, main_logger, sentry_utils
30+
from pipelines.consts import GCS_PUBLIC_DOMAIN
3231

3332
if TYPE_CHECKING:
34-
from pipelines.contexts import ConnectorContext
33+
from connector_ops.utils import Connector
3534
from github import PullRequest
35+
from pipelines.contexts import ConnectorContext
3636

3737
DAGGER_CONFIG = Config(log_output=sys.stderr)
3838
AIRBYTE_REPO_URL = "https://github.com/airbytehq/airbyte.git"
3939
METADATA_FILE_NAME = "metadata.yaml"
4040
METADATA_ICON_FILE_NAME = "icon.svg"
4141
DIFF_FILTER = "MADRT" # Modified, Added, Deleted, Renamed, Type changed
4242
IGNORED_FILE_EXTENSIONS = [".md"]
43+
ALL_CONNECTOR_DEPENDENCIES = [(connector, connector.get_local_dependency_paths()) for connector in get_all_released_connectors()]
4344

4445

4546
# This utils will probably be redundant once https://github.com/dagger/dagger/issues/3764 is implemented
@@ -323,47 +324,54 @@ def _file_path_starts_with(given_file_path: Path, starts_with_path: Path) -> boo
323324
return given_file_path_parts[: len(starts_with_path_parts)] == starts_with_path_parts
324325

325326

326-
def _find_modified_connectors(file: Union[str, Path], all_dependencies: list) -> dict:
327-
"""Find all connectors whose dependencies were modified."""
327+
def _find_modified_connectors(file_path: Union[str, Path], dependency_scanning: bool = True) -> dict:
328+
"""Find all connectors impacted by the file change."""
328329
modified_connectors = {}
329-
for connector, connector_dependencies in all_dependencies:
330-
for connector_dependency in connector_dependencies:
331-
file_path = Path(file)
332-
333-
if _file_path_starts_with(file_path, connector_dependency):
334-
# Add the connector to the modified connectors
335-
modified_connectors.setdefault(connector, [])
336-
connector_directory_path = Path(connector.code_directory)
337-
338-
# If the file is in the connector directory, add it to the modified files
339-
if _file_path_starts_with(file_path, connector_directory_path):
340-
modified_connectors[connector].append(file)
341-
else:
342-
main_logger.info(f"Adding connector '{connector}' due to dependency modification: '{file}'.")
330+
for connector, connector_dependencies in ALL_CONNECTOR_DEPENDENCIES:
331+
if Path(file_path).is_relative_to(Path(connector.code_directory)):
332+
main_logger.info(f"Adding connector '{connector}' due to connector file modification: {file_path}.")
333+
modified_connectors.setdefault(connector, [])
334+
modified_connectors[connector].append(file_path)
335+
336+
if dependency_scanning:
337+
for connector_dependency in connector_dependencies:
338+
if Path(file_path).is_relative_to(Path(connector_dependency)):
339+
# Add the connector to the modified connectors
340+
modified_connectors.setdefault(connector, [])
341+
main_logger.info(f"Adding connector '{connector}' due to dependency modification: '{file_path}'.")
343342

344343
return modified_connectors
345344

346345

347-
def get_modified_connectors(modified_files: Set[Union[str, Path]]) -> dict:
346+
def get_modified_connectors_and_files(
347+
modified_files: Set[Union[str, Path]], metadata_modification_only: bool = False, dependency_scanning: bool = True
348+
) -> dict[Connector, Set[Union[str, Path]]]:
348349
"""Create a mapping of modified connectors (key) and modified files (value).
349350
As we call connector.get_local_dependencies_paths() any modification to a dependency will trigger connector pipeline for all connectors that depend on it.
350351
The get_local_dependencies_paths function currently computes dependencies for Java connectors only.
351352
It's especially useful to trigger tests of strict-encrypt variant when a change is made to the base connector.
352353
Or to tests all jdbc connectors when a change is made to source-jdbc or base-java.
353354
We'll consider extending the dependency resolution to Python connectors once we confirm that it's needed and feasible in term of scale.
354355
"""
355-
all_connector_dependencies = [(connector, connector.get_local_dependency_paths()) for connector in get_all_released_connectors()]
356356

357357
# Ignore files with certain extensions
358358
modified_files = [file for file in modified_files if not _is_ignored_file(file)]
359-
359+
if metadata_modification_only:
360+
modified_files = get_modified_metadata_files(modified_files)
361+
main_logger.info(f"Modified metadata files: {modified_files}")
360362
modified_connectors = {}
361363
for modified_file in modified_files:
362-
modified_connectors.update(_find_modified_connectors(modified_file, all_connector_dependencies))
363-
364+
modified_connectors.update(_find_modified_connectors(modified_file, dependency_scanning))
364365
return modified_connectors
365366

366367

368+
def get_connector_modified_files(connector: Connector, all_modified_files: Set[Union[str, Path]]) -> Set[Union[str, Path]]:
369+
for modified_file in all_modified_files:
370+
modified_file_path = Path(modified_file)
371+
if modified_file_path.is_relative_to(connector.code_directory):
372+
yield modified_file
373+
374+
367375
def get_modified_metadata_files(modified_files: Set[Union[str, Path]]) -> Set[Path]:
368376
return {
369377
Path(str(f))

airbyte-ci/connectors/pipelines/poetry.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

airbyte-ci/connectors/pipelines/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "pipelines"
7-
version = "0.1.0"
7+
version = "0.1.1"
88
description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines"
99
authors = ["Airbyte <[email protected]>"]
1010

0 commit comments

Comments
 (0)