Skip to content

Add airbyte-ci command: migrate-to-manifest-only #42576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 43 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
38d9f63
feat: pipeline validates connector and moves manifest
ChristoGrab Jul 26, 2024
ffa5e55
feat: first draft of full pipeline
ChristoGrab Jul 26, 2024
f7d588a
pipeline validates and moves manifest
ChristoGrab Jul 26, 2024
8bec567
update metadata tags
ChristoGrab Jul 26, 2024
6d26096
chore: auto-fix lint and format issues
octavia-squidington-iii Jul 27, 2024
7b1cd10
Merge branch 'master' into christo/airbyte-ci-strip
natikgadzhi Jul 27, 2024
a1e0385
task: delete source folder and add metadata language tag
ChristoGrab Jul 29, 2024
5b23bd9
chore: import cleanup
ChristoGrab Jul 29, 2024
eb9e6db
task: regenerate README file
ChristoGrab Jul 29, 2024
b409814
task: update deletion to logic to exclude unit_tests/integration folder
ChristoGrab Jul 29, 2024
4b6a345
task: update documentation page
ChristoGrab Jul 29, 2024
16fdf02
Merge branch 'master' of https://github.com/airbytehq/airbyte into ch…
ChristoGrab Jul 30, 2024
b5cb21c
task: update baseImage in metadata
ChristoGrab Jul 30, 2024
6fef1fa
bug: fix stream table generation in docs
ChristoGrab Jul 30, 2024
51dc4ba
task: remove docs update step and update readme template
ChristoGrab Jul 30, 2024
1003fa6
feat: completed pipeline with reset on failure
ChristoGrab Jul 30, 2024
3819277
refactor: update command name and break up steps
ChristoGrab Jul 30, 2024
ea5f1da
chore: format
ChristoGrab Jul 30, 2024
8ec36c5
refactor: cleanup for readability
ChristoGrab Jul 31, 2024
4d5327d
chore: add docstrings
ChristoGrab Jul 31, 2024
fa9fe37
task: add README entry and version bump
ChristoGrab Jul 31, 2024
e387e3f
task: detect and migrate non-inline specs to manifest
ChristoGrab Jul 31, 2024
8cbc115
chore: merge master
ChristoGrab Jul 31, 2024
3645b50
chore: add parameter type to util function
ChristoGrab Jul 31, 2024
91504ea
fix: remove existing metadata language tag
ChristoGrab Jul 31, 2024
930226f
chore: auto-fix lint and format issues
octavia-squidington-iii Jul 31, 2024
db74674
update readme template
ChristoGrab Aug 2, 2024
51c161e
fix: include documentation_url when fetching spec
ChristoGrab Aug 2, 2024
84c0ea5
disable pypi in metadata
ChristoGrab Aug 6, 2024
61e2c0d
task: update logic to fetch latest valid tag in dockerhub and disable…
ChristoGrab Aug 6, 2024
995f33b
chore: format
ChristoGrab Aug 6, 2024
31cd78b
task: resolve parameter refs in manifest
ChristoGrab Aug 6, 2024
84c4e1c
chore: resolve type warnings
ChristoGrab Aug 6, 2024
4f9a71d
apparently i had a seizure
ChristoGrab Aug 6, 2024
bad2693
light cleanup
natikgadzhi Aug 7, 2024
75612d5
Merge branch 'master' into christo/airbyte-ci-strip
natikgadzhi Aug 7, 2024
2889b26
Update airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connector…
natikgadzhi Aug 7, 2024
b998b07
Update airbyte-ci/connectors/pipelines/README.md
natikgadzhi Aug 7, 2024
8fdd004
Update airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connector…
natikgadzhi Aug 7, 2024
ccc30a7
chore: auto-fix lint and format issues
octavia-squidington-iii Aug 7, 2024
ce98657
typing
natikgadzhi Aug 7, 2024
441031b
fix merge conflicts
ChristoGrab Aug 7, 2024
96e8a5d
Merge branch 'master' into christo/airbyte-ci-strip
natikgadzhi Aug 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def validate_environment(is_local: bool) -> None:
"upgrade-cdk": "pipelines.airbyte_ci.connectors.upgrade_cdk.commands.upgrade_cdk",
"up-to-date": "pipelines.airbyte_ci.connectors.up_to_date.commands.up_to_date",
"pull-request": "pipelines.airbyte_ci.connectors.pull_request.commands.pull_request",
"strip": "pipelines.airbyte_ci.connectors.strip.commands.strip"
},
)
@click.option(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class CONNECTOR_TEST_STEP_ID(str, Enum):
AIRBYTE_LOGGER_MIGRATION = "migration_to_logging_logger.migration"
PULL_REQUEST_CREATE = "pull_request.create"
PULL_REQUEST_UPDATE = "pull_request.update"
STRIP_CHECK_CANDIDATE = "strip.check_candidate"
STRIP_MIGRATION = "strip.strip_migration"

def __str__(self) -> str:
return self.value
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
#

import asyncclick as click
from pipelines.airbyte_ci.connectors.context import ConnectorContext
from pipelines.airbyte_ci.connectors.pipeline import run_connectors_pipelines
from pipelines.airbyte_ci.connectors.strip.pipeline import run_connectors_strip_pipeline
from pipelines.cli.dagger_pipeline_command import DaggerPipelineCommand

@click.command(
cls=DaggerPipelineCommand,
short_help="Migrate low-code connector to manifest-only"
)
@click.pass_context
async def strip(
ctx: click.Context
) -> bool:

connectors_contexts = [
ConnectorContext(
pipeline_name=f"Strip connector {connector.technical_name} to manifest-only",
connector=connector,
is_local=ctx.obj["is_local"],
git_branch=ctx.obj["git_branch"],
git_revision=ctx.obj["git_revision"],
diffed_branch=ctx.obj["diffed_branch"],
git_repo_url=ctx.obj["git_repo_url"],
report_output_prefix=ctx.obj["report_output_prefix"],
pipeline_start_timestamp=ctx.obj.get("pipeline_start_timestamp"),
)
for connector in ctx.obj["selected_connectors_with_modified_files"]
]

await run_connectors_pipelines(
connectors_contexts,
run_connectors_strip_pipeline,
"Strip connector to manifest-only pipeline",
ctx.obj["concurrency"],
ctx.obj["dagger_logs_path"],
ctx.obj["execute_timeout"],
ctx.obj["git_branch"],
ctx.obj["git_revision"],
ctx.obj["diffed_branch"],
ctx.obj["is_local"],
ctx.obj["ci_context"],
ctx.obj["git_repo_url"],

)

return True
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
#

from typing import Any
import git
import shutil
import yaml

from pipelines.airbyte_ci.connectors.consts import CONNECTOR_TEST_STEP_ID
from pipelines.airbyte_ci.connectors.context import ConnectorContext
from pipelines.airbyte_ci.connectors.reports import ConnectorReport
from pipelines.helpers.execution.run_steps import STEP_TREE, StepToRun, run_steps
from pipelines.models.steps import StepResult, StepStatus, Step
from connector_ops.utils import ConnectorLanguage # type: ignore
from anyio import Semaphore

## GLOBAL VARIABLES

VALID_FILES = [
"manifest.yaml",
"run.py",
"__init__.py",
"source.py"
]
FILES_TO_LEAVE = [
"__init__.py",
"manifest.yaml",
"metadata.yaml",
"icon.svg",
"run.py",
"source.py"
]


class CheckIsManifestMigrationCandidate(Step):
context: ConnectorContext

title: str = "Check if the connector is a candidate for migration to poetry."
airbyte_repo: git.Repo = git.Repo(search_parent_directories=True)
invalid_files: list = []

async def _run(self) -> StepResult:
connector_dir_entries = await (await self.context.get_connector_dir()).entries()

if self.context.connector.language != ConnectorLanguage.LOW_CODE:
return StepResult(
step=self,
status=StepStatus.SKIPPED,
stderr=f"The connector is not a low-code connector.",
)

if self.context.connector.language == ConnectorLanguage.MANIFEST_ONLY:
return StepResult(
step=self,
status=StepStatus.SKIPPED,
stderr="The connector is already in manifest-only format.",
)

# Detect sus python files in the connector source directory
connector_source_code_dir = self.context.connector.code_directory / self.context.connector.technical_name.replace("-", "_")
for file in connector_source_code_dir.iterdir():
if file.name not in VALID_FILES:
self.invalid_files.append(file.name)
if self.invalid_files:
return StepResult(
step=self,
status=StepStatus.SKIPPED,
stdout=f"The connector has unrecognized source files: {self.invalid_files}",
)

# Detect connector class name to make sure it's inherited from source declarative manifest
# and does not override `streams` method
connector_source_py = (connector_source_code_dir / "source.py").read_text()

if "YamlDeclarativeSource" not in connector_source_py:
return StepResult(
step=self,
status=StepStatus.SKIPPED,
stdout="The connector does not use the YamlDeclarativeSource class.",
)

if "def streams" in connector_source_py:
return StepResult(
step=self,
status=StepStatus.SKIPPED,
stdout="The connector overrides the streams method.",
)

return StepResult(
step=self,
status=StepStatus.SUCCESS,
stdout=f"{self.context.connector.technical_name} is a valid candidate for migration.")

class StripConnector(Step):
context: ConnectorContext

title = "Strip the connector to manifest-only."

async def _run(self) -> StepResult:

# 1. Move manifest.yaml to the root level of the directory
self.logger.info(f"Moving manifest to the root level of the directory")
connector_source_code_dir = self.context.connector.code_directory / self.context.connector.technical_name.replace("-", "_")

manifest_file = connector_source_code_dir / "manifest.yaml"
manifest_file = manifest_file.rename(self.context.connector.code_directory / "manifest.yaml")

if manifest_file not in self.context.connector.code_directory.iterdir():
return StepResult(
step=self,
status=StepStatus.FAILURE,
stdout="Failed to move manifest.yaml to the root level of the directory."
)

# We don't want to delete the source_<name> folder
FILES_TO_LEAVE.append(self.context.connector.technical_name.replace("-", "_"))

# 2. Delete everything that is not in an allow-list of files
for file in self.context.connector.code_directory.iterdir():
if file.name not in FILES_TO_LEAVE and not file.is_dir():
self.logger.info(f"Deleting {file.name}")
file.unlink()
elif file.name not in FILES_TO_LEAVE and file.is_dir():
self.logger.info(f"Deleting {file.name} folder")
shutil.rmtree(file)

if file in self.context.connector.code_directory.iterdir() and file.name not in FILES_TO_LEAVE:
return StepResult(
step=self,
status=StepStatus.FAILURE,
stdout=f"Failed to delete {file.name}"
)

# 3. Grab the cdk tag from metadata.yaml and update it
metadata_file = self.context.connector.code_directory / "metadata.yaml"
with open(metadata_file, "r") as file:
metadata = yaml.safe_load(file)
tags = metadata['data']['tags']
for i, tag in enumerate(tags):
if tag == "cdk:low-code":
tags[i] = "cdk:manifest-only"

# Write the changes to metadata.yaml
with open(metadata_file, "w") as file:
yaml.dump(metadata, file, default_flow_style=False)

# TODO: Add more failure checks

return StepResult(
step=self,
status=StepStatus.SUCCESS,
stdout="The connector has been successfully migrated to manifest-only."
)

## MAIN FUNCTION
async def run_connectors_strip_pipeline(
context: ConnectorContext,
semaphore: "Semaphore",
*args: Any
) -> ConnectorReport:

steps_to_run: STEP_TREE = []
steps_to_run.append(
[StepToRun(
id=CONNECTOR_TEST_STEP_ID.STRIP_CHECK_CANDIDATE,
step=CheckIsManifestMigrationCandidate(context)
)]
)

steps_to_run.append(
[StepToRun(
id=CONNECTOR_TEST_STEP_ID.STRIP_MIGRATION,
step=StripConnector(context),
depends_on=[CONNECTOR_TEST_STEP_ID.STRIP_CHECK_CANDIDATE]
)]
)

async with semaphore:
async with context:
result_dict = await run_steps(
runnables=steps_to_run,
options=context.run_step_options,
)
results = list(result_dict.values())
# TODO: What do you mean we have to restore shit if things failed?
# if any(step_result.status is StepStatus.FAILURE for step_result in results):
# restore code.



report = ConnectorReport(context, steps_results=results, name="STRIP MIGRATION RESULTS")
context.report = report

return report
Loading