Skip to content

Commit 7b65691

Browse files
fix(airbyte-ci): cleanup unused / empty schema directories after inlining schemas to manifests (#43297)
Co-authored-by: Octavia Squidington III <[email protected]>
1 parent 889cb65 commit 7b65691

File tree

5 files changed

+67
-10
lines changed

5 files changed

+67
-10
lines changed

airbyte-ci/connectors/pipelines/README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,8 @@ E.G.: running Poe tasks on the modified internal packages of the current branch:
773773

774774
| Version | PR | Description |
775775
| ------- | ---------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- |
776-
| 4.28.1 | [#42972](https://github.com/airbytehq/airbyte/pull/42972) | Add airbyte-enterprise support for format command. |
776+
| 4.28.2 | [#43297](https://github.com/airbytehq/airbyte/pull/43297) | `migrate-to-inline_schemas` removes unused schema files and empty schema dirs. |
777+
| 4.28.1 | [#42972](https://github.com/airbytehq/airbyte/pull/42972) | Add airbyte-enterprise support for format commandi |
777778
| 4.28.0 | [#42849](https://github.com/airbytehq/airbyte/pull/42849) | Couple selection of strict-encrypt variants (e vice versa) |
778779
| 4.27.0 | [#42574](https://github.com/airbytehq/airbyte/pull/42574) | Live tests: run from connectors test pipeline for connectors with sandbox connections |
779780
| 4.26.1 | [#42905](https://github.com/airbytehq/airbyte/pull/42905) | Rename the docker cache volume to avoid using the corrupted previous volume. |

airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/consts.py

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class CONNECTOR_TEST_STEP_ID(str, Enum):
3434
UPDATE_PULL_REQUEST = "up_to_date.pull"
3535
INLINE_CANDIDATE = "migration_to_inline_schemas.candidate"
3636
INLINE_MIGRATION = "migration_to_inline_schemas.migration"
37+
INLINE_CLEANUP = "migration_to_inline_schemas.cleanup"
3738
AIRBYTE_LOGGER_CANDIDATE = "migration_to_logging_logger.candidate"
3839
AIRBYTE_LOGGER_MIGRATION = "migration_to_logging_logger.migration"
3940
PULL_REQUEST_CREATE = "pull_request.create"

airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/migrate_to_inline_schemas/pipeline.py

+58-4
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ async def _run(self) -> StepResult:
4949
connector = self.context.connector
5050
manifest_path = connector.manifest_path
5151
python_path = connector.python_source_dir_path
52-
if connector.language not in [ConnectorLanguage.PYTHON, ConnectorLanguage.LOW_CODE, ConnectorLanguage.MANIFEST_ONLY]:
52+
if connector.language not in [
53+
ConnectorLanguage.PYTHON,
54+
ConnectorLanguage.LOW_CODE,
55+
ConnectorLanguage.MANIFEST_ONLY,
56+
]:
5357
return StepResult(
5458
step=self,
5559
status=StepStatus.SKIPPED,
@@ -153,7 +157,11 @@ async def _run(self) -> StepResult:
153157

154158
data = read_yaml(manifest_path)
155159
if "streams" not in data:
156-
return StepResult(step=self, status=StepStatus.SKIPPED, stderr="No manifest streams found.")
160+
return StepResult(
161+
step=self,
162+
status=StepStatus.SKIPPED,
163+
stderr="No manifest streams found.",
164+
)
157165

158166
# find the explit ones and remove or udpate
159167
json_loaders = _find_json_loaders(data, [])
@@ -196,14 +204,43 @@ async def _run(self) -> StepResult:
196204
_update_inline_schema(schema_loader, json_streams, stream_name)
197205

198206
write_yaml(data, manifest_path)
199-
await format_prettier([manifest_path])
207+
await format_prettier([manifest_path], logger=logger)
200208

201209
for json_stream in json_streams.values():
202210
logger.info(f" !! JSON schema not found: {json_stream.name}")
203211

204212
return StepResult(step=self, status=StepStatus.SUCCESS)
205213

206214

215+
class RemoveUnusedJsonSchamas(Step):
216+
context: ConnectorContext
217+
218+
title = "Cleanup json schemas that are dangling but unused."
219+
220+
async def _run(self) -> StepResult:
221+
connector = self.context.connector
222+
connector_path = connector.code_directory
223+
manifest_path = connector.manifest_path
224+
python_path = connector.python_source_dir_path
225+
schemas_path = python_path / SCHEMAS_DIR_NAME
226+
logger = self.logger
227+
228+
manifest = connector.manifest_path.read_text()
229+
230+
if manifest.find("JsonFileSchemaLoader") != -1:
231+
return StepResult(
232+
step=self,
233+
status=StepStatus.SKIPPED,
234+
stderr="Skipping: the manifest is still using JSON Schema loader.",
235+
)
236+
237+
if schemas_path.exists():
238+
logger.info(f" Removing schemnas dir: {schemas_path}")
239+
shutil.rmtree(schemas_path)
240+
241+
return StepResult(step=self, status=StepStatus.SUCCESS)
242+
243+
207244
@dataclass
208245
class JsonStream:
209246
name: str
@@ -370,7 +407,14 @@ async def run_connector_migrate_to_inline_schemas_pipeline(context: ConnectorCon
370407

371408
steps_to_run: STEP_TREE = []
372409

373-
steps_to_run.append([StepToRun(id=CONNECTOR_TEST_STEP_ID.INLINE_CANDIDATE, step=CheckIsInlineCandidate(context))])
410+
steps_to_run.append(
411+
[
412+
StepToRun(
413+
id=CONNECTOR_TEST_STEP_ID.INLINE_CANDIDATE,
414+
step=CheckIsInlineCandidate(context),
415+
)
416+
]
417+
)
374418

375419
steps_to_run.append(
376420
[
@@ -382,4 +426,14 @@ async def run_connector_migrate_to_inline_schemas_pipeline(context: ConnectorCon
382426
]
383427
)
384428

429+
steps_to_run.append(
430+
[
431+
StepToRun(
432+
id=CONNECTOR_TEST_STEP_ID.INLINE_CLEANUP,
433+
step=RemoveUnusedJsonSchamas(context),
434+
depends_on=[CONNECTOR_TEST_STEP_ID.INLINE_MIGRATION],
435+
)
436+
]
437+
)
438+
385439
return await run_connector_steps(context, semaphore, steps_to_run, restore_original_state=restore_original_state)

airbyte-ci/connectors/pipelines/pipelines/helpers/connectors/format.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
22

3+
import logging
34
import subprocess
45
from pathlib import Path
56
from typing import List
67

78
from pipelines.cli.ensure_repo_root import get_airbyte_repo_path_with_fallback
89

910

10-
async def format_prettier(files: List[Path]) -> None:
11+
async def format_prettier(files: List[Path], logger: logging.Logger) -> None:
1112
if len(files) == 0:
1213
return
1314

@@ -18,13 +19,13 @@ async def format_prettier(files: List[Path]) -> None:
1819

1920
to_format = [str(file) for file in files]
2021

21-
print(f" Formatting files: npx prettier --write {' '.join(to_format)}")
22+
logger.info(f" Formatting files: npx prettier --write {' '.join(to_format)}")
2223
command = ["npx", "prettier", "--config", str(config_path), "--write"] + to_format
2324
result = subprocess.run(command, capture_output=True, text=True)
2425
if result.returncode == 0:
25-
print("Files formatted successfully.")
26+
logger.info(" Files formatted successfully.")
2627
else:
27-
print("Error formatting files.")
28+
logger.warn(" Error formatting files.")
2829

2930

3031
def verify_formatters() -> None:

airbyte-ci/connectors/pipelines/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "pipelines"
7-
version = "4.28.1"
7+
version = "4.28.2"
88
description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines"
99
authors = ["Airbyte <[email protected]>"]
1010

0 commit comments

Comments
 (0)