Skip to content

Commit 2b97e6d

Browse files
authored
feat(registry): add cdk version (#37809)
## What Adds the cdk version for python connectors to the regisry ![Screenshot 2024-05-03 at 3.00.59 PM.png](https://graphite-user-uploaded-assets-prod.s3.amazonaws.com/PTsI7qAmiIMkhFQg04QF/4e9b5b92-90f4-476d-b19b-0ee404e49ef4.png) closes airbytehq/airbyte-internal-issues#7462
1 parent f029414 commit 2b97e6d

17 files changed

+172
-6
lines changed

airbyte-ci/connectors/metadata_service/lib/README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ _💡 Note: A prerequisite is you have [gsutil](https://cloud.google.com/storage
5050
_⚠️ Warning: Its important to know that this will remove ANY files you have in your destination buckets as it calls `gsutil rsync` with `-d` enabled._
5151

5252
```bash
53-
TARGET_BUCKET=<YOUR-DEV_BUCKET> poetry poe replicate-prod
53+
TARGET_BUCKET=<YOUR-DEV_BUCKET> poetry run poe replicate-prod
5454
```
5555

5656
### Copy specific connector version to your Development Bucket
@@ -60,7 +60,7 @@ This will copy the specified connector version to your development bucket. This
6060
_💡 Note: A prerequisite is you have [gsutil](https://cloud.google.com/storage/docs/gsutil) installed and have run `gsutil auth login`_
6161

6262
```bash
63-
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-dev.ea013c8741" poetry poe copy-connector-from-prod
63+
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-dev.ea013c8741" poetry run poe copy-connector-from-prod
6464
```
6565

6666
### Promote Connector Version to Latest
@@ -72,5 +72,5 @@ _💡 Note: A prerequisite is you have [gsutil](https://cloud.google.com/storage
7272
_⚠️ Warning: Its important to know that this will remove ANY existing files in the latest folder that are not in the versioned folder as it calls `gsutil rsync` with `-d` enabled._
7373

7474
```bash
75-
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-dev.ea013c8741" poetry poe promote-connector-to-latest
75+
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-dev.ea013c8741" poetry run poe promote-connector-to-latest
7676
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# generated by datamodel-codegen:
2+
# filename: ConnectorPackageInfo.yaml
3+
4+
from __future__ import annotations
5+
6+
from typing import Optional
7+
8+
from pydantic import BaseModel
9+
10+
11+
class ConnectorPackageInfo(BaseModel):
12+
cdk_version: Optional[str] = None

airbyte-ci/connectors/metadata_service/lib/metadata_service/models/generated/ConnectorRegistryDestinationDefinition.py

+5
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ class SourceFileInfo(BaseModel):
9797
registry_entry_generated_at: Optional[str] = None
9898

9999

100+
class ConnectorPackageInfo(BaseModel):
101+
cdk_version: Optional[str] = None
102+
103+
100104
class JobTypeResourceLimit(BaseModel):
101105
class Config:
102106
extra = Extra.forbid
@@ -196,3 +200,4 @@ class Config:
196200
ab_internal: Optional[AirbyteInternal] = None
197201
supportsRefreshes: Optional[bool] = False
198202
generated: Optional[GeneratedFields] = None
203+
packageInfo: Optional[ConnectorPackageInfo] = None

airbyte-ci/connectors/metadata_service/lib/metadata_service/models/generated/ConnectorRegistrySourceDefinition.py

+5
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ class SourceFileInfo(BaseModel):
9393
registry_entry_generated_at: Optional[str] = None
9494

9595

96+
class ConnectorPackageInfo(BaseModel):
97+
cdk_version: Optional[str] = None
98+
99+
96100
class JobTypeResourceLimit(BaseModel):
97101
class Config:
98102
extra = Extra.forbid
@@ -188,3 +192,4 @@ class Config:
188192
releases: Optional[ConnectorReleases] = None
189193
ab_internal: Optional[AirbyteInternal] = None
190194
generated: Optional[GeneratedFields] = None
195+
packageInfo: Optional[ConnectorPackageInfo] = None

airbyte-ci/connectors/metadata_service/lib/metadata_service/models/generated/ConnectorRegistryV0.py

+6
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ class SourceFileInfo(BaseModel):
9797
registry_entry_generated_at: Optional[str] = None
9898

9999

100+
class ConnectorPackageInfo(BaseModel):
101+
cdk_version: Optional[str] = None
102+
103+
100104
class SuggestedStreams(BaseModel):
101105
class Config:
102106
extra = Extra.allow
@@ -202,6 +206,7 @@ class Config:
202206
releases: Optional[ConnectorReleases] = None
203207
ab_internal: Optional[AirbyteInternal] = None
204208
generated: Optional[GeneratedFields] = None
209+
packageInfo: Optional[ConnectorPackageInfo] = None
205210

206211

207212
class ConnectorRegistryDestinationDefinition(BaseModel):
@@ -239,6 +244,7 @@ class Config:
239244
ab_internal: Optional[AirbyteInternal] = None
240245
supportsRefreshes: Optional[bool] = False
241246
generated: Optional[GeneratedFields] = None
247+
packageInfo: Optional[ConnectorPackageInfo] = None
242248

243249

244250
class ConnectorRegistryV0(BaseModel):

airbyte-ci/connectors/metadata_service/lib/metadata_service/models/generated/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .AllowedHosts import *
55
from .ConnectorBuildOptions import *
66
from .ConnectorMetadataDefinitionV0 import *
7+
from .ConnectorPackageInfo import *
78
from .ConnectorRegistryDestinationDefinition import *
89
from .ConnectorRegistrySourceDefinition import *
910
from .ConnectorRegistryV0 import *
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
"$schema": http://json-schema.org/draft-07/schema#
3+
"$id": https://github.com/airbytehq/airbyte/airbyte-ci/connectors_ci/metadata_service/lib/models/src/ConnectorPackageInfo.yaml
4+
title: ConnectorPackageInfo
5+
description: Information about the contents of the connector image
6+
type: object
7+
properties:
8+
cdk_version:
9+
type: string

airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src/ConnectorRegistryDestinationDefinition.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,5 @@ properties:
7777
default: false
7878
generated:
7979
"$ref": GeneratedFields.yaml
80+
packageInfo:
81+
"$ref": ConnectorPackageInfo.yaml

airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src/ConnectorRegistrySourceDefinition.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,5 @@ properties:
7676
"$ref": AirbyteInternal.yaml
7777
generated:
7878
"$ref": GeneratedFields.yaml
79+
packageInfo:
80+
"$ref": ConnectorPackageInfo.yaml

airbyte-ci/connectors/metadata_service/lib/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "metadata-service"
3-
version = "0.5.0"
3+
version = "0.6.0"
44
description = ""
55
authors = ["Ben Church <[email protected]>"]
66
readme = "README.md"

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/assets/registry_entry.py

+20
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from metadata_service.models.transform import to_json_sanitized_dict
2121
from metadata_service.spec_cache import SpecCache
2222
from orchestrator.config import MAX_METADATA_PARTITION_RUN_REQUEST, VALID_REGISTRIES, get_public_url_for_gcs_file
23+
from orchestrator.fetcher.connector_cdk_version import get_cdk_version
2324
from orchestrator.logging import sentry
2425
from orchestrator.logging.publish_connector_lifecycle import PublishConnectorLifecycle, PublishConnectorLifecycleStage, StageStatus
2526
from orchestrator.models.metadata import LatestMetadataEntry, MetadataDefinition
@@ -171,6 +172,22 @@ def apply_generated_fields(metadata_data: dict, metadata_entry: LatestMetadataEn
171172
return generated_fields
172173

173174

175+
@deep_copy_params
176+
def apply_package_info_fields(metadata_data: dict, metadata_entry: LatestMetadataEntry) -> dict:
177+
"""Apply package info fields to the metadata data field.
178+
179+
Args:
180+
metadata_data (dict): The metadata data field.
181+
182+
Returns:
183+
dict: The metadata data field with the package info fields applied.
184+
"""
185+
package_info_fields = metadata_data.get("packageInfo") or {}
186+
package_info_fields = set_with(package_info_fields, "cdk_version", get_cdk_version(metadata_entry), default_none_to_dict)
187+
188+
return package_info_fields
189+
190+
174191
@deep_copy_params
175192
@sentry_sdk.trace
176193
def metadata_to_registry_entry(metadata_entry: LatestMetadataEntry, override_registry_key: str) -> dict:
@@ -214,6 +231,9 @@ def metadata_to_registry_entry(metadata_entry: LatestMetadataEntry, override_reg
214231
# Add generated fields for source file metadata and git
215232
overridden_metadata_data["generated"] = apply_generated_fields(overridden_metadata_data, metadata_entry)
216233

234+
# Add Dependency information
235+
overridden_metadata_data["packageInfo"] = apply_package_info_fields(overridden_metadata_data, metadata_entry)
236+
217237
# if there is no supportLevel, set it to "community"
218238
if not overridden_metadata_data.get("supportLevel"):
219239
overridden_metadata_data["supportLevel"] = "community"

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/config.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import os
66
from typing import Optional
77

8+
DEFAULT_ASSET_URL = "https://storage.googleapis.com"
9+
810
VALID_REGISTRIES = ["oss", "cloud"]
911
REGISTRIES_FOLDER = "registries/v0"
1012
REPORT_FOLDER = "generated_reports"
@@ -20,6 +22,9 @@
2022
CONNECTORS_PATH = "airbyte-integrations/connectors"
2123
CONNECTOR_TEST_SUMMARY_FOLDER = "test_summary"
2224

25+
CONNECTOR_DEPENDENCY_FOLDER = "connector_dependencies"
26+
CONNECTOR_DEPENDENCY_FILE_NAME = "dependencies.json"
27+
2328
MAX_METADATA_PARTITION_RUN_REQUEST = 50
2429

2530
HIGH_QUEUE_PRIORITY = "3"
@@ -39,7 +44,7 @@ def get_public_url_for_gcs_file(bucket_name: str, file_path: str, cdn_url: Optio
3944
Returns:
4045
The public URL to the file.
4146
"""
42-
return f"{cdn_url}/{file_path}" if cdn_url else f"https://storage.googleapis.com/{bucket_name}/{file_path}"
47+
return f"{cdn_url}/{file_path}" if cdn_url else f"{DEFAULT_ASSET_URL}/{bucket_name}/{file_path}"
4348

4449

4550
def get_public_metadata_service_url(file_path: str) -> str:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#
2+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3+
#
4+
5+
from typing import Optional
6+
7+
import requests
8+
from orchestrator.models.metadata import LatestMetadataEntry
9+
10+
GROUP_NAME = "connector_cdk_versions"
11+
12+
BASE_URL = "https://storage.googleapis.com/dev-airbyte-cloud-connector-metadata-service/"
13+
DEPENDENCY_FOLDER = "connector_dependencies"
14+
DEPENDENCY_FILE = "dependencies.json"
15+
PACKAGE_NAME = "airbyte-cdk"
16+
PYTHON_CDK_SLUG = "python"
17+
18+
# HELPERS
19+
20+
21+
def safe_get_json_from_url(url: str) -> Optional[dict]:
22+
try:
23+
response = requests.get(url)
24+
if response.ok:
25+
return response.json()
26+
else:
27+
return None
28+
except requests.exceptions.RequestException:
29+
return None
30+
31+
32+
def find_package_version(dependencies_body: dict, package_name: str) -> Optional[str]:
33+
for package in dependencies_body.get("dependencies", []):
34+
if package.get("package_name") == package_name:
35+
return package.get("version")
36+
return None
37+
38+
39+
def get_cdk_version(
40+
metadata_entry: LatestMetadataEntry,
41+
) -> Optional[str]:
42+
url = metadata_entry.dependency_file_url
43+
if not url:
44+
return None
45+
46+
response = safe_get_json_from_url(url)
47+
if not response:
48+
return None
49+
50+
version = find_package_version(response, PACKAGE_NAME)
51+
52+
# Note: Prefix the version with the python slug as the python cdk is the only one we have
53+
# versions available for.
54+
return f"{PYTHON_CDK_SLUG}:{version}" if version else None

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/hacks.py

+25
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,28 @@ def construct_registry_entry_write_path(
8484
overrode_registry_entry_version_write_path = _get_version_specific_registry_entry_file_path(registry_entry, registry_name)
8585
_check_for_invalid_write_path(overrode_registry_entry_version_write_path)
8686
return overrode_registry_entry_version_write_path
87+
88+
89+
def sanitize_docker_repo_name_for_dependency_file(docker_repo_name: str) -> str:
90+
"""
91+
Remove the "airbyte/" prefix from the docker repository name.
92+
93+
e.g. airbyte/source-postgres -> source-postgres
94+
95+
Problem:
96+
The dependency file paths are based on the docker repository name without the "airbyte/" prefix where as all other
97+
paths are based on the full docker repository name.
98+
99+
e.g. https://storage.googleapis.com/prod-airbyte-cloud-connector-metadata-service/connector_dependencies/source-pokeapi/0.2.0/dependencies.json
100+
101+
Long term solution:
102+
Move the dependency file paths to be based on the full docker repository name.
103+
104+
Args:
105+
docker_repo_name (str): The docker repository name
106+
107+
Returns:
108+
str: The docker repository name without the "airbyte/" prefix
109+
"""
110+
111+
return docker_repo_name.replace("airbyte/", "")

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/models/metadata.py

+17
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44

55
from typing import Any, Optional, Tuple
66

7+
import orchestrator.hacks as HACKS
78
from metadata_service.constants import METADATA_FILE_NAME
89
from metadata_service.models.generated.ConnectorMetadataDefinitionV0 import ConnectorMetadataDefinitionV0
10+
from orchestrator.config import CONNECTOR_DEPENDENCY_FILE_NAME, CONNECTOR_DEPENDENCY_FOLDER, get_public_url_for_gcs_file
911
from pydantic import BaseModel, ValidationError
12+
from pydash import get
1013

1114

1215
class PydanticDelayValidationMixin:
@@ -61,3 +64,17 @@ def is_latest_version_path(self) -> bool:
6164
"""
6265
ending_path = f"latest/{METADATA_FILE_NAME}"
6366
return self.file_path.endswith(ending_path)
67+
68+
@property
69+
def dependency_file_url(self) -> Optional[str]:
70+
if not self.bucket_name or not self.metadata_definition:
71+
return None
72+
73+
connector_technical_name = get(self.metadata_definition, "data.dockerRepository")
74+
connector_version = get(self.metadata_definition, "data.dockerImageTag")
75+
sanitized_connector_technical_name = HACKS.sanitize_docker_repo_name_for_dependency_file(connector_technical_name)
76+
77+
file_path = (
78+
f"{CONNECTOR_DEPENDENCY_FOLDER}/{sanitized_connector_technical_name}/{connector_version}/{CONNECTOR_DEPENDENCY_FILE_NAME}"
79+
)
80+
return get_public_url_for_gcs_file(self.bucket_name, file_path)

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/utils/object_helpers.py

+3
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,8 @@ def default_none_to_dict(value, key, obj):
5050
key: The key to set in the dictionary.
5151
obj: The dictionary to set the key in.
5252
"""
53+
if obj is None:
54+
return
55+
5356
if value is None:
5457
obj[key] = {}

airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)