Skip to content

Commit d918385

Browse files
authored
cloud availability updater: automatic PR creation (#22568)
1 parent 4ab4cec commit d918385

File tree

8 files changed

+255
-126
lines changed

8 files changed

+255
-126
lines changed

.github/workflows/run-qa-engine.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ name: Run QA Engine
22

33
on:
44
workflow_dispatch:
5-
schedule:
6-
# 1pm UTC is 6am PDT.
7-
# same time as Generate Build Report
8-
- cron: "0 13 * * *"
5+
# schedule:
6+
## 1pm UTC is 6am PDT.
7+
## same time as Generate Build Report
8+
# - cron: "0 13 * * *"
99

1010
jobs:
1111
run-qa-engine:
@@ -28,5 +28,7 @@ jobs:
2828
run: pip install --quiet -e ./tools/ci_connector_ops
2929
- name: Run QA Engine
3030
env:
31-
QA_ENGINE_AIRBYTE_DATA_PROD_SA: '${{ secrets.QA_ENGINE_AIRBYTE_DATA_PROD_SA }}'
31+
LOGLEVEL: INFO
32+
QA_ENGINE_AIRBYTE_DATA_PROD_SA: "${{ secrets.QA_ENGINE_AIRBYTE_DATA_PROD_SA }}"
33+
GITHUB_API_TOKEN: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }}
3234
run: run-qa-engine
Lines changed: 88 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,58 @@
11
#
2-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2+
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
33
#
44

55

6-
import os
76
import logging
8-
from pathlib import Path
7+
import shutil
98
import subprocess
10-
from typing import Optional
9+
import tempfile
10+
from pathlib import Path
11+
from typing import Iterable, Optional
1112

1213
import git
14+
import requests
1315

14-
from .models import ConnectorQAReport
1516
from .constants import (
16-
AIRBYTE_CLOUD_GITHUB_REPO_URL,
17-
AIRBYTE_CLOUD_MAIN_BRANCH_NAME
17+
AIRBYTE_PLATFORM_INTERNAL_GITHUB_REPO_URL,
18+
AIRBYTE_PLATFORM_INTERNAL_MAIN_BRANCH_NAME,
19+
AIRBYTE_PLATFORM_INTERNAL_PR_ENDPOINT,
20+
AIRBYTE_PLATFORM_INTERNAL_REPO_OWNER,
21+
GITHUB_API_COMMON_HEADERS,
1822
)
23+
from .models import ConnectorQAReport
1924

2025
logger = logging.getLogger(__name__)
21-
logger.setLevel(logging.INFO)
2226

2327

2428
def clone_airbyte_cloud_repo(local_repo_path: Path) -> git.Repo:
25-
logging.info(f"Cloning {AIRBYTE_CLOUD_GITHUB_REPO_URL} to {local_repo_path}")
26-
return git.Repo.clone_from(AIRBYTE_CLOUD_GITHUB_REPO_URL, local_repo_path, branch=AIRBYTE_CLOUD_MAIN_BRANCH_NAME)
29+
logger.info(f"Cloning {AIRBYTE_PLATFORM_INTERNAL_GITHUB_REPO_URL} to {local_repo_path}")
30+
return git.Repo.clone_from(
31+
AIRBYTE_PLATFORM_INTERNAL_GITHUB_REPO_URL, local_repo_path, branch=AIRBYTE_PLATFORM_INTERNAL_MAIN_BRANCH_NAME
32+
)
33+
2734

2835
def get_definitions_mask_path(local_repo_path, definition_type: str) -> Path:
29-
definitions_mask_path = local_repo_path / f"cloud-config/cloud-config-seed/src/main/resources/seed/{definition_type}_definitions_mask.yaml"
36+
definitions_mask_path = (
37+
local_repo_path / f"cloud-config/cloud-config-seed/src/main/resources/seed/{definition_type}_definitions_mask.yaml"
38+
)
3039
if not definitions_mask_path.exists():
3140
raise FileNotFoundError(f"Can't find the {definition_type} definitions mask")
3241
return definitions_mask_path
3342

43+
3444
def checkout_new_branch(airbyte_cloud_repo: git.Repo, new_branch_name: str) -> git.Head:
3545
new_branch = airbyte_cloud_repo.create_head(new_branch_name)
3646
new_branch.checkout()
37-
logging.info(f"Checked out branch {new_branch_name}.")
47+
logger.info(f"Checked out branch {new_branch_name}.")
3848
return new_branch
3949

50+
4051
def update_definitions_mask(connector: ConnectorQAReport, definitions_mask_path: Path) -> Optional[Path]:
4152
with open(definitions_mask_path, "r") as definition_mask:
4253
connector_already_in_mask = connector.connector_definition_id in definition_mask.read()
4354
if connector_already_in_mask:
44-
logging.warning(f"{connector.connector_name}'s definition id is already in {definitions_mask_path}.")
55+
logger.warning(f"{connector.connector_name}'s definition id is already in {definitions_mask_path}.")
4556
return None
4657

4758
to_append = f"""# {connector.connector_name} (from cloud availability updater)
@@ -50,31 +61,64 @@ def update_definitions_mask(connector: ConnectorQAReport, definitions_mask_path:
5061

5162
with open(definitions_mask_path, "a") as f:
5263
f.write(to_append)
53-
logging.info(f"Updated {definitions_mask_path} with {connector.connector_name}'s definition id.")
64+
logger.info(f"Updated {definitions_mask_path} with {connector.connector_name}'s definition id.")
5465
return definitions_mask_path
5566

67+
5668
def run_generate_cloud_connector_catalog(airbyte_cloud_repo_path: Path) -> str:
5769
result = subprocess.check_output(
58-
f"cd {airbyte_cloud_repo_path} && ./gradlew :cloud-config:cloud-config-seed:generateCloudConnectorCatalog",
59-
shell=True
60-
)
61-
logging.info("Ran generateCloudConnectorCatalog Gradle Task")
70+
f"cd {airbyte_cloud_repo_path} && ./gradlew :cloud-config:cloud-config-seed:generateCloudConnectorCatalog", shell=True
71+
)
72+
logger.info("Ran generateCloudConnectorCatalog Gradle Task")
6273
return result.decode()
6374

75+
6476
def commit_all_files(airbyte_cloud_repo: git.Repo, commit_message: str):
65-
airbyte_cloud_repo.git.add('--all')
77+
airbyte_cloud_repo.git.add("--all")
6678
airbyte_cloud_repo.git.commit(m=commit_message)
67-
logging.info(f"Committed file changes.")
79+
logger.info("Committed file changes.")
80+
6881

69-
def push_branch(airbyte_cloud_repo: git.Repo, branch:str):
70-
airbyte_cloud_repo.git.push("--set-upstream", "origin", branch)
71-
logging.info(f"Pushed branch {branch} to origin")
82+
def push_branch(airbyte_cloud_repo: git.Repo, branch: str):
83+
airbyte_cloud_repo.git.push("--force", "--set-upstream", "origin", branch)
84+
logger.info(f"Pushed branch {branch} to origin")
7285

73-
def deploy_new_connector_to_cloud_repo(
74-
airbyte_cloud_repo_path: Path,
75-
airbyte_cloud_repo: git.Repo,
76-
connector: ConnectorQAReport
77-
):
86+
87+
def pr_already_created_for_branch(head_branch: str) -> bool:
88+
response = requests.get(
89+
AIRBYTE_PLATFORM_INTERNAL_PR_ENDPOINT,
90+
headers=GITHUB_API_COMMON_HEADERS,
91+
params={"head": f"{AIRBYTE_PLATFORM_INTERNAL_REPO_OWNER}:{head_branch}", "state": "open"},
92+
)
93+
response.raise_for_status()
94+
return len(response.json()) > 0
95+
96+
97+
def create_pr(connector: ConnectorQAReport, branch: str) -> Optional[requests.Response]:
98+
body = f"""The Cloud Availability Updater decided that it's the right time to make {connector.connector_name} available on Cloud!
99+
- Technical name: {connector.connector_technical_name}
100+
- Version: {connector.connector_version}
101+
- Definition ID: {connector.connector_definition_id}
102+
- OSS sync success rate: {connector.sync_success_rate}
103+
- OSS number of connections: {connector.number_of_connections}
104+
"""
105+
data = {
106+
"title": f"🤖 Add {connector.connector_technical_name} to cloud",
107+
"body": body,
108+
"head": branch,
109+
"base": AIRBYTE_PLATFORM_INTERNAL_MAIN_BRANCH_NAME,
110+
}
111+
if not pr_already_created_for_branch(branch):
112+
response = requests.post(AIRBYTE_PLATFORM_INTERNAL_PR_ENDPOINT, headers=GITHUB_API_COMMON_HEADERS, json=data)
113+
response.raise_for_status()
114+
pr_url = response.json().get("url")
115+
logger.info(f"A PR was opened for {connector.connector_technical_name}: {pr_url}")
116+
return response
117+
else:
118+
logger.warning(f"A PR already exists for branch {branch}")
119+
120+
121+
def deploy_new_connector_to_cloud_repo(airbyte_cloud_repo_path: Path, airbyte_cloud_repo: git.Repo, connector: ConnectorQAReport):
78122
"""Updates the local definitions mask on Airbyte cloud repo.
79123
Calls the generateCloudConnectorCatalog gradle task.
80124
Commits these changes on a new branch.
@@ -85,15 +129,22 @@ def deploy_new_connector_to_cloud_repo(
85129
airbyte_cloud_repo (git.Repo): The Airbyte Cloud repo instance.
86130
connector (ConnectorQAReport): The connector to add to a definitions mask.
87131
"""
88-
airbyte_cloud_repo.git.checkout(AIRBYTE_CLOUD_MAIN_BRANCH_NAME)
132+
airbyte_cloud_repo.git.checkout(AIRBYTE_PLATFORM_INTERNAL_MAIN_BRANCH_NAME)
89133
new_branch_name = f"cloud-availability-updater/deploy-{connector.connector_technical_name}"
90134
checkout_new_branch(airbyte_cloud_repo, new_branch_name)
91135
definitions_mask_path = get_definitions_mask_path(airbyte_cloud_repo_path, connector.connector_type)
92-
update_definitions_mask(connector, definitions_mask_path)
93-
run_generate_cloud_connector_catalog(airbyte_cloud_repo_path)
94-
commit_all_files(
95-
airbyte_cloud_repo,
96-
f"🤖 Add {connector.connector_name} connector to cloud"
97-
)
98-
push_branch(airbyte_cloud_repo, new_branch_name)
99-
airbyte_cloud_repo.git.checkout(AIRBYTE_CLOUD_MAIN_BRANCH_NAME)
136+
updated_files = update_definitions_mask(connector, definitions_mask_path)
137+
if updated_files:
138+
run_generate_cloud_connector_catalog(airbyte_cloud_repo_path)
139+
commit_all_files(airbyte_cloud_repo, f"🤖 Add {connector.connector_name} connector to cloud")
140+
push_branch(airbyte_cloud_repo, new_branch_name)
141+
create_pr(connector, new_branch_name)
142+
airbyte_cloud_repo.git.checkout(AIRBYTE_PLATFORM_INTERNAL_MAIN_BRANCH_NAME)
143+
144+
145+
def deploy_eligible_connectors_to_cloud_repo(eligible_connectors: Iterable):
146+
cloud_repo_path = Path(tempfile.mkdtemp())
147+
airbyte_cloud_repo = clone_airbyte_cloud_repo(cloud_repo_path)
148+
for connector in eligible_connectors:
149+
deploy_new_connector_to_cloud_repo(cloud_repo_path, airbyte_cloud_repo, connector)
150+
shutil.rmtree(cloud_repo_path)
Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,31 @@
11
#
22
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
33
#
4+
import os
45

56
CONNECTOR_BUILD_OUTPUT_URL = "https://dnsgjos7lj2fu.cloudfront.net/tests/history/connectors"
67
CLOUD_CATALOG_URL = "https://storage.googleapis.com/prod-airbyte-cloud-connector-metadata-service/cloud_catalog.json"
78
OSS_CATALOG_URL = "https://storage.googleapis.com/prod-airbyte-cloud-connector-metadata-service/oss_catalog.json"
89

910
INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS = [
10-
"8be1cf83-fde1-477f-a4ad-318d23c9f3c6", # Local CSV
11-
"a625d593-bba5-4a1c-a53d-2d246268a816", # Local JSON
12-
"b76be0a6-27dc-4560-95f6-2623da0bd7b6" # Local SQL Lite
11+
"8be1cf83-fde1-477f-a4ad-318d23c9f3c6", # Local CSV
12+
"a625d593-bba5-4a1c-a53d-2d246268a816", # Local JSON
13+
"b76be0a6-27dc-4560-95f6-2623da0bd7b6", # Local SQL Lite
1314
]
1415

1516
GCS_QA_REPORT_PATH = "gs://prod-airbyte-cloud-connector-metadata-service/qa_report.json"
16-
AIRBYTE_CLOUD_GITHUB_REPO_URL = "https://github.com/airbytehq/airbyte-cloud.git"
17-
AIRBYTE_CLOUD_MAIN_BRANCH_NAME = "master"
17+
AIRBYTE_PLATFORM_INTERNAL_REPO_OWNER = "airbytehq"
18+
AIRBYTE_PLATFORM_INTERNAL_REPO_NAME = "airbyte-platform-internal"
19+
AIRBYTE_PLATFORM_INTERNAL_GITHUB_REPO_URL = (
20+
f"https://github.com/{AIRBYTE_PLATFORM_INTERNAL_REPO_OWNER}/{AIRBYTE_PLATFORM_INTERNAL_REPO_NAME}.git"
21+
)
22+
AIRBYTE_PLATFORM_INTERNAL_MAIN_BRANCH_NAME = "master"
23+
AIRBYTE_PLATFORM_INTERNAL_PR_ENDPOINT = (
24+
f"https://api.github.com/repos/{AIRBYTE_PLATFORM_INTERNAL_REPO_OWNER}/{AIRBYTE_PLATFORM_INTERNAL_REPO_NAME}/pulls"
25+
)
26+
GITHUB_API_TOKEN = os.environ.get("GITHUB_API_TOKEN")
27+
GITHUB_API_COMMON_HEADERS = {
28+
"Accept": "application/vnd.github+json",
29+
"X-GitHub-Api-Version": "2022-11-28",
30+
"Authorization": f"Bearer {GITHUB_API_TOKEN}",
31+
}

tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,28 @@
22
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
33
#
44

5+
import logging
56

7+
from . import cloud_availability_updater, enrichments, inputs, validations
68
from .constants import CLOUD_CATALOG_URL, OSS_CATALOG_URL
7-
from . import enrichments, inputs, validations
9+
10+
logging.basicConfig(level=logging.INFO)
11+
12+
logger = logging.getLogger(__name__)
813

914

1015
def main():
16+
logger.info("Fetch the OSS connectors catalog.")
1117
oss_catalog = inputs.fetch_remote_catalog(OSS_CATALOG_URL)
18+
logger.info("Fetch the Cloud connectors catalog.")
1219
cloud_catalog = inputs.fetch_remote_catalog(CLOUD_CATALOG_URL)
20+
logger.info("Fetch adoption metrics.")
1321
adoption_metrics_per_connector_version = inputs.fetch_adoption_metrics_per_connector_version()
14-
enriched_catalog = enrichments.get_enriched_catalog(
15-
oss_catalog,
16-
cloud_catalog,
17-
adoption_metrics_per_connector_version
18-
)
19-
validations.get_qa_report(enriched_catalog, len(oss_catalog))
22+
logger.info("Start the enriched catalog generation.")
23+
enriched_catalog = enrichments.get_enriched_catalog(oss_catalog, cloud_catalog, adoption_metrics_per_connector_version)
24+
logger.info("Start the QA report generation.")
25+
qa_report = validations.get_qa_report(enriched_catalog, len(oss_catalog))
26+
logger.info("Start the QA report generation.")
27+
eligible_connectors = validations.get_connectors_eligible_for_cloud(qa_report)
28+
logger.info("Start eligible connectors deployment to Cloud.")
29+
cloud_availability_updater.deploy_eligible_connectors_to_cloud_repo(eligible_connectors)

tools/ci_connector_ops/ci_connector_ops/qa_engine/validations.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,46 +2,49 @@
22
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
33
#
44

5+
import logging
56
from datetime import datetime
6-
from typing import Iterable
7+
from typing import List
78

89
import pandas as pd
910
import requests
1011

1112
from .constants import INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS
13+
from .inputs import BUILD_STATUSES, fetch_latest_build_status_for_connector_version
1214
from .models import ConnectorQAReport, QAReport
13-
from .inputs import fetch_latest_build_status_for_connector_version, BUILD_STATUSES
1415

15-
TRUTHY_COLUMNS_TO_BE_ELIGIBLE = [
16-
"documentation_is_available",
17-
"is_appropriate_for_cloud_use",
18-
"latest_build_is_successful"
19-
]
16+
logger = logging.getLogger(__name__)
17+
18+
19+
TRUTHY_COLUMNS_TO_BE_ELIGIBLE = ["documentation_is_available", "is_appropriate_for_cloud_use", "latest_build_is_successful"]
20+
2021

2122
class QAReportGenerationError(Exception):
2223
pass
2324

25+
2426
def url_is_reachable(url: str) -> bool:
2527
response = requests.get(url)
2628
return response.status_code == 200
2729

30+
2831
def is_appropriate_for_cloud_use(definition_id: str) -> bool:
2932
return definition_id not in INAPPROPRIATE_FOR_CLOUD_USE_CONNECTORS
3033

34+
3135
def is_eligible_for_promotion_to_cloud(connector_qa_data: pd.Series) -> bool:
32-
if connector_qa_data["is_on_cloud"]:
33-
return False
34-
return all([
35-
connector_qa_data[col]
36-
for col in TRUTHY_COLUMNS_TO_BE_ELIGIBLE
37-
])
36+
if connector_qa_data["is_on_cloud"]:
37+
return False
38+
return all([connector_qa_data[col] for col in TRUTHY_COLUMNS_TO_BE_ELIGIBLE])
39+
3840

3941
def latest_build_is_successful(connector_qa_data: pd.Series) -> bool:
4042
connector_technical_name = connector_qa_data["connector_technical_name"]
4143
connector_version = connector_qa_data["connector_version"]
4244
latest_build_status = fetch_latest_build_status_for_connector_version(connector_technical_name, connector_version)
4345
return latest_build_status == BUILD_STATUSES.SUCCESS
4446

47+
4548
def get_qa_report(enriched_catalog: pd.DataFrame, oss_catalog_length: int) -> pd.DataFrame:
4649
"""Perform validation steps on top of the enriched catalog.
4750
Adds the following columns:
@@ -74,13 +77,15 @@ def get_qa_report(enriched_catalog: pd.DataFrame, oss_catalog_length: int) -> pd
7477
qa_report["report_generation_datetime"] = datetime.utcnow()
7578

7679
# Only select dataframe columns defined in the ConnectorQAReport model.
77-
qa_report= qa_report[[field.name for field in ConnectorQAReport.__fields__.values()]]
80+
qa_report = qa_report[[field.name for field in ConnectorQAReport.__fields__.values()]]
7881
# Validate the report structure with pydantic QAReport model.
7982
QAReport(connectors_qa_report=qa_report.to_dict(orient="records"))
8083
if len(qa_report) != oss_catalog_length:
81-
raise QAReportGenerationError("The QA report does not contain all the connectors defined in the OSS catalog.")
84+
raise QAReportGenerationError("The QA report does not contain all the connectors defined in the OSS catalog.")
8285
return qa_report
8386

84-
def get_connectors_eligible_for_cloud(qa_report: pd.DataFrame) -> Iterable[ConnectorQAReport]:
85-
for _, row in qa_report[qa_report["is_eligible_for_promotion_to_cloud"]].iterrows():
86-
yield ConnectorQAReport(**row)
87+
88+
def get_connectors_eligible_for_cloud(qa_report: pd.DataFrame) -> List[ConnectorQAReport]:
89+
eligible_connectors = [ConnectorQAReport(**row) for _, row in qa_report[qa_report["is_eligible_for_promotion_to_cloud"]].iterrows()]
90+
logger.info(f"{len(eligible_connectors)} connectors are eligible for Cloud.")
91+
return eligible_connectors

tools/ci_connector_ops/setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"pandas-gbq~=0.19.0",
1313
"pydantic~=1.10.4",
1414
"fsspec~=2023.1.0",
15-
"gcsfs~=2023.1.0"
15+
"gcsfs~=2023.1.0",
1616
]
1717

1818
TEST_REQUIREMENTS = [
@@ -21,7 +21,7 @@
2121
]
2222

2323
setup(
24-
version="0.1.10",
24+
version="0.1.11",
2525
name="ci_connector_ops",
2626
description="Packaged maintained by the connector operations team to perform CI for connectors",
2727
author="Airbyte",
@@ -40,7 +40,7 @@
4040
"print-mandatory-reviewers = ci_connector_ops.acceptance_test_config_checks:print_mandatory_reviewers",
4141
"allowed-hosts-checks = ci_connector_ops.allowed_hosts_checks:check_allowed_hosts",
4242
"run-qa-engine = ci_connector_ops.qa_engine.main:main",
43-
"run-qa-checks = ci_connector_ops.qa_checks:run_qa_checks"
43+
"run-qa-checks = ci_connector_ops.qa_checks:run_qa_checks",
4444
],
4545
},
4646
)

0 commit comments

Comments
 (0)