Skip to content

Commit 4779b1e

Browse files
authored
Notify commit owner via slack message (#37803)
## What Updates our slack lifecycle notifications to mention the author of the metadata change on slack ![image.png](https://graphite-user-uploaded-assets-prod.s3.amazonaws.com/PTsI7qAmiIMkhFQg04QF/b20cd2d2-dc18-4a15-ae0e-0f8a218cf871.png) Spun out of #32715 as a stack
1 parent c0492b0 commit 4779b1e

File tree

8 files changed

+188
-21
lines changed

8 files changed

+188
-21
lines changed

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from dagster import Definitions, EnvVar, ScheduleDefinition, load_assets_from_modules
55
from dagster_slack import SlackResource
66
from metadata_service.constants import METADATA_FILE_NAME, METADATA_FOLDER
7-
from orchestrator.assets import connector_test_report, github, metadata, registry, registry_entry, registry_report, specs_secrets_mask
7+
from orchestrator.assets import connector_test_report, github, metadata, registry, registry_entry, registry_report, specs_secrets_mask, slack
88
from orchestrator.config import (
99
CI_MASTER_TEST_OUTPUT_REGEX,
1010
CI_TEST_REPORT_PREFIX,
@@ -41,6 +41,7 @@
4141

4242
ASSETS = load_assets_from_modules(
4343
[
44+
slack,
4445
github,
4546
specs_secrets_mask,
4647
metadata,

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/assets/registry_entry.py

+81-11
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,22 @@
1010
import orchestrator.hacks as HACKS
1111
import pandas as pd
1212
import sentry_sdk
13-
import yaml
1413
from dagster import AutoMaterializePolicy, DynamicPartitionsDefinition, MetadataValue, OpExecutionContext, Output, asset
1514
from dagster_gcp.gcs.file_manager import GCSFileHandle, GCSFileManager
1615
from google.cloud import storage
1716
from metadata_service.constants import ICON_FILE_NAME, METADATA_FILE_NAME
1817
from metadata_service.models.generated.ConnectorRegistryDestinationDefinition import ConnectorRegistryDestinationDefinition
1918
from metadata_service.models.generated.ConnectorRegistrySourceDefinition import ConnectorRegistrySourceDefinition
19+
from metadata_service.models.transform import to_json_sanitized_dict
2020
from metadata_service.spec_cache import SpecCache
2121
from orchestrator.config import MAX_METADATA_PARTITION_RUN_REQUEST, VALID_REGISTRIES, get_public_url_for_gcs_file
2222
from orchestrator.logging import sentry
2323
from orchestrator.logging.publish_connector_lifecycle import PublishConnectorLifecycle, PublishConnectorLifecycleStage, StageStatus
2424
from orchestrator.models.metadata import LatestMetadataEntry, MetadataDefinition
25+
from orchestrator.utils.blob_helpers import yaml_blob_to_dict
2526
from orchestrator.utils.dagster_helpers import OutputDataFrame
2627
from orchestrator.utils.object_helpers import deep_copy_params
27-
from pydantic import ValidationError
28+
from pydantic import BaseModel, ValidationError
2829
from pydash.objects import get
2930

3031
PolymorphicRegistryEntry = Union[ConnectorRegistrySourceDefinition, ConnectorRegistryDestinationDefinition]
@@ -332,25 +333,73 @@ def delete_registry_entry(registry_name, metadata_entry: LatestMetadataEntry, me
332333

333334

334335
@sentry_sdk.trace
335-
def safe_parse_metadata_definition(metadata_blob: storage.Blob) -> Optional[MetadataDefinition]:
336+
def safe_parse_metadata_definition(file_name: str, metadata_dict: dict) -> Optional[MetadataDefinition]:
336337
"""
337338
Safely parse the metadata definition from the given metadata entry.
338339
Handles the case where the metadata definition is invalid for in old versions of the metadata.
339340
"""
340-
yaml_string = metadata_blob.download_as_string().decode("utf-8")
341-
metadata_dict = yaml.safe_load(yaml_string)
341+
342342
try:
343343
return MetadataDefinition.parse_obj(metadata_dict)
344344

345345
except ValidationError as e:
346346
# only raise the error if "latest" is in the path
347-
if "latest" in metadata_blob.name:
347+
if "latest" in file_name:
348348
raise e
349349
else:
350-
print(f"WARNING: Could not parse metadata definition for {metadata_blob.name}. Error: {e}")
350+
print(f"WARNING: Could not parse metadata definition for {file_name}. Error: {e}")
351351
return None
352352

353353

354+
def safe_get_slack_user_identifier(airbyte_slack_users: pd.DataFrame, metadata_dict: Union[dict, BaseModel]) -> Optional[str]:
355+
"""
356+
Safely get the slack user identifier from the given git info in the metadata file.
357+
"""
358+
if isinstance(metadata_dict, BaseModel):
359+
metadata_dict = to_json_sanitized_dict(metadata_dict)
360+
361+
# if the slack users is empty or none, return none
362+
if airbyte_slack_users is None or airbyte_slack_users.empty:
363+
return None
364+
365+
commit_author = get(metadata_dict, "data.generated.git.commit_author")
366+
commit_author_email = get(metadata_dict, "data.generated.git.commit_author_email")
367+
368+
# if the commit author email is not present, return author name or none
369+
if not commit_author_email:
370+
return commit_author
371+
372+
# if the commit author email is present, try to find the user in the slack users dataframe
373+
# if the user is not found, return the author name or none
374+
slack_user = airbyte_slack_users[airbyte_slack_users["email"] == commit_author_email]
375+
if slack_user.empty:
376+
slack_user = airbyte_slack_users[airbyte_slack_users["real_name"] == commit_author]
377+
378+
if slack_user.empty:
379+
return commit_author
380+
381+
# if the user is found, return the slack real_name and id e.g. "John Doe (U12345678)"
382+
slack_id = slack_user["id"].iloc[0]
383+
slack_real_name = slack_user["real_name"].iloc[0]
384+
return f"{slack_real_name} (<@{slack_id}>)"
385+
386+
387+
def safe_get_commit_sha(metadata_dict: Union[dict, BaseModel]) -> Optional[str]:
388+
"""
389+
Safely get the git commit sha from the given git info in the metadata file.
390+
"""
391+
if isinstance(metadata_dict, BaseModel):
392+
metadata_dict = to_json_sanitized_dict(metadata_dict)
393+
394+
# if the git commit sha is not present, return none
395+
commit_sha = get(metadata_dict, "data.generated.git.commit_sha")
396+
if not commit_sha:
397+
return None
398+
399+
# if the git commit sha is present, return the commit sha
400+
return commit_sha
401+
402+
354403
# ASSETS
355404

356405

@@ -362,7 +411,7 @@ def safe_parse_metadata_definition(metadata_blob: storage.Blob) -> Optional[Meta
362411
auto_materialize_policy=AutoMaterializePolicy.eager(max_materializations_per_minute=MAX_METADATA_PARTITION_RUN_REQUEST),
363412
)
364413
@sentry.instrument_asset_op
365-
def metadata_entry(context: OpExecutionContext) -> Output[Optional[LatestMetadataEntry]]:
414+
def metadata_entry(context: OpExecutionContext, airbyte_slack_users: pd.DataFrame) -> Output[Optional[LatestMetadataEntry]]:
366415
"""Parse and compute the LatestMetadataEntry for the given metadata file."""
367416
etag = context.partition_key
368417
context.log.info(f"Processing metadata file with etag {etag}")
@@ -373,16 +422,22 @@ def metadata_entry(context: OpExecutionContext) -> Output[Optional[LatestMetadat
373422
if not matching_blob:
374423
raise Exception(f"Could not find blob with etag {etag}")
375424

425+
metadata_dict = yaml_blob_to_dict(matching_blob)
426+
user_identifier = safe_get_slack_user_identifier(airbyte_slack_users, metadata_dict)
427+
commit_sha = safe_get_commit_sha(metadata_dict)
428+
376429
metadata_file_path = matching_blob.name
377430
PublishConnectorLifecycle.log(
378431
context,
379432
PublishConnectorLifecycleStage.METADATA_VALIDATION,
380433
StageStatus.IN_PROGRESS,
381434
f"Found metadata file with path {metadata_file_path} for etag {etag}",
435+
user_identifier=user_identifier,
436+
commit_sha=commit_sha,
382437
)
383438

384439
# read the matching_blob into a metadata definition
385-
metadata_def = safe_parse_metadata_definition(matching_blob)
440+
metadata_def = safe_parse_metadata_definition(matching_blob.name, metadata_dict)
386441

387442
dagster_metadata = {
388443
"bucket_name": matching_blob.bucket.name,
@@ -398,6 +453,8 @@ def metadata_entry(context: OpExecutionContext) -> Output[Optional[LatestMetadat
398453
PublishConnectorLifecycleStage.METADATA_VALIDATION,
399454
StageStatus.FAILED,
400455
f"Could not parse metadata definition for {metadata_file_path}, dont panic, this can be expected for old metadata files",
456+
user_identifier=user_identifier,
457+
commit_sha=commit_sha,
401458
)
402459
return Output(value=None, metadata=dagster_metadata)
403460

@@ -422,6 +479,8 @@ def metadata_entry(context: OpExecutionContext) -> Output[Optional[LatestMetadat
422479
PublishConnectorLifecycleStage.METADATA_VALIDATION,
423480
StageStatus.SUCCESS,
424481
f"Successfully parsed metadata definition for {metadata_file_path}",
482+
user_identifier=user_identifier,
483+
commit_sha=commit_sha,
425484
)
426485

427486
return Output(value=metadata_entry, metadata=dagster_metadata)
@@ -434,19 +493,26 @@ def metadata_entry(context: OpExecutionContext) -> Output[Optional[LatestMetadat
434493
auto_materialize_policy=AutoMaterializePolicy.eager(max_materializations_per_minute=MAX_METADATA_PARTITION_RUN_REQUEST),
435494
)
436495
@sentry.instrument_asset_op
437-
def registry_entry(context: OpExecutionContext, metadata_entry: Optional[LatestMetadataEntry]) -> Output[Optional[dict]]:
496+
def registry_entry(
497+
context: OpExecutionContext, metadata_entry: Optional[LatestMetadataEntry], airbyte_slack_users: pd.DataFrame
498+
) -> Output[Optional[dict]]:
438499
"""
439500
Generate the registry entry files from the given metadata file, and persist it to GCS.
440501
"""
441502
if not metadata_entry:
442503
# if the metadata entry is invalid, return an empty dict
443504
return Output(metadata={"empty_metadata": True}, value=None)
444505

506+
user_identifier = safe_get_slack_user_identifier(airbyte_slack_users, metadata_entry.metadata_definition)
507+
commit_sha = safe_get_commit_sha(metadata_entry.metadata_definition)
508+
445509
PublishConnectorLifecycle.log(
446510
context,
447511
PublishConnectorLifecycleStage.REGISTRY_ENTRY_GENERATION,
448512
StageStatus.IN_PROGRESS,
449513
f"Generating registry entry for {metadata_entry.file_path}",
514+
user_identifier=user_identifier,
515+
commit_sha=commit_sha,
450516
)
451517

452518
spec_cache = SpecCache()
@@ -488,7 +554,9 @@ def registry_entry(context: OpExecutionContext, metadata_entry: Optional[LatestM
488554
context,
489555
PublishConnectorLifecycleStage.REGISTRY_ENTRY_GENERATION,
490556
StageStatus.SUCCESS,
491-
f"Successfully generated {registry_name} registry entry for {metadata_entry.file_path} at {registry_url}",
557+
f"Successfully generated {registry_name} registry entry for {metadata_entry.file_path} at {registry_url}.\n\n*This new Connector will be available for use in the platform on the next release (1-3 min)*",
558+
user_identifier=user_identifier,
559+
commit_sha=commit_sha,
492560
)
493561

494562
# Log the registry entries that were deleted
@@ -498,6 +566,8 @@ def registry_entry(context: OpExecutionContext, metadata_entry: Optional[LatestM
498566
PublishConnectorLifecycleStage.REGISTRY_ENTRY_GENERATION,
499567
StageStatus.SUCCESS,
500568
f"Successfully deleted {registry_name} registry entry for {metadata_entry.file_path}",
569+
user_identifier=user_identifier,
570+
commit_sha=commit_sha,
501571
)
502572

503573
return Output(metadata=dagster_metadata, value=persisted_registry_entries)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#
2+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3+
#
4+
5+
import os
6+
7+
import pandas as pd
8+
from dagster import AutoMaterializePolicy, FreshnessPolicy, OpExecutionContext, Output, asset
9+
from orchestrator.utils.dagster_helpers import OutputDataFrame, output_dataframe
10+
11+
GROUP_NAME = "slack"
12+
13+
USER_REQUEST_CHUNK_SIZE = 2000
14+
MAX_REQUESTS = 5
15+
16+
17+
@asset(
18+
group_name=GROUP_NAME,
19+
required_resource_keys={"slack"},
20+
auto_materialize_policy=AutoMaterializePolicy.eager(),
21+
freshness_policy=FreshnessPolicy(maximum_lag_minutes=60 * 12),
22+
)
23+
def airbyte_slack_users(context: OpExecutionContext) -> OutputDataFrame:
24+
"""
25+
Return a list of all users in the airbyte slack.
26+
"""
27+
if not os.getenv("SLACK_TOKEN"):
28+
context.log.info("Skipping Slack Users asset as SLACK_TOKEN is not set")
29+
return None
30+
31+
client = context.resources.slack.get_client()
32+
users_response = client.users_list(limit=2000)
33+
metadata = users_response.data["response_metadata"]
34+
users = users_response.data["members"]
35+
requests_count = 1
36+
37+
while metadata["next_cursor"] and requests_count < MAX_REQUESTS:
38+
users_response = client.users_list(limit=2000, cursor=metadata["next_cursor"])
39+
metadata = users_response.data["response_metadata"]
40+
users.extend(users_response.data["members"])
41+
requests_count += 1
42+
43+
# Convert to a dataframe of id, real_name, and email
44+
# Remove any deleted or bot profiles
45+
users_df = pd.DataFrame(users)
46+
users_df = users_df[users_df["deleted"] == False]
47+
users_df = users_df[users_df["is_bot"] == False]
48+
users_df["email"] = users_df["profile"].apply(lambda x: x.get("email", None))
49+
users_df = users_df[["id", "real_name", "email"]]
50+
51+
return output_dataframe(users_df)

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/config.py

+3
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,6 @@ def get_public_metadata_service_url(file_path: str) -> str:
4646
metadata_bucket = os.getenv("METADATA_BUCKET")
4747
metadata_cdn_url = os.getenv("METADATA_CDN_BASE_URL")
4848
return get_public_url_for_gcs_file(metadata_bucket, file_path, metadata_cdn_url)
49+
50+
51+
REPO_URL = "https://github.com/airbytehq/airbyte/"

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/logging/publish_connector_lifecycle.py

+30-3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from enum import Enum
77

88
from dagster import OpExecutionContext
9+
from orchestrator.config import REPO_URL
910
from orchestrator.ops.slack import send_slack_message
1011

1112

@@ -56,19 +57,45 @@ def stage_to_log_level(stage_status: StageStatus) -> str:
5657
else:
5758
return "info"
5859

60+
def _commit_link(commit_sha: str) -> str:
61+
"""Create a markdown link to a commit."""
62+
commit_url = f"{REPO_URL}/commit/{commit_sha}"
63+
return f"\ncommit: <{commit_url}|{commit_sha}>"
64+
65+
def _user_mention(user_identifier: str) -> str:
66+
"""Create a markdown link to a user."""
67+
return f"\nauthor: {user_identifier}"
68+
5969
@staticmethod
6070
def create_log_message(
6171
lifecycle_stage: PublishConnectorLifecycleStage,
6272
stage_status: StageStatus,
6373
message: str,
74+
commit_sha: str = None,
75+
user_identifier: str = None,
6476
) -> str:
6577
emoji = stage_status.to_emoji()
66-
return f"*{emoji} _{lifecycle_stage}_ {stage_status}*: {message}"
78+
final_message = f"*{emoji} _{lifecycle_stage}_ {stage_status}*:\n{message}"
79+
80+
if user_identifier:
81+
final_message += PublishConnectorLifecycle._user_mention(user_identifier)
82+
83+
if commit_sha:
84+
final_message += PublishConnectorLifecycle._commit_link(commit_sha)
85+
86+
return final_message
6787

6888
@staticmethod
69-
def log(context: OpExecutionContext, lifecycle_stage: PublishConnectorLifecycleStage, stage_status: StageStatus, message: str):
89+
def log(
90+
context: OpExecutionContext,
91+
lifecycle_stage: PublishConnectorLifecycleStage,
92+
stage_status: StageStatus,
93+
message: str,
94+
commit_sha: str = None,
95+
user_identifier: str = None,
96+
):
7097
"""Publish a connector notification log to logger and slack (if enabled)."""
71-
message = PublishConnectorLifecycle.create_log_message(lifecycle_stage, stage_status, message)
98+
message = PublishConnectorLifecycle.create_log_message(lifecycle_stage, stage_status, message, commit_sha, user_identifier)
7299

73100
level = PublishConnectorLifecycle.stage_to_log_level(stage_status)
74101
log_method = getattr(context.log, level)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2+
3+
import yaml
4+
from google.cloud import storage
5+
6+
7+
def yaml_blob_to_dict(yaml_blob: storage.Blob) -> dict:
8+
"""
9+
Convert the given yaml blob to a dictionary.
10+
"""
11+
yaml_string = yaml_blob.download_as_string().decode("utf-8")
12+
return yaml.safe_load(yaml_string)

airbyte-ci/connectors/metadata_service/orchestrator/poetry.lock

+4-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

airbyte-ci/connectors/metadata_service/orchestrator/tests/test_registry.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
oss_sources_dataframe,
2828
)
2929
from orchestrator.models.metadata import LatestMetadataEntry, MetadataDefinition
30+
from orchestrator.utils.blob_helpers import yaml_blob_to_dict
3031
from pydantic import ValidationError
3132

3233
VALID_METADATA_DICT = {
@@ -64,11 +65,13 @@ def test_safe_parse_metadata_definition(blob_name, blob_content, expected_result
6465
mock_blob.name = blob_name
6566
mock_blob.download_as_string.return_value = blob_content.encode("utf-8")
6667

68+
metadata_dict = yaml_blob_to_dict(mock_blob)
69+
6770
if expected_exception:
6871
with pytest.raises(expected_exception):
69-
safe_parse_metadata_definition(mock_blob)
72+
safe_parse_metadata_definition(mock_blob.name, metadata_dict)
7073
else:
71-
result = safe_parse_metadata_definition(mock_blob)
74+
result = safe_parse_metadata_definition(mock_blob.name, metadata_dict)
7275
# assert the name is set correctly
7376
assert result == expected_result
7477

0 commit comments

Comments
 (0)