-
Notifications
You must be signed in to change notification settings - Fork 4.5k
tech spec: demo of common base image for all python connectors #29477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
86935d9
8328c9d
9b3ff73
7083efe
16fc882
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
Python base images used for connector built are declared [here](https://github.com/airbytehq/airbyte/blob/8328c9dd89f6417295a20f7c0f5b823a2f02ee8e/airbyte-ci/connectors/pipelines/pipelines/builds/base_images/python.py)# Changelog for airbyte-python-base | ||
|
||
|Version| Changelog | | ||
|-------|-------------------------------------------------------------------------------------------------| | ||
| 1.0.0 | Upgrades the base image to Python 3.10.12. | | ||
| 0.0.2 | Adds git to the base image. | | ||
| 0.0.1 |Declares the legacy base image with mandatory debian packages, pip upgrade, timezone settings ...| |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# | ||
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
import inspect | ||
import sys | ||
from abc import ABC, abstractmethod | ||
from enum import Enum | ||
from typing import Type | ||
|
||
import dagger | ||
from py_markdown_table.markdown_table import markdown_table | ||
|
||
|
||
class PythonBase(Enum): | ||
# Using image digest to ensure that the image is not changed | ||
PYTHON_3_9 = "python:3.9@sha256:0596c508fdfdf28fd3b98e170f7e3d4708d01df6e6d4bffa981fd6dd22dbd1a5" | ||
PYTHON_3_10_12 = "python:3.10.12@sha256-527cc6f230cf7de1f972fbb0ffc850035e91fb4a52058b44906ea706b3018bb6" | ||
|
||
|
||
class VersionError(Exception): | ||
pass | ||
|
||
|
||
class AirbytePythonBase(ABC): | ||
|
||
name = "airbyte-python-base" | ||
|
||
TIMEZONE = "Etc/UTC" | ||
|
||
@property | ||
@abstractmethod | ||
def python_base_image(cls) -> PythonBase: | ||
raise NotImplementedError("Subclasses must define a 'python_base_image'.") | ||
|
||
@property | ||
@abstractmethod | ||
def changelog(cls) -> str: | ||
raise NotImplementedError("Subclasses must define a 'changelog' attribute.") | ||
|
||
def __init__(self, dagger_client: dagger.Client): | ||
self.dagger_client = dagger_client | ||
self.validate_version() | ||
|
||
@classmethod | ||
def name_and_version(cls) -> str: | ||
return f"{cls.name}:{cls.version()}" | ||
|
||
@classmethod | ||
def version(cls): | ||
return ".".join(cls.__name__.split("_")[1:]) | ||
|
||
@property | ||
def base(self) -> dagger.Container: | ||
return self.dagger_client.container().from_(self.python_base_image.value).with_env_variable("BASE_IMAGE", self.name_and_version()) | ||
|
||
@property | ||
@abstractmethod | ||
def container(self): | ||
raise NotImplementedError("Subclasses must define a 'container' property.") | ||
|
||
def validate_version(self): | ||
version_parts = self.version().split(".") | ||
if not len(version_parts) == 3 and all([v.isdigit() for v in version_parts]): | ||
raise VersionError("Version must be in the format 'x.y.z' and each part must be a digit.") | ||
|
||
|
||
class _0_0_1(AirbytePythonBase): | ||
python_base_image = PythonBase.PYTHON_3_9 | ||
|
||
apt_packages = [ | ||
"curl", | ||
"bash", | ||
"build-essential", | ||
"cmake", | ||
"g++", | ||
"libffi-dev", | ||
"libstdc++6", | ||
] | ||
|
||
changelog = "Declares the legacy base image with mandatory debian packages, pip upgrade, timezone settings ..." | ||
|
||
@property | ||
def container(self) -> dagger.Container: | ||
return ( | ||
self.base.with_exec(["ln", "-snf", f"/usr/share/zoneinfo/{self.TIMEZONE}", "/etc/localtime"]) | ||
.with_exec(["apt-get", "update"]) | ||
.with_exec(["apt-get", "install", "-y", *self.apt_packages]) | ||
.with_exec(["pip", "install", "--upgrade", "pip"]) | ||
) | ||
|
||
|
||
class _0_0_2(_0_0_1): | ||
python_base_image = PythonBase.PYTHON_3_9 | ||
|
||
changelog = "Adds git to the base image." | ||
|
||
@property | ||
def container(self) -> dagger.Container: | ||
|
||
return super().with_exec(["apt-get", "install", "-y", "git"]) | ||
|
||
|
||
class _1_0_0(_0_0_2): | ||
python_base_image = PythonBase.PYTHON_3_10_12 | ||
|
||
changelog = "Upgrades the base image to Python 3.10.12." | ||
|
||
|
||
def get_all_base_images() -> dict[str, Type[AirbytePythonBase]]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is "just" sugar to automatically discover the image version declared in the module. It saves us from hardcoding the list of base images version: implementing a new class should be the only step to make a new base version available. |
||
# Reverse the order of the members so that the latest version is first | ||
cls_members = reversed(inspect.getmembers(sys.modules[__name__], inspect.isclass)) | ||
return { | ||
cls_member.name_and_version(): cls_member | ||
for _, cls_member in cls_members | ||
if issubclass(type(cls_member), type(AirbytePythonBase)) and cls_member != AirbytePythonBase and cls_member != ABC | ||
} | ||
|
||
|
||
ALL_BASE_IMAGES = get_all_base_images() | ||
|
||
|
||
def write_changelog_file(): | ||
entries = [{"Version": base_cls.version(), "Changelog": base_cls.changelog} for _, base_cls in ALL_BASE_IMAGES.items()] | ||
markdown = markdown_table(entries).set_params(row_sep="markdown", quote=False).get_markdown() | ||
with open("PYTHON_BASE_IMAGES_CHANGELOG.md", "w") as f: | ||
f.write( | ||
"Python base images used for connector built are declared [here](https://github.com/airbytehq/airbyte/blob/8328c9dd89f6417295a20f7c0f5b823a2f02ee8e/airbyte-ci/connectors/pipelines/pipelines/builds/base_images/python.py)\n\n" | ||
) | ||
f.write(f"# Changelog for {AirbytePythonBase.name}\n\n") | ||
f.write(markdown) | ||
|
||
|
||
write_changelog_file() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,9 +2,13 @@ | |
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from dagger import QueryError | ||
from pipelines.actions.environments import with_airbyte_python_connector | ||
import importlib.util | ||
from pathlib import Path | ||
|
||
from dagger import CacheVolume, Container, QueryError | ||
from pipelines.actions.environments import find_local_python_dependencies | ||
from pipelines.bases import StepResult, StepStatus | ||
from pipelines.builds.base_images.python import ALL_BASE_IMAGES | ||
from pipelines.builds.common import BuildConnectorImageBase, BuildConnectorImageForAllPlatformsBase | ||
from pipelines.contexts import ConnectorContext | ||
|
||
|
@@ -15,13 +19,102 @@ class BuildConnectorImage(BuildConnectorImageBase): | |
A spec command is run on the container to validate it was built successfully. | ||
""" | ||
|
||
entrypoint = ["python", "/airbyte/integration_code/main.py"] | ||
|
||
async def _run(self) -> StepResult: | ||
connector = await with_airbyte_python_connector(self.context, self.build_platform) | ||
# connector_base_image = self.context.connector.metadata["connectorBaseImage"] | ||
# Hardcode the base image for demo purposes | ||
connector_base_image = "airbyte-python-base:0.0.1" | ||
|
||
if connector_base_image not in ALL_BASE_IMAGES: | ||
return StepResult( | ||
self, | ||
StepStatus.FAILURE, | ||
f"Connector base image {connector_base_image} does not exists. " f"Supported connector base images are {ALL_BASE_IMAGES}", | ||
) | ||
base: Container = ALL_BASE_IMAGES[connector_base_image](self.dagger_client).container | ||
connector: Container = await self._build_from_base(base) | ||
try: | ||
return await self.get_step_result(connector.with_exec(["spec"])) | ||
except QueryError as e: | ||
return StepResult(self, StepStatus.FAILURE, stderr=str(e)) | ||
|
||
async def _build_from_base(self, base: Container): | ||
pip_cache: CacheVolume = self.context.dagger_client.cache_volume("pip_cache") | ||
snake_case_name = self.context.connector.technical_name.replace("-", "_") | ||
|
||
setup_dependencies_to_mount = await find_local_python_dependencies( | ||
self.context, | ||
str(self.context.connector.code_directory), | ||
search_dependencies_in_setup_py=True, | ||
search_dependencies_in_requirements_txt=False, | ||
) | ||
|
||
with_setup_file: Container = ( | ||
base.with_env_variable("DAGGER_BUILD", "True") | ||
.with_workdir("/airbyte/integration_code") | ||
.with_mounted_cache("/root/.cache/pip", pip_cache) | ||
.with_file("setup.py", (await self.context.get_connector_dir(include="setup.py")).file("setup.py")) | ||
) | ||
with_local_dependencies = with_setup_file | ||
|
||
for dependency_path in setup_dependencies_to_mount: | ||
in_container_dependency_path = f"/local_dependencies/{Path(dependency_path).name}" | ||
with_local_dependencies = with_local_dependencies.with_mounted_directory( | ||
in_container_dependency_path, self.context.get_repo_dir(dependency_path) | ||
) | ||
with_local_dependencies_installed = with_local_dependencies.with_exec(["pip", "install", "--prefix=/usr/local", "."]) | ||
with_main = with_local_dependencies_installed.with_file("main.py", (await self.context.get_connector_dir()).file("main.py")) | ||
with_connector_code = with_main.with_directory( | ||
snake_case_name, (await self.context.get_connector_dir(include=snake_case_name)).directory(snake_case_name) | ||
) | ||
connector_container = ( | ||
with_connector_code.with_env_variable("AIRBYTE_ENTRYPOINT", " ".join(self.entrypoint)) | ||
.with_entrypoint(self.entrypoint) | ||
.with_label("io.airbyte.version", self.context.metadata["dockerImageTag"]) | ||
.with_label("io.airbyte.name", self.context.metadata["dockerRepository"]) | ||
) | ||
return await self.finalize_build(self.context, connector_container) | ||
|
||
@staticmethod | ||
async def finalize_build(context: ConnectorContext, connector_container: Container) -> Container: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code is currently used in the |
||
"""Finalize build by running finalize_build.sh or finalize_build.py if present in the connector directory.""" | ||
connector_dir_with_finalize_script = await context.get_connector_dir(include=["finalize_build.sh", "finalize_build.py"]) | ||
finalize_scripts = await connector_dir_with_finalize_script.entries() | ||
if not finalize_scripts: | ||
return connector_container | ||
|
||
# We don't want finalize scripts to override the entrypoint so we keep it in memory to reset it after finalization | ||
original_entrypoint = await connector_container.entrypoint() | ||
|
||
has_finalize_bash_script = "finalize_build.sh" in finalize_scripts | ||
has_finalize_python_script = "finalize_build.py" in finalize_scripts | ||
if has_finalize_python_script and has_finalize_bash_script: | ||
raise Exception("Connector has both finalize_build.sh and finalize_build.py, please remove one of them") | ||
|
||
if has_finalize_python_script: | ||
context.logger.info(f"{context.connector.technical_name} has a finalize_build.py script, running it to finalize build...") | ||
module_path = context.connector.code_directory / "finalize_build.py" | ||
connector_finalize_module_spec = importlib.util.spec_from_file_location( | ||
f"{context.connector.code_directory.name}_finalize", module_path | ||
) | ||
connector_finalize_module = importlib.util.module_from_spec(connector_finalize_module_spec) | ||
connector_finalize_module_spec.loader.exec_module(connector_finalize_module) | ||
try: | ||
connector_container = await connector_finalize_module.finalize_build(context, connector_container) | ||
except AttributeError: | ||
raise Exception("Connector has a finalize_build.py script but it doesn't have a finalize_build function.") | ||
|
||
if has_finalize_bash_script: | ||
context.logger.info(f"{context.connector.technical_name} has finalize_build.sh script, running it to finalize build...") | ||
connector_container = ( | ||
connector_container.with_file("/tmp/finalize_build.sh", connector_dir_with_finalize_script.file("finalize_build.sh")) | ||
.with_entrypoint("sh") | ||
.with_exec(["/tmp/finalize_build.sh"]) | ||
) | ||
|
||
return connector_container.with_entrypoint(original_entrypoint) | ||
|
||
|
||
class BuildConnectorImageForAllPlatforms(BuildConnectorImageForAllPlatformsBase): | ||
"""Build a Python connector image for all platforms.""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm showcasing here how inheritance can help in patching a previous version with maximal code reuse.
Nothing in the pattern I implemented forces a new version to inherit from the previous one. In other words: the
container
property can be re-implemented from scratch on a AirbytePythonBase subclass if we don't want to build on top of the previous version.