diff --git a/.github/workflows/publish-cdk-command-manually.yml b/.github/workflows/publish-cdk-command-manually.yml index afce576b72d0c..4f206f609bab4 100644 --- a/.github/workflows/publish-cdk-command-manually.yml +++ b/.github/workflows/publish-cdk-command-manually.yml @@ -73,7 +73,7 @@ jobs: repository: ${{ github.event.inputs.repo }} ref: ${{ github.event.inputs.gitref }} - name: Build CDK Package - run: ./gradlew --no-daemon --no-build-cache :airbyte-cdk:python:build + run: (cd airbyte-cdk/python; ./gradlew --no-daemon --no-build-cache :build) - name: Post failure to Slack channel dev-connectors-extensibility if: ${{ failure() }} uses: slackapi/slack-github-action@v1.23.0 diff --git a/airbyte-cdk/python/bin/build_code_generator_image.sh b/airbyte-cdk/python/bin/build_code_generator_image.sh new file mode 100755 index 0000000000000..f73c318317c5a --- /dev/null +++ b/airbyte-cdk/python/bin/build_code_generator_image.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -e + +DOCKER_BUILD_ARCH="${DOCKER_BUILD_ARCH:-amd64}" +# https://docs.docker.com/develop/develop-images/build_enhancements/ +export DOCKER_BUILDKIT=1 + +CODE_GENERATOR_DOCKERFILE="$(dirname $0)/../code-generator/Dockerfile" +test -f $CODE_GENERATOR_DOCKERFILE +docker build --build-arg DOCKER_BUILD_ARCH="$DOCKER_BUILD_ARCH" -t "airbyte/code-generator:dev" - < $CODE_GENERATOR_DOCKERFILE diff --git a/airbyte-cdk/python/build.gradle b/airbyte-cdk/python/build.gradle index 63cc9992a73b6..61f355742382a 100644 --- a/airbyte-cdk/python/build.gradle +++ b/airbyte-cdk/python/build.gradle @@ -1,25 +1,134 @@ +import ru.vyarus.gradle.plugin.python.task.PythonTask + plugins { - id 'airbyte-python' - id 'airbyte-docker-legacy' + id 'base' + id 'ru.vyarus.use-python' version '2.3.0' } +def generateCodeGeneratorImage = tasks.register('generateCodeGeneratorImage', Exec) { + commandLine 'bin/build_code_generator_image.sh' +} def generateComponentManifestClassFiles = tasks.register('generateComponentManifestClassFiles', Exec) { - environment 'ROOT_DIR', rootDir.absolutePath + environment 'ROOT_DIR', rootDir.parentFile.parentFile.absolutePath commandLine 'bin/generate-component-manifest-files.sh' } generateComponentManifestClassFiles.configure { - dependsOn project(':tools:code-generator').tasks.named('assemble') + dependsOn generateCodeGeneratorImage } tasks.register('generate').configure { dependsOn generateComponentManifestClassFiles } tasks.register('validateSourceYamlManifest', Exec) { - environment 'ROOT_DIR', rootDir.absolutePath + environment 'ROOT_DIR', rootDir.parentFile.parentFile.absolutePath commandLine 'bin/validate-yaml-schema.sh' } tasks.register('runLowCodeConnectorUnitTests', Exec) { - environment 'ROOT_DIR', rootDir.absolutePath + environment 'ROOT_DIR', rootDir.parentFile.parentFile.absolutePath commandLine 'bin/low-code-unit-tests.sh' } + +def venvDirectoryName = '.venv' + +// Add a task that allows cleaning up venvs to every python project +def cleanPythonVenv = tasks.register('cleanPythonVenv', Exec) { + commandLine 'rm' + args '-rf', "${projectDir.absolutePath}/${venvDirectoryName}" +} + +tasks.named('clean').configure { + dependsOn cleanPythonVenv +} + +// Configure gradle python plugin. +python { + envPath = venvDirectoryName + minPythonVersion '3.10' + + // Amazon Linux support. + // The airbyte-ci tool runs gradle tasks in AL2023-based containers. + // In AL2023, `python3` is necessarily v3.9, and later pythons need to be installed and named explicitly. + // See https://github.com/amazonlinux/amazon-linux-2023/issues/459 for details. + try { + if ("python3.11 --version".execute().waitFor() == 0) { + // python3.11 definitely exists at this point, use it instead of 'python3'. + pythonBinary "python3.11" + } + } catch (IOException _) { + // Swallow exception if python3.11 is not installed. + } + // Pyenv support. + try { + def pyenvRoot = "pyenv root".execute() + def pyenvLatest = "pyenv latest ${minPythonVersion}".execute() + // Pyenv definitely exists at this point: use 'python' instead of 'python3' in all cases. + pythonBinary "python" + if (pyenvRoot.waitFor() == 0 && pyenvLatest.waitFor() == 0) { + pythonPath "${pyenvRoot.text.trim()}/versions/${pyenvLatest.text.trim()}/bin" + } + } catch (IOException _) { + // Swallow exception if pyenv is not installed. + } + + scope 'VIRTUALENV' + installVirtualenv = true + pip 'pip:23.2.1' + pip 'mccabe:0.6.1' + // https://github.com/csachs/pyproject-flake8/issues/13 + pip 'flake8:4.0.1' + // flake8 doesn't support pyproject.toml files + // and thus there is the wrapper "pyproject-flake8" for this + pip 'pyproject-flake8:0.0.1a2' + pip 'pytest:6.2.5' + pip 'coverage[toml]:6.3.1' +} + +def installLocalReqs = tasks.register('installLocalReqs', PythonTask) { + module = "pip" + command = "install .[dev,tests]" + inputs.file('setup.py') + outputs.file('build/installedlocalreqs.txt') +} + +def flakeCheck = tasks.register('flakeCheck', PythonTask) { + module = "pflake8" + command = "--config pyproject.toml ./" +} + +def installReqs = tasks.register('installReqs', PythonTask) { + module = "pip" + command = "install .[main]" + inputs.file('setup.py') + outputs.file('build/installedreqs.txt') +} +installReqs.configure { + dependsOn installLocalReqs +} + +tasks.named('check').configure { + dependsOn installReqs + dependsOn flakeCheck +} + +def installTestReqs = tasks.register('installTestReqs', PythonTask) { + module = "pip" + command = "install .[tests]" + inputs.file('setup.py') + outputs.file('build/installedtestreqs.txt') +} +installTestReqs.configure { + dependsOn installReqs +} + +def testTask = tasks.register('testPython', PythonTask) { + module = "coverage" + command = "run --data-file=unit_tests/.coverage.testPython --rcfile=pyproject.toml -m pytest -s unit_tests -c pytest.ini" +} +testTask.configure { + dependsOn installTestReqs +} + +tasks.named('check').configure { + dependsOn testTask +} diff --git a/tools/code-generator/Dockerfile b/airbyte-cdk/python/code-generator/Dockerfile similarity index 100% rename from tools/code-generator/Dockerfile rename to airbyte-cdk/python/code-generator/Dockerfile diff --git a/airbyte-cdk/python/gradle.properties b/airbyte-cdk/python/gradle.properties new file mode 100644 index 0000000000000..a458cfe27eb92 --- /dev/null +++ b/airbyte-cdk/python/gradle.properties @@ -0,0 +1,11 @@ +# NOTE: some of these values are overwritten in CI! +# NOTE: if you want to override this for your local machine, set overrides in ~/.gradle/gradle.properties + +org.gradle.parallel=true +org.gradle.caching=true + +# Note, this might have issues on the normal Github runner. +org.gradle.vfs.watch=true + +# Tune # of cores Gradle uses. +# org.gradle.workers.max=3 diff --git a/airbyte-cdk/python/gradle/wrapper/gradle-wrapper.jar b/airbyte-cdk/python/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000000..7f93135c49b76 Binary files /dev/null and b/airbyte-cdk/python/gradle/wrapper/gradle-wrapper.jar differ diff --git a/airbyte-cdk/python/gradle/wrapper/gradle-wrapper.properties b/airbyte-cdk/python/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000000..a80b22ce5cffe --- /dev/null +++ b/airbyte-cdk/python/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,7 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/airbyte-cdk/python/gradlew b/airbyte-cdk/python/gradlew new file mode 100755 index 0000000000000..1aa94a4269074 --- /dev/null +++ b/airbyte-cdk/python/gradlew @@ -0,0 +1,249 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/airbyte-cdk/python/gradlew.bat b/airbyte-cdk/python/gradlew.bat new file mode 100644 index 0000000000000..6689b85beecde --- /dev/null +++ b/airbyte-cdk/python/gradlew.bat @@ -0,0 +1,92 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/airbyte-cdk/python/pyproject.toml b/airbyte-cdk/python/pyproject.toml index b5e8c83ca37b3..f03d6cbcbe012 100644 --- a/airbyte-cdk/python/pyproject.toml +++ b/airbyte-cdk/python/pyproject.toml @@ -6,3 +6,39 @@ requires = [ ] build-backend = "setuptools.build_meta" + +[tool.coverage.report] +fail_under = 0 +skip_empty = true +sort = "-cover" +omit = [ + ".venv/*", + "main.py", + "setup.py", + "unit_tests/*", + "integration_tests/*", + "**/generated/*", +] + +[tool.flake8] +extend-exclude = [ + "*/lib/*/site-packages", + ".venv", + "build", + "models", + ".eggs", + "airbyte-cdk/python/airbyte_cdk/models/__init__.py", + "airbyte-cdk/python/airbyte_cdk/sources/declarative/models/__init__.py", + ".tox", + "airbyte_api_client", + "**/generated/*", +] +max-complexity = 20 +max-line-length = 140 + +extend-ignore = [ + "E203", # whitespace before ':' (conflicts with Black) + "E231", # Bad trailing comma (conflicts with Black) + "E501", # line too long (conflicts with Black) + "W503", # line break before binary operator (conflicts with Black) +] \ No newline at end of file diff --git a/airbyte-cdk/python/settings.gradle b/airbyte-cdk/python/settings.gradle new file mode 100644 index 0000000000000..02e3dd9a67242 --- /dev/null +++ b/airbyte-cdk/python/settings.gradle @@ -0,0 +1,29 @@ +import com.gradle.scan.plugin.PublishedBuildScan + +pluginManagement { + repositories { + // # Gradle looks for dependency artifacts in repositories listed in 'repositories' blocks in descending order. + gradlePluginPortal() + } +} + +// Configure the gradle enterprise plugin to enable build scans. Enabling the plugin at the top of the settings file allows the build scan to record +// as much information as possible. +plugins { + id "com.gradle.enterprise" version "3.15.1" +} + +ext.isCiServer = System.getenv().containsKey("CI") + +gradleEnterprise { + buildScan { + termsOfServiceUrl = "https://gradle.com/terms-of-service" + termsOfServiceAgree = "yes" + uploadInBackground = !isCiServer // Disable in CI or scan URLs may not work. + buildScanPublished { PublishedBuildScan scan -> + file("scan-journal.log") << "${new Date()} - ${scan.buildScanId} - ${scan.buildScanUri}\n" + } + } +} + +rootProject.name = 'airbyte-cdk-python' diff --git a/airbyte-integrations/connectors-performance/destination-harness/build.gradle b/airbyte-integrations/connectors-performance/destination-harness/build.gradle index e5a3c4ca264ae..fd6bb54d0aed3 100644 --- a/airbyte-integrations/connectors-performance/destination-harness/build.gradle +++ b/airbyte-integrations/connectors-performance/destination-harness/build.gradle @@ -1,6 +1,5 @@ plugins { id 'application' - id 'airbyte-docker-legacy' } application { diff --git a/airbyte-integrations/connectors-performance/source-harness/build.gradle b/airbyte-integrations/connectors-performance/source-harness/build.gradle index abfeaa794b418..667e5af0a9a5f 100644 --- a/airbyte-integrations/connectors-performance/source-harness/build.gradle +++ b/airbyte-integrations/connectors-performance/source-harness/build.gradle @@ -1,6 +1,5 @@ plugins { id 'application' - id 'airbyte-docker-legacy' } application { diff --git a/buildSrc/src/main/groovy/DockerHelpers.groovy b/buildSrc/src/main/groovy/DockerHelpers.groovy deleted file mode 100644 index 875f6320de9e2..0000000000000 --- a/buildSrc/src/main/groovy/DockerHelpers.groovy +++ /dev/null @@ -1,23 +0,0 @@ -import java.nio.file.Paths - -class DockerHelpers { - static String extractLabelValue(String dockerFile, String labelName) { - def file = dockerFile instanceof File ? dockerFile : new File(dockerFile) - return file.readLines() - .grep({ it.startsWith('LABEL') && it.contains(labelName) }) - .get(0) - .split('=')[1] - } - - static String extractImageName(String dockerFile) { - return extractLabelValue(dockerFile, "io.airbyte.name") - } - - static String extractImageVersion(String dockerFile) { - return extractLabelValue(dockerFile, "io.airbyte.version") - } - - static String getDevTaggedImage(projectDir, dockerfileName) { - return "${extractImageName(Paths.get(projectDir.absolutePath, dockerfileName).toString())}:dev" - } -} diff --git a/buildSrc/src/main/groovy/airbyte-docker-legacy.gradle b/buildSrc/src/main/groovy/airbyte-docker-legacy.gradle deleted file mode 100644 index e323cf7c95cc5..0000000000000 --- a/buildSrc/src/main/groovy/airbyte-docker-legacy.gradle +++ /dev/null @@ -1,331 +0,0 @@ -import java.nio.file.Paths -import java.security.MessageDigest -import java.util.concurrent.ConcurrentHashMap -import org.apache.commons.text.StringSubstitutor -import org.gradle.api.DefaultTask -import org.gradle.api.GradleException -import org.gradle.api.Plugin -import org.gradle.api.Project -import org.gradle.api.file.ConfigurableFileTree -import org.gradle.api.file.FileCollection -import org.gradle.api.tasks.CacheableTask -import org.gradle.api.tasks.Input -import org.gradle.api.tasks.InputFile -import org.gradle.api.tasks.InputFiles -import org.gradle.api.tasks.OutputFile -import org.gradle.api.tasks.PathSensitive -import org.gradle.api.tasks.PathSensitivity -import org.gradle.api.tasks.TaskAction - -/** - * AirbyteDockerLegacyTask is the task which builds a docker image based on a Dockerfile. - * - * It and the other classes in this file have "Legacy" in their name because we want to get rid of this plugin in favor - * of dagger-pipeline-based tooling like `airbyte-ci`. As of the time of this writing this is already the case for - * connectors. There are still a few remaining usages outside of connectors and they are useful to support a smooth - * local java-centric development experience with gradle, especially around integration tests. - * - * Issue https://github.com/airbytehq/airbyte/issues/30708 tracks the complete removal of this plugin. - */ -@CacheableTask -abstract class AirbyteDockerLegacyTask extends DefaultTask { - - @InputFiles - @PathSensitive(PathSensitivity.RELATIVE) - FileCollection filesInDockerImage - - @Input - Map baseImageHashes - - @InputFile - @PathSensitive(PathSensitivity.RELATIVE) - File dockerFile - - @OutputFile - File idFileOutput - - @InputFile - @PathSensitive(PathSensitivity.RELATIVE) - File buildScript = project.rootProject.file('tools/bin/build_image.sh') - - @TaskAction - def dockerTask() { - project.exec { - commandLine( - buildScript.absolutePath, - project.rootDir.absolutePath, - project.projectDir.absolutePath, - dockerFile.name, - DockerHelpers.getDevTaggedImage(project.projectDir, dockerFile.name), - idFileOutput.absolutePath, - ) - } - } -} - -/** - * AirbyteDockerLegacyTaskFactory is a convenience object to avoid passing the current project around. - */ -class AirbyteDockerLegacyTaskFactory { - - private AirbyteDockerLegacyTaskFactory() {} - - Project project - String dockerFileName - - File dockerFile() { - return project.file(dockerFileName) - } - - // This hash of the full path to the Dockerfile is the name of the task's output file. - String dockerfilePathHash() { - return MessageDigest.getInstance("MD5") - .digest(dockerFile().absolutePath.getBytes()) - .encodeHex() - .toString() - } - - // A superset of the files which are COPYed into the image, defined as the project file set - // with the .dockerignore rules applied to it. - // We could be more precise by parsing the Dockerfile but this is good enough in practice. - FileCollection filteredProjectFiles() { - ConfigurableFileTree files = project.fileTree(project.projectDir) - def dockerignore = project.file('.dockerignore') - if (!dockerignore.exists()) { - return files.filter { - file -> !file.toString().contains(".venv") - } - } - for (def rule : dockerignore.readLines()) { - if (rule.startsWith("#")) { - continue - } - rule = rule.trim() - files = (rule.startsWith("!") ? files.include(rule.substring(1)) : files.exclude(rule)) as ConfigurableFileTree - } - return files - } - - // Queries docker for all images and their hashes. - static synchronized Map collectKnownImageHashes(Project project) { - def stdout = new ByteArrayOutputStream() - project.rootProject.exec { - commandLine "docker", "images", "--no-trunc", "-f", "dangling=false", "--format", "{{.Repository}}:{{.Tag}} {{.ID}}" - standardOutput = stdout - } - Map map = [:] - stdout.toString().eachLine {line -> - def splits = line.split() - map.put(splits[0], splits[1].trim()) - } - return map - } - - // Query all docker images at most once for all tasks, at task creation time. - static def lazyImageHashesAtTaskCreationTime = new LazyImageHashesCache() - - static class LazyImageHashesCache { - private Map lazyValue - - synchronized Map get(Project project) { - if (lazyValue == null) { - lazyValue = collectKnownImageHashes(project) - } - return lazyValue - } - } - - // Global mapping of tagged image name to gradle project. - // This is populated at configuration time and accessed at task creation time. - // All keys verify isTaggedImageOwnedByThisRepo. - static def taggedImageToProject = new ConcurrentHashMap() - - static boolean isTaggedImageOwnedByThisRepo(String taggedImage) { - if (!taggedImage.startsWith("airbyte/")) { - // Airbyte's docker images are all prefixed like this. - // Anything not with this prefix is therefore not owned by this repo. - return false - } - if (taggedImage.startsWith("airbyte/base-airbyte-protocol-python:")) { - // Special case: this image is not built by this repo. - return false - } - if (!taggedImage.endsWith(":dev")) { - // Special case: this image is owned by this repo but built separate. e.g. source-file-secure - return false - } - // Otherwise, assume the image is built by this repo. - return true - } - - // Returns a mapping of each base image referenced in the Dockerfile to the corresponding hash - // in the results of collectKnownImageHashes(). If no hash was found, map to "???" instead. - Map baseTaggedImagesAndHashes(Map allKnownImageHashes) { - def taggedImages = new HashSet() - - // Look for "FROM foo AS bar" directives, and add them to the map with .put("bar", "foo") - Map imageAliases = [:] - dockerFile().eachLine { line -> - def parts = line.split() - if (parts.length >= 4 && parts[0].equals("FROM") && parts[parts.length - 2].equals("AS")) { - imageAliases.put(parts[parts.length - 1], parts[1]) - } - } - - dockerFile().eachLine { line -> - if (line.startsWith("FROM ")) { - def image = line.split()[1] - assert !image.isEmpty() - taggedImages.add(image) - } else if (line.startsWith("COPY --from=")) { - def image = line.substring("COPY --from=".length()).split()[0] - assert !image.isEmpty() - if (imageAliases[image] != null) { - taggedImages.add(imageAliases[image]) - } else { - taggedImages.add(image) - } - } - } - - Map result = [:] - for (def taggedImage : taggedImages) { - // Some image tags rely on environment variables (e.g. "FROM amazoncorretto:${JDK_VERSION}"). - taggedImage = new StringSubstitutor(System.getenv()).replace(taggedImage).trim() - result.put(taggedImage, allKnownImageHashes.getOrDefault(taggedImage, "???")) - } - return result - } - - // Create the task lazily: we shouldn't invoke 'docker' unless the task is created as part of the build. - def createTask(String taskName) { - if (!dockerFile().exists()) { - // This might not actually be necessary. It doesn't seem harmful either. - return project.tasks.register(taskName) { - logger.info "Skipping ${taskName} because ${dockerFile()} does not exist." - } - } - - // Tagged name of the image to be built by this task. - def taggedImage = DockerHelpers.getDevTaggedImage(project.projectDir, dockerFileName) - // Map this project to the tagged name of the image built by this task. - taggedImageToProject.put(taggedImage, project) - // Path to the ID file to be generated by this task. - // The ID file contains the hash of the image. - def idFilePath = Paths.get(project.rootProject.rootDir.absolutePath, '.dockerversions', dockerfilePathHash()) - // Register the task (lazy creation). - def airbyteDockerTask = project.tasks.register(taskName, AirbyteDockerLegacyTask) { task -> - // Set inputs. - task.filesInDockerImage = filteredProjectFiles() - task.dockerFile = this.dockerFile() - task.baseImageHashes = baseTaggedImagesAndHashes(lazyImageHashesAtTaskCreationTime.get(project)) - // Set dependencies on base images built by this repo. - for (String taggedImageDependency : task.baseImageHashes.keySet()) { - if (isTaggedImageOwnedByThisRepo(taggedImageDependency)) { - task.logger.info("adding airbyteDocker task dependency: image ${taggedImage} is based on ${taggedImageDependency}") - def dependentProject = taggedImageToProject.get(taggedImageDependency) - if (dependentProject == null) { - throw new GradleException("no known project for image ${taggedImageDependency}") - } - // Depend on 'assemble' instead of 'airbyteDocker' or 'airbyteDockerTest', it's simpler that way. - task.dependsOn(dependentProject.tasks.named('assemble')) - } - } - // Set outputs. - task.idFileOutput = idFilePath.toFile() - task.outputs.upToDateWhen { - // Because the baseImageHashes is computed at task creation time, it may be stale - // at task execution time. Let's double-check. - - // Missing dependency declarations in the gradle build may result in the airbyteDocker tasks - // to be created in the wrong order. Not worth breaking the build over. - for (Map.Entry e : task.baseImageHashes) { - if (isTaggedImageOwnedByThisRepo(e.key) && e.value == "???") { - task.logger.info "Not up to date: missing at least one airbyte base image in docker" - return false - } - } - // Fetch the hashes of the required based images anew. - def allImageHashes = collectKnownImageHashes(task.project) - // If the image to be built by this task doesn't exist in docker, then it definitely should - // be built regardless of the status of the ID file. - // For instance, it's possible that a `docker image rm` occurred between consecutive - // identical gradle builds: the ID file remains untouched but the image still needs to be rebuilt. - if (!allImageHashes.containsKey(taggedImage)) { - task.logger.info "Not up to date: ID file exists but target image not found in docker" - return false - } - // If the depended-upon base images have changed in the meantime, then it follows that the target - // image needs to be rebuilt regardless of the status of the ID file. - def currentBaseImageHashes = baseTaggedImagesAndHashes(allImageHashes) - if (!task.baseImageHashes.equals(currentBaseImageHashes)) { - task.logger.info "Not up to date: at last one base image has changed in docker since task creation" - return false - } - // In all other cases, if the ID file hasn't been touched, then the task can be skipped. - return true - } - } - - airbyteDockerTask.configure { - // Images for java projects always rely on the distribution tarball. - dependsOn project.tasks.matching { it.name == 'distTar' } - // Ensure that all files exist beforehand. - dependsOn project.tasks.matching { it.name == 'generate' } - } - project.tasks.named('assemble').configure { - // We may revisit the dependency on assemble but the dependency should always be on a base task. - dependsOn airbyteDockerTask - } - // Add a task to clean up when doing a gradle clean. - // Don't actually mess with docker, just delete the output file. - def airbyteDockerCleanTask = project.tasks.register(taskName + "Clean", Delete) { - delete idFilePath - } - project.tasks.named('clean').configure { - dependsOn airbyteDockerCleanTask - } - return airbyteDockerTask - } - - static def build(Project project, String taskName, String dockerFileName) { - def f = new AirbyteDockerLegacyTaskFactory() - f.project = project - f.dockerFileName = dockerFileName - f.createTask(taskName) - } -} - -/** - * AirbyteDockerLegacyPlugin creates an airbyteDocker task for the project when a Dockerfile is present. - * - * Following the same logic, it creates airbyteDockerTest when Dockerfile.test is present, though - * that behavior is not used anywhere except in the source-mongo connector and is therefore deprecated - * through the use of airbyte-ci. - */ -class AirbyteDockerLegacyPlugin implements Plugin { - - void apply(Project project) { - AirbyteDockerLegacyTaskFactory.build(project, 'airbyteDocker', 'Dockerfile') - - // Used only for source-mongodb. Consider removing entirely. - if (project.name.endsWith('source-mongodb')) { - AirbyteDockerLegacyTaskFactory.build(project, 'airbyteDockerTest', 'Dockerfile.test') - } - - // Used for base-normalization. - if (project.name.endsWith('base-normalization')) { - ['airbyteDockerMSSql' : 'mssql', - 'airbyteDockerMySql' : 'mysql', - 'airbyteDockerOracle' : 'oracle', - 'airbyteDockerClickhouse': 'clickhouse', - 'airbyteDockerSnowflake' : 'snowflake', - 'airbyteDockerRedshift' : 'redshift', - 'airbyteDockerTiDB' : 'tidb', - 'airbyteDockerDuckDB' : 'duckdb' - ].forEach {taskName, customConnector -> - AirbyteDockerLegacyTaskFactory.build(project, taskName, "${customConnector}.Dockerfile") - } - } - } -} diff --git a/buildSrc/src/main/groovy/airbyte-python.gradle b/buildSrc/src/main/groovy/airbyte-python.gradle deleted file mode 100644 index 59f14890c75b2..0000000000000 --- a/buildSrc/src/main/groovy/airbyte-python.gradle +++ /dev/null @@ -1,185 +0,0 @@ -import groovy.io.FileType -import groovy.io.FileVisitResult -import org.gradle.api.GradleException -import org.gradle.api.Plugin -import org.gradle.api.Project -import org.gradle.api.tasks.Exec -import ru.vyarus.gradle.plugin.python.task.PythonTask - -class Helpers { - static addTestTaskIfTestFilesFound(Project project, String testFilesDirectory, String taskName, taskDependencies) { - """ - This method verifies if there are test files in a directory before adding the pytest task to run tests on that directory. This is needed - because if there are no tests in that dir and we run pytest on it, it exits with exit code 5 which gradle takes to mean that the process - failed, since it's non-zero. This means that if a module doesn't need a unit or integration test, it still needs to add a dummy test file - like: - - ``` - def make_ci_pass_test(): - assert True - ``` - - So we use this method to leverage pytest's test discovery rules (https://docs.pytest.org/en/6.2.x/goodpractices.html#conventions-for-python-test-discovery) - to selectively run pytest based on whether there seem to be test files in that directory. - Namely, if the directory contains a file whose name is test_*.py or *_test.py then it's a test. - - See https://github.com/airbytehq/airbyte/issues/4979 for original context - """ - - boolean requiresTasks = false - if (project.file(testFilesDirectory).exists()) { - def testDir = project.projectDir.toPath().resolve(testFilesDirectory) - testDir.traverse(type: FileType.FILES, nameFilter: ~/(^test_.*|.*_test)\.py$/) {file -> - requiresTasks = true - // If a file is found, terminate the traversal, thus causing this task to be declared at most once - return FileVisitResult.TERMINATE - } - } - if (!requiresTasks) { - return - } - - def coverageTask = project.tasks.register(taskName, PythonTask) { - def dataFile = "${testFilesDirectory}/.coverage.${taskName}" - def rcFile = project.rootProject.file('pyproject.toml').absolutePath - def testConfig = project.file('pytest.ini').exists() ? 'pytest.ini' : project.rootProject.file('pyproject.toml').absolutePath - - module = "coverage" - command = "run --data-file=${dataFile} --rcfile=${rcFile} -m pytest -s ${testFilesDirectory} -c ${testConfig}" - } - coverageTask.configure { - dependsOn taskDependencies - } - } -} - -class AirbytePythonPlugin implements Plugin { - - void apply(Project project) { - - def venvDirectoryName = '.venv' - - // Add a task that allows cleaning up venvs to every python project - def cleanPythonVenv = project.tasks.register('cleanPythonVenv', Exec) { - commandLine 'rm' - args '-rf', "${project.projectDir.absolutePath}/${venvDirectoryName}" - } - project.tasks.named('clean').configure { - dependsOn cleanPythonVenv - } - - project.plugins.apply 'ru.vyarus.use-python' - - // Configure gradle python plugin. - project.python { - envPath = venvDirectoryName - minPythonVersion '3.10' - - // Amazon Linux support. - // The airbyte-ci tool runs gradle tasks in AL2023-based containers. - // In AL2023, `python3` is necessarily v3.9, and later pythons need to be installed and named explicitly. - // See https://github.com/amazonlinux/amazon-linux-2023/issues/459 for details. - try { - if ("python3.11 --version".execute().waitFor() == 0) { - // python3.11 definitely exists at this point, use it instead of 'python3'. - pythonBinary "python3.11" - } - } catch (IOException _) { - // Swallow exception if python3.11 is not installed. - } - // Pyenv support. - try { - def pyenvRoot = "pyenv root".execute() - def pyenvLatest = "pyenv latest ${minPythonVersion}".execute() - // Pyenv definitely exists at this point: use 'python' instead of 'python3' in all cases. - pythonBinary "python" - if (pyenvRoot.waitFor() == 0 && pyenvLatest.waitFor() == 0) { - pythonPath "${pyenvRoot.text.trim()}/versions/${pyenvLatest.text.trim()}/bin" - } - } catch (IOException _) { - // Swallow exception if pyenv is not installed. - } - - scope 'VIRTUALENV' - installVirtualenv = true - pip 'pip:23.2.1' - pip 'mccabe:0.6.1' - // https://github.com/csachs/pyproject-flake8/issues/13 - pip 'flake8:4.0.1' - // flake8 doesn't support pyproject.toml files - // and thus there is the wrapper "pyproject-flake8" for this - pip 'pyproject-flake8:0.0.1a2' - pip 'pytest:6.2.5' - pip 'coverage[toml]:6.3.1' - } - - // Attempt to install anything in requirements.txt. - // By convention this should only be dependencies whose source is located in the project. - if (project.file('requirements.txt').exists()) { - project.tasks.register('installLocalReqs', PythonTask) { - module = "pip" - command = "install -r requirements.txt" - inputs.file('requirements.txt') - outputs.file('build/installedlocalreqs.txt') - } - } else if (project.file('setup.py').exists()) { - // If requirements.txt does not exists, install from setup.py instead, assume a dev or "tests" profile exists. - // In this case, there is no need to depend on the base python modules since everything should be contained in the setup.py. - project.tasks.register('installLocalReqs', PythonTask) { - module = "pip" - command = "install .[dev,tests]" - inputs.file('setup.py') - outputs.file('build/installedlocalreqs.txt') - } - } else { - return - } - - def installLocalReqs = project.tasks.named('installLocalReqs') - - def flakeCheck = project.tasks.register('flakeCheck', PythonTask) { - module = "pflake8" - command = "--config ${project.rootProject.file('pyproject.toml').absolutePath} ./" - } - - def installReqs = project.tasks.register('installReqs', PythonTask) { - module = "pip" - command = "install .[main]" - inputs.file('setup.py') - outputs.file('build/installedreqs.txt') - } - installReqs.configure { - dependsOn installLocalReqs - } - - project.tasks.named('check').configure { - dependsOn installReqs - dependsOn flakeCheck - } - - def installTestReqs = project.tasks.register('installTestReqs', PythonTask) { - module = "pip" - command = "install .[tests]" - inputs.file('setup.py') - outputs.file('build/installedtestreqs.txt') - } - installTestReqs.configure { - dependsOn installReqs - } - - Helpers.addTestTaskIfTestFilesFound(project, 'unit_tests', 'testPython', installTestReqs) - project.tasks.named('check').configure { - dependsOn project.tasks.matching { it.name == 'testPython' } - } - - Helpers.addTestTaskIfTestFilesFound(project, 'integration_tests', 'integrationTestPython', installTestReqs) - def integrationTestTasks = project.tasks.matching { it.name == 'integrationTestPython' } - integrationTestTasks.configureEach { - dependsOn project.tasks.named('assemble') - mustRunAfter project.tasks.named('check') - } - project.tasks.named('build').configure { - dependsOn integrationTestTasks - } - } -} diff --git a/settings.gradle b/settings.gradle index 4754bee55575a..fb56a14df9073 100644 --- a/settings.gradle +++ b/settings.gradle @@ -159,9 +159,6 @@ if (isCiServer || isAirbyteCI) { rootProject.name = 'airbyte' -include ':tools:code-generator' - -include ':airbyte-cdk:python' include ':airbyte-cdk:java:airbyte-cdk' include ':airbyte-cdk:java:airbyte-cdk:dependencies' include ':airbyte-cdk:java:airbyte-cdk:core' @@ -175,9 +172,6 @@ include ':airbyte-cdk:java:airbyte-cdk:datastore-bigquery' include ':airbyte-cdk:java:airbyte-cdk:datastore-mongo' include ':airbyte-cdk:java:airbyte-cdk:datastore-postgres' -include ':airbyte-integrations:bases:base' -include ':airbyte-integrations:bases:base-java' -include ':airbyte-integrations:bases:base-normalization' include ':airbyte-integrations:connector-templates:generator' include ':airbyte-integrations:connectors-performance:source-harness' include ':airbyte-integrations:connectors-performance:destination-harness' diff --git a/tools/code-generator/build.gradle b/tools/code-generator/build.gradle deleted file mode 100644 index 0c2de175e2cc9..0000000000000 --- a/tools/code-generator/build.gradle +++ /dev/null @@ -1,3 +0,0 @@ -plugins { - id 'airbyte-docker-legacy' -}