diff --git a/.dockerignore b/.dockerignore index 9b6cdec50d7ef..6f5c41ab75fdc 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,4 @@ .git -.gradle .idea **/build **/node_modules \ No newline at end of file diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index d9343f6e7b406..ee21f95294a67 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -10,31 +10,8 @@ jobs: steps: - name: Checkout Dataline uses: actions/checkout@v2 + - name: Build + run: ./tools/app/build.sh - # Impact is fairly limited, might actually need to cache node_modules - - name: Cache node modules - uses: actions/cache@v2 - with: - path: ~/.npm - key: npm-${{ hashFiles('**/package-lock.json') }} - restore-keys: | - npm- + - - name: Cache java deps - uses: actions/cache@v2 - with: - path: ~/.gradle/caches - key: gradle-${{ hashFiles('**/*.gradle') }} - restore-keys: | - gradle- - - - name: Set up JDK 14 - uses: actions/setup-java@v1 - with: - java-version: 14 - - - name: Build with Gradle - run: ./gradlew build --no-daemon - - - name: Ensure no file change - run: git diff-index --quiet HEAD diff --git a/build.gradle b/build.gradle index adfe945240923..fc8d9008cbcb2 100644 --- a/build.gradle +++ b/build.gradle @@ -5,7 +5,7 @@ plugins { Properties env = new Properties() File envFile = new File('.env') -envFile.withInputStream {env.load(it) } +envFile.withInputStream { env.load(it) } if (!env.containsKey("VERSION")) { throw new Exception("Version not specified in .env file...") @@ -39,6 +39,10 @@ subprojects { test { useJUnitPlatform() + testLogging() { + events "failed" + exceptionFormat "full" + } } dependencies { diff --git a/dataline-workers/build.gradle b/dataline-workers/build.gradle index 483b84910a3f3..b00b6e655c63c 100644 --- a/dataline-workers/build.gradle +++ b/dataline-workers/build.gradle @@ -2,3 +2,15 @@ plugins { id 'java-library' } +configurations { + jdbc +} + +dependencies { + testImplementation "com.fasterxml.jackson.core:jackson-databind:2.9.8" + + testImplementation "org.postgresql:postgresql:42.1.4" + testImplementation "org.testcontainers:testcontainers:1.14.3" + testImplementation "org.testcontainers:postgresql:1.14.3" +} + diff --git a/dataline-workers/src/main/java/io/dataline/workers/singer/BaseSingerWorker.java b/dataline-workers/src/main/java/io/dataline/workers/singer/BaseSingerWorker.java index e9435bb756f95..05d9ecaf1d89b 100644 --- a/dataline-workers/src/main/java/io/dataline/workers/singer/BaseSingerWorker.java +++ b/dataline-workers/src/main/java/io/dataline/workers/singer/BaseSingerWorker.java @@ -1,6 +1,7 @@ package io.dataline.workers.singer; import io.dataline.workers.JobStatus; +import io.dataline.workers.OutputAndStatus; import io.dataline.workers.Worker; import java.io.BufferedReader; import java.io.FileReader; @@ -10,6 +11,7 @@ import java.nio.file.Paths; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -19,16 +21,33 @@ public abstract class BaseSingerWorker implements Worker protected JobStatus jobStatus; protected String workerId; protected Process workerProcess; + protected final Path workspacePath; - private final String workspaceRoot; private final String singerRoot; protected BaseSingerWorker(String workerId, String workspaceRoot, String singerRoot) { this.workerId = workerId; - this.workspaceRoot = workspaceRoot; + this.workspacePath = Path.of(workspaceRoot, workerId); this.singerRoot = singerRoot; } + @Override + public OutputAndStatus run() { + createWorkspace(); + return runInternal(); + } + + public abstract OutputAndStatus runInternal(); + + private void createWorkspace() { + try { + FileUtils.forceMkdir(workspacePath.toFile()); + } catch (IOException e) { + LOGGER.error("Unable to create workspace for worker {} due to exception {} ", workerId, e); + throw new RuntimeException(e); + } + } + @Override public void cancel() { try { @@ -44,7 +63,7 @@ public void cancel() { } protected Path getWorkspacePath() { - return Paths.get(workspaceRoot, workerId); + return workspacePath; } protected String readFileFromWorkspace(String fileName) { diff --git a/dataline-workers/src/main/java/io/dataline/workers/singer/SingerDiscoveryWorker.java b/dataline-workers/src/main/java/io/dataline/workers/singer/SingerDiscoveryWorker.java index 367cc3748630d..f386044bb66a6 100644 --- a/dataline-workers/src/main/java/io/dataline/workers/singer/SingerDiscoveryWorker.java +++ b/dataline-workers/src/main/java/io/dataline/workers/singer/SingerDiscoveryWorker.java @@ -19,7 +19,6 @@ public class SingerDiscoveryWorker extends BaseSingerWorker { private final String configDotJson; private final SingerTap tap; - private DiscoveryOutput output; public SingerDiscoveryWorker( String workerId, @@ -33,7 +32,7 @@ public SingerDiscoveryWorker( } @Override - public OutputAndStatus run() { + public OutputAndStatus runInternal() { // TODO use format converter here // write config.json to disk String configPath = writeFileToWorkspace(CONFIG_JSON_FILENAME, configDotJson); @@ -46,16 +45,28 @@ public OutputAndStatus run() { getWorkspacePath().resolve(ERROR_LOG_FILENAME).toAbsolutePath().toString(); // exec try { + + String[] cmd = {tapPath, "--config", configPath, "--discover"}; + Process workerProcess = - new ProcessBuilder(tapPath, "--config " + configPath, "--discover") + new ProcessBuilder(cmd) .redirectError(new File(errorLogPath)) .redirectOutput(new File(catalogDotJsonPath)) .start(); - workerProcess.wait(); - if (workerProcess.exitValue() == 0) { + + // TODO will need to wrap this synchronize in a while loop and timeout to prevent contention + // coming from + // cancellations + synchronized (workerProcess) { + workerProcess.wait(); + } + int exitCode = workerProcess.exitValue(); + if (exitCode == 0) { String catalog = readFileFromWorkspace(CATALOG_JSON_FILENAME); return new OutputAndStatus<>(SUCCESSFUL, new DiscoveryOutput(catalog)); } else { + LOGGER.debug( + "Discovery worker {} subprocess finished with exit code {}", workerId, exitCode); return new OutputAndStatus<>(FAILED); } } catch (IOException | InterruptedException e) { diff --git a/dataline-workers/src/main/java/io/dataline/workers/singer/SingerTap.java b/dataline-workers/src/main/java/io/dataline/workers/singer/SingerTap.java index c408646bf2bbe..55c10fc229765 100644 --- a/dataline-workers/src/main/java/io/dataline/workers/singer/SingerTap.java +++ b/dataline-workers/src/main/java/io/dataline/workers/singer/SingerTap.java @@ -3,7 +3,7 @@ public enum SingerTap implements SingerConnector { // TODO S3_CSV("", ""), - POSTGRES("", ""), + POSTGRES("tap-postgres", "tap-postgres"), STRIPE("", ""); private final String getPythonVirtualEnvName; diff --git a/dataline-workers/src/test/java/io/dataline/workers/BaseWorkerTestCase.java b/dataline-workers/src/test/java/io/dataline/workers/BaseWorkerTestCase.java index 2a860dd00613d..dcdcdd860b542 100644 --- a/dataline-workers/src/test/java/io/dataline/workers/BaseWorkerTestCase.java +++ b/dataline-workers/src/test/java/io/dataline/workers/BaseWorkerTestCase.java @@ -1,10 +1,8 @@ package io.dataline.workers; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.UUID; -import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.TestInstance; @@ -13,34 +11,11 @@ public abstract class BaseWorkerTestCase { private Path workspaceDirectory; @BeforeAll - public void init() { - createTestWorkspace(); - deleteWorkspaceUponJvmExit(); + public void init() throws IOException { + workspaceDirectory = Files.createTempDirectory("dataline"); } protected Path getWorkspacePath() { return workspaceDirectory; } - - private void createTestWorkspace() { - try { - workspaceDirectory = Paths.get("/tmp/tests/dataline-" + UUID.randomUUID().toString()); - FileUtils.forceMkdir(workspaceDirectory.toFile()); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private void deleteWorkspaceUponJvmExit() { - Runtime.getRuntime() - .addShutdownHook( - new Thread( - () -> { - try { - FileUtils.deleteDirectory(workspaceDirectory.toFile()); - } catch (IOException e) { - throw new RuntimeException(e); - } - })); - } } diff --git a/dataline-workers/src/test/java/io/dataline/workers/singer/TestSingerDiscoveryWorker.java b/dataline-workers/src/test/java/io/dataline/workers/singer/TestSingerDiscoveryWorker.java index c452110ef937a..1bb7d45921778 100644 --- a/dataline-workers/src/test/java/io/dataline/workers/singer/TestSingerDiscoveryWorker.java +++ b/dataline-workers/src/test/java/io/dataline/workers/singer/TestSingerDiscoveryWorker.java @@ -1,5 +1,78 @@ package io.dataline.workers.singer; -public class TestSingerDiscoveryWorker { - // TODO pending installing singer binaries into the workspace +import static io.dataline.workers.JobStatus.SUCCESSFUL; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Maps; +import com.google.common.io.Resources; +import io.dataline.workers.BaseWorkerTestCase; +import io.dataline.workers.DiscoveryOutput; +import io.dataline.workers.OutputAndStatus; +import java.io.IOException; +import java.net.URL; +import java.nio.charset.Charset; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.util.Map; +import org.junit.jupiter.api.Test; +import org.testcontainers.containers.PostgreSQLContainer; + +public class TestSingerDiscoveryWorker extends BaseWorkerTestCase { + + @Test + public void testPostgresDiscovery() throws SQLException, IOException { + PostgreSQLContainer db = new PostgreSQLContainer(); + db.start(); + Connection con = + DriverManager.getConnection(db.getJdbcUrl(), db.getUsername(), db.getPassword()); + con.createStatement().execute("CREATE TABLE id_and_name (id integer, name VARCHAR(200));"); + + String postgresCreds = getPostgresConfigJson(db); + SingerDiscoveryWorker worker = + new SingerDiscoveryWorker( + "1", + postgresCreds, + SingerTap.POSTGRES, + getWorkspacePath().toAbsolutePath().toString(), + "/usr/local/lib/singer/"); // TODO inject as env variable + + System.out.println(getWorkspacePath().toAbsolutePath().toString()); + System.out.println(postgresCreds); + OutputAndStatus run = worker.run(); + assertEquals(SUCCESSFUL, run.status); + + String expectedCatalog = readResource("simple_postgres_catalog.json"); + assertTrue(run.output.isPresent()); + assertJsonEquals(expectedCatalog, run.output.get().catalog); + } + + private String readResource(String name) { + URL resource = Resources.getResource(name); + try { + return Resources.toString(resource, Charset.defaultCharset()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void assertJsonEquals(String s1, String s2) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + assertTrue(mapper.readTree(s1).equals(mapper.readTree(s2))); + } + + private String getPostgresConfigJson(PostgreSQLContainer psqlContainer) + throws JsonProcessingException { + Map props = Maps.newHashMap(); + props.put("dbname", psqlContainer.getDatabaseName()); + props.put("user", psqlContainer.getUsername()); + props.put("password", psqlContainer.getPassword()); + props.put("host", psqlContainer.getHost()); + props.put("port", String.valueOf(psqlContainer.getFirstMappedPort())); + + return new ObjectMapper().writeValueAsString(props); + } } diff --git a/dataline-workers/src/test/resources/simple_postgres_catalog.json b/dataline-workers/src/test/resources/simple_postgres_catalog.json new file mode 100644 index 0000000000000..1fd0ded153c13 --- /dev/null +++ b/dataline-workers/src/test/resources/simple_postgres_catalog.json @@ -0,0 +1,126 @@ +{ + "streams": [ + { + "table_name": "id_and_name", + "stream": "id_and_name", + "metadata": [ + { + "breadcrumb": [], + "metadata": { + "table-key-properties": [], + "schema-name": "public", + "database-name": "test", + "row-count": 0, + "is-view": false + } + }, + { + "breadcrumb": [ + "properties", + "id" + ], + "metadata": { + "sql-datatype": "integer", + "inclusion": "available", + "selected-by-default": true + } + }, + { + "breadcrumb": [ + "properties", + "name" + ], + "metadata": { + "sql-datatype": "character varying", + "inclusion": "available", + "selected-by-default": true + } + } + ], + "tap_stream_id": "test-public-id_and_name", + "schema": { + "type": "object", + "properties": { + "id": { + "type": [ + "null", + "integer" + ], + "minimum": -2147483648, + "maximum": 2147483647 + }, + "name": { + "type": [ + "null", + "string" + ], + "maxLength": 200 + } + }, + "definitions": { + "sdc_recursive_integer_array": { + "type": [ + "null", + "integer", + "array" + ], + "items": { + "$ref": "#/definitions/sdc_recursive_integer_array" + } + }, + "sdc_recursive_number_array": { + "type": [ + "null", + "number", + "array" + ], + "items": { + "$ref": "#/definitions/sdc_recursive_number_array" + } + }, + "sdc_recursive_string_array": { + "type": [ + "null", + "string", + "array" + ], + "items": { + "$ref": "#/definitions/sdc_recursive_string_array" + } + }, + "sdc_recursive_boolean_array": { + "type": [ + "null", + "boolean", + "array" + ], + "items": { + "$ref": "#/definitions/sdc_recursive_boolean_array" + } + }, + "sdc_recursive_timestamp_array": { + "type": [ + "null", + "string", + "array" + ], + "format": "date-time", + "items": { + "$ref": "#/definitions/sdc_recursive_timestamp_array" + } + }, + "sdc_recursive_object_array": { + "type": [ + "null", + "object", + "array" + ], + "items": { + "$ref": "#/definitions/sdc_recursive_object_array" + } + } + } + } + } + ] +} diff --git a/docker-compose.dev.yaml b/docker-compose.dev.yaml index 5075b446eedbf..0b662a5b67791 100644 --- a/docker-compose.dev.yaml +++ b/docker-compose.dev.yaml @@ -13,7 +13,7 @@ services: server: build: context: . - dockerfile: server.Dockerfile + dockerfile: server_dist.Dockerfile image: server:dev container_name: dataline-server environment: diff --git a/server.Dockerfile b/server.Dockerfile deleted file mode 100644 index 309a357387930..0000000000000 --- a/server.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -# Prepare gradle dependency cache -FROM gradle:jdk14 AS cache - -WORKDIR /code - -# for i in **/*.gradle; do echo COPY ./$i $(dirname $i)/; done -COPY ./.env ./ -COPY ./build.gradle ./ -COPY ./dataline-api/build.gradle dataline-api/ -COPY ./dataline-commons/build.gradle dataline-commons/ -COPY ./dataline-server/build.gradle dataline-server/ -COPY ./settings.gradle ./ - -RUN gradle --gradle-user-home=/tmp/gradle_cache clean dependencies --no-daemon - -# Build artifact -FROM gradle:jdk14 AS build - -WORKDIR /code - -COPY --from=cache /tmp/gradle_cache /home/gradle/.gradle -COPY . /code -RUN gradle clean distTar --no-daemon -RUN ls /code/dataline-server/build/distributions/ - -# Build final image -FROM openjdk:14.0.2-slim - -EXPOSE 8000 - -WORKDIR /app/dataline-server - -# TODO: add data mount instead -RUN mkdir data - -COPY --from=build /code/dataline-server/build/distributions/*.tar dataline-server.tar -RUN tar xf dataline-server.tar --strip-components=1 - -# add docker-compose-wait tool -ENV WAIT_VERSION 2.7.2 -ADD https://github.com/ufoscout/docker-compose-wait/releases/download/$WAIT_VERSION/wait wait -RUN chmod +x wait - -# wait for postgres to become available before starting server -CMD ./wait && bin/dataline-server diff --git a/server_base.Dockerfile b/server_base.Dockerfile new file mode 100644 index 0000000000000..5a0e02f536e86 --- /dev/null +++ b/server_base.Dockerfile @@ -0,0 +1,48 @@ +# Prepare gradle dependency cache +FROM openjdk:14.0.2-slim AS cache + +WORKDIR /code + +# for i in **/*.gradle; do echo COPY ./$i $(dirname $i)/; done +COPY ./build.gradle ./ +COPY ./dataline-api/build.gradle dataline-api/ +COPY ./dataline-commons/build.gradle dataline-commons/ +COPY ./dataline-config-persistence/build.gradle dataline-config-persistence/ +COPY ./dataline-config/build.gradle dataline-config/ +COPY ./dataline-db/build.gradle dataline-db/ +COPY ./dataline-server/build.gradle dataline-server/ +COPY ./dataline-workers/build.gradle dataline-workers/ +COPY ./settings.gradle ./ +COPY ./.env ./ +# Since we're not inheriting the gradle image, easiest way to run gradle is via the wrapper. +COPY ./gradlew ./ +COPY ./gradle ./gradle + +RUN ./gradlew --gradle-user-home=/tmp/gradle_cache clean dependencies --no-daemon + +# Build artifact +FROM openjdk:14.0.2-slim + +WORKDIR /code + +# Setup singer environment. Since this is an expensive operation, we run it as early as possible in the build stage. +COPY ./.env ./ +COPY ./.root ./ +COPY ./tools/singer ./tools/singer +COPY ./tools/lib ./tools/lib +RUN mkdir -p /usr/local/lib/singer +RUN ./tools/singer/setup_singer_env.buster.sh /usr/local/lib/singer + +# Install Node. While the UI is not going to be served from this container, running UI tests is part of the build. +RUN apt-get update \ + && apt-get install -y curl \ + && curl -sL https://deb.nodesource.com/setup_14.x | bash - \ + && apt-get install -y nodejs + +COPY --from=cache /tmp/gradle_cache /home/gradle/.gradle +COPY . /code + +# Create distributions, but don't run tests just yet +RUN ./gradlew clean distTar --no-daemon --console rich +ENTRYPOINT ["./gradlew", "build", "--no-daemon", "--console", "rich"] + diff --git a/server_dist.Dockerfile b/server_dist.Dockerfile new file mode 100644 index 0000000000000..97e4c088591d6 --- /dev/null +++ b/server_dist.Dockerfile @@ -0,0 +1,15 @@ +# Build final image +FROM dataline/server/base:latest + +EXPOSE 8000 + +WORKDIR /app/dataline-server + +# TODO: add data mount instead +RUN mkdir data + +RUN cp /code/dataline-server/build/distributions/*.tar dataline-server.tar \ + && rm -rf /code +RUN tar xf dataline-server.tar --strip-components=1 + +CMD bin/dataline-server diff --git a/tools/app/build.sh b/tools/app/build.sh new file mode 100755 index 0000000000000..6257ef7c515e3 --- /dev/null +++ b/tools/app/build.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env sh + +set -e + +. tools/lib/lib.sh + +BASE_IMAGE_DOCKERFILE=server_base.Dockerfile +BASE_IMAGE_NAME=dataline/server/base + +DIST_IMAGE_DOCKERFILE=server_dist.Dockerfile +DIST_IMAGE_NAME=dataline/server + +main() { + assert_root + docker build -f "$BASE_IMAGE_DOCKERFILE" . -t "$BASE_IMAGE_NAME" + # The base image may launch docker containers, so mount the docker socket as a volume to allow that + docker run -t --rm -v /var/run/docker.sock:/var/run/docker.sock "$BASE_IMAGE_NAME" + docker build -f "$DIST_IMAGE_DOCKERFILE" . -t "$DIST_IMAGE_NAME" +} + +main "$@" diff --git a/tools/singer/install_all_connectors.sh b/tools/singer/install_all_connectors.sh new file mode 100755 index 0000000000000..65f6c8ca9221e --- /dev/null +++ b/tools/singer/install_all_connectors.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -e + +. tools/lib/lib.sh + +assert_root + +USAGE="./tools/singer/$(basename "$0") " + +[ -z "$1" ] && echo "singer root not provided" && error "$USAGE" +export SINGER_ROOT=$1 + +pip3 install psycopg2-binary +./tools/singer/install_connector.sh tap-postgres tap-postgres 0.1.0 +./tools/singer/install_connector.sh target-postgres singer-target-postgres 0.2.4 diff --git a/tools/singer/install_connector.sh b/tools/singer/install_connector.sh new file mode 100755 index 0000000000000..69441198ba430 --- /dev/null +++ b/tools/singer/install_connector.sh @@ -0,0 +1,32 @@ +#!/bin/bash +set -e + +. tools/lib/lib.sh + +_python() { + python3.7 "$@" +} + +USAGE="./tools/singer/$(basename "$0") +" + +[ -z "$1" ] && echo "Venv not provided" && error "$USAGE" +[ -z "$2" ] && echo "pip package name not provided" && error "$USAGE" +[ -z "$3" ] && echo "pip package version not provided" && error "$USAGE" +[ -z "$4" ] && [[ -z $SINGER_ROOT ]] && echo "singer root not provided" && error "$USAGE" + +VENV_NAME=$1 && shift +PIP_PACKAGE_NAME=$1 && shift +PIP_PACKAGE_VERSION=$1 && shift +[[ -z "$SINGER_ROOT" ]] && SINGER_ROOT=$1 && shift + +echo "Creating Virutal Environment for $PIP_PACKAGE_NAME v$PIP_PACKAGE_VERSION in $SINGER_ROOT/" +cd "$SINGER_ROOT" +# Create virutal env directory +_python -m venv "$VENV_NAME" +. "$VENV_NAME/bin/activate" +_python -m pip install --upgrade pip +_python -m pip install "$PIP_PACKAGE_NAME==$PIP_PACKAGE_VERSION" +_python -m pip check && echo "No package conflicts" || exit 1 +deactivate +cd - diff --git a/tools/singer/setup_singer_env.buster.sh b/tools/singer/setup_singer_env.buster.sh new file mode 100755 index 0000000000000..ebfdf60bcaaf3 --- /dev/null +++ b/tools/singer/setup_singer_env.buster.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Sets up the Singer environment on a Debian (Buster) distro + +set -e + +. tools/lib/lib.sh + +assert_root +SINGER_ROOT=$1 +[ -z "$SINGER_ROOT" ] && error "singer_root is required" + +apt-get clean +apt-get update +apt-get -y install libpq-dev=11.7-0+deb10u1 \ + python3.7=3.7.3-2+deb10u2 \ + python3-venv=3.7.3-1 \ + python3-pip=18.1-5 + +./tools/singer/install_all_connectors.sh "$SINGER_ROOT"