Skip to content

Commit cd2e86a

Browse files
lmossmanbenmoriceau
authored andcommitted
✨ Migrate OSS to temporal scheduler (#12757)
* Migrate OSS to temporal scheduler * add comment about migration being performed in server * add comments about removing migration logic * formatting and add tests for migration logic * rm duplicated test * remove more duplicated build task * remove retry * disable acceptance tests that call temporal directly when on kube * set NEW_SCHEDULER and CONTAINER_ORCHESTRATOR_ENABLED env vars to true to be consistent * set default value of container orchestrator enabled to true * Revert "set default value of container orchestrator enabled to true" This reverts commit 21b3670. * Revert "set NEW_SCHEDULER and CONTAINER_ORCHESTRATOR_ENABLED env vars to true to be consistent" This reverts commit 6dd2ec0. * Revert "Revert "set NEW_SCHEDULER and CONTAINER_ORCHESTRATOR_ENABLED env vars to true to be consistent"" This reverts commit 2f40f9d. * Revert "Revert "set default value of container orchestrator enabled to true"" This reverts commit 26068d5. * fix sync workflow test * remove defunct cancellation tests due to internal temporal error * format - remove unused imports * revert changes that set container orchestrator enabled to true everywhere * remove NEW_SCHEDULER feature flag from .env files, and set CONTAINER_ORCHESTRATOR_ENABLED flag to true for kube .env files Co-authored-by: Benoit Moriceau <[email protected]>
1 parent af5cf8a commit cd2e86a

File tree

14 files changed

+153
-291
lines changed

14 files changed

+153
-291
lines changed

.env

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ MAX_DISCOVER_WORKERS=5
9090

9191

9292
### FEATURE FLAGS ###
93-
NEW_SCHEDULER=false
9493
AUTO_DISABLE_FAILING_CONNECTIONS=false
9594
EXPOSE_SECRETS_IN_EXPORT=false
9695
FORCE_MIGRATE_SECRET_STORE=false

.github/workflows/gradle.yml

Lines changed: 0 additions & 281 deletions
Original file line numberDiff line numberDiff line change
@@ -518,283 +518,6 @@ jobs:
518518
label: ${{ needs.start-platform-build-runner.outputs.label }}
519519
ec2-instance-id: ${{ needs.start-platform-build-runner.outputs.ec2-instance-id }}
520520

521-
# Scheduler V2
522-
# In case of self-hosted EC2 errors, remove this block.
523-
start-platform-new-scheduler-acceptance-runner:
524-
name: "Platform: Start Docker w/ Scheduler v2 Test Runner"
525-
needs:
526-
- changes
527-
- find_valid_pat
528-
# Because scheduled builds on master require us to skip the changes job. Use always() to force this to run on master.
529-
if: needs.changes.outputs.backend == 'true' || needs.changes.outputs.build == 'true' || (always() && github.ref == 'refs/heads/master')
530-
timeout-minutes: 10
531-
runs-on: ubuntu-latest
532-
outputs:
533-
label: ${{ steps.start-ec2-runner.outputs.label }}
534-
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
535-
steps:
536-
- name: Checkout Airbyte
537-
uses: actions/checkout@v2
538-
- name: Start AWS Runner
539-
id: start-ec2-runner
540-
uses: ./.github/actions/start-aws-runner
541-
with:
542-
github-token: ${{ needs.find_valid_pat.outputs.pat }}
543-
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
544-
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
545-
platform-new-scheduler-acceptance:
546-
# In case of self-hosted EC2 errors, remove the next two lines and uncomment the currently commented out `runs-on` line.
547-
needs: start-platform-new-scheduler-acceptance-runner # required to start the main job when the runner is ready
548-
runs-on: ${{ needs.start-platform-new-scheduler-acceptance-runner.outputs.label }} # run the job on the newly created runner
549-
name: "Platform: Docker w/ Scheduler v2 Acceptance Tests"
550-
timeout-minutes: 90
551-
steps:
552-
- name: Checkout Airbyte
553-
uses: actions/checkout@v2
554-
555-
- name: Npm Caching
556-
uses: actions/cache@v2
557-
with:
558-
path: |
559-
~/.npm
560-
key: ${{ secrets.CACHE_VERSION }}-npm-${{ runner.os }}-${{ hashFiles('**/package-lock.json') }}
561-
restore-keys: |
562-
${{ secrets.CACHE_VERSION }}-npm-${{ runner.os }}-
563-
564-
# this intentionally does not use restore-keys so we don't mess with gradle caching
565-
- name: Gradle Caching
566-
uses: actions/cache@v2
567-
with:
568-
path: |
569-
~/.gradle/caches
570-
~/.gradle/wrapper
571-
**/.venv
572-
key: ${{ secrets.CACHE_VERSION }}-${{ runner.os }}-${{ hashFiles('**/*.gradle*') }}-${{ hashFiles('**/package-lock.json') }}
573-
574-
- uses: actions/setup-java@v1
575-
with:
576-
java-version: "17"
577-
578-
- uses: actions/setup-node@v1
579-
with:
580-
node-version: "16.13.0"
581-
582-
- name: Set up CI Gradle Properties
583-
run: |
584-
mkdir -p ~/.gradle/
585-
cat > ~/.gradle/gradle.properties <<EOF
586-
org.gradle.jvmargs=-Xmx8g -Xss4m --add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
587-
--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
588-
--add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
589-
--add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
590-
--add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
591-
org.gradle.workers.max=8
592-
org.gradle.vfs.watch=false
593-
EOF
594-
595-
- name: Build
596-
run: |
597-
SUB_BUILD=PLATFORM ./gradlew build javadoc --scan
598-
599-
- name: Run End-to-End Acceptance Tests with the new scheduler
600-
run: ./tools/bin/acceptance_test_with_new_scheduler.sh
601-
602-
# In case of self-hosted EC2 errors, remove this block.
603-
stop-platform-new-scheduler-acceptance-runner:
604-
name: "Platform: Stop Docker w/ Scheduler v2 Test Runner"
605-
timeout-minutes: 10
606-
needs:
607-
- start-platform-new-scheduler-acceptance-runner # required to get output from the start-runner job
608-
- platform-new-scheduler-acceptance # required to wait when the main job is done
609-
- find_valid_pat
610-
runs-on: ubuntu-latest
611-
# Always is required to stop the runner even if the previous job has errors. However always() runs even if the previous step is skipped.
612-
# Thus, we check for skipped here.
613-
if: ${{ always() && needs.start-platform-new-scheduler-acceptance-runner.result != 'skipped'}}
614-
steps:
615-
- name: Configure AWS credentials
616-
uses: aws-actions/configure-aws-credentials@v1
617-
with:
618-
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
619-
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
620-
aws-region: us-east-2
621-
- name: Stop EC2 runner
622-
uses: supertopher/[email protected]
623-
with:
624-
mode: stop
625-
label: ${{ needs.start-platform-new-scheduler-acceptance-runner.outputs.label }}
626-
github-token: ${{ needs.find_valid_pat.outputs.pat }}
627-
ec2-instance-id: ${{ needs.start-platform-new-scheduler-acceptance-runner.outputs.ec2-instance-id }}
628-
629-
## Kube Acceptance Tests
630-
# Docker acceptance tests run as part of the build job.
631-
# In case of self-hosted EC2 errors, remove this block.
632-
start-kube-acceptance-test-runner:
633-
name: "Platform: Start Kube Acceptance Test EC2 Runner"
634-
needs:
635-
- changes
636-
- find_valid_pat
637-
# Because scheduled builds on master require us to skip the changes job. Use always() to force this to run on master.
638-
if: needs.changes.outputs.backend == 'true' || needs.changes.outputs.build == 'true' || (always() && github.ref == 'refs/heads/master')
639-
timeout-minutes: 10
640-
runs-on: ubuntu-latest
641-
outputs:
642-
label: ${{ steps.start-ec2-runner.outputs.label }}
643-
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
644-
steps:
645-
- name: Checkout Airbyte
646-
uses: actions/checkout@v2
647-
- name: Start AWS Runner
648-
id: start-ec2-runner
649-
uses: ./.github/actions/start-aws-runner
650-
with:
651-
# github-self-hosted-runner-ubuntu-20-with-150gdisk-docker-20.10.7-and-socat
652-
ec2-image-id: ami-0c1a9bc22624339d8
653-
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
654-
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
655-
github-token: ${{ needs.find_valid_pat.outputs.pat }}
656-
kube-acceptance-test:
657-
name: "Platform: Acceptance Tests (Kube)"
658-
# In case of self-hosted EC2 errors, removed the `needs` line and switch back to running on ubuntu-latest.
659-
needs: start-kube-acceptance-test-runner # required to start the main job when the runner is ready
660-
runs-on: ${{ needs.start-kube-acceptance-test-runner.outputs.label }} # run the job on the newly created runner
661-
environment: more-secrets
662-
timeout-minutes: 90
663-
steps:
664-
- name: Checkout Airbyte
665-
uses: actions/checkout@v2
666-
667-
- name: Cache Build Artifacts
668-
uses: ./.github/actions/cache-build-artifacts
669-
with:
670-
cache-key: ${{ secrets.CACHE_VERSION }}
671-
cache-python: "false"
672-
673-
- uses: actions/setup-java@v1
674-
with:
675-
java-version: "17"
676-
677-
- uses: actions/setup-node@v1
678-
with:
679-
node-version: "16.13.0"
680-
681-
- name: Fix EC-2 Runner
682-
run: |
683-
mkdir -p /home/runner
684-
685-
- name: Set up CI Gradle Properties
686-
run: |
687-
mkdir -p ~/.gradle/
688-
cat > ~/.gradle/gradle.properties <<EOF
689-
org.gradle.jvmargs=-Xmx8g -Xss4m --add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
690-
--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
691-
--add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
692-
--add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
693-
--add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
694-
org.gradle.workers.max=8
695-
org.gradle.vfs.watch=false
696-
EOF
697-
698-
- name: Create cluster config file
699-
run: |
700-
cat > /tmp/kind-config.yaml <<EOF
701-
kind: Cluster
702-
apiVersion: kind.x-k8s.io/v1alpha4
703-
nodes:
704-
- role: control-plane
705-
- role: worker
706-
EOF
707-
708-
- name: Check Docker System Info
709-
run: docker system info
710-
711-
- name: KIND Kubernetes Cluster Setup
712-
uses: helm/[email protected]
713-
with:
714-
node_image: kindest/node:v1.21.2
715-
config: /tmp/kind-config.yaml
716-
# In case of self-hosted EC2 errors, remove this env block.
717-
env:
718-
USER: root
719-
HOME: /home/runner
720-
CHANGE_MINIKUBE_NONE_USER: true
721-
722-
- name: Describe kube nodes
723-
run: kubectl describe nodes
724-
env:
725-
USER: root
726-
HOME: /home/runner
727-
728-
- name: Build Platform Docker Images
729-
run: SUB_BUILD=PLATFORM ./gradlew assemble -x test --scan
730-
731-
- name: Run Logging Tests
732-
run: ./tools/bin/cloud_storage_logging_test.sh
733-
env:
734-
# AWS_S3_INTEGRATION_TEST_CREDS can be found in LastPass as AWS_S3_INTEGRATION_TEST_CREDS
735-
AWS_S3_INTEGRATION_TEST_CREDS: ${{ secrets.AWS_S3_INTEGRATION_TEST_CREDS }}
736-
# GOOGLE_CLOUD_STORAGE_TEST_CREDS can be found in LastPass as "google cloud storage ( gcs ) test creds"
737-
GOOGLE_CLOUD_STORAGE_TEST_CREDS: ${{ secrets.GOOGLE_CLOUD_STORAGE_TEST_CREDS }}
738-
739-
- name: Run Kubernetes End-to-End Acceptance Tests
740-
env:
741-
USER: root
742-
HOME: /home/runner
743-
# AWS_S3_INTEGRATION_TEST_CREDS can be found in LastPass as AWS_S3_INTEGRATION_TEST_CREDS
744-
AWS_S3_INTEGRATION_TEST_CREDS: ${{ secrets.AWS_S3_INTEGRATION_TEST_CREDS }}
745-
SECRET_STORE_GCP_CREDENTIALS: ${{ secrets.SECRET_STORE_GCP_CREDENTIALS }}
746-
SECRET_STORE_GCP_PROJECT_ID: ${{ secrets.SECRET_STORE_GCP_PROJECT_ID }}
747-
run: |
748-
CI=true IS_MINIKUBE=true ./tools/bin/acceptance_test_kube.sh
749-
750-
- uses: actions/upload-artifact@v2
751-
if: failure()
752-
with:
753-
name: Kubernetes Logs
754-
path: /tmp/kubernetes_logs/*
755-
756-
- name: Show Disk Usage
757-
run: |
758-
df -h
759-
docker system df
760-
761-
- name: Run AWS/GCP Cloud Integration Tests
762-
env:
763-
USER: root
764-
HOME: /home/runner
765-
# AWS_S3_INTEGRATION_TEST_CREDS can be found in LastPass as AWS_S3_INTEGRATION_TEST_CREDS
766-
AWS_S3_INTEGRATION_TEST_CREDS: ${{ secrets.AWS_S3_INTEGRATION_TEST_CREDS }}
767-
SECRET_STORE_GCP_CREDENTIALS: ${{ secrets.SECRET_STORE_GCP_CREDENTIALS }}
768-
SECRET_STORE_GCP_PROJECT_ID: ${{ secrets.SECRET_STORE_GCP_PROJECT_ID }}
769-
run: |
770-
CI=true ./tools/bin/cloud_integration_tests.sh
771-
# In case of self-hosted EC2 errors, remove this block.
772-
stop-kube-acceptance-test-runner:
773-
name: "Platform: Stop Kube Acceptance Test EC2 Runner"
774-
timeout-minutes: 10
775-
needs:
776-
- start-kube-acceptance-test-runner # required to get output from the start-runner job
777-
- kube-acceptance-test # required to wait when the main job is done
778-
- find_valid_pat
779-
runs-on: ubuntu-latest
780-
# Always is required to stop the runner even if the previous job has errors. However always() runs even if the previous step is skipped.
781-
# Thus, we check for skipped here.
782-
if: ${{ always() && needs.start-kube-acceptance-test-runner.result != 'skipped'}}
783-
steps:
784-
- name: Configure AWS credentials
785-
uses: aws-actions/configure-aws-credentials@v1
786-
with:
787-
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
788-
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
789-
aws-region: us-east-2
790-
- name: Stop EC2 runner
791-
uses: supertopher/[email protected]
792-
with:
793-
mode: stop
794-
github-token: ${{ needs.find_valid_pat.outputs.pat }}
795-
label: ${{ needs.start-kube-acceptance-test-runner.outputs.label }}
796-
ec2-instance-id: ${{ needs.start-kube-acceptance-test-runner.outputs.ec2-instance-id }}
797-
798521
## Kube Acceptance Tests (with scheduler v2 - both temporal changes and container orchestrator)
799522
# Docker acceptance tests run as part of the build job.
800523
# In case of self-hosted EC2 errors, remove this block.
@@ -939,8 +662,6 @@ jobs:
939662
- frontend-build
940663
- octavia-cli-build
941664
- platform-build
942-
- platform-new-scheduler-acceptance
943-
- kube-acceptance-test
944665
- kube-acceptance-test-v2
945666
if: ${{ failure() && github.ref == 'refs/heads/master' }}
946667
steps:
@@ -965,8 +686,6 @@ jobs:
965686
- frontend-build
966687
- octavia-cli-build
967688
- platform-build
968-
- platform-new-scheduler-acceptance
969-
- kube-acceptance-test
970689
- kube-acceptance-test-v2
971690
if: success()
972691
steps:

airbyte-commons/src/main/java/io/airbyte/commons/features/EnvVariableFeatureFlags.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@ public class EnvVariableFeatureFlags implements FeatureFlags {
1111

1212
@Override
1313
public boolean usesNewScheduler() {
14-
log.info("New Scheduler: " + Boolean.parseBoolean(System.getenv("NEW_SCHEDULER")));
14+
// TODO: sweep this method along with the scheduler
15+
log.info("New Scheduler: true (post-migration)");
1516

16-
return Boolean.parseBoolean(System.getenv("NEW_SCHEDULER"));
17+
// After migrating all OSS users onto the new temporal scheduler, this should always return true.
18+
return true;
1719
}
1820

1921
@Override

airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,32 @@ public void setSecretMigrationDone() throws IOException {
574574
true)));
575575
}
576576

577+
private final String SCHEDULER_MIGRATION_STATUS = "schedulerMigration";
578+
579+
@Override
580+
public boolean isSchedulerMigrated() throws IOException {
581+
final Result<Record> result = jobDatabase.query(ctx -> ctx.select()
582+
.from(AIRBYTE_METADATA_TABLE)
583+
.where(DSL.field(METADATA_KEY_COL).eq(SCHEDULER_MIGRATION_STATUS))
584+
.fetch());
585+
586+
return result.stream().count() == 1;
587+
}
588+
589+
@Override
590+
public void setSchedulerMigrationDone() throws IOException {
591+
jobDatabase.query(ctx -> ctx.execute(String.format(
592+
"INSERT INTO %s(%s, %s) VALUES('%s', '%s') ON CONFLICT (%s) DO UPDATE SET %s = '%s'",
593+
AIRBYTE_METADATA_TABLE,
594+
METADATA_KEY_COL,
595+
METADATA_VAL_COL,
596+
SCHEDULER_MIGRATION_STATUS,
597+
true,
598+
METADATA_KEY_COL,
599+
METADATA_VAL_COL,
600+
true)));
601+
}
602+
577603
@Override
578604
public Optional<String> getVersion() throws IOException {
579605
final Result<Record> result = jobDatabase.query(ctx -> ctx.select()

airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,4 +248,20 @@ List<JobWithStatusAndTimestamp> listJobStatusAndTimestampWithConnection(UUID con
248248
*/
249249
void setSecretMigrationDone() throws IOException;
250250

251+
/**
252+
* Check if the scheduler has been migrated to temporal.
253+
*
254+
* TODO (https://github.com/airbytehq/airbyte/issues/12823): remove this method after the next
255+
* "major" version bump as it will no longer be needed.
256+
*/
257+
boolean isSchedulerMigrated() throws IOException;
258+
259+
/**
260+
* Set that the scheduler migration has been performed.
261+
*
262+
* TODO (https://github.com/airbytehq/airbyte/issues/12823): remove this method after the next
263+
* "major" version bump as it will no longer be needed.
264+
*/
265+
void setSchedulerMigrationDone() throws IOException;
266+
251267
}

0 commit comments

Comments
 (0)