chanzuckerberg
diff --git a/‎.github/workflows/build-images-and-create-deployment.yml
Lines changed: 1 addition & 7 deletions b/‎.github/workflows/build-images-and-create-deployment.yml
Lines changed: 1 addition & 7 deletions
diff --git a/‎.github/workflows/deploy-happy-stack.yml
Lines changed: 0 additions & 5 deletions b/‎.github/workflows/deploy-happy-stack.yml
Lines changed: 0 additions & 5 deletions
diff --git a/‎.github/workflows/push-tests.yml
Lines changed: 0 additions & 5 deletions b/‎.github/workflows/push-tests.yml
Lines changed: 0 additions & 5 deletions
diff --git a/‎.github/workflows/test-receiving-repository-dispatch.yml
Lines changed: 0 additions & 21 deletions b/‎.github/workflows/test-receiving-repository-dispatch.yml
Lines changed: 0 additions & 21 deletions
diff --git a/‎.github/workflows/test-sending-repository-dispatch.yml
Lines changed: 0 additions & 19 deletions b/‎.github/workflows/test-sending-repository-dispatch.yml
Lines changed: 0 additions & 19 deletions
diff --git a/‎.github/workflows/test-workflow-run-after-push-tests-pass.yml
Lines changed: 0 additions & 25 deletions b/‎.github/workflows/test-workflow-run-after-push-tests-pass.yml
Lines changed: 0 additions & 25 deletions
diff --git a/‎.github/workflows/trigger-release-candidate-build-and-deploy.yml
Lines changed: 20 additions & 3 deletions b/‎.github/workflows/trigger-release-candidate-build-and-deploy.yml
Lines changed: 20 additions & 3 deletions
diff --git a/‎.happy/terraform/envs/dev/main.tf
Lines changed: 2 additions & 2 deletions b/‎.happy/terraform/envs/dev/main.tf
Lines changed: 2 additions & 2 deletions
diff --git a/‎.happy/terraform/envs/prod/main.tf
Lines changed: 2 additions & 2 deletions b/‎.happy/terraform/envs/prod/main.tf
Lines changed: 2 additions & 2 deletions
diff --git a/‎.happy/terraform/envs/stage/main.tf
Lines changed: 2 additions & 2 deletions b/‎.happy/terraform/envs/stage/main.tf
Lines changed: 2 additions & 2 deletions
diff --git a/‎.happy/terraform/modules/schema_migration/main.tf
Lines changed: 53 additions & 0 deletions b/‎.happy/terraform/modules/schema_migration/main.tf
Lines changed: 53 additions & 0 deletions
diff --git a/‎.happy/terraform/modules/sfn/main.tf
Lines changed: 5 additions & 1 deletion b/‎.happy/terraform/modules/sfn/main.tf
Lines changed: 5 additions & 1 deletion
diff --git a/‎Dockerfile.wmg_pipeline
Lines changed: 3 additions & 1 deletion b/‎Dockerfile.wmg_pipeline
Lines changed: 3 additions & 1 deletion
diff --git a/‎backend/cellguide/pipeline/computational_marker_genes/__init__.py
Lines changed: 33 additions & 8 deletions b/‎backend/cellguide/pipeline/computational_marker_genes/__init__.py
Lines changed: 33 additions & 8 deletions
@@ -4,10 +4,9 @@ on:
   push:
     branches:
       - main
-      - staging
       - prod
   repository_dispatch:
-    types: [build-images]
+    types: [build-images-for-staging]
 env:
   # Force using BuildKit instead of normal Docker, required so that metadata
   # is written/read to allow us to use layers of previous builds as cache.
@@ -16,11 +15,6 @@ env:
   DOCKER_REPO: ${{ secrets.ECR_REPO }}/
   GITHUB_BRANCH: ${{ github.event.client_payload.ref || github.ref }}
 
-# add a concurrency group to prevent multiple builds from running at the same time
-concurrency:
-  group: ${{ github.event.client_payload.ref || github.ref }}-environment
-  cancel-in-progress: false
-
 permissions:
   id-token: write
   contents: read
 
@@ -2,11 +2,6 @@ name: Deploy Happy
 
 on: deployment
 
-# add a concurrency group to prevent multiple builds from running at the same time
-concurrency:
-  group: ${{ github.ref }}-environment
-  cancel-in-progress: false
-
 env:
   DOCKER_BUILDKIT: 1
   COMPOSE_DOCKER_CLI_BUILD: 1
 
@@ -8,11 +8,6 @@ on:
     branches:
       - "main"
 
-# add a concurrency group to prevent multiple builds from running at the same time
-concurrency:
-  group: ${{ github.ref }}-environment
-  cancel-in-progress: false
-
 env:
   CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
   DEPLOYMENT_STAGE: test
 
@@ -1,13 +1,20 @@
 name: Trigger release candidate build and deploy
 
 on:
-  push:
+  workflow_run:
+    workflows:
+      - "Push Tests"
+    types:
+      - completed
     branches:
       - main
 
 jobs:
-  continuous_deploy_to_staging:
+  deploy_to_stage_env_on_test_pass_on_main:
     runs-on: ubuntu-latest
+    # deployment to staging is only triggered if dependent workflow ("Push Tests")
+    # pass successfully
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
     steps:
       - name: Checkout main branch
         uses: actions/checkout@v2
@@ -44,5 +51,15 @@ jobs:
         uses: peter-evans/repository-dispatch@v2
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
-          event-type: build-images
+          event-type: build-images-for-staging
           client-payload: '{"ref": "refs/heads/staging"}'
+
+      - name: Send slack notification if main not merged into staging
+        if: failure()
+        uses: 8398a7/action-slack@v3
+        with:
+          status: ${{ job.status }}
+          fields: repo,commit,author,eventName,workflow,job,mention
+          mention: "here"
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK }}
@@ -16,8 +16,8 @@ module stack {
   batch_container_memory_limit = 28000
   wmg_batch_container_memory_limit = 248000
   wmg_desired_vcpus                = 128
-  cg_desired_vcpus                 = 48
-  cg_batch_container_memory_limit  = 92000
+  cg_desired_vcpus                 = 128
+  cg_batch_container_memory_limit  = 248000
   backend_memory               = 8192
   frontend_memory              = 4096
   backend_instance_count       = 4
 
@@ -16,8 +16,8 @@ module stack {
   batch_container_memory_limit = 63500
   wmg_batch_container_memory_limit = 248000
   wmg_desired_vcpus                = 128
-  cg_desired_vcpus                 = 48
-  cg_batch_container_memory_limit  = 92000
+  cg_desired_vcpus                 = 128
+  cg_batch_container_memory_limit  = 248000
   backend_memory               = 30 * 1024
   frontend_memory              = 4096
   backend_instance_count       = 6
 
@@ -16,8 +16,8 @@ module stack {
   batch_container_memory_limit = 63500
   wmg_batch_container_memory_limit = 248000
   wmg_desired_vcpus                = 128
-  cg_batch_container_memory_limit  = 92000
-  cg_desired_vcpus                 = 48
+  cg_batch_container_memory_limit  = 248000
+  cg_desired_vcpus                 = 128
   backend_memory               = 8192
   frontend_memory              = 4096
   backend_instance_count       = 4
 
@@ -113,6 +113,59 @@ resource aws_batch_job_definition schema_migrations {
   })
 }
 
+resource aws_batch_job_definition pubish_revisions {
+  type = "container"
+  name = "dp-${var.deployment_stage}-${var.custom_stack_name}-${local.name}-publish-revisions"
+  container_properties = jsonencode({
+    command = ["python3",
+      "-m",
+      "backend.layers.processing.publish_revisions",
+    ],
+    jobRoleArn= var.batch_role_arn,
+    image= var.image,
+    environment= [
+      {
+        name= "ARTIFACT_BUCKET",
+        value= var.artifact_bucket
+      },
+      {
+        name= "DEPLOYMENT_STAGE",
+        value= var.deployment_stage
+      },
+      {
+        name= "AWS_DEFAULT_REGION",
+        value= data.aws_region.current.name
+      },
+      {
+        name= "REMOTE_DEV_PREFIX",
+        value= var.remote_dev_prefix
+      },
+      {
+        name= "DATASETS_BUCKET",
+        value= var.datasets_bucket
+      },
+    ],
+    resourceRequirements = [
+      {
+        type= "VCPU",
+        Value="2"
+      },
+      {
+        Type="MEMORY",
+        Value = "4096"
+      }
+    ]
+    logConfiguration= {
+      logDriver= "awslogs",
+      options= {
+        awslogs-group= aws_cloudwatch_log_group.batch_cloud_watch_logs_group.id,
+        awslogs-region= data.aws_region.current.name
+      }
+    }
+  })
+}
+
+
 resource aws_sfn_state_machine sfn_schema_migration {
   name     = "dp-${var.deployment_stage}-${var.custom_stack_name}-${local.name}-sfn"
   role_arn = var.sfn_role_arn
 
@@ -58,7 +58,11 @@ resource "aws_sfn_state_machine" "state_machine" {
               },
               {
                 "Name": "DATASET_ID",
-                 "Value.$": "$.dataset_id"
+                "Value.$": "$.dataset_id"
+              },
+              {
+                "Name": "COLLECTION_ID",
+                "Value.$": "$.collection_id"
               },
               {
                 "Name": "STEP_NAME",
 
@@ -19,6 +19,8 @@ ADD backend/wmg/__init__.py backend/wmg/__init__.py
 ADD backend/wmg/config.py backend/wmg/config.py
 ADD backend/wmg/data backend/wmg/data
 ADD backend/wmg/pipeline backend/wmg/pipeline
+ADD backend/wmg/api backend/wmg/api
+ADD backend/cellguide/pipeline backend/cellguide/pipeline
 ADD backend/layers backend/layers
 ADD backend/common backend/common
 
@@ -29,4 +31,4 @@ LABEL commit=${HAPPY_COMMIT}
 ENV COMMIT_SHA=${HAPPY_COMMIT}
 ENV COMMIT_BRANCH=${HAPPY_BRANCH}
 
-CMD ["python3", "-m", "backend.wmg.pipeline.cube_pipeline"]
+CMD ["python3", "-m", "backend.wmg.pipeline"]
@@ -1,7 +1,9 @@
 import logging
 
-from backend.cellguide.pipeline.computational_marker_genes.computational_markers import MarkerGenesCalculator
-from backend.cellguide.pipeline.computational_marker_genes.constants import MARKER_SCORE_THRESHOLD
+from backend.cellguide.pipeline.computational_marker_genes.computational_markers import (
+    MARKER_SCORE_THRESHOLD,
+    MarkerGenesCalculator,
+)
 from backend.cellguide.pipeline.constants import COMPUTATIONAL_MARKER_GENES_FOLDERNAME, MARKER_GENE_PRESENCE_FILENAME
 from backend.cellguide.pipeline.ontology_tree import get_ontology_tree_builder
 from backend.cellguide.pipeline.ontology_tree.tree_builder import OntologyTreeBuilder
@@ -61,6 +63,29 @@ def get_computational_marker_genes(*, snapshot: WmgSnapshot, ontology_tree: Onto
         else:
             marker_genes[key] = marker_genes_per_tissue[key]
 
+    # convert all groupby_dims IDs to labels as required by CellGuide
+    organism_id_to_name = {k: v for d in snapshot.primary_filter_dimensions["organism_terms"] for k, v in d.items()}
+    tissue_id_to_name = {
+        k: v
+        for organism in snapshot.primary_filter_dimensions["tissue_terms"]
+        for i in snapshot.primary_filter_dimensions["tissue_terms"][organism]
+        for k, v in i.items()
+    }
+    for _, marker_gene_stats_list in marker_genes.items():
+        for marker_gene_stats in marker_gene_stats_list:
+            groupby_dims = marker_gene_stats.groupby_dims
+            groupby_terms = list(groupby_dims.keys())
+            groupby_term_labels = [term.rsplit("_", 1)[0] + "_label" for term in groupby_terms]
+            groupby_dims_new = dict(zip(groupby_term_labels, (groupby_dims[term] for term in groupby_terms)))
+
+            for key in groupby_dims_new:
+                if key == "tissue_ontology_term_label":
+                    groupby_dims_new[key] = tissue_id_to_name.get(groupby_dims_new[key], groupby_dims_new[key])
+                elif key == "organism_ontology_term_label":
+                    groupby_dims_new[key] = organism_id_to_name.get(groupby_dims_new[key], groupby_dims_new[key])
+
+            marker_gene_stats.groupby_dims = groupby_dims_new
+
     reformatted_marker_genes = {}
     for cell_type_id, marker_gene_stats_list in marker_genes.items():
         for marker_gene_stats in marker_gene_stats_list:
@@ -87,11 +112,11 @@ def get_computational_marker_genes(*, snapshot: WmgSnapshot, ontology_tree: Onto
             )
             reformatted_marker_genes[symbol][organism][tissue].append(data)
 
-    # assert that cell types do not appear multiple times in each gene, tissue, organism
-    for symbol in reformatted_marker_genes:
-        for organism in reformatted_marker_genes[symbol]:
-            for tissue in reformatted_marker_genes[symbol][organism]:
-                cell_type_ids = [i["cell_type_id"] for i in reformatted_marker_genes[symbol][organism][tissue]]
-                assert len(cell_type_ids) == len(list(set(cell_type_ids)))
+    # # assert that cell types do not appear multiple times in each gene, tissue, organism
+    # for symbol in reformatted_marker_genes:
+    #     for organism in reformatted_marker_genes[symbol]:
+    #         for tissue in reformatted_marker_genes[symbol][organism]:
+    #             cell_type_ids = [i["cell_type_id"] for i in reformatted_marker_genes[symbol][organism][tissue]]
+    #             assert len(cell_type_ids) == len(list(set(cell_type_ids)))
 
     return marker_genes, reformatted_marker_genes
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,11 @@ resource "aws_sfn_state_machine" "state_machine" {`
`58`	`58`	`},`
`59`	`59`	`{`
`60`	`60`	`"Name": "DATASET_ID",`
`61`		`- "Value.$": "$.dataset_id"`
	`61`	`+ "Value.$": "$.dataset_id"`
	`62`	`+ },`
	`63`	`+ {`
	`64`	`+ "Name": "COLLECTION_ID",`
	`65`	`+ "Value.$": "$.collection_id"`
`62`	`66`	`},`
`63`	`67`	`{`
`64`	`68`	`"Name": "STEP_NAME",`