Add a workflow manager to support running hpc apps in k8s cluster

linsword13 · linsword13 · commit 8f77f27f9b03 · 2025-04-03T11:59:25.000-07:00
It uses: * `mpi-operator` from `kubeflow` to manage the worker pods * `kustomize` to port locally generated assets into the pods Example workflow for running HPL: * Ramble workspace config: (the `container_image` points to an image containing HPL dependency, courtesy of @akiki-liang0) ```yaml ramble: variants: package_manager: None workflow_manager: gke-mpi applications: hpl: workloads: calculator: experiments: gketest: env_vars: set: OMP_NUM_THREADS: '{n_threads}' OMPI_ALLOW_RUN_AS_ROOT: 1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1 LD_LIBRARY_PATH: /usr/local/lib/openmpi:/openBLAS:/root/hpl:/root/hpl/bin:/root/hpl/bin/linux:/usr/local/lib OMP_PLACES: 'core' OMP_PROC_BIND: 'true' variables: mpi_command: mpirun -np {n_ranks} --npernode {processes_per_node} -v --allow-run-as-root --bind-to core -x PATH -x LD_LIBRARY_PATH --mca btl tcp,self processes_per_node: 192 n_nodes: 2 memory_per_node: 762 array_size: 6400000000 N-NBMINs: 1 NBMINs: 4 NPFACTs: 1 PFACTs: 1 N-RFACTs: 1 RFACTs: 1 extra_config_files: | '{experiment_run_dir}/HPL.dat' container_image: '<container_image_uri>' gke_run_dir: '/root/hpl/bin/linux' ``` * Run `ramble workspace setup` to set up the `kustomize` templates * Optionally, can run `ramble on --executor '{batch_print_deployment}' to inspect the generated deployment template * Run `ramble on` to submit the job to an existing gke cluster * During the run, can use `ramble on --executor '{batch_query}'` to look at the job info and get the launcher log * Run `ramble workspace analyze` upon completion, this fetches in the launcher log (the hpl isn't optimized, so ignore the perf result): ``` $ ramble workspace analyze -p From Workspace: gke-hpl (hash: 1f96b0b9cf8fe9b0c67e29740404175ae559bae9ec75757150e2a715eec2e0ae) Experiment hpl.calculator.gketest figures of merit: Status = SUCCESS Tags = ['benchmark', 'benchmark-app', 'linpack'] N-NB-P-Q = 405504-384-16-24 context figures of merit: Time = 2832.29 s GFlops = 1.5695e+04 GFLOP/s ``` * At the end, run `ramble on --executor '{batch_cancel}'` to delete the launcher pod. This is intentionally configured to stay running after the job completion, so that its logs can be fetched. That means for now it needs to be manually cleaned up afterwards.
diff --git a/lib/ramble/ramble/test/workflow_manager_functionality/gke_mpi_workflow_manager.py b/lib/ramble/ramble/test/workflow_manager_functionality/gke_mpi_workflow_manager.py
@@ -0,0 +1,95 @@
+# Copyright 2022-2025 The Ramble Authors
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+import os
+
+import pytest
+
+import ramble.workspace
+from ramble.main import RambleCommand
+
+workspace = RambleCommand("workspace")
+
+pytestmark = pytest.mark.usefixtures(
+    "mutable_config",
+    "mutable_mock_workspace_path",
+)
+
+
+def test_gke_mpi_workflow(request):
+    workspace_name = request.node.name
+    test_config = """
+ramble:
+  env_vars:
+    set:
+      OMP_NUM_THREADS: '{n_threads}'
+  variants:
+    workflow_manager: gke-mpi
+  variables:
+    mpi_command: mpirun -n {n_ranks}
+    processes_per_node: 1
+    n_nodes: 2
+    container_image: docker.pkg.dev/myproject/myimage
+    extra_metadata: |
+      a: 1
+      b: 2
+    extra_container_config_files: |
+      {experiment_run_dir}/app_config.txt
+  applications:
+    hostname:
+      workloads:
+        parallel:
+          experiments:
+            generated: {}
+"""
+    with ramble.workspace.create(workspace_name) as ws:
+        ws.write()
+        config_path = os.path.join(ws.config_dir, ramble.workspace.config_file_name)
+        with open(config_path, "w+") as f:
+            f.write(test_config)
+        ws._re_read()
+        workspace("setup", "--dry-run", global_args=["-D", ws.root])
+
+        run_path = os.path.join(ws.experiment_dir, "hostname", "parallel", "generated")
+        files = [f for f in os.listdir(run_path) if os.path.isfile(os.path.join(run_path, f))]
+        assert "batch_submit" in files
+        assert "batch_query" in files
+        assert "batch_cancel" in files
+        assert "gke_mpi.yaml" in files
+        assert "kustomization.yaml" in files
+        assert "launcher_execute_script" in files
+        assert "worker_execute_script" in files
+        assert "batch_print_deployment" in files
+        with open(os.path.join(run_path, "batch_submit")) as f:
+            content = f.read()
+            assert f"kubectl apply --kustomize {run_path}" in content
+        with open(os.path.join(run_path, "batch_query")) as f:
+            content = f.read()
+            assert "kubectl describe mpijobs hostname-parallel-generated" in content
+        with open(os.path.join(run_path, "batch_cancel")) as f:
+            content = f.read()
+            assert "kubectl delete mpijobs hostname-parallel-generated" in content
+        with open(os.path.join(run_path, "gke_mpi.yaml")) as f:
+            content = f.read()
+            assert "kind: MPIJob" in content
+            assert "name: hostname-parallel-generated" in content
+            assert "replicas: 2" in content
+            assert "image: docker.pkg.dev/myproject/myimage" in content
+        with open(os.path.join(run_path, "kustomization.yaml")) as f:
+            content = f.read()
+            assert "files:" in content
+            assert os.path.join(run_path, "app_config.txt") in content
+        with open(os.path.join(run_path, "launcher_execute_script")) as f:
+            content = f.read()
+            assert "hostname" in content
+        with open(os.path.join(run_path, "worker_execute_script")) as f:
+            content = f.read()
+            assert "sshd" in content
+        with open(os.path.join(run_path, "batch_print_deployment")) as f:
+            content = f.read()
+            assert "kubectl kustomize" in content
diff --git a/var/ramble/repos/builtin/workflow_managers/gke-mpi/batch_cancel.tpl b/var/ramble/repos/builtin/workflow_managers/gke-mpi/batch_cancel.tpl
@@ -0,0 +1,2 @@
+#!/bin/bash
+kubectl delete mpijobs {job_name}
diff --git a/var/ramble/repos/builtin/workflow_managers/gke-mpi/batch_print_deployment.tpl b/var/ramble/repos/builtin/workflow_managers/gke-mpi/batch_print_deployment.tpl
@@ -0,0 +1,2 @@
+#!/bin/bash
+kubectl kustomize {experiment_run_dir}
diff --git a/var/ramble/repos/builtin/workflow_managers/gke-mpi/batch_query.tpl b/var/ramble/repos/builtin/workflow_managers/gke-mpi/batch_query.tpl
@@ -0,0 +1,16 @@
+#!/bin/bash
+echo "========================"
+echo "Print out mpi job status"
+echo "========================"
+echo ""
+kubectl describe mpijobs {job_name}
+
+lname=$(kubectl get pods | grep '{job_name}-launcher' | awk '{print $1}')
+if [ ! -z "$lname" ]; then
+    echo " "
+    echo "=========================="
+    echo "Print out the launcher log"
+    echo "=========================="
+    echo " "
+    kubectl logs $lname | tee {experiment_run_dir}/launcher.log
+fi
diff --git a/var/ramble/repos/builtin/workflow_managers/gke-mpi/batch_submit.tpl b/var/ramble/repos/builtin/workflow_managers/gke-mpi/batch_submit.tpl
@@ -0,0 +1,2 @@
+#!/bin/bash
+kubectl apply --kustomize {experiment_run_dir}
diff --git a/var/ramble/repos/builtin/workflow_managers/gke-mpi/gke_mpi.yaml.tpl b/var/ramble/repos/builtin/workflow_managers/gke-mpi/gke_mpi.yaml.tpl
@@ -0,0 +1,48 @@
+apiVersion: kubeflow.org/v2beta1
+kind: MPIJob
+metadata:
+  name: {job_name}
+{extra_metadata_section}
+spec:
+  slotsPerWorker: {cores_per_node}
+  runPolicy:
+    cleanPodPolicy: Running
+  mpiReplicaSpecs:
+    Launcher:
+      replicas: 1
+      template:
+        spec:
+          hostPID: true
+          hostIPC: true
+          dnsPolicy: ClusterFirstWithHostNet
+          volumes:
+          - name: config
+            configMap:
+              name: gke-mpi-config
+          containers:
+          - image: {container_image}
+            name: mpi-launcher
+            volumeMounts:
+            - name: config
+              mountPath: /config
+            command: ["bash", "{launcher_script_path}"]
+            securityContext:
+              privileged: true
+    Worker:
+      replicas: {n_nodes}
+      template:
+        spec:
+          containers:
+          - image: {container_image}
+            name: mpi-worker
+            securityContext:
+              privileged: true
+            volumeMounts:
+            - name: config
+              mountPath: /config
+            command: ["bash", "{worker_script_path}"]
+          volumes:
+          - name: config
+            configMap:
+              name: gke-mpi-config
+            
diff --git a/var/ramble/repos/builtin/workflow_managers/gke-mpi/kustomization.yaml.tpl b/var/ramble/repos/builtin/workflow_managers/gke-mpi/kustomization.yaml.tpl
@@ -0,0 +1,3 @@
+resources:
+- {gke_mpi_yaml}
+{config_map_gen_section}
diff --git a/var/ramble/repos/builtin/workflow_managers/gke-mpi/launcher_execute_script.tpl b/var/ramble/repos/builtin/workflow_managers/gke-mpi/launcher_execute_script.tpl
@@ -0,0 +1,6 @@
+#!/bin/bash
+mkdir -p {container_work_dir} && cd {container_work_dir}
+# important to resolve symlink
+cp --remove-destination -r -L /config/* .
+
+{unformatted_command_without_logs}
diff --git a/var/ramble/repos/builtin/workflow_managers/gke-mpi/worker_execute_script.tpl b/var/ramble/repos/builtin/workflow_managers/gke-mpi/worker_execute_script.tpl
@@ -0,0 +1,5 @@
+#!/bin/bash
+mkdir -p {container_work_dir} && cd {container_work_dir}
+# important to resolve symlink
+cp --remove-destination -r -L /config/* .
+/usr/sbin/sshd -De -f /etc/ssh/sshd_config
diff --git a/var/ramble/repos/builtin/workflow_managers/gke-mpi/workflow_manager.py b/var/ramble/repos/builtin/workflow_managers/gke-mpi/workflow_manager.py
@@ -0,0 +1,186 @@
+# Copyright 2022-2025 The Ramble Authors
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+import os
+import shutil
+import textwrap
+
+from ramble.wmkit import *
+
+from spack.util.executable import Executable, ProcessError
+
+
+class GkeMpi(WorkflowManagerBase):
+    """GKE workflow manager that uses the MPI operator"""
+
+    name = "gke-mpi"
+
+    maintainers("linsword13")
+
+    tags("workflow", "gke", "mpi")
+
+    workflow_manager_variable(
+        name="job_name",
+        default="{application_name}-{workload_name}-{experiment_name}",
+        description="GKE job name",
+    )
+
+    workflow_manager_variable(
+        name="extra_metadata",
+        default="",
+        description="Extra line-separated key:val pairs for the metadata section",
+    )
+
+    workflow_manager_variable(
+        name="cores_per_node",
+        default="{processes_per_node}",
+        description="Cores per node",
+    )
+
+    workflow_manager_variable(
+        name="container_image",
+        default="",
+        description="url to the container image",
+    )
+
+    workflow_manager_variable(
+        name="extra_container_config_files",
+        default="",
+        description="extra line-separated list of config files to be mapped to containers",
+    )
+
+    workflow_manager_variable(
+        name="container_work_dir",
+        default="/config",
+        description="working directory inside the container",
+    )
+
+    workflow_manager_variable(
+        name="launcher_execute_script_template",
+        default="launcher_execute_script.tpl",
+        description="execute script template for the launcher",
+    )
+
+    register_template(
+        name="launcher_execute_script",
+        src_path="{launcher_execute_script_template}",
+    )
+
+    workflow_manager_variable(
+        name="worker_execute_script_template",
+        default="worker_execute_script.tpl",
+        description="execute script template for the workers",
+    )
+
+    register_template(
+        name="worker_execute_script",
+        src_path="{worker_execute_script_template}",
+    )
+
+    register_template(
+        name="gke_mpi_yaml",
+        src_path="gke_mpi.yaml.tpl",
+        dest_path="gke_mpi.yaml",
+        extra_vars_func="gke_mpi_yaml_vars",
+    )
+
+    def _gke_mpi_yaml_vars(self):
+        expander = self.app_inst.expander
+        extra_metadata_str = expander.expand_var_name("extra_metadata")
+        launcher_script = expander.expand_var_name("launcher_execute_script")
+        worker_script = expander.expand_var_name("worker_execute_script")
+        if extra_metadata_str:
+            extra_metadata_section = textwrap.indent(
+                extra_metadata_str, " " * 2
+            )
+        else:
+            extra_metadata_section = ""
+
+        return {
+            "extra_metadata_section": extra_metadata_section,
+            "launcher_script_path": os.path.join(
+                "/config", os.path.basename(launcher_script)
+            ),
+            "worker_script_path": os.path.join(
+                "/config", os.path.basename(worker_script)
+            ),
+        }
+
+    register_template(
+        name="kustomization.yaml",
+        src_path="kustomization.yaml.tpl",
+        dest_path="kustomization.yaml",
+        extra_vars_func="kustomization_yaml_vars",
+    )
+
+    def _kustomization_yaml_vars(self):
+        files = ["{launcher_execute_script}", "{worker_execute_script}"]
+        expander = self.app_inst.expander
+        extra_files_str = expander.expand_var_name(
+            "extra_container_config_files"
+        )
+        if extra_files_str:
+            files.extend(extra_files_str.split("\n"))
+        file_lines = "\n".join(
+            [expander.expand_var(f.lstrip("- ")) for f in files]
+        )
+        lines = [
+            "configMapGenerator:",
+            "- name: gke-mpi-config",
+            "  files:",
+            textwrap.indent(file_lines, "  - "),
+            "generatorOptions:",
+            # For some reason kustomization does not apply the generated name properly.
+            # So disable the suffix as a workaround.
+            "  disableNameSuffixHash: true",
+        ]
+        config_map_gen_section = "\n".join(lines)
+        return {
+            "config_map_gen_section": config_map_gen_section,
+        }
+
+    register_template(
+        name="batch_submit",
+        src_path="batch_submit.tpl",
+        dest_path="batch_submit",
+    )
+
+    register_template(
+        name="batch_query",
+        src_path="batch_query.tpl",
+        dest_path="batch_query",
+    )
+
+    register_template(
+        name="batch_cancel",
+        src_path="batch_cancel.tpl",
+        dest_path="batch_cancel",
+    )
+
+    # A convenience for printing the deployment config
+    register_template(
+        name="batch_print_deployment",
+        src_path="batch_print_deployment.tpl",
+        dest_path="batch_print_deployment",
+    )
+
+    def _prepare_analysis(self, workspace):
+        if workspace.dry_run:
+            return
+        expander = self.app_inst.expander
+        query_script = expander.expand_var_name("batch_query")
+        query_cmd = Executable(query_script)
+        try:
+            query_cmd(output=os.devnull)
+        except ProcessError as e:
+            logger.warn(f"batch_query returns error {e}")
+        run_dir = expander.expand_var_name("experiment_run_dir")
+        launcher_log = os.path.join(run_dir, "launcher.log")
+        if os.path.exists(launcher_log):
+            log_file = expander.expand_var_name("log_file")
+            shutil.copy2(launcher_log, log_file)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+#!/bin/bash`
	`2`	`+kubectl delete mpijobs {job_name}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+#!/bin/bash`
	`2`	`+kubectl kustomize {experiment_run_dir}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+#!/bin/bash`
	`2`	`+kubectl apply --kustomize {experiment_run_dir}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+resources:`
	`2`	`+- {gke_mpi_yaml}`
	`3`	`+{config_map_gen_section}`