Skip to content

Commit 2c8758b

Browse files
authored
Allow running examples on Apple Silicon M1 and fix image build errors for arm64 (#1898)
1 parent 170647d commit 2c8758b

File tree

9 files changed

+46
-15
lines changed

9 files changed

+46
-15
lines changed

cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ ADD ./pkg/ ${TARGET_DIR}/pkg/
77
ADD ./${METRICS_COLLECTOR_DIR}/ ${TARGET_DIR}/${METRICS_COLLECTOR_DIR}/
88
WORKDIR ${TARGET_DIR}/${METRICS_COLLECTOR_DIR}
99

10+
RUN if [ "$(uname -m)" = "aarch64" ]; then \
11+
apt-get -y update && \
12+
apt-get -y install gfortran libpcre3 libpcre3-dev && \
13+
apt-get clean && \
14+
rm -rf /var/lib/apt/lists/*; \
15+
fi
16+
1017
RUN pip install --no-cache-dir -r requirements.txt
1118

1219
RUN chgrp -R 0 ${TARGET_DIR} \

cmd/suggestion/chocolate/v1beta1/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ ENV SUGGESTION_DIR cmd/suggestion/chocolate/v1beta1
1616
RUN apt-get -y update && \
1717
apt-get -y install git && \
1818
if [ "$(uname -m)" = "ppc64le" ] || [ "$(uname -m)" = "aarch64" ]; then \
19-
apt-get -y install gfortran libopenblas-dev liblapack-dev; \
19+
apt-get -y install gfortran libopenblas-dev liblapack-dev g++; \
2020
fi && \
2121
apt-get clean && \
2222
rm -rf /var/lib/apt/lists/*

examples/v1beta1/kind-cluster/deploy.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ if [ -z "$(command -v kubectl)" ]; then
3636
exit 1
3737
fi
3838

39-
# Step 1. Create Kind cluster with Kubernetes v1.22.9
40-
kind create cluster --image kindest/node:v1.22.9
39+
# Step 1. Create Kind cluster with Kubernetes v1.23.6
40+
kind create cluster --image kindest/node:v1.23.6
4141
echo -e "\nKind cluster has been created\n"
4242

4343
# Step 2. Set context for kubectl
@@ -53,6 +53,12 @@ kubectl get nodes
5353
echo -e "\nDeploying Katib components\n"
5454
kubectl apply -k "github.com/kubeflow/katib.git/manifests/v1beta1/installs/katib-standalone?ref=master"
5555

56+
# If the local machine's CPU architecture is arm64, rewrite mysql image.
57+
if [ "$(uname -m)" = "arm64" ]; then
58+
kubectl patch deployments -n kubeflow katib-mysql --type json -p \
59+
'[{"op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "arm64v8/mysql:8.0.29-oracle"}]'
60+
fi
61+
5662
# Wait until all Katib pods are running.
5763
kubectl wait --for=condition=ready --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in (controller,db-manager,mysql,ui)" -n kubeflow pod
5864

examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,14 @@ WORKDIR ${TARGET_DIR}
77

88
ENV PYTHONPATH ${TARGET_DIR}
99

10+
RUN if [ "$(uname -m)" = "aarch64" ]; then \
11+
apt-get -y update && \
12+
apt-get -y install gfortran libpcre3 libpcre3-dev && \
13+
apt-get clean && \
14+
rm -rf /var/lib/apt/lists/*; \
15+
fi
16+
1017
RUN pip install --no-cache-dir -r requirements.txt
11-
RUN pip install --no-cache-dir tensorflow==2.9.1
1218
RUN chgrp -R 0 ${TARGET_DIR} \
1319
&& chmod -R g+rwX ${TARGET_DIR}
1420

examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ WORKDIR ${TARGET_DIR}
77

88
ENV PYTHONPATH ${TARGET_DIR}
99

10-
RUN pip install --no-cache-dir -r requirements.txt
10+
RUN pip install --no-cache-dir scipy==1.8.1
1111
RUN chgrp -R 0 ${TARGET_DIR} \
1212
&& chmod -R g+rwX ${TARGET_DIR}
1313

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
scipy>=1.7.2
2+
tensorflow==2.9.1; platform_machine=="x86_64"
3+
tensorflow-aarch64==2.9.1; platform_machine=="aarch64"

examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@ FROM python:3.9-slim
33
ADD examples/v1beta1/trial-images/tf-mnist-with-summaries /opt/tf-mnist-with-summaries
44
WORKDIR /opt/tf-mnist-with-summaries
55

6-
RUN pip install --no-cache-dir tensorflow==2.9.1
6+
RUN if [ "$(uname -m)" = "aarch64" ]; then \
7+
apt-get -y update && \
8+
apt-get -y install gfortran libpcre3 libpcre3-dev && \
9+
apt-get clean && \
10+
rm -rf /var/lib/apt/lists/*; \
11+
fi
12+
13+
RUN pip install --no-cache-dir -r requirements.txt
714
RUN chgrp -R 0 /opt/tf-mnist-with-summaries \
815
&& chmod -R g+rwX /opt/tf-mnist-with-summaries
916

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
tensorflow==2.9.1; platform_machine=="x86_64"
2+
tensorflow-aarch64==2.9.1; platform_machine=="aarch64"

scripts/v1beta1/build.sh

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,32 +112,33 @@ echo -e "\nBuilding median stopping rule...\n"
112112
docker build --platform "linux/$ARCH" -t "${REGISTRY}/earlystopping-medianstop:${TAG}" -f ${CMD_PREFIX}/earlystopping/medianstop/${VERSION}/Dockerfile .
113113

114114
# Training container images
115+
echo -e "\nBuilding training container images..."
116+
115117
if [ ! "$ARCH" = "amd64" ]; then
116-
echo -e "\nTraining container images are supported only amd64."
118+
echo -e "\nSome training container images are supported only amd64."
117119
else
118120

119-
echo -e "\nBuilding training container images..."
120-
121121
echo -e "\nBuilding mxnet mnist training container example...\n"
122122
docker build --platform linux/amd64 -t "${REGISTRY}/mxnet-mnist:${TAG}" -f examples/${VERSION}/trial-images/mxnet-mnist/Dockerfile .
123123

124-
echo -e "\nBuilding Tensorflow with summaries mnist training container example...\n"
125-
docker build --platform linux/amd64 -t "${REGISTRY}/tf-mnist-with-summaries:${TAG}" -f examples/${VERSION}/trial-images/tf-mnist-with-summaries/Dockerfile .
126-
127124
echo -e "\nBuilding PyTorch mnist training container example...\n"
128125
docker build --platform linux/amd64 -t "${REGISTRY}/pytorch-mnist:${TAG}" -f examples/${VERSION}/trial-images/pytorch-mnist/Dockerfile .
129126

130127
echo -e "\nBuilding Keras CIFAR-10 CNN training container example for ENAS with GPU support...\n"
131128
docker build --platform linux/amd64 -t "${REGISTRY}/enas-cnn-cifar10-gpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.gpu .
132129

133-
echo -e "\nBuilding Keras CIFAR-10 CNN training container example for ENAS with CPU support...\n"
134-
docker build --platform linux/amd64 -t "${REGISTRY}/enas-cnn-cifar10-cpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.cpu .
135-
136130
echo -e "\nBuilding PyTorch CIFAR-10 CNN training container example for DARTS with CPU support...\n"
137131
docker build --platform linux/amd64 -t "${REGISTRY}/darts-cnn-cifar10-cpu:${TAG}" -f examples/${VERSION}/trial-images/darts-cnn-cifar10/Dockerfile.cpu .
138132

139133
echo -e "\nBuilding PyTorch CIFAR-10 CNN training container example for DARTS with GPU support...\n"
140134
docker build --platform linux/amd64 -t "${REGISTRY}/darts-cnn-cifar10-gpu:${TAG}" -f examples/${VERSION}/trial-images/darts-cnn-cifar10/Dockerfile.gpu .
135+
141136
fi
142137

138+
echo -e "\nBuilding Tensorflow with summaries mnist training container example...\n"
139+
docker build --platform "linux/$ARCH" -t "${REGISTRY}/tf-mnist-with-summaries:${TAG}" -f examples/${VERSION}/trial-images/tf-mnist-with-summaries/Dockerfile .
140+
141+
echo -e "\nBuilding Keras CIFAR-10 CNN training container example for ENAS with CPU support...\n"
142+
docker build --platform "linux/$ARCH" -t "${REGISTRY}/enas-cnn-cifar10-cpu:${TAG}" -f examples/${VERSION}/trial-images/enas-cnn-cifar10/Dockerfile.cpu .
143+
143144
echo -e "\nAll Katib images with ${TAG} tag have been built successfully!\n"

0 commit comments

Comments
 (0)