Skip to content

Commit 504d3c4

Browse files
committed
modify script to build container image
1 parent 2392b88 commit 504d3c4

File tree

13 files changed

+359
-48
lines changed

13 files changed

+359
-48
lines changed

Makefile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
HAS_LINT := $(shell command -v golangci-lint;)
22
COMMIT := v1beta1-$(shell git rev-parse --short=7 HEAD)
33
KATIB_REGISTRY := docker.io/kubeflowkatib
4+
CPU_ARCH ?= amd64
45

56
# Run tests
67
.PHONY: test
@@ -49,10 +50,10 @@ endif
4950

5051
# Build images for the Katib v1beta1 components.
5152
build: generate
52-
ifeq ($(and $(REGISTRY),$(TAG)),)
53-
$(error REGISTRY and TAG must be set. Usage: make build REGISTRY=<registry> TAG=<tag>)
53+
ifeq ($(and $(REGISTRY),$(TAG),$(CPU_ARCH)),)
54+
$(error REGISTRY and TAG must be set. Usage: make build REGISTRY=<registry> TAG=<tag> CPU_ARCH=<cpu-architecture>)
5455
endif
55-
bash scripts/v1beta1/build.sh $(REGISTRY) $(TAG)
56+
bash scripts/v1beta1/build.sh $(REGISTRY) $(TAG) $(CPU_ARCH)
5657

5758
# Build and push Katib images from the latest master commit.
5859
push-latest: generate

cmd/suggestion/chocolate/v1beta1/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ RUN if [ "$(uname -m)" = "ppc64le" ]; then \
2323
ADD ./pkg/ ${TARGET_DIR}/pkg/
2424
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
2525
WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}
26+
RUN if [ "$(uname -m)" = "aarch64" ]; then \
27+
sed -i -e '$a git+https://github.com/fmder/ghalton@master' -e '/^ghalton/d' requirements.txt; \
28+
fi;
2629
RUN pip install --no-cache-dir -r requirements.txt
2730

2831
RUN chgrp -R 0 ${TARGET_DIR} \

docs/developer-guide.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ see the following user guides:
1313
## Requirements
1414

1515
- [Go](https://golang.org/) (1.17 or later)
16-
- [Docker](https://docs.docker.com/) (17.05 or later)
16+
- [Docker](https://docs.docker.com/) (20.10 or later)
1717
- [Java](https://docs.oracle.com/javase/8/docs/technotes/guides/install/install_overview.html) (8 or later)
1818
- [Python](https://www.python.org/) (3.9 or later)
1919
- [kustomize](https://kustomize.io/) (4.0.5 or later)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
apiVersion: kubeflow.org/v1beta1
2+
kind: Experiment
3+
metadata:
4+
namespace: kubeflow
5+
name: tfevent-metrics-collector
6+
spec:
7+
parallelTrialCount: 3
8+
maxTrialCount: 12
9+
maxFailedTrialCount: 3
10+
objective:
11+
type: maximize
12+
goal: 0.99
13+
objectiveMetricName: accuracy_1
14+
algorithm:
15+
algorithmName: random
16+
metricsCollectorSpec:
17+
source:
18+
fileSystemPath:
19+
path: /train
20+
kind: Directory
21+
collector:
22+
kind: TensorFlowEvent
23+
parameters:
24+
- name: learning_rate
25+
parameterType: double
26+
feasibleSpace:
27+
min: "0.01"
28+
max: "0.05"
29+
trialTemplate:
30+
primaryContainerName: training-container
31+
trialParameters:
32+
- name: learningRate
33+
description: Learning rate for the training model
34+
reference: learning_rate
35+
trialSpec:
36+
apiVersion: batch/v1
37+
kind: Job
38+
spec:
39+
template:
40+
spec:
41+
containers:
42+
- name: training-container
43+
image: docker.io/kubeflowkatib/tensorflow-mnist:latest
44+
command:
45+
- "python3"
46+
- "/opt/tensorflow-mnist/mnist_with_summaries.py"
47+
- "--log_dir=/train/metrics"
48+
- "--learning_rate=${trialParameters.learningRate}"
49+
restartPolicy: Never

examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
FROM tensorflow/tensorflow:1.15.4-py3
1+
FROM tensorflow/tensorflow:2.7.0
22

33
ENV TARGET_DIR /opt/enas-cnn-cifar10
44

55
ADD examples/v1beta1/trial-images/enas-cnn-cifar10 ${TARGET_DIR}
66
WORKDIR ${TARGET_DIR}
77

8-
RUN pip3 install --upgrade pip
9-
RUN pip3 install --upgrade -r requirements.txt
108
ENV PYTHONPATH ${TARGET_DIR}
119

1210
RUN chgrp -R 0 ${TARGET_DIR} \

examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
FROM tensorflow/tensorflow:1.15.4-gpu-py3
1+
FROM tensorflow/tensorflow:2.7.0-gpu
22

33
ENV TARGET_DIR /opt/enas-cnn-cifar10
44

55
ADD examples/v1beta1/trial-images/enas-cnn-cifar10 ${TARGET_DIR}
66
WORKDIR ${TARGET_DIR}
77

8-
RUN pip3 install --upgrade pip
9-
RUN pip3 install --upgrade -r requirements.txt
108
ENV PYTHONPATH ${TARGET_DIR}
119

1210
RUN chgrp -R 0 ${TARGET_DIR} \

examples/v1beta1/trial-images/enas-cnn-cifar10/RunTrial.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import numpy as np
33
from keras.datasets import cifar10
44
from ModelConstructor import ModelConstructor
5-
from keras.utils import to_categorical
6-
from keras.utils import multi_gpu_model
5+
from tensorflow.keras.utils import to_categorical
6+
from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model
77
from keras.preprocessing.image import ImageDataGenerator
88
import argparse
99
import time

examples/v1beta1/trial-images/enas-cnn-cifar10/requirements.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
FROM tensorflow/tensorflow:2.7.0
2+
3+
ADD examples/v1beta1/trial-images/tensorflow-mnist /opt/tensorflow-mnist
4+
WORKDIR /opt/tensorflow-mnist
5+
6+
# Add folder for the logs.
7+
RUN mkdir /katib
8+
9+
RUN chgrp -R 0 /opt/tensorflow-mnist \
10+
&& chmod -R g+rwX /opt/tensorflow-mnist \
11+
&& chgrp -R 0 /katib \
12+
&& chmod -R g+rwX /katib
13+
14+
ENTRYPOINT ["python3", "/opt/tensorflow-mnist/mnist_with_summaries.py"]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Tensorflow MNIST Classification With Summaries Example
2+
3+
This is Tensorflow MNIST image classification training container that outputs TF summaries.
4+
It uses convolutional neural network to train the model.
5+
6+
If you want to read more about this example, visit the official
7+
[tensorflow](https://github.com/tensorflow/tensorflow/blob/7462dcaae1e8cfe1dfd0c62dd6083f9749a9d827/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py)
8+
GitHub repository.
9+
10+
Katib uses this training container in some Experiments, for instance in the
11+
[TF Event Metrics Collector](../../metrics-collector/tfevent-metrics-collector.yaml#L55-L64).

0 commit comments

Comments
 (0)