Skip to content

Commit 6fa3915

Browse files
committed
fix example for enas
1 parent 67d3e50 commit 6fa3915

File tree

6 files changed

+14
-15
lines changed

6 files changed

+14
-15
lines changed

cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile.ppc64le

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
FROM ibmcom/tensorflow-ppc64le:2.2.0-py3
2-
RUN pip install rfc3339 grpcio googleapis-common-protos
32
ADD . /usr/src/app/github.com/kubeflow/katib
43
WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/metricscollector/v1beta1/tfevent-metricscollector/
54
RUN pip install --no-cache-dir -r requirements.txt

examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ ENV TARGET_DIR /opt/enas-cnn-cifar10
55
ADD examples/v1beta1/trial-images/enas-cnn-cifar10 ${TARGET_DIR}
66
WORKDIR ${TARGET_DIR}
77

8+
RUN pip3 install --no-cache-dir -r requirements.txt
89
ENV PYTHONPATH ${TARGET_DIR}
910

1011
RUN chgrp -R 0 ${TARGET_DIR} \

examples/v1beta1/trial-images/enas-cnn-cifar10/RunTrial.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
import keras
2-
import numpy as np
1+
from tensorflow import keras
32
from keras.datasets import cifar10
43
from ModelConstructor import ModelConstructor
54
from tensorflow.keras.utils import to_categorical
65
from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model
76
from keras.preprocessing.image import ImageDataGenerator
87
import argparse
9-
import time
108

119
if __name__ == "__main__":
1210
parser = argparse.ArgumentParser(description='TrainingContainer')
@@ -46,7 +44,7 @@
4644

4745
test_model.summary()
4846
test_model.compile(loss=keras.losses.categorical_crossentropy,
49-
optimizer=keras.optimizers.Adam(lr=1e-3, decay=1e-4),
47+
optimizer=keras.optimizers.Adam(learning_rate=1e-3, decay=1e-4),
5048
metrics=['accuracy'])
5149

5250
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
@@ -67,12 +65,12 @@
6765

6866
print(">>> Data Loaded. Training starts.")
6967
for e in range(num_epochs):
70-
print("\nTotal Epoch {}/{}".format(e+1, num_epochs))
71-
history = test_model.fit_generator(generator=aug_data_flow,
72-
steps_per_epoch=int(len(x_train)/128)+1,
73-
epochs=1, verbose=1,
74-
validation_data=(x_test, y_test))
75-
print("Training-Accuracy={}".format(history.history['acc'][-1]))
68+
print("\nTotal Epoch {}/{}".format(e + 1, num_epochs))
69+
history = test_model.fit(aug_data_flow,
70+
steps_per_epoch=int(len(x_train) / 128) + 1,
71+
epochs=1, verbose=1,
72+
validation_data=(x_test, y_test))
73+
print("Training-Accuracy={}".format(history.history['accuracy'][-1]))
7674
print("Training-Loss={}".format(history.history['loss'][-1]))
77-
print("Validation-Accuracy={}".format(history.history['val_acc'][-1]))
75+
print("Validation-Accuracy={}".format(history.history['val_accuracy'][-1]))
7876
print("Validation-Loss={}".format(history.history['val_loss'][-1]))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
scipy>=1.7.2

examples/v1beta1/trial-images/tf-mnist-with-summaries/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ If you want to read more about this example, visit the official
88
GitHub repository.
99

1010
Katib uses this training container in some Experiments, for instance in the
11-
[TF Event Metrics Collector](../../metrics-collector/tfevent-metrics-collector.yaml#L55-L64).
11+
[TF Event Metrics Collector](../../metrics-collector/tfevent-metrics-collector.yaml#L42-L49).

test/e2e/v1beta1/argo_workflow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
"earlystopping-medianstop": "cmd/earlystopping/medianstop/v1beta1/Dockerfile",
6060
"trial-mxnet-mnist": "examples/v1beta1/trial-images/mxnet-mnist/Dockerfile",
6161
"trial-pytorch-mnist": "examples/v1beta1/trial-images/pytorch-mnist/Dockerfile",
62-
# "trial-tf-mnist-with-summaries": "examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile",
62+
"trial-tf-mnist-with-summaries": "examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile",
6363
"trial-enas-cnn-cifar10-gpu": "examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu",
6464
"trial-enas-cnn-cifar10-cpu": "examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu",
6565
"trial-darts-cnn-cifar10": "examples/v1beta1/trial-images/darts-cnn-cifar10/Dockerfile",
@@ -80,7 +80,7 @@
8080
"pytorchjob": "examples/v1beta1/kubeflow-training-operator/pytorchjob-mnist.yaml",
8181
"tfjob": "examples/v1beta1/kubeflow-training-operator/tfjob-mnist-with-summaries.yaml",
8282
"file-metricscollector": "examples/v1beta1/metrics-collector/file-metrics-collector.yaml",
83-
# "tfevent-metricscollector": "examples/v1beta1/metrics-collector/tfevent-metrics-collector.yaml",
83+
"tfevent-metricscollector": "examples/v1beta1/metrics-collector/tfevent-metrics-collector.yaml",
8484
"never-resume": "examples/v1beta1/resume-experiment/never-resume.yaml",
8585
"from-volume-resume": "examples/v1beta1/resume-experiment/from-volume-resume.yaml",
8686
"median-stop": "examples/v1beta1/early-stopping/median-stop.yaml"

0 commit comments

Comments
 (0)