Skip to content

Commit a925b29

Browse files
Update to Kind 1.27 and fix the integration tests (#2637) (#2639)
* update to Kind 1.27 * update katib test * update katib test * increase test pipeline timeout from 60 to 120 seconds --------- Signed-off-by: juliusvonkohout <[email protected]>
1 parent 0fe0065 commit a925b29

File tree

5 files changed

+43
-92
lines changed

5 files changed

+43
-92
lines changed

.github/workflows/pipeline_kind_test.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,4 @@ jobs:
4343
kubectl apply -f tests/e2e/yamls
4444
python3 ./tests/gh-actions/kf-objects/test_pipeline.py
4545
./tests/gh-actions/install_argo_cli.sh
46-
argo wait @latest -n kubeflow-user-example-com --request-timeout 60
46+
argo wait @latest -n kubeflow-user-example-com --request-timeout 120
+36-58
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,56 @@
11
---
2+
---
23
apiVersion: kubeflow.org/v1beta1
34
kind: Experiment
45
metadata:
5-
name: grid-example
66
namespace: kubeflow-user
7+
name: grid
78
spec:
9+
objective:
10+
type: minimize
11+
goal: 0.1
12+
objectiveMetricName: loss
13+
algorithm:
14+
algorithmName: grid
15+
parallelTrialCount: 2
16+
maxTrialCount: 2
17+
maxFailedTrialCount: 2
818
parameters:
919
- name: lr
1020
parameterType: double
1121
feasibleSpace:
12-
max: '0.01'
13-
min: '0.001'
14-
step: '0.001'
15-
- name: num-layers
16-
parameterType: int
17-
feasibleSpace:
18-
max: '3'
19-
min: '2'
20-
- name: optimizer
21-
parameterType: categorical
22+
min: "0.01"
23+
step: "0.005"
24+
max: "0.05"
25+
- name: momentum
26+
parameterType: double
2227
feasibleSpace:
23-
list:
24-
- adam
25-
objective:
26-
type: maximize
27-
goal: 0.80
28-
objectiveMetricName: Validation-accuracy
29-
additionalMetricNames:
30-
- Train-accuracy
31-
metricStrategies:
32-
- name: Validation-accuracy
33-
value: max
34-
- name: Train-accuracy
35-
value: max
36-
algorithm:
37-
algorithmName: grid
28+
min: "0.5"
29+
step: "0.1"
30+
max: "0.9"
3831
trialTemplate:
32+
primaryContainerName: training-container
33+
trialParameters:
34+
- name: learningRate
35+
description: Learning rate for the training model
36+
reference: lr
37+
- name: momentum
38+
description: Momentum for the training model
39+
reference: momentum
3940
trialSpec:
4041
apiVersion: batch/v1
4142
kind: Job
4243
spec:
4344
template:
44-
metadata:
45-
labels:
46-
sidecar.istio.io/inject: 'false'
4745
spec:
4846
containers:
49-
- command:
50-
- python3
51-
- /opt/mxnet-mnist/mnist.py
52-
- '--batch-size=64'
53-
- '--lr=${trialParameters.learningRate}'
54-
- '--num-layers=${trialParameters.numberLayers}'
55-
- '--optimizer=${trialParameters.optimizer}'
56-
image: docker.io/kubeflowkatib/mxnet-mnist:latest
57-
name: training-container
47+
- name: training-container
48+
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
49+
command:
50+
- "python3"
51+
- "/opt/pytorch-mnist/mnist.py"
52+
- "--epochs=1"
53+
- "--batch-size=16"
54+
- "--lr=${trialParameters.learningRate}"
55+
- "--momentum=${trialParameters.momentum}"
5856
restartPolicy: Never
59-
trialParameters:
60-
- name: learningRate
61-
description: Learning rate for the training model
62-
reference: lr
63-
- name: numberLayers
64-
description: Number of training model layers
65-
reference: num-layers
66-
- name: optimizer
67-
description: Training model optimizer (sdg, adam or ftrl)
68-
reference: optimizer
69-
primaryContainerName: training-container
70-
successCondition: status.conditions.#(type=="Complete")#|#(status=="True")#
71-
failureCondition: status.conditions.#(type=="Failed")#|#(status=="True")#
72-
parallelTrialCount: 1
73-
maxTrialCount: 1
74-
maxFailedTrialCount: 1
75-
metricsCollectorSpec:
76-
collector:
77-
kind: StdOut
78-
resumePolicy: LongRunning

tests/gh-actions/kind-cluster-1-24.yaml

-27
This file was deleted.

tests/gh-actions/kind-cluster-1-25.yaml renamed to tests/gh-actions/kind-cluster-1-26.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ kubeadmConfigPatches:
1919
"service-account-signing-key-file": "/etc/kubernetes/pki/sa.key"
2020
nodes:
2121
- role: control-plane
22-
image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1
22+
image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
2323
- role: worker
24-
image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1
24+
image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
2525
- role: worker
26-
image: kindest/node:v1.25.3@sha256:f52781bc0d7a19fb6c405c2af83abfeb311f130707a0e219175677e366cc45d1
26+
image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5

tests/gh-actions/kind-cluster.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ kubeadmConfigPatches:
1919
"service-account-signing-key-file": "/etc/kubernetes/pki/sa.key"
2020
nodes:
2121
- role: control-plane
22-
image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
22+
image: kindest/node:v1.27.11@sha256:ec04b9f650954c033c978db9c25a9071b449179b0e509df258350c2f3034fb57
2323
- role: worker
24-
image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
24+
image: kindest/node:v1.27.11@sha256:ec04b9f650954c033c978db9c25a9071b449179b0e509df258350c2f3034fb57
2525
- role: worker
26-
image: kindest/node:v1.26.6@sha256:5e5d789e90c1512c8c480844e0985bc3b4da4ba66179cc5b540fe5b785ca97b5
26+
image: kindest/node:v1.27.11@sha256:ec04b9f650954c033c978db9c25a9071b449179b0e509df258350c2f3034fb57

0 commit comments

Comments
 (0)