@@ -156,7 +156,7 @@ objects:
156
156
export PATH=/usr/libexec/origin:$PATH
157
157
158
158
trap 'touch /tmp/shared/exit' EXIT
159
- trap 'kill $( jobs -p) ; exit 0' TERM
159
+ trap 'jobs -p | xargs -r kill || true ; exit 0' TERM
160
160
161
161
function fips_check() {
162
162
oc --insecure-skip-tls-verify --request-timeout=60s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodelist
@@ -425,6 +425,12 @@ objects:
425
425
workers=0
426
426
fi
427
427
if [[ "${CLUSTER_TYPE}" = "aws" ]]; then
428
+ master_type=null
429
+ if [[ "${CLUSTER_VARIANT}" =~ "xlarge" ]]; then
430
+ master_type=m5.8xlarge
431
+ elif [[ "${CLUSTER_VARIANT}" =~ "large" ]]; then
432
+ master_type=m5.4xlarge
433
+ fi
428
434
subnets="[]"
429
435
if [[ "${CLUSTER_VARIANT}" =~ "shared-vpc" ]]; then
430
436
case $((RANDOM % 4)) in
@@ -446,6 +452,7 @@ objects:
446
452
replicas: 3
447
453
platform:
448
454
aws:
455
+ type: ${master_type}
449
456
zones:
450
457
- us-east-1a
451
458
- us-east-1b
@@ -517,7 +524,7 @@ objects:
517
524
elif [[ "${CLUSTER_TYPE}" == "gcp" ]]; then
518
525
# HACK: try to "poke" the token endpoint before the test starts
519
526
for i in $(seq 1 30); do
520
- code="$( curl -s -o /dev/null -w "%{http_code}" https://oauth2.googleapis.com/token -X POST -d '' )"
527
+ code="$( curl -s -o /dev/null -w "%{http_code}" https://oauth2.googleapis.com/token -X POST -d '' || echo "Failed to POST https://oauth2.googleapis.com/token with $?" 1>&2 )"
521
528
if [[ "${code}" == "400" ]]; then
522
529
break
523
530
fi
@@ -706,6 +713,10 @@ objects:
706
713
value: /etc/openshift-installer/gce.json
707
714
- name: KUBECONFIG
708
715
value: /tmp/artifacts/installer/auth/kubeconfig
716
+ - name: USER
717
+ value: test
718
+ - name: HOME
719
+ value: /tmp
709
720
command:
710
721
- /bin/bash
711
722
- -c
@@ -773,6 +784,7 @@ objects:
773
784
fi
774
785
775
786
oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes
787
+ oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.spec.providerID}{"\n"}{end}' | sed 's|.*/||' > /tmp/node-provider-IDs
776
788
oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers
777
789
oc --insecure-skip-tls-verify --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api
778
790
@@ -813,6 +825,22 @@ objects:
813
825
queue /tmp/artifacts/nodes/$i/heap oc --insecure-skip-tls-verify get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap
814
826
done < /tmp/nodes
815
827
828
+ if [[ "${CLUSTER_TYPE}" = "aws" ]]; then
829
+ # FIXME: get epel-release or otherwise add awscli to our teardown image
830
+ export PATH="${HOME}/.local/bin:${PATH}"
831
+ easy_install --user pip # our Python 2.7.5 is even too old for ensurepip
832
+ pip install --user awscli
833
+ export AWS_DEFAULT_REGION="$(python -c 'import json; data = json.load(open("/tmp/artifacts/installer/metadata.json")); print(data["aws"]["region"])')"
834
+ echo "gathering node console output from ${AWS_DEFAULT_REGION}"
835
+ fi
836
+
837
+ while IFS= read -r i; do
838
+ mkdir -p "/tmp/artifacts/nodes/${i}"
839
+ if [[ "${CLUSTER_TYPE}" = "aws" ]]; then
840
+ queue /tmp/artifacts/nodes/$i/console aws ec2 get-console-output --instance-id "${i}"
841
+ fi
842
+ done < /tmp/node-provider-IDs
843
+
816
844
FILTER=gzip queue /tmp/artifacts/nodes/masters-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=master --unify=false
817
845
FILTER=gzip queue /tmp/artifacts/nodes/workers-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=worker --unify=false
818
846
@@ -836,6 +864,7 @@ objects:
836
864
837
865
echo "Snapshotting prometheus (may take 15s) ..."
838
866
queue /tmp/artifacts/metrics/prometheus.tar.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- tar cvzf - -C /prometheus .
867
+ FILTER=gzip queue /tmp/artifacts/metrics/prometheus-target-metadata.json.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- /bin/bash -c "curl -G http://localhost:9090/api/v1/targets/metadata --data-urlencode 'match_target={instance!=\"\"}'"
839
868
840
869
echo "Running must-gather..."
841
870
mkdir -p /tmp/artifacts/must-gather
@@ -849,7 +878,7 @@ objects:
849
878
}
850
879
851
880
trap 'teardown' EXIT
852
- trap 'kill $( jobs -p) ; exit 0' TERM
881
+ trap 'jobs -p | xargs -r kill || true ; exit 0' TERM
853
882
854
883
for i in $(seq 1 220); do
855
884
if [[ -f /tmp/shared/exit ]]; then
0 commit comments