Skip to content

Commit 92631f1

Browse files
committed
pkg/steps/clusterinstall/template: Sync with openshift/release
Bringing over a number of changes which have landed in ci-operator/templates/openshift/installer/cluster-launch-installer-e2e.yaml as of openshift/release@016eb4ed27 (Merge pull request openshift/release#6505 from hongkailiu/clusterReaders, 2019-12-19). One series was improved kill logic: * openshift/release@9cd158adf3 (template: Use a more correct kill command, 2019-12-03, openshift/release#6223). * openshift/release@d0744e520d (exit with 0 even if kill failed, 2019-12-09, openshift/release#6295) Another series was around AWS instance console logs: * openshift/release@e102a16d89 (ci-operator/templates/openshift/installer/cluster-launch-installer-e2e: Gather node console logs on AWS, 2019-12-02, openshift/release#6189). * openshift/release@26fde70045 (ci-operator/templates/openshift/installer/cluster-launch-installer-e2e: Set AWS_DEFAULT_REGION, 2019-12-04, openshift/release#6249). And there was also: * openshift/release@cdf97164aa (templates: Add large and xlarge variants, 2019-11-25, openshift/release#6081). * openshift/release@8cbef5e4a7 (ci-operator/templates/openshift/installer/cluster-launch-installer-e2e: Error-catching for Google OAuth pokes, 2019-12-02, openshift/release#6190). * openshift/release@ad29eda8dd (template: Gather the prometheus target metadata during teardown, 2019-12-12, openshift/release#6379).
1 parent fb6c435 commit 92631f1

File tree

1 file changed

+32
-3
lines changed

1 file changed

+32
-3
lines changed

pkg/steps/clusterinstall/template.go

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ objects:
156156
export PATH=/usr/libexec/origin:$PATH
157157
158158
trap 'touch /tmp/shared/exit' EXIT
159-
trap 'kill $(jobs -p); exit 0' TERM
159+
trap 'jobs -p | xargs -r kill || true; exit 0' TERM
160160
161161
function fips_check() {
162162
oc --insecure-skip-tls-verify --request-timeout=60s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodelist
@@ -425,6 +425,12 @@ objects:
425425
workers=0
426426
fi
427427
if [[ "${CLUSTER_TYPE}" = "aws" ]]; then
428+
master_type=null
429+
if [[ "${CLUSTER_VARIANT}" =~ "xlarge" ]]; then
430+
master_type=m5.8xlarge
431+
elif [[ "${CLUSTER_VARIANT}" =~ "large" ]]; then
432+
master_type=m5.4xlarge
433+
fi
428434
subnets="[]"
429435
if [[ "${CLUSTER_VARIANT}" =~ "shared-vpc" ]]; then
430436
case $((RANDOM % 4)) in
@@ -446,6 +452,7 @@ objects:
446452
replicas: 3
447453
platform:
448454
aws:
455+
type: ${master_type}
449456
zones:
450457
- us-east-1a
451458
- us-east-1b
@@ -517,7 +524,7 @@ objects:
517524
elif [[ "${CLUSTER_TYPE}" == "gcp" ]]; then
518525
# HACK: try to "poke" the token endpoint before the test starts
519526
for i in $(seq 1 30); do
520-
code="$( curl -s -o /dev/null -w "%{http_code}" https://oauth2.googleapis.com/token -X POST -d '' )"
527+
code="$( curl -s -o /dev/null -w "%{http_code}" https://oauth2.googleapis.com/token -X POST -d '' || echo "Failed to POST https://oauth2.googleapis.com/token with $?" 1>&2)"
521528
if [[ "${code}" == "400" ]]; then
522529
break
523530
fi
@@ -706,6 +713,10 @@ objects:
706713
value: /etc/openshift-installer/gce.json
707714
- name: KUBECONFIG
708715
value: /tmp/artifacts/installer/auth/kubeconfig
716+
- name: USER
717+
value: test
718+
- name: HOME
719+
value: /tmp
709720
command:
710721
- /bin/bash
711722
- -c
@@ -773,6 +784,7 @@ objects:
773784
fi
774785
775786
oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes
787+
oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.spec.providerID}{"\n"}{end}' | sed 's|.*/||' > /tmp/node-provider-IDs
776788
oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers
777789
oc --insecure-skip-tls-verify --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api
778790
@@ -813,6 +825,22 @@ objects:
813825
queue /tmp/artifacts/nodes/$i/heap oc --insecure-skip-tls-verify get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap
814826
done < /tmp/nodes
815827
828+
if [[ "${CLUSTER_TYPE}" = "aws" ]]; then
829+
# FIXME: get epel-release or otherwise add awscli to our teardown image
830+
export PATH="${HOME}/.local/bin:${PATH}"
831+
easy_install --user pip # our Python 2.7.5 is even too old for ensurepip
832+
pip install --user awscli
833+
export AWS_DEFAULT_REGION="$(python -c 'import json; data = json.load(open("/tmp/artifacts/installer/metadata.json")); print(data["aws"]["region"])')"
834+
echo "gathering node console output from ${AWS_DEFAULT_REGION}"
835+
fi
836+
837+
while IFS= read -r i; do
838+
mkdir -p "/tmp/artifacts/nodes/${i}"
839+
if [[ "${CLUSTER_TYPE}" = "aws" ]]; then
840+
queue /tmp/artifacts/nodes/$i/console aws ec2 get-console-output --instance-id "${i}"
841+
fi
842+
done < /tmp/node-provider-IDs
843+
816844
FILTER=gzip queue /tmp/artifacts/nodes/masters-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=master --unify=false
817845
FILTER=gzip queue /tmp/artifacts/nodes/workers-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=worker --unify=false
818846
@@ -836,6 +864,7 @@ objects:
836864
837865
echo "Snapshotting prometheus (may take 15s) ..."
838866
queue /tmp/artifacts/metrics/prometheus.tar.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- tar cvzf - -C /prometheus .
867+
FILTER=gzip queue /tmp/artifacts/metrics/prometheus-target-metadata.json.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- /bin/bash -c "curl -G http://localhost:9090/api/v1/targets/metadata --data-urlencode 'match_target={instance!=\"\"}'"
839868
840869
echo "Running must-gather..."
841870
mkdir -p /tmp/artifacts/must-gather
@@ -849,7 +878,7 @@ objects:
849878
}
850879
851880
trap 'teardown' EXIT
852-
trap 'kill $(jobs -p); exit 0' TERM
881+
trap 'jobs -p | xargs -r kill || true; exit 0' TERM
853882
854883
for i in $(seq 1 220); do
855884
if [[ -f /tmp/shared/exit ]]; then

0 commit comments

Comments
 (0)