Skip to content

Commit 7e56379

Browse files
committed
templates/openshift: grab bootstrap log on failure
This uses the Terraform state to discover the IP address of the bootstrap node (ideally, the installer will provide this information in a form which easier to consume in the future). It then connects to the gatewayd instance on that machine and pulls the logs for various services. Hopefully, these logs will be useful when diagnosing installation failures.
1 parent bfb2475 commit 7e56379

File tree

2 files changed

+58
-6
lines changed

2 files changed

+58
-6
lines changed

ci-operator/templates/openshift/installer/cluster-launch-installer-e2e.yaml

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,7 @@ objects:
373373
exit 1
374374
fi
375375
376-
/bin/openshift-install --dir=/tmp/artifacts/installer create cluster &
377-
wait "$!"
376+
/bin/openshift-install --dir=/tmp/artifacts/installer create cluster
378377
379378
# Performs cleanup of all created resources
380379
- name: teardown
@@ -421,7 +420,34 @@ objects:
421420
export PATH=$PATH:/tmp/shared
422421
423422
echo "Gathering artifacts ..."
424-
mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics
423+
mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics /tmp/artifacts/bootstrap
424+
425+
if [ -f /tmp/artifacts/installer/terraform.tfstate ]
426+
then
427+
# we don't have jq, so the python equivalent of
428+
# jq '.modules[].resources."aws_instance.bootstrap".primary.attributes."public_ip" | select(.)'
429+
bootstrap_ip=$(python -c \
430+
'import sys, json; d=reduce(lambda x,y: dict(x.items() + y.items()), map(lambda x: x["resources"], json.load(sys.stdin)["modules"])); k="aws_instance.bootstrap"; print d[k]["primary"]["attributes"]["public_ip"] if k in d else ""' \
431+
< /tmp/artifacts/installer/terraform.tfstate
432+
)
433+
434+
if [ -n "${bootstrap_ip}" ]
435+
then
436+
for service in bootkube openshift kubelet crio
437+
do
438+
queue "/tmp/artifacts/bootstrap/${service}.service" curl \
439+
--insecure \
440+
--silent \
441+
--connect-timeout 5 \
442+
--retry 3 \
443+
--cert /tmp/artifacts/installer/tls/journal-gatewayd.crt \
444+
--key /tmp/artifacts/installer/tls/journal-gatewayd.key \
445+
--url "https://${bootstrap_ip}:19531/entries?_SYSTEMD_UNIT=${service}.service"
446+
done
447+
fi
448+
else
449+
echo "No terraform statefile found. Skipping collection of bootstrap logs."
450+
fi
425451
426452
oc --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes
427453
oc --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers

ci-operator/templates/openshift/installer/cluster-launch-installer-src.yaml

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,7 @@ objects:
338338
exit 1
339339
fi
340340
341-
/bin/openshift-install --dir=/tmp/artifacts/installer create cluster &
342-
wait "$!"
341+
/bin/openshift-install --dir=/tmp/artifacts/installer create cluster
343342
344343
# Performs cleanup of all created resources
345344
- name: teardown
@@ -386,7 +385,34 @@ objects:
386385
export PATH=$PATH:/tmp/shared
387386
388387
echo "Gathering artifacts ..."
389-
mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics
388+
mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics /tmp/artifacts/bootstrap
389+
390+
if [ -f /tmp/artifacts/installer/terraform.tfstate ]
391+
then
392+
# we don't have jq, so the python equivalent of
393+
# jq '.modules[].resources."aws_instance.bootstrap".primary.attributes."public_ip" | select(.)'
394+
bootstrap_ip=$(python -c \
395+
'import sys, json; d=reduce(lambda x,y: dict(x.items() + y.items()), map(lambda x: x["resources"], json.load(sys.stdin)["modules"])); k="aws_instance.bootstrap"; print d[k]["primary"]["attributes"]["public_ip"] if k in d else ""' \
396+
< /tmp/artifacts/installer/terraform.tfstate
397+
)
398+
399+
if [ -n "${bootstrap_ip}" ]
400+
then
401+
for service in bootkube openshift kubelet crio
402+
do
403+
queue "/tmp/artifacts/bootstrap/${service}.service" curl \
404+
--insecure \
405+
--silent \
406+
--connect-timeout 5 \
407+
--retry 3 \
408+
--cert /tmp/artifacts/installer/tls/journal-gatewayd.crt \
409+
--key /tmp/artifacts/installer/tls/journal-gatewayd.key \
410+
--url "https://${bootstrap_ip}:19531/entries?_SYSTEMD_UNIT=${service}.service"
411+
done
412+
fi
413+
else
414+
echo "No terraform statefile found. Skipping collection of bootstrap logs."
415+
fi
390416
391417
oc --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes
392418
oc --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers

0 commit comments

Comments
 (0)