Skip to content

Commit 46c15b9

Browse files
yansun1996sajmera-pensando
authored andcommitted
Add support for concurrent techsupport run
1 parent 2cc6d63 commit 46c15b9

File tree

1 file changed

+15
-11
lines changed

1 file changed

+15
-11
lines changed

tools/techsupport_dump.sh

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,11 @@
2121
#
2222
set -e
2323

24-
TECH_SUPPORT_FILE=techsupport-$(date "+%F_%T" | sed -e 's/:/-/g')
24+
# generate a uuid to mark the techsupport daemonset
25+
# so that the concurrent techsupport run won't affect each other
26+
UUID=$(uuidgen)
27+
28+
TECH_SUPPORT_FILE=techsupport-${UUID}-$(date "+%F_%T" | sed -e 's/:/-/g')
2529
DEFAULT_RESOURCES="nodes events"
2630
NFD_RESOURCES="pods daemonsets deployments configmap"
2731
KMM_RESOURCES="pods daemonsets deployments modules configmap"
@@ -165,21 +169,21 @@ else
165169
NODES=$(echo "${NODES} ${CONTROL_PLANE}" | tr ' ' '\n' | sort -u)
166170
fi
167171

168-
cat <<EOF >/tmp/techsupport.json
172+
cat <<EOF >/tmp/techsupport-${UUID}.json
169173
apiVersion: apps/v1
170174
kind: DaemonSet
171175
metadata:
172-
name: techsupport
176+
name: techsupport-${UUID}
173177
labels:
174-
app: techsupport
178+
app: techsupport-${UUID}
175179
spec:
176180
selector:
177181
matchLabels:
178-
app: techsupport
182+
app: techsupport-${UUID}
179183
template:
180184
metadata:
181185
labels:
182-
app: techsupport
186+
app: techsupport-${UUID}
183187
spec:
184188
containers:
185189
- name: busybox
@@ -190,10 +194,10 @@ spec:
190194
- sleep
191195
- 1h
192196
EOF
193-
${KUBECTL} apply -f /tmp/techsupport.json
197+
${KUBECTL} apply -f /tmp/techsupport-${UUID}.json
194198

195199
cleanup() {
196-
${KUBECTL} delete -f /tmp/techsupport.json
200+
${KUBECTL} delete -f /tmp/techsupport-${UUID}.json
197201
}
198202

199203
trap cleanup EXIT
@@ -255,15 +259,15 @@ for node in "${nodeList[@]}"; do
255259
pod_logs $GPUOPER_NS "gpu-operator" $node $GPUOPER_PODS
256260

257261
# node logs
258-
dbgpods=$(${KUBECTL} get pods -o name --field-selector spec.nodeName=${node} -l "app=techsupport" || continue)
262+
dbgpods=$(${KUBECTL} get pods -o name --field-selector spec.nodeName=${node} -l "app=techsupport-${UUID}" || continue)
259263

260264
# wait for the debug pod
261265
for dbgpod in ${dbgpods}; do
262266
${KUBECTL} wait --for=condition=Ready=true ${dbgpod} >/dev/null
263267
log " lsmod"
264-
${KUBECTL} exec -it ${dbgpod} -- sh -c "lsmod | grep amdgpu || true" >${TECH_SUPPORT_FILE}/${node}/lsmod.txt
268+
${KUBECTL} exec ${dbgpod} -- sh -c "lsmod | grep amdgpu || true" >${TECH_SUPPORT_FILE}/${node}/lsmod.txt
265269
log " dmesg"
266-
${KUBECTL} exec -it ${dbgpod} -- sh -c "dmesg || true" >${TECH_SUPPORT_FILE}/${node}/dmesg.txt
270+
${KUBECTL} exec ${dbgpod} -- sh -c "dmesg || true" >${TECH_SUPPORT_FILE}/${node}/dmesg.txt
267271
done
268272
done
269273

0 commit comments

Comments
 (0)