21
21
#
22
22
set -e
23
23
24
- TECH_SUPPORT_FILE=techsupport-$( date " +%F_%T" | sed -e ' s/:/-/g' )
24
+ # generate a uuid to mark the techsupport daemonset
25
+ # so that the concurrent techsupport run won't affect each other
26
+ UUID=$( uuidgen)
27
+
28
+ TECH_SUPPORT_FILE=techsupport-${UUID} -$( date " +%F_%T" | sed -e ' s/:/-/g' )
25
29
DEFAULT_RESOURCES=" nodes events"
26
30
NFD_RESOURCES=" pods daemonsets deployments configmap"
27
31
KMM_RESOURCES=" pods daemonsets deployments modules configmap"
@@ -165,21 +169,21 @@ else
165
169
NODES=$( echo " ${NODES} ${CONTROL_PLANE} " | tr ' ' ' \n' | sort -u)
166
170
fi
167
171
168
- cat << EOF >/tmp/techsupport.json
172
+ cat << EOF >/tmp/techsupport- ${UUID} .json
169
173
apiVersion: apps/v1
170
174
kind: DaemonSet
171
175
metadata:
172
- name: techsupport
176
+ name: techsupport- ${UUID}
173
177
labels:
174
- app: techsupport
178
+ app: techsupport- ${UUID}
175
179
spec:
176
180
selector:
177
181
matchLabels:
178
- app: techsupport
182
+ app: techsupport- ${UUID}
179
183
template:
180
184
metadata:
181
185
labels:
182
- app: techsupport
186
+ app: techsupport- ${UUID}
183
187
spec:
184
188
containers:
185
189
- name: busybox
@@ -190,10 +194,10 @@ spec:
190
194
- sleep
191
195
- 1h
192
196
EOF
193
- ${KUBECTL} apply -f /tmp/techsupport.json
197
+ ${KUBECTL} apply -f /tmp/techsupport- ${UUID} .json
194
198
195
199
cleanup () {
196
- ${KUBECTL} delete -f /tmp/techsupport.json
200
+ ${KUBECTL} delete -f /tmp/techsupport- ${UUID} .json
197
201
}
198
202
199
203
trap cleanup EXIT
@@ -255,15 +259,15 @@ for node in "${nodeList[@]}"; do
255
259
pod_logs $GPUOPER_NS " gpu-operator" $node $GPUOPER_PODS
256
260
257
261
# node logs
258
- dbgpods=$( ${KUBECTL} get pods -o name --field-selector spec.nodeName=${node} -l " app=techsupport" || continue)
262
+ dbgpods=$( ${KUBECTL} get pods -o name --field-selector spec.nodeName=${node} -l " app=techsupport- ${UUID} " || continue)
259
263
260
264
# wait for the debug pod
261
265
for dbgpod in ${dbgpods} ; do
262
266
${KUBECTL} wait --for=condition=Ready=true ${dbgpod} > /dev/null
263
267
log " lsmod"
264
- ${KUBECTL} exec -it ${dbgpod} -- sh -c " lsmod | grep amdgpu || true" > ${TECH_SUPPORT_FILE} /${node} /lsmod.txt
268
+ ${KUBECTL} exec ${dbgpod} -- sh -c " lsmod | grep amdgpu || true" > ${TECH_SUPPORT_FILE} /${node} /lsmod.txt
265
269
log " dmesg"
266
- ${KUBECTL} exec -it ${dbgpod} -- sh -c " dmesg || true" > ${TECH_SUPPORT_FILE} /${node} /dmesg.txt
270
+ ${KUBECTL} exec ${dbgpod} -- sh -c " dmesg || true" > ${TECH_SUPPORT_FILE} /${node} /dmesg.txt
267
271
done
268
272
done
269
273
0 commit comments