Skip to content

Commit fa1b727

Browse files
committed
Dpu: Add check that vsp ds is running
Signed-off-by: Salvatore Daniele <[email protected]>
1 parent 370b7ae commit fa1b727

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

extraConfigDpu.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,19 @@ def dpu_operator_start(client: K8sClient, repo: Optional[str]) -> None:
211211
client.oc_run_or_die("wait --for=condition=Ready pod --all -n openshift-dpu-operator --timeout=5m")
212212

213213

214+
def ensure_vsp_ds_running(client: K8sClient) -> None:
215+
retries = 10
216+
for _ in range(retries):
217+
desired_pods = int(client.oc_run_or_die("get ds vsp -o jsonpath='{.status.desiredNumberScheduled}'").out)
218+
available_pods = int(client.oc_run_or_die("get ds vsp -o jsonpath='{.status.numberAvailable}'").out)
219+
if available_pods != desired_pods:
220+
logger.info(f"Waiting for VSP ds to scale up. Desired pods: {desired_pods} Available pods: {available_pods}")
221+
time.sleep(10)
222+
else:
223+
break
224+
else:
225+
logger.error_and_exit("Vsp pods failed to reach ready state")
226+
214227
def ExtraConfigDpu(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[str, Future[Optional[host.Result]]]) -> None:
215228
[f.result() for (_, f) in futures.items()]
216229
logger.info("Running post config step to start DPU operator on IPU")
@@ -246,6 +259,7 @@ def ExtraConfigDpu(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[str,
246259
vendor_plugin.start(vendor_plugin.vsp_image_name(imgReg), client)
247260
else:
248261
vendor_plugin.build_push_start(lh, client, imgReg)
262+
ensure_vsp_ds_running(client)
249263

250264
git_repo_setup(repo, repo_wipe=False, url=DPU_OPERATOR_REPO)
251265
if cfg.rebuild_dpu_operators_images:
@@ -293,6 +307,7 @@ def ExtraConfigDpuHost(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[s
293307
h.ssh_connect("core")
294308
vendor_plugin = init_vendor_plugin(h, node.kind or "")
295309
vendor_plugin.build_push_start(lh, client, imgReg)
310+
ensure_vsp_ds_running(client)
296311

297312
git_repo_setup(repo, repo_wipe=False, url=DPU_OPERATOR_REPO)
298313
if cfg.rebuild_dpu_operators_images:

0 commit comments

Comments
 (0)