|
11 | 11 | from clustersConfig import ExtraConfigArgs
|
12 | 12 | import imageRegistry
|
13 | 13 | from common import git_repo_setup
|
14 |
| -from dpuVendor import init_vendor_plugin, IpuPlugin, MarvellDpuPlugin |
| 14 | +from dpuVendor import init_vendor_plugin, IpuPlugin |
15 | 15 | from imageRegistry import ImageRegistry
|
16 | 16 |
|
17 | 17 | DPU_OPERATOR_REPO = "https://github.com/openshift/dpu-operator.git"
|
@@ -211,6 +211,20 @@ def dpu_operator_start(client: K8sClient, repo: Optional[str]) -> None:
|
211 | 211 | client.oc_run_or_die("wait --for=condition=Ready pod --all -n openshift-dpu-operator --timeout=5m")
|
212 | 212 |
|
213 | 213 |
|
| 214 | +def ensure_vsp_ds_running(client: K8sClient) -> None: |
| 215 | + retries = 10 |
| 216 | + for _ in range(retries): |
| 217 | + desired_pods = int(client.oc_run_or_die("get ds vsp -o jsonpath='{.status.desiredNumberScheduled}'").out) |
| 218 | + available_pods = int(client.oc_run_or_die("get ds vsp -o jsonpath='{.status.numberAvailable}'").out) |
| 219 | + if available_pods != desired_pods: |
| 220 | + logger.info(f"Waiting for VSP ds to scale up. Desired pods: {desired_pods} Available pods: {available_pods}") |
| 221 | + time.sleep(10) |
| 222 | + else: |
| 223 | + break |
| 224 | + else: |
| 225 | + logger.error_and_exit("Vsp pods failed to reach ready state") |
| 226 | + |
| 227 | + |
214 | 228 | def ExtraConfigDpu(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[str, Future[Optional[host.Result]]]) -> None:
|
215 | 229 | [f.result() for (_, f) in futures.items()]
|
216 | 230 | logger.info("Running post config step to start DPU operator on IPU")
|
@@ -246,6 +260,7 @@ def ExtraConfigDpu(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[str,
|
246 | 260 | vendor_plugin.start(vendor_plugin.vsp_image_name(imgReg), client)
|
247 | 261 | else:
|
248 | 262 | vendor_plugin.build_push_start(lh, client, imgReg)
|
| 263 | + ensure_vsp_ds_running(client) |
249 | 264 |
|
250 | 265 | git_repo_setup(repo, repo_wipe=False, url=DPU_OPERATOR_REPO)
|
251 | 266 | if cfg.rebuild_dpu_operators_images:
|
@@ -293,6 +308,7 @@ def ExtraConfigDpuHost(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[s
|
293 | 308 | h.ssh_connect("core")
|
294 | 309 | vendor_plugin = init_vendor_plugin(h, node.kind or "")
|
295 | 310 | vendor_plugin.build_push_start(lh, client, imgReg)
|
| 311 | + ensure_vsp_ds_running(client) |
296 | 312 |
|
297 | 313 | git_repo_setup(repo, repo_wipe=False, url=DPU_OPERATOR_REPO)
|
298 | 314 | if cfg.rebuild_dpu_operators_images:
|
|
0 commit comments