Skip to content

Commit ea2cf86

Browse files
committed
Move to custom idpf kernel
As a workaround for the current idpf issues, try testing the upstream kernel with the supposed fix Signed-off-by: Salvatore Daniele <[email protected]>
1 parent 2bd5d02 commit ea2cf86

File tree

3 files changed

+36
-10
lines changed

3 files changed

+36
-10
lines changed

extraConfigDpu.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from clustersConfig import ClustersConfig
22
import host
33
from k8sClient import K8sClient
4-
from concurrent.futures import Future
4+
from concurrent.futures import Future, ThreadPoolExecutor
55
import os
66
import time
77
from typing import Optional, List
@@ -13,6 +13,7 @@
1313
import common
1414
import re
1515
from dpuVendor import detect_dpu
16+
from kernel import ensure_IIC_500_kernel_is_installed
1617

1718
MICROSHIFT_KUBECONFIG = "/root/kubeconfig.microshift"
1819

@@ -167,6 +168,22 @@ def ExtraConfigDpuHost(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[s
167168
lh = host.LocalHost()
168169
client = K8sClient(cc.kubeconfig)
169170

171+
# Workaround to ensure patched kernel is deployed on IPU host to avoid issue in IIC-500
172+
def helper(h: host.Host) -> Optional[host.Result]:
173+
ensure_IIC_500_kernel_is_installed(h)
174+
return
175+
176+
executor = ThreadPoolExecutor(max_workers=len(cc.workers))
177+
f = []
178+
# Assuming that all workers have a DPU
179+
for e in cc.workers:
180+
logger.info(f"Calling helper function for node {e.node}")
181+
h = host.RemoteHost(e.node)
182+
f.append(executor.submit(helper, h))
183+
184+
for thread in f:
185+
logger.info(thread.result())
186+
170187
imgReg = imageRegistry.ensure_local_registry_running(lh, delete_all=False)
171188
imgReg.ocp_trust(client)
172189
# Need to trust the registry in OCP / Microshift

ipu.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from clustersConfig import NodeConfig
88
from bmc import BmcConfig
99
from clusterNode import ClusterNode
10+
from kernel import ensure_IIC_500_kernel_is_installed
1011
import host
1112
from bmc import BMC
1213
import common
@@ -94,6 +95,7 @@ def start(self, iso_or_image_path: str) -> bool:
9495
if ipu_bmc.version() != "1.8.0":
9596
logger.error_and_exit(f"Unexpected version {ipu_bmc.version()}, should be 1.8.0")
9697
self._boot_iso(iso_or_image_path)
98+
self._patch_ipu_host_kernel()
9799
return True
98100

99101
def has_booted(self) -> bool:
@@ -123,6 +125,12 @@ def helper(node: NodeConfig, iso_address: str) -> str:
123125
iso_address = f"http://{lh_ip}:{str(http_server.port)}/{iso_name}"
124126
logger.info(helper(node, iso_address))
125127

128+
def _patch_ipu_host_kernel(self) -> None:
129+
# Workaround to ensure patched kernel is deployed on IPU host to avoid issue in IIC-500
130+
assert self.config.dpu_host is not None
131+
ipu_host = host.RemoteHost(self.config.dpu_host)
132+
ensure_IIC_500_kernel_is_installed(ipu_host)
133+
126134
def post_boot(self, *, desired_ip_range: Optional[tuple[str, str]] = None) -> bool:
127135
# As a WA for https://issues.redhat.com/browse/IIC-527 we need to reload the idpf driver since this seems to fail
128136
# after an IMC reboot (which occurs during the RHEL installation)

kernel.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,22 @@
33
import time
44

55
KERNEL_RPMS = [
6-
"https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/vol/rhel-9/packages/kernel/5.14.0/427.2.1.el9_4/x86_64/kernel-5.14.0-427.2.1.el9_4.x86_64.rpm",
7-
"https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/vol/rhel-9/packages/kernel/5.14.0/427.2.1.el9_4/x86_64/kernel-core-5.14.0-427.2.1.el9_4.x86_64.rpm",
8-
"https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/vol/rhel-9/packages/kernel/5.14.0/427.2.1.el9_4/x86_64/kernel-modules-5.14.0-427.2.1.el9_4.x86_64.rpm",
9-
"https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/vol/rhel-9/packages/kernel/5.14.0/427.2.1.el9_4/x86_64/kernel-modules-core-5.14.0-427.2.1.el9_4.x86_64.rpm",
10-
"https://download-01.beak-001.prod.iad2.dc.redhat.com/brewroot/vol/rhel-9/packages/kernel/5.14.0/427.2.1.el9_4/x86_64/kernel-modules-extra-5.14.0-427.2.1.el9_4.x86_64.rpm",
6+
"https://download.devel.redhat.com/brewroot/work/tasks/2286/66882286/kernel-5.14.0-570.idpf.IIC_500.el9_6.x86_64.rpm",
7+
"https://download.devel.redhat.com/brewroot/work/tasks/2286/66882286/kernel-core-5.14.0-570.idpf.IIC_500.el9_6.x86_64.rpm",
8+
"https://download.devel.redhat.com/brewroot/work/tasks/2286/66882286/kernel-modules-5.14.0-570.idpf.IIC_500.el9_6.x86_64.rpm",
9+
"https://download.devel.redhat.com/brewroot/work/tasks/2286/66882286/kernel-modules-core-5.14.0-570.idpf.IIC_500.el9_6.x86_64.rpm",
10+
"https://download.devel.redhat.com/brewroot/work/tasks/2286/66882286/kernel-modules-extra-5.14.0-570.idpf.IIC_500.el9_6.x86_64.rpm",
1111
]
1212

1313

14-
def ensure_rhel_9_4_kernel_is_installed(h: host.Host) -> None:
14+
def ensure_IIC_500_kernel_is_installed(h: host.Host) -> None:
1515
h.ssh_connect("core")
1616
ret = h.run("uname -r")
17-
if "el9_4" in ret.out:
17+
if "5.14.0-570.idpf.IIC_500.el9_6.x86_64" in ret.out:
18+
logger.info("5.14.0-570.idpf.IIC_500.el9_6.x86_64 kernel already installed, skipping")
1819
return
1920

20-
logger.info(f"Installing RHEL 9.4 kernel on {h.hostname()}")
21+
logger.info(f"Installing 5.14.0-570.idpf.IIC_500.el9_6.x86_64 kernel on {h.hostname()}")
2122

2223
wd = "working_dir"
2324
h.run(f"rm -rf {wd}")
@@ -45,4 +46,4 @@ def ensure_rhel_9_4_kernel_is_installed(h: host.Host) -> None:
4546
h.ssh_connect("core")
4647
ret = h.run("uname -r")
4748
if "el9_4" not in ret.out:
48-
logger.error_and_exit(f"Failed to install rhel 9.4 kernel on host {h.hostname()}")
49+
logger.error_and_exit(f"Failed to install 5.14.0-570.idpf.IIC_500.el9_6.x86_64 kernel on host {h.hostname()}")

0 commit comments

Comments
 (0)