Skip to content

Commit 91775cf

Browse files
committed
Add preconfig step ExtraConfigMevFwUp
Provide a pre-config to put the MeV firmware into a good state. Reflashing the firmware takes a very long time. By default, it will not do this if the firmware is already on the desired version. However, if we specify to force, we will reflash regardless of the current state. Signed-off-by: Salvatore Daniele <[email protected]>
1 parent 9a8167e commit 91775cf

File tree

4 files changed

+75
-8
lines changed

4 files changed

+75
-8
lines changed

clustersConfig.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ class ExtraConfigArgs:
5858

5959
base_image: str = ""
6060

61+
mev_version: str = ""
62+
63+
force_mev_fw_up: bool = False
64+
6165
def pre_check(self) -> None:
6266
if self.sriov_network_operator_local:
6367
if self.name != "sriov_network_operator":

extraConfigMev.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from clustersConfig import ClustersConfig
2+
import host
3+
from logger import logger
4+
from clustersConfig import ExtraConfigArgs
5+
from bmc import BMC
6+
from concurrent.futures import Future
7+
from typing import Optional
8+
import time
9+
10+
LATEST_MEV_FW = "1.8.0.10052"
11+
12+
13+
def ExtraConfigMevFwUp(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[str, Future[Optional[host.Result]]]) -> None:
14+
logger.info("Running pre config step to flash MeV firmware on IPU IMC")
15+
16+
# This preconfig step is expected to run on an IMC only
17+
assert cc.kind == "iso"
18+
master = cc.masters[0]
19+
assert master.kind == "ipu"
20+
assert master.host_side_bmc is not None
21+
imc = host.Host(master.bmc)
22+
23+
# Check if a particular firmware version is being requested or if we will use default
24+
if cfg.mev_version == "":
25+
logger.info("Desired MeV fw release not specified, will install the latest by default")
26+
cfg.mev_version = LATEST_MEV_FW
27+
logger.info(f"Will ensure {master.bmc} is on firmware version: {cfg.mev_version}")
28+
29+
# We should only perform an update if it is required, or if the user insists we do so
30+
if not cfg.force_mev_fw_up:
31+
logger.info("Checking if firmware update is required")
32+
if imc.ping():
33+
imc.ssh_connect(master.bmc_user, master.bmc_password)
34+
ret = imc.run("cat /etc/issue.net")
35+
if cfg.mev_version in ret.out:
36+
logger.info(f"Current MeV fw version is {ret.out.strip()}, no need to update")
37+
return
38+
39+
# Perform upgrade
40+
lh = host.LocalHost()
41+
42+
fw_up_cmd = f"--dpu-type ipu --imc-address {master.bmc} firmware up --version {cfg.mev_version}"
43+
44+
ret = lh.run_in_container(fw_up_cmd, interactive=True)
45+
46+
if not ret.success():
47+
logger.error_and_exit(f"Failed to flash new firmware. Error: {ret.err}")
48+
49+
# Perform coldboot to apply the change
50+
ipu_host_bmc = BMC.from_bmc(master.host_side_bmc)
51+
ipu_host_bmc.cold_boot()
52+
# Cold boot should also reboot IMC, give time to settle before trying to ping IMC
53+
time.sleep(20)
54+
55+
# Access the IMC to validate the flash was successful
56+
imc.ssh_connect(master.bmc_user, master.bmc_password)
57+
ret = imc.run("cat /etc/issue.net")
58+
if cfg.mev_version not in ret.out or ret.returncode != 0:
59+
logger.error_and_exit(f"Mev firmware release is not the expected version: {ret.out}")
60+
61+
logger.info("MeV firmware flash complete")

extraConfigRunner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from extraConfigMicroshift import ExtraConfigMicroshift
1515
from extraConfigRhSubscription import ExtraConfigRhSubscription
1616
from extraConfigDpu import ExtraConfigDpu, ExtraConfigDpuHost
17+
from extraConfigMev import ExtraConfigMevFwUp
1718
from clustersConfig import ClustersConfig
1819
from clustersConfig import ExtraConfigArgs
1920
from concurrent.futures import Future
@@ -51,6 +52,7 @@ def __init__(self, cc: ClustersConfig):
5152
"rh_subscription": ExtraConfigRhSubscription,
5253
"dpu_operator_host": ExtraConfigDpuHost,
5354
"dpu_operator_dpu": ExtraConfigDpu,
55+
"mev_firmware_up": ExtraConfigMevFwUp,
5456
}
5557

5658
def run(self, to_run: ExtraConfigArgs, futures: dict[str, Future[Optional[host.Result]]]) -> None:

host.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,14 @@ def run_or_die(self, cmd: str) -> Result:
322322
logger.debug(ret.out.strip())
323323
return ret
324324

325+
def run_in_container(self, cmd: str, interactive: bool = False, verbose: bool = True, dry_run: bool = False) -> Result:
326+
name = "dpu-tools"
327+
it = "-it" if interactive else ""
328+
v = "--verbose" if verbose else ""
329+
d = "--dry-run" if dry_run else ""
330+
full_command = f"sudo podman run {it} --rm --pull always --replace --pid host --network host --user 0 --name {name} --privileged -v /dev:/dev quay.io/bnemeth/bf {v} {d} {cmd}"
331+
return self.run(full_command, logging.INFO)
332+
325333
def close(self) -> None:
326334
assert self._host is not None
327335
self._host.close()
@@ -443,14 +451,6 @@ def cx_firmware_upgrade(self) -> Result:
443451
logger.info("Upgrading CX firmware")
444452
return self.run_in_container("utils cx-fwup")
445453

446-
def run_in_container(self, cmd: str, interactive: bool = False, verbose: bool = True, dry_run: bool = False) -> Result:
447-
name = "dpu-tools"
448-
it = "-it" if interactive else ""
449-
v = "--verbose" if verbose else ""
450-
d = "--dry-run" if dry_run else ""
451-
full_command = f"sudo podman run {it} --rm --pull always --replace --pid host --network host --user 0 --name {name} --privileged -v /dev:/dev quay.io/bnemeth/bf {v} {d} {cmd}"
452-
return self.run(full_command, logging.DEBUG)
453-
454454

455455
class HostWithBF2(Host):
456456
def connect_to_bf(self, bf_addr: str) -> None:

0 commit comments

Comments
 (0)