From 569b47ced9a04fb63cd92b3b7e0fcf7ef713819c Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Mon, 15 Aug 2022 08:05:17 +0000 Subject: [PATCH 1/6] System healthd to check PSU power exceeding status Signed-off-by: Stephen Sun --- .../health_checker/hardware_checker.py | 12 ++++++++ src/system-health/tests/test_system_health.py | 30 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/src/system-health/health_checker/hardware_checker.py b/src/system-health/health_checker/hardware_checker.py index 575564de8733..7f559126acbd 100644 --- a/src/system-health/health_checker/hardware_checker.py +++ b/src/system-health/health_checker/hardware_checker.py @@ -239,6 +239,18 @@ def _check_psu_status(self, config): voltage_min_th, voltage_max_th)) continue + + if not self._ignore_check(config.ignore_devices, 'psu', name, 'power_threshold'): + power_overload = data_dict.get('power_overload', None) + if power_overload == 'True': + try: + power = data_dict['power'] + power_critical_threshold = data_dict['power_critical_threshold'] + self.set_object_not_ok('PSU', name, 'power of {} ({}w) exceeds threshold ({}w)'.format(name, power, power_critical_threshold)) + except KeyError: + self.set_object_not_ok('PSU', name, 'power of {} exceeds threshold but power or power_critical_threshold does not invalid'.format(name)) + continue + self.set_object_ok('PSU', name) def reset(self): diff --git a/src/system-health/tests/test_system_health.py b/src/system-health/tests/test_system_health.py index d58c69bececa..0eeadd25fac9 100644 --- a/src/system-health/tests/test_system_health.py +++ b/src/system-health/tests/test_system_health.py @@ -362,6 +362,30 @@ def test_hardware_checker(): 'voltage': '10', 'voltage_min_threshold': '12', 'voltage_max_threshold': '15', + }, + 'PSU_INFO|PSU 6': { + 'presence': 'True', + 'status': 'True', + 'temp': '55', + 'temp_threshold': '100', + 'voltage': '10', + 'voltage_min_threshold': '12', + 'voltage_max_threshold': '15', + 'power_overload': 'True', + 'power': '101.0', + 'power_critical_threshold': '100.0', + 'power_threshold': '90.0' + }, + 'PSU_INFO|PSU 7': { + 'presence': 'True', + 'status': 'True', + 'temp': '55', + 'temp_threshold': '100', + 'voltage': '10', + 'voltage_min_threshold': '12', + 'voltage_max_threshold': '15', + 'power_overload': 'True', + 'power': '101.0' } }) @@ -400,6 +424,12 @@ def test_hardware_checker(): assert 'PSU 5' in checker._info assert checker._info['PSU 5'][HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_NOT_OK + assert 'PSU 6' in checker._info + assert checker._info['PSU 6'][HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_NOT_OK + + assert 'PSU 7' in checker._info + assert checker._info['PSU 7'][HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_NOT_OK + def test_config(): config = Config() From d618c34cd013ccbfc6de0e7905d9c4d98b28657b Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Fri, 26 Aug 2022 12:52:26 +0000 Subject: [PATCH 2/6] Update field name in state db Signed-off-by: Stephen Sun --- src/system-health/tests/test_system_health.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/system-health/tests/test_system_health.py b/src/system-health/tests/test_system_health.py index 0eeadd25fac9..f25ccd982cab 100644 --- a/src/system-health/tests/test_system_health.py +++ b/src/system-health/tests/test_system_health.py @@ -374,7 +374,7 @@ def test_hardware_checker(): 'power_overload': 'True', 'power': '101.0', 'power_critical_threshold': '100.0', - 'power_threshold': '90.0' + 'power_warning_threshold': '90.0' }, 'PSU_INFO|PSU 7': { 'presence': 'True', From 7694c413efa1ee936054d50bf168fca910fba7da Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Mon, 29 Aug 2022 01:37:03 +0000 Subject: [PATCH 3/6] Add more specific check in UT Signed-off-by: Stephen Sun --- src/system-health/tests/test_system_health.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/system-health/tests/test_system_health.py b/src/system-health/tests/test_system_health.py index f25ccd982cab..5aabe6ac6a99 100644 --- a/src/system-health/tests/test_system_health.py +++ b/src/system-health/tests/test_system_health.py @@ -368,7 +368,7 @@ def test_hardware_checker(): 'status': 'True', 'temp': '55', 'temp_threshold': '100', - 'voltage': '10', + 'voltage': '12', 'voltage_min_threshold': '12', 'voltage_max_threshold': '15', 'power_overload': 'True', @@ -381,7 +381,7 @@ def test_hardware_checker(): 'status': 'True', 'temp': '55', 'temp_threshold': '100', - 'voltage': '10', + 'voltage': '12', 'voltage_min_threshold': '12', 'voltage_max_threshold': '15', 'power_overload': 'True', @@ -425,10 +425,12 @@ def test_hardware_checker(): assert checker._info['PSU 5'][HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_NOT_OK assert 'PSU 6' in checker._info + assert checker._info['PSU 6'][HealthChecker.INFO_FIELD_OBJECT_MSG] == 'power of PSU 6 (101.0w) exceeds threshold (100.0w)' assert checker._info['PSU 6'][HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_NOT_OK assert 'PSU 7' in checker._info assert checker._info['PSU 7'][HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_NOT_OK + assert checker._info['PSU 7'][HealthChecker.INFO_FIELD_OBJECT_MSG] == 'power of PSU 7 exceeds threshold but power or power_critical_threshold does not invalid' def test_config(): From c02f3e3387cf209986e9233e073e3b95f5ed9862 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Thu, 17 Nov 2022 01:07:18 +0000 Subject: [PATCH 4/6] power_warning_threshold => power_warning_suppress_threshold Signed-off-by: Stephen Sun --- src/system-health/tests/test_system_health.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/system-health/tests/test_system_health.py b/src/system-health/tests/test_system_health.py index 5aabe6ac6a99..af1b78fa99ad 100644 --- a/src/system-health/tests/test_system_health.py +++ b/src/system-health/tests/test_system_health.py @@ -374,7 +374,7 @@ def test_hardware_checker(): 'power_overload': 'True', 'power': '101.0', 'power_critical_threshold': '100.0', - 'power_warning_threshold': '90.0' + 'power_warning_suppress_threshold': '90.0' }, 'PSU_INFO|PSU 7': { 'presence': 'True', From 3d3dd838aed4ba59acdd3787332b44cf478a2860 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Sat, 19 Nov 2022 00:08:10 +0000 Subject: [PATCH 5/6] Fix syntax error Signed-off-by: Stephen Sun --- src/system-health/health_checker/hardware_checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/system-health/health_checker/hardware_checker.py b/src/system-health/health_checker/hardware_checker.py index 7f559126acbd..83e35b5c7dc3 100644 --- a/src/system-health/health_checker/hardware_checker.py +++ b/src/system-health/health_checker/hardware_checker.py @@ -248,7 +248,7 @@ def _check_psu_status(self, config): power_critical_threshold = data_dict['power_critical_threshold'] self.set_object_not_ok('PSU', name, 'power of {} ({}w) exceeds threshold ({}w)'.format(name, power, power_critical_threshold)) except KeyError: - self.set_object_not_ok('PSU', name, 'power of {} exceeds threshold but power or power_critical_threshold does not invalid'.format(name)) + self.set_object_not_ok('PSU', name, 'power of {} exceeds threshold but power or power_critical_threshold is not invalid'.format(name)) continue self.set_object_ok('PSU', name) From e2af255cae95d46dc0cf033fb80f852b04ef6f34 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Sat, 19 Nov 2022 01:25:19 +0000 Subject: [PATCH 6/6] Fix ut error and typo Signed-off-by: Stephen Sun --- src/system-health/health_checker/hardware_checker.py | 2 +- src/system-health/tests/test_system_health.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/system-health/health_checker/hardware_checker.py b/src/system-health/health_checker/hardware_checker.py index 83e35b5c7dc3..59e47fa26733 100644 --- a/src/system-health/health_checker/hardware_checker.py +++ b/src/system-health/health_checker/hardware_checker.py @@ -248,7 +248,7 @@ def _check_psu_status(self, config): power_critical_threshold = data_dict['power_critical_threshold'] self.set_object_not_ok('PSU', name, 'power of {} ({}w) exceeds threshold ({}w)'.format(name, power, power_critical_threshold)) except KeyError: - self.set_object_not_ok('PSU', name, 'power of {} exceeds threshold but power or power_critical_threshold is not invalid'.format(name)) + self.set_object_not_ok('PSU', name, 'power of {} exceeds threshold but power or power_critical_threshold is invalid'.format(name)) continue self.set_object_ok('PSU', name) diff --git a/src/system-health/tests/test_system_health.py b/src/system-health/tests/test_system_health.py index af1b78fa99ad..687781ea2a4f 100644 --- a/src/system-health/tests/test_system_health.py +++ b/src/system-health/tests/test_system_health.py @@ -430,7 +430,7 @@ def test_hardware_checker(): assert 'PSU 7' in checker._info assert checker._info['PSU 7'][HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_NOT_OK - assert checker._info['PSU 7'][HealthChecker.INFO_FIELD_OBJECT_MSG] == 'power of PSU 7 exceeds threshold but power or power_critical_threshold does not invalid' + assert checker._info['PSU 7'][HealthChecker.INFO_FIELD_OBJECT_MSG] == 'power of PSU 7 exceeds threshold but power or power_critical_threshold is invalid' def test_config():