Skip to content

Commit 34fc615

Browse files
dgsudharsanStormLiangMS
authored andcommitted
[sai_failure_dump]Invoking dump during SAI failure (#2644)
* [sai_failure_dump]Invoking dump during SAI failure
1 parent b817695 commit 34fc615

File tree

7 files changed

+208
-14
lines changed

7 files changed

+208
-14
lines changed

orchagent/main.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ void syncd_apply_view()
126126
if (status != SAI_STATUS_SUCCESS)
127127
{
128128
SWSS_LOG_ERROR("Failed to notify syncd APPLY_VIEW %d", status);
129-
exit(EXIT_FAILURE);
129+
handleSaiFailure(true);
130130
}
131131
}
132132

@@ -619,7 +619,7 @@ int main(int argc, char **argv)
619619
if (status != SAI_STATUS_SUCCESS)
620620
{
621621
SWSS_LOG_ERROR("Failed to create a switch, rv:%d", status);
622-
exit(EXIT_FAILURE);
622+
handleSaiFailure(true);
623623
}
624624
SWSS_LOG_NOTICE("Create a switch, id:%" PRIu64, gSwitchId);
625625

@@ -650,7 +650,7 @@ int main(int argc, char **argv)
650650
if (status != SAI_STATUS_SUCCESS)
651651
{
652652
SWSS_LOG_ERROR("Failed to get MAC address from switch, rv:%d", status);
653-
exit(EXIT_FAILURE);
653+
handleSaiFailure(true);
654654
}
655655
else
656656
{
@@ -665,7 +665,7 @@ int main(int argc, char **argv)
665665
if (status != SAI_STATUS_SUCCESS)
666666
{
667667
SWSS_LOG_ERROR("Fail to get switch virtual router ID %d", status);
668-
exit(EXIT_FAILURE);
668+
handleSaiFailure(true);
669669
}
670670

671671
gVirtualRouterId = attr.value.oid;
@@ -707,7 +707,7 @@ int main(int argc, char **argv)
707707
if (status != SAI_STATUS_SUCCESS)
708708
{
709709
SWSS_LOG_ERROR("Failed to create underlay router interface %d", status);
710-
exit(EXIT_FAILURE);
710+
handleSaiFailure(true);
711711
}
712712

713713
SWSS_LOG_NOTICE("Created underlay router interface ID %" PRIx64, gUnderlayIfId);

orchagent/orchdaemon.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ void OrchDaemon::flush()
675675
if (status != SAI_STATUS_SUCCESS)
676676
{
677677
SWSS_LOG_ERROR("Failed to flush redis pipeline %d", status);
678-
abort();
678+
handleSaiFailure(true);
679679
}
680680
}
681681

orchagent/saihelper.cpp

+37-7
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,8 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
496496
default:
497497
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
498498
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
499-
abort();
499+
handleSaiFailure(true);
500+
break;
500501
}
501502
break;
502503
case SAI_API_HOSTIF:
@@ -514,8 +515,10 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
514515
default:
515516
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
516517
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
517-
abort();
518+
handleSaiFailure(true);
519+
break;
518520
}
521+
break;
519522
default:
520523
switch (status)
521524
{
@@ -525,7 +528,8 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
525528
default:
526529
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
527530
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
528-
abort();
531+
handleSaiFailure(true);
532+
break;
529533
}
530534
}
531535
return task_need_retry;
@@ -566,8 +570,10 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void
566570
default:
567571
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
568572
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
569-
abort();
573+
handleSaiFailure(true);
574+
break;
570575
}
576+
break;
571577
case SAI_API_TUNNEL:
572578
switch (status)
573579
{
@@ -578,12 +584,15 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void
578584
default:
579585
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
580586
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
581-
abort();
587+
handleSaiFailure(true);
588+
break;
582589
}
590+
break;
583591
default:
584592
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
585593
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
586-
abort();
594+
handleSaiFailure(true);
595+
break;
587596
}
588597

589598
return task_need_retry;
@@ -611,7 +620,8 @@ task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, vo
611620
default:
612621
SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s",
613622
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
614-
abort();
623+
handleSaiFailure(true);
624+
break;
615625
}
616626
return task_need_retry;
617627
}
@@ -663,3 +673,23 @@ bool parseHandleSaiStatusFailure(task_process_status status)
663673
}
664674
return true;
665675
}
676+
677+
/* Handling SAI failure. Request redis to invoke SAI failure dump and abort if set*/
678+
void handleSaiFailure(bool abort_on_failure)
679+
{
680+
SWSS_LOG_ENTER();
681+
682+
sai_attribute_t attr;
683+
684+
attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD;
685+
attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP;
686+
sai_status_t status = sai_switch_api->set_switch_attribute(gSwitchId, &attr);
687+
if (status != SAI_STATUS_SUCCESS)
688+
{
689+
SWSS_LOG_ERROR("Failed to take sai failure dump %d", status);
690+
}
691+
if (abort_on_failure)
692+
{
693+
abort();
694+
}
695+
}

orchagent/saihelper.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void
1818
task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
1919
task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
2020
bool parseHandleSaiStatusFailure(task_process_status status);
21-
21+
void handleSaiFailure(bool abort_on_failure);

tests/mock_tests/Makefile.am

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ tests_SOURCES = aclorch_ut.cpp \
5050
flowcounterrouteorch_ut.cpp \
5151
orchdaemon_ut.cpp \
5252
warmrestartassist_ut.cpp \
53+
test_failure_handling.cpp \
5354
$(top_srcdir)/lib/gearboxutils.cpp \
5455
$(top_srcdir)/lib/subintf.cpp \
5556
$(top_srcdir)/orchagent/orchdaemon.cpp \

tests/mock_tests/portsorch_ut.cpp

+81
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "notifier.h"
1010
#define private public
1111
#include "pfcactionhandler.h"
12+
#include <sys/mman.h>
1213
#undef private
1314

1415
#include <sstream>
@@ -21,6 +22,8 @@ namespace portsorch_test
2122

2223
sai_port_api_t ut_sai_port_api;
2324
sai_port_api_t *pold_sai_port_api;
25+
sai_switch_api_t ut_sai_switch_api;
26+
sai_switch_api_t *pold_sai_switch_api;
2427

2528
bool not_support_fetching_fec;
2629
vector<sai_port_fec_mode_t> mock_port_fec_modes = {SAI_PORT_FEC_MODE_RS, SAI_PORT_FEC_MODE_FC};
@@ -66,9 +69,28 @@ namespace portsorch_test
6669
_sai_set_port_fec_count++;
6770
_sai_port_fec_mode = attr[0].value.s32;
6871
}
72+
else if (attr[0].id == SAI_PORT_ATTR_AUTO_NEG_MODE)
73+
{
74+
/* Simulating failure case */
75+
return SAI_STATUS_FAILURE;
76+
}
6977
return pold_sai_port_api->set_port_attribute(port_id, attr);
7078
}
7179

80+
uint32_t *_sai_syncd_notifications_count;
81+
int32_t *_sai_syncd_notification_event;
82+
sai_status_t _ut_stub_sai_set_switch_attribute(
83+
_In_ sai_object_id_t switch_id,
84+
_In_ const sai_attribute_t *attr)
85+
{
86+
if (attr[0].id == SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD)
87+
{
88+
*_sai_syncd_notifications_count =+ 1;
89+
*_sai_syncd_notification_event = attr[0].value.s32;
90+
}
91+
return pold_sai_switch_api->set_switch_attribute(switch_id, attr);
92+
}
93+
7294
void _hook_sai_port_api()
7395
{
7496
ut_sai_port_api = *sai_port_api;
@@ -83,6 +105,19 @@ namespace portsorch_test
83105
sai_port_api = pold_sai_port_api;
84106
}
85107

108+
void _hook_sai_switch_api()
109+
{
110+
ut_sai_switch_api = *sai_switch_api;
111+
pold_sai_switch_api = sai_switch_api;
112+
ut_sai_switch_api.set_switch_attribute = _ut_stub_sai_set_switch_attribute;
113+
sai_switch_api = &ut_sai_switch_api;
114+
}
115+
116+
void _unhook_sai_switch_api()
117+
{
118+
sai_switch_api = pold_sai_switch_api;
119+
}
120+
86121
sai_queue_api_t ut_sai_queue_api;
87122
sai_queue_api_t *pold_sai_queue_api;
88123
int _sai_set_queue_attr_count = 0;
@@ -473,6 +508,52 @@ namespace portsorch_test
473508
_unhook_sai_port_api();
474509
}
475510

511+
TEST_F(PortsOrchTest, PortTestSAIFailureHandling)
512+
{
513+
_hook_sai_port_api();
514+
_hook_sai_switch_api();
515+
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
516+
std::deque<KeyOpFieldsValuesTuple> entries;
517+
518+
not_support_fetching_fec = false;
519+
// Get SAI default ports to populate DB
520+
auto ports = ut_helper::getInitialSaiPorts();
521+
522+
for (const auto &it : ports)
523+
{
524+
portTable.set(it.first, it.second);
525+
}
526+
527+
// Set PortConfigDone
528+
portTable.set("PortConfigDone", { { "count", to_string(ports.size()) } });
529+
530+
// refill consumer
531+
gPortsOrch->addExistingData(&portTable);
532+
533+
// Apply configuration :
534+
// create ports
535+
static_cast<Orch *>(gPortsOrch)->doTask();
536+
537+
_sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
538+
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
539+
_sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
540+
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
541+
*_sai_syncd_notifications_count = 0;
542+
543+
entries.push_back({"Ethernet0", "SET",
544+
{
545+
{"autoneg", "on"}
546+
}});
547+
auto consumer = dynamic_cast<Consumer *>(gPortsOrch->getExecutor(APP_PORT_TABLE_NAME));
548+
consumer->addToSync(entries);
549+
ASSERT_DEATH({static_cast<Orch *>(gPortsOrch)->doTask();}, "");
550+
551+
ASSERT_EQ(*_sai_syncd_notifications_count, 1);
552+
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);
553+
_unhook_sai_port_api();
554+
_unhook_sai_switch_api();
555+
}
556+
476557
TEST_F(PortsOrchTest, PortReadinessColdBoot)
477558
{
478559
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#include "saihelper.h"
2+
#include "ut_helper.h"
3+
#include <sys/mman.h>
4+
5+
extern sai_switch_api_t *sai_switch_api;
6+
7+
namespace saifailure_test
8+
{
9+
struct SaiFailureTest : public ::testing::Test
10+
{
11+
};
12+
uint32_t *_sai_syncd_notifications_count;
13+
int32_t *_sai_syncd_notification_event;
14+
sai_switch_api_t *pold_sai_switch_api;
15+
sai_switch_api_t ut_sai_switch_api;
16+
17+
sai_status_t _ut_stub_sai_set_switch_attribute(
18+
_In_ sai_object_id_t switch_id,
19+
_In_ const sai_attribute_t *attr)
20+
{
21+
if (attr[0].id == SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD)
22+
{
23+
*_sai_syncd_notifications_count = *_sai_syncd_notifications_count + 1;
24+
*_sai_syncd_notification_event = attr[0].value.s32;
25+
}
26+
return pold_sai_switch_api->set_switch_attribute(switch_id, attr);
27+
}
28+
29+
void _hook_sai_switch_api()
30+
{
31+
ut_sai_switch_api = *sai_switch_api;
32+
pold_sai_switch_api = sai_switch_api;
33+
ut_sai_switch_api.set_switch_attribute = _ut_stub_sai_set_switch_attribute;
34+
sai_switch_api = &ut_sai_switch_api;
35+
}
36+
37+
void _unhook_sai_switch_api()
38+
{
39+
sai_switch_api = pold_sai_switch_api;
40+
}
41+
42+
TEST_F(SaiFailureTest, handleSaiFailure)
43+
{
44+
_hook_sai_switch_api();
45+
_sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
46+
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
47+
_sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
48+
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
49+
*_sai_syncd_notifications_count = 0;
50+
uint32_t notif_count = *_sai_syncd_notifications_count;
51+
52+
ASSERT_DEATH({handleSaiCreateStatus(SAI_API_FDB, SAI_STATUS_FAILURE);}, "");
53+
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
54+
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);
55+
56+
ASSERT_DEATH({handleSaiCreateStatus(SAI_API_HOSTIF, SAI_STATUS_INVALID_PARAMETER);}, "");
57+
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
58+
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);
59+
60+
ASSERT_DEATH({handleSaiCreateStatus(SAI_API_PORT, SAI_STATUS_FAILURE);}, "");
61+
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
62+
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);
63+
64+
ASSERT_DEATH({handleSaiSetStatus(SAI_API_HOSTIF, SAI_STATUS_FAILURE);}, "");
65+
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
66+
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);
67+
68+
ASSERT_DEATH({handleSaiSetStatus(SAI_API_PORT, SAI_STATUS_FAILURE);}, "");
69+
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
70+
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);
71+
72+
ASSERT_DEATH({handleSaiSetStatus(SAI_API_TUNNEL, SAI_STATUS_FAILURE);}, "");
73+
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
74+
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);
75+
76+
ASSERT_DEATH({handleSaiRemoveStatus(SAI_API_LAG, SAI_STATUS_FAILURE);}, "");
77+
ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count);
78+
ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP);
79+
80+
_unhook_sai_switch_api();
81+
}
82+
}

0 commit comments

Comments
 (0)