-
Notifications
You must be signed in to change notification settings - Fork 296
Add support for fdb_event MOVE and check fdb event oids #420
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6015,6 +6015,45 @@ void meta_sai_on_fdb_flush_event_consolidated( | |
} | ||
} | ||
|
||
void meta_fdb_event_snoop_oid( | ||
_In_ sai_object_id_t oid) | ||
{ | ||
SWSS_LOG_ENTER(); | ||
|
||
if (oid == SAI_NULL_OBJECT_ID) | ||
return; | ||
|
||
if (object_reference_exists(oid)) | ||
return; | ||
|
||
sai_object_type_t ot = sai_object_type_query(oid); | ||
|
||
if (ot == SAI_OBJECT_TYPE_NULL) | ||
{ | ||
SWSS_LOG_ERROR("failed to get object type on fdb_event oid: 0x%lx", oid); | ||
return; | ||
} | ||
|
||
sai_object_meta_key_t key = { .objecttype = ot, .objectkey = { .key = { .object_id = oid } } }; | ||
|
||
object_reference_insert(oid); | ||
|
||
if (!object_exists(key)) | ||
create_object(key); | ||
|
||
/* | ||
* In normal operation orch agent should query or create all bridge, vlan | ||
* and bridge port, so we should not get this message. Let's put it as | ||
* warning for better visibility. Most likely if this happen there is a | ||
* vendor bug in SAI and we should also see warnings or errors reported | ||
* from syncd in logs. | ||
*/ | ||
|
||
SWSS_LOG_WARN("fdb_entry oid (snoop): %s: %s", | ||
sai_serialize_object_type(ot).c_str(), | ||
sai_serialize_object_id(oid).c_str()); | ||
} | ||
|
||
void meta_sai_on_fdb_event_single( | ||
_In_ const sai_fdb_event_notification_data_t& data) | ||
{ | ||
|
@@ -6024,6 +6063,35 @@ void meta_sai_on_fdb_event_single( | |
|
||
std::string key_fdb = sai_serialize_object_meta_key(meta_key_fdb); | ||
|
||
/* | ||
* Because we could receive fdb event's before orch agent will query or | ||
* create bridge/vlan/bridge port we should snoop here new OIDs and put | ||
* them in local DB. | ||
* | ||
* Unfortunately we don't have a way to check whether those OIDs are correct | ||
* or whether there maybe some bug in vendor SAI and for example is sending | ||
* invalid OIDs in those event's. Also sai_object_type_query can return | ||
* valid object type for OID, but this does not guarantee that this OID is | ||
* valid, for example one of existing bridge ports that orch agent didn't | ||
* query yet. | ||
*/ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this PR add visibility to this issue, in such case you can print out warning message. but it does not solve the issue. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, that was intention of this PR |
||
|
||
meta_fdb_event_snoop_oid(data.fdb_entry.bv_id); | ||
|
||
for (uint32_t i = 0; i < data.attr_count; i++) | ||
{ | ||
auto meta = sai_metadata_get_attr_metadata(SAI_OBJECT_TYPE_FDB_ENTRY, data.attr[i].id); | ||
|
||
if (meta == NULL) | ||
{ | ||
SWSS_LOG_ERROR("failed to get metadata for fdb_entry attr.id = %d", data.attr[i].id); | ||
continue; | ||
} | ||
|
||
if (meta->attrvaluetype == SAI_ATTR_VALUE_TYPE_OBJECT_ID) | ||
meta_fdb_event_snoop_oid(data.attr[i].value.oid); | ||
} | ||
|
||
switch (data.event_type) | ||
{ | ||
case SAI_FDB_EVENT_LEARNED: | ||
|
@@ -6096,6 +6164,33 @@ void meta_sai_on_fdb_event_single( | |
|
||
break; | ||
|
||
case SAI_FDB_EVENT_MOVE: | ||
|
||
if (!object_exists(key_fdb)) | ||
{ | ||
SWSS_LOG_WARN("object key %s doesn't exist but received FDB MOVE event", key_fdb.c_str()); | ||
break; | ||
} | ||
|
||
// on MOVE event, just update attributes on existing entry | ||
|
||
for (uint32_t i = 0; i < data.attr_count; i++) | ||
{ | ||
const sai_attribute_t& attr = data.attr[i]; | ||
|
||
sai_status_t status = meta_generic_validation_set(meta_key_fdb, &attr); | ||
|
||
if (status != SAI_STATUS_SUCCESS) | ||
{ | ||
SWSS_LOG_ERROR("object key %s FDB MOVE event, SET validateion failed on attr.id = %d", key_fdb.c_str(), attr.id); | ||
continue; | ||
} | ||
|
||
meta_generic_validation_post_set(meta_key_fdb, &attr); | ||
} | ||
|
||
break; | ||
|
||
default: | ||
|
||
SWSS_LOG_ERROR("got FDB_ENTRY notification with unknown event_type %d, bug?", data.event_type); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -240,18 +240,73 @@ void redisPutFdbEntryToAsicView( | |
g_redisClient->hset(key, strAttrId, strAttrValue); | ||
} | ||
|
||
void check_fdb_event_notification_data( | ||
_In_ const sai_fdb_event_notification_data_t& data) | ||
{ | ||
SWSS_LOG_ENTER(); | ||
|
||
/* | ||
* Any new RID value spotted in fdb notification can happen for 2 reasons: | ||
* | ||
* - a bug is present on the vendor SAI, all RID's are already in local or | ||
* REDIS ASIC DB but vendor SAI returned new or invalid RID | ||
* | ||
* - orch agent didn't query yet bridge ID/vlan ID and already | ||
* started to receive fdb notifications in which case warn message | ||
* could be ignored. | ||
* | ||
* If vendor SAI will return invalid RID, then this will later on lead to | ||
* inconsistent DB state and possible failure on apply view after cold or | ||
* warm boot. | ||
* | ||
* On switch init we do discover phase, and we should discover all objects | ||
* so we should not get any of those messages if SAI is in consistent | ||
* state. | ||
*/ | ||
|
||
if (!check_rid_exists(data.fdb_entry.bv_id)) | ||
SWSS_LOG_ERROR("bv_id RID 0x%lx is not present on local ASIC DB: %s", data.fdb_entry.bv_id, | ||
sai_serialize_fdb_entry(data.fdb_entry).c_str()); | ||
|
||
if (!check_rid_exists(data.fdb_entry.switch_id) || data.fdb_entry.switch_id == SAI_NULL_OBJECT_ID) | ||
SWSS_LOG_ERROR("switch_id RID 0x%lx is not present on local ASIC DB: %s", data.fdb_entry.bv_id, | ||
sai_serialize_fdb_entry(data.fdb_entry).c_str()); | ||
|
||
for (uint32_t i = 0; i < data.attr_count; i++) | ||
{ | ||
const sai_attribute_t& attr = data.attr[i]; | ||
|
||
auto meta = sai_metadata_get_attr_metadata(SAI_OBJECT_TYPE_FDB_ENTRY, attr.id); | ||
|
||
if (meta == NULL) | ||
{ | ||
SWSS_LOG_ERROR("unable to get metadata for fdb_entry attr.id = %d", attr.id); | ||
continue; | ||
} | ||
|
||
// skip non oid attributes | ||
if (meta->attrvaluetype != SAI_ATTR_VALUE_TYPE_OBJECT_ID) | ||
continue; | ||
|
||
if (!check_rid_exists(attr.value.oid)) | ||
SWSS_LOG_WARN("RID 0x%lx on %s is not present on local ASIC DB", attr.value.oid, meta->attridname); | ||
kcudnik marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why line 268 and 271 are error level, but this one is warning level? shouldn't them be same level? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because if received switch id in fdb notification is not existing in db already then this is a bug same for bridge vlan/id |
||
} | ||
} | ||
|
||
void process_on_fdb_event( | ||
_In_ uint32_t count, | ||
_In_ sai_fdb_event_notification_data_t *data) | ||
{ | ||
SWSS_LOG_ENTER(); | ||
|
||
SWSS_LOG_DEBUG("fdb event count: %d", count); | ||
SWSS_LOG_INFO("fdb event count: %u", count); | ||
|
||
for (uint32_t i = 0; i < count; i++) | ||
{ | ||
sai_fdb_event_notification_data_t *fdb = &data[i]; | ||
|
||
check_fdb_event_notification_data(*fdb); | ||
|
||
SWSS_LOG_DEBUG("fdb %u: type: %d", i, fdb->event_type); | ||
|
||
fdb->fdb_entry.switch_id = translate_rid_to_vid(fdb->fdb_entry.switch_id, SAI_NULL_OBJECT_ID); | ||
|
@@ -536,7 +591,7 @@ bool ntf_queue_t::enqueue( | |
|
||
if (!(log_count % 1000)) | ||
{ | ||
SWSS_LOG_NOTICE("Too many messages in queue (%ld), dropped %ld FDB events!", | ||
SWSS_LOG_NOTICE("Too many messages in queue (%zu), dropped %u FDB events!", | ||
queueStats(), (log_count+1)); | ||
} | ||
|
||
|
@@ -782,6 +837,6 @@ void check_notifications_pointers( | |
* Here we translated pointer, just log it. | ||
*/ | ||
|
||
SWSS_LOG_INFO("%s: %lp (orch) => %lp (syncd)", meta->attridname, prev, attr.value.ptr); | ||
SWSS_LOG_INFO("%s: 0x%lX (orch) => 0x%lX (syncd)", meta->attridname, (uint64_t)prev, (uint64_t)attr.value.ptr); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment is a little confusing.
In what scenario fdb event may be received before orch agent "create" bridge/vlan/bridge port? If that does happen, the VID that comes with FDB will be different from VID assigned to the object during "create" operation. The integrity of meta reference will be messed up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
there are bridge ports creted by default on the switch and between create switch and removing them it may happen that fdb_event arrive
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I thought you were talking about FDB event for the non-default bridge/vlan/bridge port object.
If it does happen that fdb_event arrive before removing default bridge ports, will it cause validation error since the bridge ports has non-zero reference count? Orchagent initialization fails at this case?