Skip to content

[Chassis][voq] remote link down ECMP acceleration #3150

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7ed86b4
Remote ECMP changes for voq chassis
arlakshm May 8, 2024
837509e
fix typo
arlakshm May 14, 2024
ce244a4
add UT
arlakshm May 22, 2024
c0b4c99
Merge branch 'master' into arlakshm/master/remote_ecmp
arlakshm May 22, 2024
4dc59f6
Merge branch 'master' into arlakshm/master/remote_ecmp
arlakshm May 23, 2024
df07e71
Merge remote-tracking branch 'origin/master' into arlakshm/master/rem…
arlakshm May 24, 2024
7b87ea8
add more UT
arlakshm May 24, 2024
ae361b3
Merge branch 'master' into arlakshm/master/remote_ecmp
arlakshm May 24, 2024
83921a7
Merge remote-tracking branch 'myfork/arlakshm/master/remote_ecmp' int…
arlakshm May 24, 2024
8f322d1
Merge branch 'master' into arlakshm/master/remote_ecmp
arlakshm May 24, 2024
ff55b09
Merge branch 'master' into arlakshm/master/remote_ecmp
prsunny May 28, 2024
c70f80d
Remote ECMP changes for voq chassis
arlakshm May 8, 2024
af3218e
fix typo
arlakshm May 14, 2024
4ca7dd1
add UT
arlakshm May 22, 2024
c971fde
add more UT
arlakshm May 24, 2024
1ba206e
update log message
arlakshm May 28, 2024
d101771
Merge remote-tracking branch 'myfork/arlakshm/master/remote_ecmp' int…
arlakshm May 28, 2024
fbac408
update log message
arlakshm May 28, 2024
0f315ee
Merge branch 'master' into arlakshm/master/remote_ecmp
prsunny May 28, 2024
457a970
Merge branch 'master' into arlakshm/master/remote_ecmp
arlakshm May 29, 2024
d60873a
Merge branch 'master' into arlakshm/master/remote_ecmp
arlakshm May 30, 2024
2f23893
Merge branch 'master' into arlakshm/master/remote_ecmp
arlakshm May 30, 2024
37464bd
Merge branch 'master' into arlakshm/master/remote_ecmp
arlakshm May 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions orchagent/intfsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ void IntfsOrch::doTask(Consumer &consumer)
bool mpls = false;
string vlan = "";
string loopbackAction = "";

string oper_status ="";
for (auto idx : data)
{
const auto &field = fvField(idx);
Expand Down Expand Up @@ -807,6 +807,10 @@ void IntfsOrch::doTask(Consumer &consumer)
{
loopbackAction = value;
}
else if (field == "oper_status")
{
oper_status = value;
}
}

if (alias == "eth0" || alias == "docker0")
Expand Down Expand Up @@ -860,7 +864,19 @@ void IntfsOrch::doTask(Consumer &consumer)
it = consumer.m_toSync.erase(it);
continue;
}

if(table_name == CHASSIS_APP_SYSTEM_INTERFACE_TABLE_NAME)
{
if(isRemoteSystemPortIntf(alias))
{
SWSS_LOG_INFO("Handle remote systemport intf %s, oper status %s", alias.c_str(), oper_status.c_str());
bool isUp = (oper_status == "up") ? true : false;
if (!gNeighOrch->ifChangeInformRemoteNextHop(alias, isUp))
{
SWSS_LOG_WARN("Unable to update the nexthop for port %s, oper status %s", alias.c_str(), oper_status.c_str());
}

}
}
//Voq Inband interface config processing
if(inband_type.size() && !ip_prefix_in_key)
{
Expand Down Expand Up @@ -1656,7 +1672,10 @@ void IntfsOrch::voqSyncAddIntf(string &alias)
return;
}

FieldValueTuple nullFv ("NULL", "NULL");

string oper_status = port.m_oper_status == SAI_PORT_OPER_STATUS_UP ? "up" : "down";

FieldValueTuple nullFv ("oper_status", oper_status);
vector<FieldValueTuple> attrs;
attrs.push_back(nullFv);

Expand Down Expand Up @@ -1696,3 +1715,30 @@ void IntfsOrch::voqSyncDelIntf(string &alias)
m_tableVoqSystemInterfaceTable->del(alias);
}

void IntfsOrch::voqSyncIntfState(string &alias, bool isUp)
{
Port port;
string port_alias;
if(gPortsOrch->getPort(alias, port))
{
if (port.m_type == Port::LAG)
{
if (port.m_system_lag_info.switch_id != gVoqMySwitchId)
{
return;
}
port_alias = port.m_system_lag_info.alias;
}
else
{
if(port.m_system_port_info.type == SAI_SYSTEM_PORT_TYPE_REMOTE)
{
return;
}
port_alias = port.m_system_port_info.alias;
}
SWSS_LOG_NOTICE("Syncing system interface state %s for port %s", isUp ? "up" : "down", port_alias.c_str());
m_tableVoqSystemInterfaceTable->hset(port_alias, "oper_status", isUp ? "up" : "down");
}

}
1 change: 1 addition & 0 deletions orchagent/intfsorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class IntfsOrch : public Orch

bool isRemoteSystemPortIntf(string alias);
bool isLocalSystemPortIntf(string alias);
void voqSyncIntfState(string &alias, bool);

private:

Expand Down
31 changes: 31 additions & 0 deletions orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,8 @@ bool NeighOrch::setNextHopFlag(const NextHopKey &nexthop, const uint32_t nh_flag
auto nhop = m_syncdNextHops.find(nexthop);
bool rc = false;

SWSS_LOG_INFO("setNextHopFlag on %s seen on port %s ",
nexthop.ip_address.to_string().c_str(), nexthop.alias.c_str());
assert(nhop != m_syncdNextHops.end());

if (nhop->second.nh_flags & nh_flag)
Expand Down Expand Up @@ -379,6 +381,8 @@ bool NeighOrch::clearNextHopFlag(const NextHopKey &nexthop, const uint32_t nh_fl

nhop->second.nh_flags &= ~nh_flag;
uint32_t count;
SWSS_LOG_INFO("clearnexthop on %s seen on port %s ",
nexthop.ip_address.to_string().c_str(), nexthop.alias.c_str());
switch (nh_flag)
{
case NHFLAGS_IFDOWN:
Expand Down Expand Up @@ -1901,3 +1905,30 @@ bool NeighOrch::addZeroMacTunnelRoute(const NeighborEntry& entry, const MacAddre

return false;
}

bool NeighOrch::ifChangeInformRemoteNextHop(const string &alias, bool if_up)
{
SWSS_LOG_ENTER();
bool rc = true;
Port inbp;
gPortsOrch->getInbandPort(inbp);
for (auto nbr = m_syncdNeighbors.begin(); nbr != m_syncdNeighbors.end(); ++nbr)
{
if (nbr->first.alias != alias)
{
continue;
}
SWSS_LOG_INFO("Found remote Neighbor %s on %s", nbr->first.ip_address.to_string().c_str(), alias.c_str());
NextHopKey nhop = { nbr->first.ip_address, inbp.m_alias };

if (if_up)
{
rc = clearNextHopFlag(nhop, NHFLAGS_IFDOWN);
}
else
{
rc = setNextHopFlag(nhop, NHFLAGS_IFDOWN);
}
}
return rc;
}
2 changes: 2 additions & 0 deletions orchagent/neighorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class NeighOrch : public Orch, public Subject, public Observer
bool removeTunnelNextHop(const NextHopKey&);

bool ifChangeInformNextHop(const string &, bool);

bool isNextHopFlagSet(const NextHopKey &, const uint32_t);
bool removeOverlayNextHop(const NextHopKey &);
void update(SubjectType, void *);
Expand All @@ -81,6 +82,7 @@ class NeighOrch : public Orch, public Subject, public Observer

void resolveNeighbor(const NeighborEntry &);
void updateSrv6Nexthop(const NextHopKey &, const sai_object_id_t &);
bool ifChangeInformRemoteNextHop(const string &, bool);

private:
PortsOrch *m_portsOrch;
Expand Down
11 changes: 11 additions & 0 deletions orchagent/portsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7986,6 +7986,8 @@ void PortsOrch::updatePortOperStatus(Port &port, sai_port_oper_status_t status)
isUp ? "up" : "down");
}
}
SWSS_LOG_INFO("Updating the nexthop for port %s and operational status %s", port.m_alias.c_str(), isUp ? "up" : "down");

if (!gNeighOrch->ifChangeInformNextHop(port.m_alias, isUp))
{
SWSS_LOG_WARN("Inform nexthop operation failed for interface %s", port.m_alias.c_str());
Expand All @@ -7998,6 +8000,15 @@ void PortsOrch::updatePortOperStatus(Port &port, sai_port_oper_status_t status)
}
}

if(gMySwitchType == "voq")
{
if (gIntfsOrch->isLocalSystemPortIntf(port.m_alias))
{
gIntfsOrch->voqSyncIntfState(port.m_alias, isUp);
}
}


PortOperStateUpdate update = {port, status};
notify(SUBJECT_TYPE_PORT_OPER_STATE_CHANGE, static_cast<void *>(&update));
}
Expand Down
151 changes: 150 additions & 1 deletion tests/test_virtual_chassis.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,70 @@ def del_inbandif_port(self, vct, ibport):
# Applicable only for line cards
if cfg_switch_type == "voq":
config_db.delete_entry("VOQ_INBAND_INTERFACE", f"{ibport}")


def get_lc_dvs(self, vct, lc_switch_id):
dvss = vct.dvss
for name in dvss.keys():
dvs = dvss[name]

config_db = dvs.get_config_db()
metatbl = config_db.get_entry("DEVICE_METADATA", "localhost")

cfg_switch_type = metatbl.get("switch_type")

if cfg_switch_type == "voq":
switch_id = metatbl.get("switch_id")
assert switch_id != "", "Got error in getting switch_id from CONFIG_DB DEVICE_METADATA"
if lc_switch_id == switch_id:
return dvs

def get_sup_dvs(self, vct):
dvss = vct.dvss
for name in dvss.keys():
if name.startswith("supervisor"):
return dvss[name]

def configure_neighbor(self, dvs, action, test_neigh_ip, mac_address, test_neigh_dev):
_, res = dvs.runcmd(['sh', "-c", "ip neigh show"])
if action == "add":
_, res = dvs.runcmd(['sh', "-c", f"ip neigh {action} {test_neigh_ip} lladdr {mac_address} dev {test_neigh_dev}"])
assert res == "", "Error configuring static neigh"
else:
_, res = dvs.runcmd(['sh', "-c", f"ip neigh del {test_neigh_ip} dev {test_neigh_dev}"])
assert res == "", "Error deleting static neigh"

def get_num_of_ecmp_paths_from_asic_db(self, dvs, ip_prefix):
# get the route entry
routes = dvs.asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY")


# find the entry for the interested prefix
route_key = ""
for route in routes:
if ip_prefix in route:
route_key = route
break

assert route_key != "", "Route not found"

# get the nexthop group oid
route_entry =dvs.asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY", route_key)
nhg_id = route_entry.get("SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID", None)

assert nhg_id is not None, "nexthop group is not found"

# find the nexthop in the nexthop group member table which belong the nhg_id
nhs = dvs.asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER")
count = 0
for nh in nhs:
nh_entry = dvs.asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER", nh)
nh_nhg_id = nh_entry.get("SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID", None)

if nh_nhg_id == nhg_id:
count+=1

return count

def test_connectivity(self, vct):
if vct is None:
return
Expand Down Expand Up @@ -972,7 +1035,93 @@ def test_chassis_wred_profile_on_system_ports(self, vct):

# Total number of logs = (No of system ports * No of lossless priorities) - No of lossless priorities for CPU ports
assert logSeen.strip() == str(len(system_ports)*2 - 2)

def test_chassis_system_intf_status(self, vct):
dvs = self.get_sup_dvs(vct)
chassis_app_db = DVSDatabase(swsscommon.CHASSIS_APP_DB, dvs.redis_chassis_sock)
keys = chassis_app_db.get_keys("SYSTEM_INTERFACE")
assert len(keys) > 0, "No system interface entries in chassis app db"
for key in keys:
intf = chassis_app_db.get_entry("SYSTEM_INTERFACE", key)
# Get the oper_status
oper_status = intf.get("oper_status", "unknown")
assert oper_status != "unknown", "System interface oper status is unknown"

def test_remote_port_down(self, vct):
# test params
local_lc_switch_id = '0'
remote_lc_switch_id = '2'
test_system_port = "lc1|Asic0|Ethernet4"
test_prefix = "13.13.0.0/16"
inband_port = "Ethernet0"
test_neigh_ip_1 = "10.8.104.10"
test_neigh_dev_1 = "Ethernet4"
test_neigh_mac_1 = "00:01:02:03:04:05"
test_neigh_ip_2 = "10.8.108.10"
test_neigh_dev_2 = "Ethernet8"
test_neigh_mac_2 = "00:01:02:03:04:06"

local_lc_dvs = self.get_lc_dvs(vct, local_lc_switch_id)
remote_lc_dvs = self.get_lc_dvs(vct, remote_lc_switch_id)
# config inband port
self.config_inbandif_port(vct, inband_port)

# add 2 neighbors
self.configure_neighbor(local_lc_dvs, "add", test_neigh_ip_1, test_neigh_mac_1, test_neigh_dev_1)
self.configure_neighbor(local_lc_dvs, "add", test_neigh_ip_2, test_neigh_mac_2, test_neigh_dev_2)

time.sleep(30)

# add route of LC1(pretend learnt via bgp)
_, res = remote_lc_dvs.runcmd(['sh', '-c', f"ip route add {test_prefix} nexthop via {test_neigh_ip_1} nexthop via {test_neigh_ip_2}"])
assert res == "", "Error configuring route"
time.sleep(10)
# verify 2 nexthops are programmed in asic_db
paths = self.get_num_of_ecmp_paths_from_asic_db(remote_lc_dvs, test_prefix)
assert paths == 2, "ECMP paths not configured"

# shut down port on LC0
local_lc_dvs.port_admin_set("Ethernet4", "down")
time.sleep(10)

# verify the port oper status is down in chassis db
sup_dvs = self.get_sup_dvs(vct)
chassis_app_db = DVSDatabase(swsscommon.CHASSIS_APP_DB, sup_dvs.redis_chassis_sock)
keys = chassis_app_db.get_keys("SYSTEM_INTERFACE")
assert len(keys) > 0, "No system interface entries in chassis app db"
port_status = chassis_app_db.get_entry("SYSTEM_INTERFACE", test_system_port)
oper_status = port_status.get("oper_status", "unknown")
assert oper_status == "down", "System interface oper status is not down"

# verify the number of paths is reduced by 1
paths = self.get_num_of_ecmp_paths_from_asic_db(remote_lc_dvs, test_prefix)
assert paths == 1, "Remote port down does not remote ecmp member"

# shut down port on LC0
local_lc_dvs.port_admin_set("Ethernet4", "up")
time.sleep(10)

# verify the port oper status is up in chassis db
sup_dvs = self.get_sup_dvs(vct)
chassis_app_db = DVSDatabase(swsscommon.CHASSIS_APP_DB, sup_dvs.redis_chassis_sock)
keys = chassis_app_db.get_keys("SYSTEM_INTERFACE")
assert len(keys) > 0, "No system interface entries in chassis app db"
port_status = chassis_app_db.get_entry("SYSTEM_INTERFACE", test_system_port)
oper_status = port_status.get("oper_status", "unknown")
assert oper_status == "up", "System interface oper status is not down"

# verify the number of paths is reduced by 1
paths = self.get_num_of_ecmp_paths_from_asic_db(remote_lc_dvs,test_prefix)
assert paths == 2, "Remote port up is not added in nexthop group"

#cleanup
_, res = remote_lc_dvs.runcmd(['sh', '-c', f"ip route del {test_prefix} nexthop via {test_neigh_ip_1} nexthop via {test_neigh_ip_2}"])
assert res == "", "Error configuring route"

# Cleanup inband if configuration
self.del_inbandif_port(vct, inband_port)


# Add Dummy always-pass test at end as workaroud
# for issue when Flaky fail on final test it invokes module tear-down before retrying
def test_nonflaky_dummy():
Expand Down
7 changes: 6 additions & 1 deletion tests/virtual_chassis/1/default_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,20 @@
"INTERFACE": {
"Ethernet0": {},
"Ethernet4": {},
"Ethernet8": {},
"Ethernet0|10.8.101.1/24": {},
"Ethernet4|10.8.104.1/24": {}
"Ethernet4|10.8.104.1/24": {},
"Ethernet8|10.8.108.1/24": {}
},
"PORT": {
"Ethernet0": {
"admin_status": "up"
},
"Ethernet4": {
"admin_status": "up"
},
"Ethernet8": {
"admin_status": "up"
}
},
"SYSTEM_PORT": {
Expand Down
Loading