Skip to content

Commit 6828f9e

Browse files
authored
[action] [PR:22101] [ARP_UPDATE] Update arp_update tool suite to resolve static-route neighbors for BT0 (sonic-net#1008)
<!-- Please make sure you've read and understood our contributing guidelines: https://github.com/Azure/SONiC/blob/gh-pages/CONTRIBUTING.md failure_prs.log skip_prs.log Make sure all your commits include a signature generated with `git commit -s` ** If this is a bug fix, make sure your description includes "fixes #xxxx", or "closes #xxxx" or "resolves #xxxx" Please provide the following information: --> #### Why I did it We need to proactively resolve static-route neighbors for BT0 to prevent VLAN flooding caused by MAC missing. ##### Work item tracking - Microsoft ADO **(number only)**: 31751190 #### How I did it Reuse the static-route neighbor probing mechanism (which was for packet-chassis) for BT0. #### How to verify it <!-- If PR needs to be backported, then the PR must be tested against the base branch and the earliest backport release branch and provide tested image version on these two branches. For example, if the PR is requested for master, 202211 and 202012, then the requester needs to provide test results on master and 202012. --> #### Which release branch to backport (provide reason below if selected) <!-- - Note we only backport fixes to a release branch, *not* features! - Please also provide a reason for the backporting below. - e.g. - [x] 202006 --> - [ ] 201811 - [ ] 201911 - [ ] 202006 - [ ] 202012 - [ ] 202106 - [ ] 202111 - [ ] 202205 - [ ] 202211 - [ ] 202305 - [x] 202412 #### Tested branch (Please provide the tested image version) <!-- - Please provide tested image version - e.g. - [x] 20201231.100 --> - [ ] <!-- image version 1 --> - [ ] <!-- image version 2 --> #### Description for the changelog <!-- Write a short (one line) summary that describes the changes in this pull request for inclusion in the changelog: --> <!-- Ensure to add label/tag for the feature raised. example - PR#2174 under sonic-utilities repo. where, Generic Config and Update feature has been labelled as GCU. --> #### Link to config_db schema for YANG module changes <!-- Provide a link to config_db schema for the table for which YANG model is defined Link should point to correct section on https://github.com/Azure/sonic-buildimage/blob/master/src/sonic-yang-models/doc/Configuration.md --> #### A picture of a cute animal (not mandatory but encouraged)
1 parent 69b2c84 commit 6828f9e

File tree

2 files changed

+47
-36
lines changed

2 files changed

+47
-36
lines changed

files/build_templates/arp_update_vars.j2

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"switch_type": "{% if DEVICE_METADATA and 'localhost' in DEVICE_METADATA and 'switch_type' in DEVICE_METADATA['localhost'] %}{{ DEVICE_METADATA['localhost']['switch_type'] }}{%endif %}",
3+
"type": "{% if DEVICE_METADATA and 'localhost' in DEVICE_METADATA and 'type' in DEVICE_METADATA['localhost'] %}{{ DEVICE_METADATA['localhost']['type'] }}{%endif %}",
34
"interface": "{% for (name, prefix) in INTERFACE|pfx_filter %}{% if prefix|ipv6 %}{{ name }} {% endif %}{% endfor %}",
45
"pc_interface" : "{% for (name, prefix) in PORTCHANNEL_INTERFACE|pfx_filter %}{% if prefix|ipv6 %}{{ name }} {% endif %}{% endfor %}",
56
"vlan_sub_interface": "{% for (name, prefix) in VLAN_SUB_INTERFACE|pfx_filter %}{% if prefix|ipv6 %}{{ name }} {% endif %}{% endfor %}",

files/scripts/arp_update

+46-36
Original file line numberDiff line numberDiff line change
@@ -18,49 +18,59 @@ while /bin/true; do
1818
# find L3 interfaces which are UP, send ipv6 multicast pings
1919
ARP_UPDATE_VARS=$(sonic-cfggen -d -t ${ARP_UPDATE_VARS_FILE})
2020
SWITCH_TYPE=$(echo $ARP_UPDATE_VARS | jq -r '.switch_type')
21-
if [[ "$SWITCH_TYPE" == "chassis-packet" ]]; then
21+
TYPE=$(echo $ARP_UPDATE_VARS | jq -r '.type')
22+
if [[ "$SWITCH_TYPE" == "chassis-packet" ]] || [[ "$TYPE" == "BackEndToRRouter" ]]; then
2223
# Get array of Nexthops and ifnames. Nexthops and ifnames are mapped one to one
2324
STATIC_ROUTE_NEXTHOPS=($(echo $ARP_UPDATE_VARS | jq -r '.static_route_nexthops'))
2425
STATIC_ROUTE_IFNAMES=($(echo $ARP_UPDATE_VARS | jq -r '.static_route_ifnames'))
25-
# on supervisor/rp exit the script gracefully
26+
2627
if [[ -z "$STATIC_ROUTE_NEXTHOPS" ]] || [[ -z "$STATIC_ROUTE_IFNAMES" ]]; then
27-
logger "exiting as no static route in packet based chassis"
28-
exit 0
29-
fi
30-
for i in ${!STATIC_ROUTE_NEXTHOPS[@]}; do
31-
nexthop="${STATIC_ROUTE_NEXTHOPS[i]}"
32-
if [[ $nexthop == *"."* ]]; then
33-
neigh_state=$(ip -4 neigh show | grep -w $nexthop | tr -s ' ')
34-
ping_prefix=ping
35-
elif [[ $nexthop == *":"* ]] ; then
36-
neigh_state=$(ip -6 neigh show | grep -w $nexthop | tr -s ' ')
37-
ping_prefix=ping6
28+
if [[ "$SWITCH_TYPE" == "chassis-packet" ]]; then
29+
# exit gracefully if running on supervisor/rp
30+
logger "exiting as no static route in packet based chassis"
31+
exit 0
3832
fi
39-
# Check if there is an INCOMPLETE, FAILED, or STALE entry and try to resolve it again.
40-
# STALE entries may be present if there is no traffic on a path. A far-end down event may not
41-
# clear the STALE entry. Refresh the STALE entry to clear the table.
42-
if [[ -z "${neigh_state}" ]] || [[ -n $(echo ${neigh_state} | grep 'INCOMPLETE\|FAILED\|STALE') ]]; then
43-
interface="${STATIC_ROUTE_IFNAMES[i]}"
44-
if [[ -z "$interface" ]]; then
45-
# should never be here, handling just in case
46-
logger -p error "missing interface entry for static route $nexthop"
47-
continue
33+
# continue if running on BT0
34+
else
35+
for i in ${!STATIC_ROUTE_NEXTHOPS[@]}; do
36+
nexthop="${STATIC_ROUTE_NEXTHOPS[i]}"
37+
if [[ $nexthop == *"."* ]]; then
38+
neigh_state=$(ip -4 neigh show | grep -w $nexthop | tr -s ' ')
39+
ping_prefix=ping
40+
elif [[ $nexthop == *":"* ]] ; then
41+
neigh_state=$(ip -6 neigh show | grep -w $nexthop | tr -s ' ')
42+
ping_prefix=ping6
4843
fi
49-
intf_up=$(ip link show $interface | grep "state UP")
50-
if [[ -n "$intf_up" ]]; then
51-
pingcmd="timeout 0.2 $ping_prefix -I ${interface} -n -q -i 0 -c 1 -W 1 $nexthop >/dev/null"
52-
eval $pingcmd
53-
# STALE entries may appear more often, not logging to prevent periodic syslogs
54-
if [[ -z $(echo ${neigh_state} | grep 'STALE') ]]; then
55-
logger "static route nexthop not resolved ($neigh_state), pinging $nexthop on $interface"
44+
# Check if there is an INCOMPLETE, FAILED, or STALE entry and try to resolve it again.
45+
# STALE entries may be present if there is no traffic on a path. A far-end down event may not
46+
# clear the STALE entry. Refresh the STALE entry to clear the table.
47+
if [[ -z "${neigh_state}" ]] || [[ -n $(echo ${neigh_state} | grep 'INCOMPLETE\|FAILED\|STALE') ]]; then
48+
interface="${STATIC_ROUTE_IFNAMES[i]}"
49+
if [[ -z "$interface" ]]; then
50+
# should never be here, handling just in case
51+
logger -p error "missing interface entry for static route $nexthop"
52+
continue
53+
fi
54+
intf_up=$(ip link show $interface | grep "state UP")
55+
if [[ -n "$intf_up" ]]; then
56+
pingcmd="timeout 0.2 $ping_prefix -I ${interface} -n -q -i 0 -c 1 -W 1 $nexthop >/dev/null"
57+
eval $pingcmd
58+
# STALE entries may appear more often, not logging to prevent periodic syslogs
59+
if [[ -z $(echo ${neigh_state} | grep 'STALE') ]]; then
60+
logger "static route nexthop not resolved ($neigh_state), pinging $nexthop on $interface"
61+
fi
5662
fi
5763
fi
58-
fi
59-
done
64+
done
6065

61-
sleep 150
62-
continue
66+
if [[ "$SWITCH_TYPE" == "chassis-packet" ]]; then
67+
sleep 150
68+
# skip the rest of the script if running on a packet chassis
69+
continue
70+
fi
71+
fi
6372
fi
73+
6474
# find L3 interfaces which are UP, send ipv6 multicast pings
6575
INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.interface')
6676
PC_INTERFACE=$(echo $ARP_UPDATE_VARS | jq -r '.pc_interface')
@@ -122,7 +132,7 @@ while /bin/true; do
122132
ndisc6cmd="sed -e 's/^/ndisc6 -q -w 0 -1 /' -e 's/$/;/'"
123133
ip6cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | cut -d ' ' -f 1,3 | $ndisc6cmd"
124134
eval `eval $ip6cmd`
125-
135+
126136
if [[ $SUBTYPE == "dualtor" ]]; then
127137
# capture all current failed/incomplete IPv6 neighbors in the kernel to avoid situations where new neighbors are learned
128138
# in the middle of the below sequence of commands
@@ -161,13 +171,13 @@ while /bin/true; do
161171
# ip neigh replace <neighbor IPv6> dev <VLAN name> nud incomplete
162172
failed_kernel_neighbors=$(ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED')
163173
if [[ ! -z "$failed_kernel_neighbors" ]]; then
164-
neigh_replace_template="sed -e 's/^/ip neigh replace /' -e 's/,/ dev /' -e 's/$/ nud incomplete;/'"
174+
neigh_replace_template="sed -e 's/^/ip neigh replace /' -e 's/,/ dev /' -e 's/$/ nud incomplete;/'"
165175
ip_neigh_replace_cmd="echo \"$failed_kernel_neighbors\" | cut -d ' ' -f 1,3 --output-delimiter=',' | $neigh_replace_template"
166176
eval `eval "$ip_neigh_replace_cmd"`
167177
fi
168178
fi
169179
done
170-
180+
171181

172182
# sleep here before handling the mismatch as it is not required during startup
173183
sleep 300

0 commit comments

Comments
 (0)