|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +"""" |
| 4 | +Description: restore_neighbors.py -- restoring neighbor table into kernel during system warm reboot. |
| 5 | + The script is started by supervisord in swss docker when the docker is started. |
| 6 | + If does not do anything in case warm restart is not enabled. |
| 7 | + In case system warm reboot is enabled, it will try to restore the neighbor table into kernel |
| 8 | + through netlink API calls and update the neigh table by sending arp/ns requests to all neighbor |
| 9 | + entries, then it sets the stateDB flag for neighsyncd to continue the reconciliation process. |
| 10 | + In case docker restart enabled only, it sets the stateDB flag so neighsyncd can follow |
| 11 | + the same logic. |
| 12 | +""" |
| 13 | + |
| 14 | +import sys |
| 15 | +import swsssdk |
| 16 | +import netifaces |
| 17 | +import time |
| 18 | +import monotonic |
| 19 | +from pyroute2 import IPRoute, NetlinkError |
| 20 | +from pyroute2.netlink.rtnl import ndmsg |
| 21 | +from socket import AF_INET,AF_INET6 |
| 22 | +import logging |
| 23 | +logging.getLogger("scapy.runtime").setLevel(logging.ERROR) |
| 24 | +from scapy.all import conf, in6_getnsma, inet_pton, inet_ntop, in6_getnsmac, get_if_hwaddr, Ether, ARP, IPv6, ICMPv6ND_NS, ICMPv6NDOptSrcLLAddr |
| 25 | +from swsscommon import swsscommon |
| 26 | +import errno |
| 27 | + |
| 28 | +logger = logging.getLogger(__name__) |
| 29 | +logger.setLevel(logging.WARNING) |
| 30 | +logger.addHandler(logging.NullHandler()) |
| 31 | + |
| 32 | +# timeout the restore process in 1 min if not finished |
| 33 | +# This is mostly to wait for interfaces to be created and up after warm-reboot |
| 34 | +# It would be good to keep that below routing reconciliation time-out. |
| 35 | +TIME_OUT = 60 |
| 36 | + |
| 37 | +# every 5 seconds to check interfaces state |
| 38 | +CHECK_INTERVAL = 5 |
| 39 | + |
| 40 | +ip_family = {"IPv4": AF_INET, "IPv6": AF_INET6} |
| 41 | + |
| 42 | +# return the first ipv4/ipv6 address assigned on intf |
| 43 | +def first_ip_on_intf(intf, family): |
| 44 | + if intf in netifaces.interfaces(): |
| 45 | + ipaddresses = netifaces.ifaddresses(intf) |
| 46 | + if ip_family[family] in ipaddresses: |
| 47 | + # cover link local address as well |
| 48 | + return ipaddresses[ip_family[family]][0]['addr'].split("%")[0] |
| 49 | + return None |
| 50 | + |
| 51 | +# check if the intf is operational up |
| 52 | +def is_intf_oper_state_up(intf): |
| 53 | + oper_file = '/sys/class/net/{0}/carrier' |
| 54 | + try: |
| 55 | + state_file = open(oper_file.format(intf), 'r') |
| 56 | + state = state_file.readline().rstrip() |
| 57 | + except Exception as e: |
| 58 | + logger.info('Error: {}'.format(str(e))) |
| 59 | + return False |
| 60 | + if state == '1': |
| 61 | + return True |
| 62 | + return False |
| 63 | + |
| 64 | +# read the neigh table from AppDB to memory, format as below |
| 65 | +# build map as below, this can efficiently access intf and family groups later |
| 66 | +# { intf1 -> { { family1 -> [[ip1, mac1], [ip2, mac2] ...] } |
| 67 | +# { family2 -> [[ipM, macM], [ipN, macN] ...] } }, |
| 68 | +# ... |
| 69 | +# intfA -> { { family1 -> [[ipW, macW], [ipX, macX] ...] } |
| 70 | +# { family2 -> [[ipY, macY], [ipZ, macZ] ...] } } |
| 71 | +# } |
| 72 | +# |
| 73 | +# Alternatively: |
| 74 | +# 1, we can build: |
| 75 | +# { intf1 -> [[family1, ip1, mac1], [family2, ip2, mac2] ...]}, |
| 76 | +# ... |
| 77 | +# { intfA -> [[family1, ipX, macX], [family2, ipY, macY] ...]} |
| 78 | +# |
| 79 | +# 2, Or simply build two maps based on families |
| 80 | +# These alternative solutions would have worse performance because: |
| 81 | +# 1, need iterate the whole list if only one family is up. |
| 82 | +# 2, need check interface state twice due to the split map |
| 83 | + |
| 84 | +def read_neigh_table_to_maps(): |
| 85 | + db = swsssdk.SonicV2Connector(host='127.0.0.1') |
| 86 | + db.connect(db.APPL_DB, False) |
| 87 | + |
| 88 | + intf_neigh_map = {} |
| 89 | + |
| 90 | + keys = db.keys(db.APPL_DB, 'NEIGH_TABLE:*') |
| 91 | + keys = [] if keys is None else keys |
| 92 | + for key in keys: |
| 93 | + key_split = key.split(':', 2) |
| 94 | + intf_name = key_split[1] |
| 95 | + if intf_name == 'lo': |
| 96 | + continue |
| 97 | + dst_ip = key_split[2] |
| 98 | + value = db.get_all(db.APPL_DB, key) |
| 99 | + if 'neigh' in value and 'family' in value: |
| 100 | + dmac = value['neigh'] |
| 101 | + family = value['family'] |
| 102 | + else: |
| 103 | + raise RuntimeError('Neigh table format is incorrect') |
| 104 | + |
| 105 | + if family not in ip_family: |
| 106 | + raise RuntimeError('Neigh table format is incorrect') |
| 107 | + |
| 108 | + ip_mac_pair = [] |
| 109 | + ip_mac_pair.append(dst_ip) |
| 110 | + ip_mac_pair.append(dmac) |
| 111 | + |
| 112 | + intf_neigh_map.setdefault(intf_name, {}).setdefault(family, []).append(ip_mac_pair) |
| 113 | + db.close(db.APPL_DB) |
| 114 | + return intf_neigh_map |
| 115 | + |
| 116 | + |
| 117 | +# Use netlink to set neigh table into kernel, not overwrite the existing ones |
| 118 | +def set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac): |
| 119 | + logging.info('Add neighbor entries: family: {}, intf_idx: {}, ip: {}, mac: {}'.format( |
| 120 | + family, intf_idx, dst_ip, dmac)) |
| 121 | + |
| 122 | + if family not in ip_family: |
| 123 | + return |
| 124 | + |
| 125 | + family_af_inet = ip_family[family] |
| 126 | + try : |
| 127 | + ipclass.neigh('add', |
| 128 | + family=family_af_inet, |
| 129 | + dst=dst_ip, |
| 130 | + lladdr=dmac, |
| 131 | + ifindex=intf_idx, |
| 132 | + state=ndmsg.states['reachable']) |
| 133 | + # If neigh exists, log it but no exception raise, other exceptions, raise |
| 134 | + except NetlinkError as e: |
| 135 | + if e[0] == errno.EEXIST: |
| 136 | + logger.warning('Neigh exists in kernel with family: {}, intf_idx: {}, ip: {}, mac: {}'.format( |
| 137 | + family, intf_idx, dst_ip, dmac)) |
| 138 | + else: |
| 139 | + raise |
| 140 | + |
| 141 | +# build ARP or NS packets depending on family |
| 142 | +def build_arp_ns_pkt(family, smac, src_ip, dst_ip): |
| 143 | + if family == 'IPv4': |
| 144 | + eth = Ether(src=smac, dst='ff:ff:ff:ff:ff:ff') |
| 145 | + pkt = eth/ARP(op=ARP.who_has, pdst=dst_ip) |
| 146 | + elif family == 'IPv6': |
| 147 | + nsma = in6_getnsma(inet_pton(AF_INET6, dst_ip)) |
| 148 | + mcast_dst_ip = inet_ntop(AF_INET6, nsma) |
| 149 | + dmac = in6_getnsmac(nsma) |
| 150 | + eth = Ether(src=smac,dst=dmac) |
| 151 | + ipv6 = IPv6(src=src_ip, dst=mcast_dst_ip) |
| 152 | + ns = ICMPv6ND_NS(tgt=dst_ip) |
| 153 | + ns_opt = ICMPv6NDOptSrcLLAddr(lladdr=smac) |
| 154 | + pkt = eth/ipv6/ns/ns_opt |
| 155 | + return pkt |
| 156 | + |
| 157 | +# Set the statedb "NEIGH_RESTORE_TABLE|Flags", so neighsyncd can start reconciliation |
| 158 | +def set_statedb_neigh_restore_done(): |
| 159 | + db = swsssdk.SonicV2Connector(host='127.0.0.1') |
| 160 | + db.connect(db.STATE_DB, False) |
| 161 | + db.set(db.STATE_DB, 'NEIGH_RESTORE_TABLE|Flags', 'restored', 'true') |
| 162 | + db.close(db.STATE_DB) |
| 163 | + return |
| 164 | + |
| 165 | +def restore_update_kernel_neighbors(intf_neigh_map): |
| 166 | + # create object for netlink calls to kernel |
| 167 | + ipclass = IPRoute() |
| 168 | + mtime = monotonic.time.time |
| 169 | + start_time = mtime() |
| 170 | + while (mtime() - start_time) < TIME_OUT: |
| 171 | + for intf, family_neigh_map in intf_neigh_map.items(): |
| 172 | + # only try to restore to kernel when link is up |
| 173 | + if is_intf_oper_state_up(intf): |
| 174 | + src_mac = get_if_hwaddr(intf) |
| 175 | + intf_idx = ipclass.link_lookup(ifname=intf)[0] |
| 176 | + # create socket per intf to send packets |
| 177 | + s = conf.L2socket(iface=intf) |
| 178 | + |
| 179 | + # Only two families: 'IPv4' and 'IPv6' |
| 180 | + for family in ip_family.keys(): |
| 181 | + # if ip address assigned and if we have neighs in this family, restore them |
| 182 | + src_ip = first_ip_on_intf(intf, family) |
| 183 | + if src_ip and (family in family_neigh_map): |
| 184 | + neigh_list = family_neigh_map[family] |
| 185 | + for dst_ip, dmac in neigh_list: |
| 186 | + # use netlink to set neighbor entries |
| 187 | + set_neigh_in_kernel(ipclass, family, intf_idx, dst_ip, dmac) |
| 188 | + |
| 189 | + # best effort to update kernel neigh info |
| 190 | + # this will be updated by arp_update later too |
| 191 | + s.send(build_arp_ns_pkt(family, src_mac, src_ip, dst_ip)) |
| 192 | + # delete this family on the intf |
| 193 | + del intf_neigh_map[intf][family] |
| 194 | + # close the pkt socket |
| 195 | + s.close() |
| 196 | + |
| 197 | + # if all families are deleted, remove the key |
| 198 | + if len(intf_neigh_map[intf]) == 0: |
| 199 | + del intf_neigh_map[intf] |
| 200 | + # map is empty, all neigh entries are restored |
| 201 | + if not intf_neigh_map: |
| 202 | + break |
| 203 | + time.sleep(CHECK_INTERVAL) |
| 204 | + |
| 205 | + |
| 206 | +def main(): |
| 207 | + |
| 208 | + print "restore_neighbors service is started" |
| 209 | + |
| 210 | + # Use warmstart python binding |
| 211 | + warmstart = swsscommon.WarmStart() |
| 212 | + warmstart.initialize("neighsyncd", "swss") |
| 213 | + warmstart.checkWarmStart("neighsyncd", "swss", False) |
| 214 | + |
| 215 | + # if swss or system warm reboot not enabled, don't run |
| 216 | + if not warmstart.isWarmStart(): |
| 217 | + print "restore_neighbors service is skipped as warm restart not enabled" |
| 218 | + return |
| 219 | + |
| 220 | + # swss restart not system warm reboot |
| 221 | + if not warmstart.isSystemWarmRebootEnabled(): |
| 222 | + set_statedb_neigh_restore_done() |
| 223 | + print "restore_neighbors service is done as system warm reboot not enabled" |
| 224 | + return |
| 225 | + |
| 226 | + # read the neigh table from appDB to internal map |
| 227 | + try: |
| 228 | + intf_neigh_map = read_neigh_table_to_maps() |
| 229 | + except RuntimeError as e: |
| 230 | + logger.exception(str(e)) |
| 231 | + sys.exit(1) |
| 232 | + |
| 233 | + try: |
| 234 | + restore_update_kernel_neighbors(intf_neigh_map) |
| 235 | + except Exception as e: |
| 236 | + logger.exception(str(e)) |
| 237 | + sys.exit(1) |
| 238 | + |
| 239 | + # set statedb to signal other processes like neighsyncd |
| 240 | + set_statedb_neigh_restore_done() |
| 241 | + print "restore_neighbor service is done for system warmreboot" |
| 242 | + return |
| 243 | + |
| 244 | +if __name__ == '__main__': |
| 245 | + main() |
0 commit comments