Skip to content

Commit 096061e

Browse files
committed
[warm-reboot] Add new preboot health check: verify database integrity
1 parent f5ce87a commit 096061e

File tree

2 files changed

+371
-2
lines changed

2 files changed

+371
-2
lines changed

scripts/check_db_integrity.py

+361
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,361 @@
1+
#!/usr/bin/env python3
2+
3+
import os, sys
4+
import json, jsonschema
5+
import argparse
6+
import syslog
7+
import traceback
8+
9+
10+
CONFIG_DB_SCHEMA = {
11+
"$schema": "http://json-schema.org/draft-06/schema",
12+
"type": "object",
13+
"title": "Schema for config_db.json critical structure needed before reboot",
14+
"required": [
15+
"ACL_TABLE",
16+
"BGP_NEIGHBOR",
17+
"BGP_PEER_RANGE",
18+
"BUFFER_PG",
19+
"BUFFER_POOL",
20+
"BUFFER_PROFILE",
21+
"BUFFER_QUEUE",
22+
"CABLE_LENGTH",
23+
"CRM",
24+
"DEVICE_METADATA",
25+
"DEVICE_NEIGHBOR",
26+
"DEVICE_NEIGHBOR_METADATA",
27+
"DHCP_SERVER",
28+
"DSCP_TO_TC_MAP",
29+
"FEATURE",
30+
"FLEX_COUNTER_TABLE",
31+
"KDUMP",
32+
"LOOPBACK_INTERFACE",
33+
"MAP_PFC_PRIORITY_TO_QUEUE",
34+
"MGMT_INTERFACE",
35+
"MGMT_PORT",
36+
"NTP_SERVER",
37+
"PFC_WD",
38+
"PORT",
39+
"PORTCHANNEL",
40+
"PORTCHANNEL_INTERFACE",
41+
"PORTCHANNEL_MEMBER",
42+
"PORT_QOS_MAP",
43+
"QUEUE",
44+
"SCHEDULER",
45+
"SNMP",
46+
"SNMP_COMMUNITY",
47+
"SYSLOG_SERVER",
48+
"TACPLUS_SERVER",
49+
"TC_TO_PRIORITY_GROUP_MAP",
50+
"TC_TO_QUEUE_MAP",
51+
"VERSIONS",
52+
"VLAN",
53+
"VLAN_INTERFACE",
54+
"VLAN_MEMBER",
55+
"WRED_PROFILE"
56+
],
57+
"properties": {
58+
"ACL_TABLE": {
59+
"$id": "#/properties/ACL_TABLE",
60+
"type": "object"
61+
},
62+
"BGP_NEIGHBOR": {
63+
"$id": "#/properties/BGP_NEIGHBOR",
64+
"type": "object"
65+
},
66+
"BGP_PEER_RANGE": {
67+
"$id": "#/properties/BGP_PEER_RANGE",
68+
"type": "object"
69+
},
70+
"BUFFER_PG": {
71+
"$id": "#/properties/BUFFER_PG",
72+
"type": "object"
73+
},
74+
"BUFFER_POOL": {
75+
"$id": "#/properties/BUFFER_POOL",
76+
"type": "object"
77+
},
78+
"BUFFER_PROFILE": {
79+
"$id": "#/properties/BUFFER_PROFILE",
80+
"type": "object"
81+
},
82+
"BUFFER_QUEUE": {
83+
"$id": "#/properties/BUFFER_QUEUE",
84+
"type": "object"
85+
},
86+
"CABLE_LENGTH": {
87+
"$id": "#/properties/CABLE_LENGTH",
88+
"type": "object"
89+
},
90+
"CRM": {
91+
"$id": "#/properties/CRM",
92+
"type": "object"
93+
},
94+
"DEVICE_METADATA": {
95+
"$id": "#/properties/DEVICE_METADATA",
96+
"type": "object"
97+
},
98+
"DEVICE_NEIGHBOR": {
99+
"$id": "#/properties/DEVICE_NEIGHBOR",
100+
"type": "object"
101+
},
102+
"DEVICE_NEIGHBOR_METADATA": {
103+
"$id": "#/properties/DEVICE_NEIGHBOR_METADATA",
104+
"type": "object"
105+
},
106+
"DHCP_SERVER": {
107+
"$id": "#/properties/DHCP_SERVER",
108+
"type": "object"
109+
},
110+
"DSCP_TO_TC_MAP": {
111+
"$id": "#/properties/DSCP_TO_TC_MAP",
112+
"type": "object"
113+
},
114+
"FEATURE": {
115+
"$id": "#/properties/FEATURE",
116+
"type": "object",
117+
"required": [
118+
"acms",
119+
"bgp",
120+
"database",
121+
"dhcp_relay",
122+
"lldp",
123+
"mux",
124+
"pmon",
125+
"radv",
126+
"snmp",
127+
"swss",
128+
"syncd",
129+
"teamd",
130+
"telemetry"
131+
],
132+
"properties": {
133+
"acms": {
134+
"$id": "#/properties/FEATURE/properties/acms",
135+
"type": "object"
136+
},
137+
"bgp": {
138+
"$id": "#/properties/FEATURE/properties/bgp",
139+
"type": "object"
140+
},
141+
"database": {
142+
"$id": "#/properties/FEATURE/properties/database",
143+
"type": "object"
144+
},
145+
"dhcp_relay": {
146+
"$id": "#/properties/FEATURE/properties/dhcp_relay",
147+
"type": "object"
148+
},
149+
"lldp": {
150+
"$id": "#/properties/FEATURE/properties/lldp",
151+
"type": "object"
152+
},
153+
"mux": {
154+
"$id": "#/properties/FEATURE/properties/mux",
155+
"type": "object"
156+
},
157+
"pmon": {
158+
"$id": "#/properties/FEATURE/properties/pmon",
159+
"type": "object"
160+
},
161+
"radv": {
162+
"$id": "#/properties/FEATURE/properties/radv",
163+
"type": "object"
164+
},
165+
"snmp": {
166+
"$id": "#/properties/FEATURE/properties/snmp",
167+
"type": "object"
168+
},
169+
"swss": {
170+
"$id": "#/properties/FEATURE/properties/swss",
171+
"type": "object"
172+
},
173+
"syncd": {
174+
"$id": "#/properties/FEATURE/properties/syncd",
175+
"type": "object"
176+
},
177+
"teamd": {
178+
"$id": "#/properties/FEATURE/properties/teamd",
179+
"type": "object"
180+
},
181+
"telemetry": {
182+
"$id": "#/properties/FEATURE/properties/telemetry",
183+
"type": "object"
184+
}
185+
}
186+
},
187+
"FLEX_COUNTER_TABLE": {
188+
"$id": "#/properties/FLEX_COUNTER_TABLE",
189+
"type": "object"
190+
},
191+
"KDUMP": {
192+
"$id": "#/properties/KDUMP",
193+
"type": "object"
194+
},
195+
"LOOPBACK_INTERFACE": {
196+
"$id": "#/properties/LOOPBACK_INTERFACE",
197+
"type": "object"
198+
},
199+
"MAP_PFC_PRIORITY_TO_QUEUE": {
200+
"$id": "#/properties/MAP_PFC_PRIORITY_TO_QUEUE",
201+
"type": "object"
202+
},
203+
"MGMT_INTERFACE": {
204+
"$id": "#/properties/MGMT_INTERFACE",
205+
"type": "object"
206+
},
207+
"MGMT_PORT": {
208+
"$id": "#/properties/MGMT_PORT",
209+
"type": "object"
210+
},
211+
"NTP_SERVER": {
212+
"$id": "#/properties/NTP_SERVER",
213+
"type": "object"
214+
},
215+
"PFC_WD": {
216+
"$id": "#/properties/PFC_WD",
217+
"type": "object"
218+
},
219+
"PORT": {
220+
"$id": "#/properties/PORT",
221+
"type": "object"
222+
},
223+
"PORTCHANNEL": {
224+
"$id": "#/properties/PORTCHANNEL",
225+
"type": "object"
226+
},
227+
"PORTCHANNEL_INTERFACE": {
228+
"$id": "#/properties/PORTCHANNEL_INTERFACE",
229+
"type": "object"
230+
},
231+
"PORTCHANNEL_MEMBER": {
232+
"$id": "#/properties/PORTCHANNEL_MEMBER",
233+
"type": "object"
234+
},
235+
"PORT_QOS_MAP": {
236+
"$id": "#/properties/PORT_QOS_MAP",
237+
"type": "object"
238+
},
239+
"QUEUE": {
240+
"$id": "#/properties/QUEUE",
241+
"type": "object"
242+
},
243+
"SCHEDULER": {
244+
"$id": "#/properties/SCHEDULER",
245+
"type": "object"
246+
},
247+
"SNMP": {
248+
"$id": "#/properties/SNMP",
249+
"type": "object"
250+
},
251+
"SNMP_COMMUNITY": {
252+
"$id": "#/properties/SNMP_COMMUNITY",
253+
"type": "object"
254+
},
255+
"SYSLOG_SERVER": {
256+
"$id": "#/properties/SYSLOG_SERVER",
257+
"type": "object"
258+
},
259+
"TACPLUS_SERVER": {
260+
"$id": "#/properties/TACPLUS_SERVER",
261+
"type": "object"
262+
},
263+
"TC_TO_PRIORITY_GROUP_MAP": {
264+
"$id": "#/properties/TC_TO_PRIORITY_GROUP_MAP",
265+
"type": "object"
266+
},
267+
"TC_TO_QUEUE_MAP": {
268+
"$id": "#/properties/TC_TO_QUEUE_MAP",
269+
"type": "object"
270+
},
271+
"VERSIONS": {
272+
"$id": "#/properties/VERSIONS",
273+
"type": "object",
274+
"required": [
275+
"DATABASE"
276+
],
277+
"properties": {
278+
"DATABASE": {
279+
"$id": "#/properties/VERSIONS/properties/DATABASE",
280+
"type": "object",
281+
"required": [
282+
"VERSION"
283+
],
284+
"properties": {
285+
"VERSION": {
286+
"$id": "#/properties/VERSIONS/properties/DATABASE/properties/VERSION",
287+
"type": "string"
288+
}
289+
}
290+
}
291+
}
292+
},
293+
"VLAN": {
294+
"$id": "#/properties/VLAN",
295+
"type": "object"
296+
},
297+
"VLAN_INTERFACE": {
298+
"$id": "#/properties/VLAN_INTERFACE",
299+
"type": "object"
300+
},
301+
"VLAN_MEMBER": {
302+
"$id": "#/properties/VLAN_MEMBER",
303+
"type": "object"
304+
},
305+
"WRED_PROFILE": {
306+
"$id": "#/properties/WRED_PROFILE",
307+
"type": "object"
308+
}
309+
}
310+
}
311+
312+
def main():
313+
parser = argparse.ArgumentParser()
314+
parser.add_argument('-c', '--config_db_file', type=str,
315+
default='/etc/sonic/config_db.json',
316+
help='Absolute location of config_db.json file')
317+
318+
args = parser.parse_args()
319+
config_db_file = args.config_db_file
320+
config_db_data = dict()
321+
322+
# Read config_db.json and check if it is a valid JSON file
323+
try:
324+
with open(config_db_file) as fp:
325+
config_db_data = json.load(fp)
326+
except ValueError as err:
327+
syslog.syslog(syslog.LOG_DEBUG, "Config DB json file is not a valid json file. " +\
328+
"Error: {}".format(str(err)))
329+
return 1
330+
331+
# What: Validate if critical tables and entries are present in config_db.json
332+
# Why: This is needed to avoid rebooting with a bad config_db.json; which can
333+
# potentially trigger failures in the reboot recovery path.
334+
# How: Check config_db.json against a schema (CONFIG_DB_SCHEMA) which defines
335+
# REQUIRED tables and their types.
336+
try:
337+
jsonschema.validate(instance=config_db_data, schema=CONFIG_DB_SCHEMA)
338+
except jsonschema.exceptions.ValidationError as err:
339+
syslog.syslog(syslog.LOG_ERR, "Database is missing tables/entries needed for reboot procedure. " +\
340+
"Config db integrity check failed with:\n{}".format(str(err)))
341+
return 1
342+
syslog.syslog(syslog.LOG_DEBUG, "Database integrity checks passed.")
343+
return 0
344+
345+
346+
if __name__ == '__main__':
347+
res = 0
348+
try:
349+
res = main()
350+
except KeyboardInterrupt:
351+
syslog.syslog(syslog.LOG_NOTICE, "SIGINT received. Quitting")
352+
res = 1
353+
except Exception as e:
354+
syslog.syslog(syslog.LOG_ERR, "Got an exception %s: Traceback: %s" % (str(e), traceback.format_exc()))
355+
res = 2
356+
finally:
357+
syslog.closelog()
358+
try:
359+
sys.exit(res)
360+
except SystemExit:
361+
os._exit(res)

0 commit comments

Comments
 (0)