Skip to content

Commit 57e3fd1

Browse files
pra-mohzhenggen-xu
authored andcommitted
Add daemon which periodically pushes process and docker stats to State DB (sonic-net#3525)
1 parent 183223b commit 57e3fd1

File tree

3 files changed

+232
-0
lines changed

3 files changed

+232
-0
lines changed

files/build_templates/sonic_debian_extension.j2

+5
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,11 @@ sudo cp $IMAGE_CONFIGS/caclmgrd/caclmgrd.service $FILESYSTEM_ROOT/etc/systemd/s
253253
echo "caclmgrd.service" | sudo tee -a $GENERATED_SERVICE_FILE
254254
sudo cp $IMAGE_CONFIGS/caclmgrd/caclmgrd $FILESYSTEM_ROOT/usr/bin/
255255

256+
# Copy process/docker cpu/memory utilization data export daemon
257+
sudo cp $IMAGE_CONFIGS/procdockerstatsd/procdockerstatsd.service $FILESYSTEM_ROOT/etc/systemd/system/
258+
echo "procdockerstatsd.service" | sudo tee -a $GENERATED_SERVICE_FILE
259+
sudo cp $IMAGE_CONFIGS/procdockerstatsd/procdockerstatsd $FILESYSTEM_ROOT/usr/bin/
260+
256261
# Copy process-reboot-cause service files
257262
sudo cp $IMAGE_CONFIGS/process-reboot-cause/process-reboot-cause.service $FILESYSTEM_ROOT/etc/systemd/system/
258263
echo "process-reboot-cause.service" | sudo tee -a $GENERATED_SERVICE_FILE
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
# !/usr/bin/env python
2+
'''
3+
procdockerstatsd
4+
Daemon which periodically gathers process and docker statistics and pushes the data to STATE_DB
5+
'''
6+
7+
import os
8+
import re
9+
import subprocess
10+
import sys
11+
import syslog
12+
import time
13+
from datetime import datetime
14+
15+
import swsssdk
16+
17+
VERSION = '1.0'
18+
19+
SYSLOG_IDENTIFIER = "procdockerstatsd"
20+
21+
REDIS_HOSTIP = "127.0.0.1"
22+
23+
# ========================== Syslog wrappers ==========================
24+
def log_info(msg, also_print_to_console=False):
25+
syslog.openlog(SYSLOG_IDENTIFIER)
26+
syslog.syslog(syslog.LOG_INFO, msg)
27+
syslog.closelog()
28+
29+
def log_warning(msg, also_print_to_console=False):
30+
syslog.openlog(SYSLOG_IDENTIFIER)
31+
syslog.syslog(syslog.LOG_WARNING, msg)
32+
syslog.closelog()
33+
34+
def log_error(msg, also_print_to_console=False):
35+
syslog.openlog(SYSLOG_IDENTIFIER)
36+
syslog.syslog(syslog.LOG_ERR, msg)
37+
syslog.closelog()
38+
39+
# ========================== ProcessDocker class ==========================
40+
class ProcDockerStats:
41+
42+
def __init__(self):
43+
self.state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP)
44+
self.state_db.connect("STATE_DB")
45+
46+
def run_command(self, cmd):
47+
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
48+
(stdout, stderr) = proc.communicate()
49+
if proc.returncode != 0:
50+
log_error("Error running command '{}'".format(cmd))
51+
return None
52+
else:
53+
return stdout
54+
55+
def format_docker_cmd_output(self, cmdout):
56+
lines = re.split("\n", cmdout)
57+
keys = re.split(" +", lines[0])
58+
docker_data = dict()
59+
docker_data_list = []
60+
for item in lines[1:]:
61+
values1 = re.split(" +", item)
62+
docker_data = dict(zip(keys, values1))
63+
docker_data_list.append(docker_data)
64+
formatted_dict = self.create_docker_dict(docker_data_list)
65+
return formatted_dict
66+
67+
def format_process_cmd_output(self, cmdout):
68+
lines = re.split("\n", cmdout)
69+
keys = re.split(" +", lines[0])
70+
keylist = list(filter(None, keys))
71+
process_data = dict()
72+
process_data_list = []
73+
for item in lines[1:]:
74+
values1 = re.split(" +", str(item))
75+
# To remove extra space before UID
76+
val = list(filter(None, values1))
77+
# Merging extra columns created due to space in cmd ouput
78+
val[8:] = [''.join(val[8:])]
79+
process_data = dict(zip(keylist, val))
80+
process_data_list.append(process_data)
81+
return process_data_list
82+
83+
def convert_to_bytes(self, value):
84+
unit_value = re.search('[a-zA-Z]+', value)
85+
value_to_convert = float(filter(str.isdigit, value))
86+
unit = unit_value.group(0)
87+
UNITS_B = 'B'
88+
UNITS_KB = 'KB'
89+
UNITS_MB = 'MB'
90+
UNITS_MiB = 'MiB'
91+
UNITS_GiB = 'GiB'
92+
if unit.lower() == UNITS_B.lower():
93+
return int(round(value_to_convert))
94+
elif unit.lower() == UNITS_KB.lower():
95+
value_converted = value_to_convert * 1000
96+
return int(round(value_converted))
97+
elif unit.lower() == UNITS_MB.lower():
98+
value_converted = value_to_convert * 1000 * 1000
99+
return int(round(value_converted))
100+
elif unit.lower() == UNITS_MiB.lower():
101+
value_converted = value_to_convert * 1024 * 1024
102+
return int(round(value_converted))
103+
elif unit.lower() == UNITS_GiB.lower():
104+
value_converted = value_to_convert * 1024 * 1024 * 1024
105+
return int(round(value_converted))
106+
107+
def create_docker_dict(self, dict_list):
108+
dockerdict = {}
109+
for row in dict_list[0:]:
110+
cid = row.get('CONTAINER ID')
111+
if cid:
112+
key = 'DOCKER_STATS|' + str(cid)
113+
dockerdict[key] = {}
114+
dockerdict[key]['NAME'] = row.get('NAME')
115+
116+
splitcol = row.get('CPU %')
117+
cpu = re.split("%", str(splitcol))
118+
dockerdict[key]['CPU%'] = str(cpu[0])
119+
120+
splitcol = row.get('MEM USAGE / LIMIT')
121+
memuse = re.split(" / ", str(splitcol))
122+
# converting MiB and GiB to bytes
123+
dockerdict[key]['MEM_BYTES'] = str(self.convert_to_bytes(memuse[0]))
124+
dockerdict[key]['MEM_LIMIT_BYTES'] = str(self.convert_to_bytes(memuse[1]))
125+
126+
splitcol = row.get('MEM %')
127+
mem = re.split("%", str(splitcol))
128+
dockerdict[key]['MEM%'] = str(mem[0])
129+
130+
splitcol = row.get('NET I/O')
131+
netio = re.split(" / ", str(splitcol))
132+
dockerdict[key]['NET_IN_BYTES'] = str(self.convert_to_bytes(netio[0]))
133+
dockerdict[key]['NET_OUT_BYTES'] = str(self.convert_to_bytes(netio[1]))
134+
135+
splitcol = row.get('BLOCK I/O')
136+
blockio = re.split(" / ", str(splitcol))
137+
dockerdict[key]['BLOCK_IN_BYTES'] = str(self.convert_to_bytes(blockio[0]))
138+
dockerdict[key]['BLOCK_OUT_BYTES'] = str(self.convert_to_bytes(blockio[1]))
139+
140+
dockerdict[key]['PIDS'] = row.get('PIDS')
141+
return dockerdict
142+
143+
def update_dockerstats_command(self):
144+
cmd = "docker stats --no-stream -a"
145+
data = self.run_command(cmd)
146+
if not data:
147+
log_error("'{}' returned null output".format(cmd))
148+
return False
149+
dockerdata = self.format_docker_cmd_output(data)
150+
if not dockerdata:
151+
log_error("formatting for docker output failed")
152+
return False
153+
# wipe out all data from state_db before updating
154+
self.state_db.delete_all_by_pattern('STATE_DB', 'DOCKER_STATS|*')
155+
for k1,v1 in dockerdata.iteritems():
156+
for k2,v2 in v1.iteritems():
157+
self.update_state_db(k1, k2, v2)
158+
return True
159+
160+
def update_processstats_command(self):
161+
data = self.run_command("ps -eo uid,pid,ppid,%mem,%cpu,stime,tty,time,cmd --sort -%cpu | head -1024")
162+
processdata = self.format_process_cmd_output(data)
163+
value = ""
164+
# wipe out all data before updating with new values
165+
self.state_db.delete_all_by_pattern('STATE_DB', 'PROCESS_STATS|*')
166+
for row in processdata[0:]:
167+
cid = row.get('PID')
168+
if cid:
169+
value = 'PROCESS_STATS|' + str(cid)
170+
uid = row.get('UID')
171+
self.update_state_db(value, 'UID', uid)
172+
ppid = row.get('PPID')
173+
self.update_state_db(value, 'PPID', ppid)
174+
cpu = row.get('%CPU')
175+
self.update_state_db(value, '%CPU', str(cpu))
176+
mem = row.get('%MEM')
177+
self.update_state_db(value, '%MEM', str(mem))
178+
stime = row.get('STIME')
179+
self.update_state_db(value, 'STIME', stime)
180+
tty = row.get('TT')
181+
self.update_state_db(value, 'TT', tty)
182+
time = row.get('TIME')
183+
self.update_state_db(value, 'TIME', time)
184+
cmd = row.get('CMD')
185+
self.update_state_db(value, 'CMD', cmd)
186+
187+
def update_state_db(self, key1, key2, value2):
188+
self.state_db.set('STATE_DB', key1, key2, value2)
189+
190+
def run(self):
191+
self.update_dockerstats_command()
192+
datetimeobj = datetime.now()
193+
# Adding key to store latest update time.
194+
self.update_state_db('DOCKER_STATS|LastUpdateTime', 'lastupdate', datetimeobj)
195+
self.update_processstats_command()
196+
self.update_state_db('PROCESS_STATS|LastUpdateTime', 'lastupdate', datetimeobj)
197+
198+
# main start
199+
def main():
200+
log_info("process-docker stats daemon starting up..")
201+
if not os.getuid() == 0:
202+
log_error("Must be root to run process-docker daemon")
203+
print "Error: Must be root to run process-docker daemon"
204+
sys.exit(1)
205+
pd = ProcDockerStats()
206+
# Data need to be updated every 2 mins. hence adding delay of 120 seconds
207+
while True:
208+
pd.run()
209+
time.sleep(120)
210+
log_info("process-docker stats daemon exited")
211+
212+
if __name__ == '__main__':
213+
main()
214+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[Unit]
2+
Description=Process and docker CPU/memory utilization data export daemon
3+
Requires=database.service updategraph.service
4+
After=database.service updategraph.service
5+
6+
[Service]
7+
Type=simple
8+
ExecStart=/usr/bin/procdockerstatsd
9+
Restart=Always
10+
11+
[Install]
12+
WantedBy=multi-user.target
13+

0 commit comments

Comments
 (0)