Skip to content

Commit e648290

Browse files
disk_Check: Scan & mount as RW when disk turns into Read-only (#1872)
* disk_check: Check & mount RO as RW using tmpfs (#1569) What I did There is a bug that occasionally turn root-overlay as RO. This makes /etc & /home as RO. This blocks any new remote user login, as that needs to write into /etc & /home. This tool scans /etc & /home (or given dirs) as in RW or RO state. If RO, it could create a writable overlay using tmpfs. This is transient and stays until next reboot. Any write after the overlay will be lost upon reboot. But this allows new remote users login. How I did it Create upper & work dirs in /run/mount (tmpfs). Mount /etc & /home as lowerdirs and use the same name for final merge. This allows anyone opening a file in /etc or /home to operate on the merged overlay, transparently. How to verify it Mount any dir on tmpfs ( mount -t tmpfs tmpfs test_dir) remount as RO (mount -o remount,ro test_dir) Pass that dir to this script. (disk_check.py -d ./test_dir) Now it should be RW
1 parent c80321c commit e648290

File tree

3 files changed

+361
-0
lines changed

3 files changed

+361
-0
lines changed

scripts/disk_check.py

+180
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
"""
5+
What:
6+
There have been cases, where disk turns Read-only due to kernel bug.
7+
In Read-only state, system blocks new remote user login via TACACS.
8+
This utility is to check & make transient recovery as needed.
9+
10+
How:
11+
check for Read-Write permission. If Read-only, create writable overlay using tmpfs.
12+
13+
By default "/etc" & "/home" are checked and if in Read-only state, make them Read-Write
14+
using overlay on top of tmpfs.
15+
16+
Making /etc & /home as writable lets successful new remote user login.
17+
18+
If in Read-only state or in Read-Write state with the help of tmpfs overlay,
19+
syslog ERR messages are written, to help raise alerts.
20+
21+
Monit may be used to invoke it periodically, to help scan & fix and
22+
report via syslog.
23+
24+
Tidbit:
25+
If you would like to test this script, you could simulate a RO disk
26+
with the following command. Reboot will revert the effect.
27+
sudo bash -c "echo u > /proc/sysrq-trigger"
28+
29+
"""
30+
31+
import argparse
32+
import os
33+
import sys
34+
import syslog
35+
import subprocess
36+
37+
UPPER_DIR = "/run/mount/upper"
38+
WORK_DIR = "/run/mount/work"
39+
MOUNTS_FILE = "/proc/mounts"
40+
41+
chk_log_level = syslog.LOG_ERR
42+
43+
def _log_msg(lvl, pfx, msg):
44+
if lvl <= chk_log_level:
45+
print("{}: {}".format(pfx, msg))
46+
syslog.syslog(lvl, msg)
47+
48+
def log_err(m):
49+
_log_msg(syslog.LOG_ERR, "Err", m)
50+
51+
52+
def log_info(m):
53+
_log_msg(syslog.LOG_INFO, "Info", m)
54+
55+
56+
def log_debug(m):
57+
_log_msg(syslog.LOG_DEBUG, "Debug", m)
58+
59+
60+
def test_writable(dirs):
61+
for d in dirs:
62+
rw = os.access(d, os.W_OK)
63+
if not rw:
64+
log_err("{} is not read-write".format(d))
65+
return False
66+
else:
67+
log_debug("{} is Read-Write".format(d))
68+
return True
69+
70+
71+
def run_cmd(cmd):
72+
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
73+
(output, err) = p.communicate()
74+
## Wait for end of command. Get return returncode ##
75+
ret = p.wait()
76+
if ret:
77+
log_err("failed: ret={} cmd={}".format(ret, cmd))
78+
else:
79+
log_info("ret={} cmd: {}".format(ret, cmd))
80+
81+
if output:
82+
log_info("stdout: {}".format(output.decode("utf-8")))
83+
if err:
84+
log_info("stderr: {}".format(err.decode("utf-8")))
85+
86+
return ret
87+
88+
89+
def get_dname(path_name):
90+
return os.path.basename(os.path.normpath(path_name))
91+
92+
93+
def do_mnt(dirs):
94+
if os.path.exists(UPPER_DIR):
95+
log_err("Already mounted")
96+
return 1
97+
98+
for i in (UPPER_DIR, WORK_DIR):
99+
try:
100+
os.mkdir(i)
101+
except OSError as error:
102+
log_err("Failed to create {}".format(i))
103+
return 1
104+
105+
for d in dirs:
106+
d_name = get_dname(d)
107+
d_upper = os.path.join(UPPER_DIR, d_name)
108+
d_work = os.path.join(WORK_DIR, d_name)
109+
os.mkdir(d_upper)
110+
os.mkdir(d_work)
111+
112+
ret = run_cmd("mount -t overlay overlay_{} -o lowerdir={},"
113+
"upperdir={},workdir={} {}".format(
114+
d_name, d, d_upper, d_work, d))
115+
if ret:
116+
break
117+
118+
if ret:
119+
for i in (UPPER_DIR, WORK_DIR):
120+
if os.path.exists(i):
121+
ret = run_cmd("rm -rf {}".format(i))
122+
if ret:
123+
log_err("Failed to remove {}".format(i))
124+
125+
log_err("Failed to mount {} as Read-Write".format(dirs))
126+
else:
127+
log_info("{} are mounted as Read-Write".format(dirs))
128+
return ret
129+
130+
131+
def is_mounted(dirs):
132+
if not os.path.exists(UPPER_DIR):
133+
return False
134+
135+
onames = set()
136+
for d in dirs:
137+
onames.add("overlay_{}".format(get_dname(d)))
138+
139+
with open(MOUNTS_FILE, "r") as s:
140+
for ln in s.readlines():
141+
n = ln.strip().split()[0]
142+
if n in onames:
143+
log_debug("Mount exists for {}".format(n))
144+
return True
145+
return False
146+
147+
148+
def do_check(skip_mount, dirs):
149+
ret = 0
150+
if not test_writable(dirs):
151+
if not skip_mount:
152+
ret = do_mnt(dirs)
153+
154+
# Check if mounted
155+
if (not ret) and is_mounted(dirs):
156+
log_err("READ-ONLY: Mounted {} to make Read-Write".format(dirs))
157+
158+
return ret
159+
160+
161+
def main():
162+
global chk_log_level
163+
164+
parser=argparse.ArgumentParser(
165+
description="check disk for Read-Write and mount etc & home as Read-Write")
166+
parser.add_argument('-s', "--skip-mount", action='store_true', default=False,
167+
help="Skip mounting /etc & /home as Read-Write")
168+
parser.add_argument('-d', "--dirs", default="/etc,/home",
169+
help="dirs to mount")
170+
parser.add_argument('-l', "--loglvl", default=syslog.LOG_ERR, type=int,
171+
help="log level")
172+
args = parser.parse_args()
173+
174+
chk_log_level = args.loglvl
175+
ret = do_check(args.skip_mount, args.dirs.split(","))
176+
return ret
177+
178+
179+
if __name__ == "__main__":
180+
sys.exit(main())

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
'scripts/db_migrator.py',
8686
'scripts/decode-syseeprom',
8787
'scripts/dropcheck',
88+
'scripts/disk_check.py',
8889
'scripts/dropconfig',
8990
'scripts/dropstat',
9091
'scripts/dump_nat_entries.py',

tests/disk_check_test.py

+180
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import sys
2+
import syslog
3+
from unittest.mock import patch
4+
import pytest
5+
import subprocess
6+
7+
sys.path.append("scripts")
8+
import disk_check
9+
10+
disk_check.MOUNTS_FILE = "/tmp/proc_mounts"
11+
12+
test_data = {
13+
"0": {
14+
"desc": "All good as /tmp is read-write",
15+
"args": ["", "-d", "/tmp"],
16+
"err": ""
17+
},
18+
"1": {
19+
"desc": "Not good as /tmpx is not read-write; But fix skipped",
20+
"args": ["", "-d", "/tmpx", "-s"],
21+
"err": "/tmpx is not read-write"
22+
},
23+
"2": {
24+
"desc": "Not good as /tmpx is not read-write; expect mount",
25+
"args": ["", "-d", "/tmpx"],
26+
"upperdir": "/tmp/tmpx",
27+
"workdir": "/tmp/tmpy",
28+
"mounts": "overlay_tmpx blahblah",
29+
"err": "/tmpx is not read-write|READ-ONLY: Mounted ['/tmpx'] to make Read-Write",
30+
"cmds": ['mount -t overlay overlay_tmpx -o lowerdir=/tmpx,upperdir=/tmp/tmpx/tmpx,workdir=/tmp/tmpy/tmpx /tmpx']
31+
},
32+
"3": {
33+
"desc": "Not good as /tmpx is not read-write; mount fail as create of upper fails",
34+
"args": ["", "-d", "/tmpx"],
35+
"upperdir": "/tmpx",
36+
"expect_ret": 1
37+
},
38+
"4": {
39+
"desc": "Not good as /tmpx is not read-write; mount fail as upper exist",
40+
"args": ["", "-d", "/tmpx"],
41+
"upperdir": "/tmp",
42+
"err": "/tmpx is not read-write|Already mounted",
43+
"expect_ret": 1
44+
},
45+
"5": {
46+
"desc": "/tmp is read-write, but as well mount exists; hence report",
47+
"args": ["", "-d", "/tmp"],
48+
"upperdir": "/tmp",
49+
"mounts": "overlay_tmp blahblah",
50+
"err": "READ-ONLY: Mounted ['/tmp'] to make Read-Write"
51+
},
52+
"6": {
53+
"desc": "Test another code path for good case",
54+
"args": ["", "-d", "/tmp"],
55+
"upperdir": "/tmp"
56+
}
57+
}
58+
59+
err_data = ""
60+
max_log_lvl = -1
61+
cmds = []
62+
current_tc = None
63+
64+
def mount_file(d):
65+
with open(disk_check.MOUNTS_FILE, "w") as s:
66+
s.write(d)
67+
68+
69+
def report_err_msg(lvl, m):
70+
global err_data
71+
global max_log_lvl
72+
73+
if lvl > max_log_lvl:
74+
max_log_lvl = lvl
75+
76+
if lvl == syslog.LOG_ERR:
77+
if err_data:
78+
err_data += "|"
79+
err_data += m
80+
81+
82+
class proc:
83+
returncode = 0
84+
stdout = None
85+
stderr = None
86+
87+
def __init__(self, proc_upd = None):
88+
if proc_upd:
89+
self.returncode = proc_upd.get("ret", 0)
90+
self.stdout = proc_upd.get("stdout", None)
91+
self.stderr = proc_upd.get("stderr", None)
92+
93+
94+
def mock_subproc_run(cmd, shell, stdout):
95+
global cmds
96+
97+
assert shell == True
98+
assert stdout == subprocess.PIPE
99+
100+
upd = (current_tc["proc"][len(cmds)]
101+
if len(current_tc.get("proc", [])) > len(cmds) else None)
102+
cmds.append(cmd)
103+
104+
return proc(upd)
105+
106+
107+
def init_tc(tc):
108+
global err_data, cmds, current_tc
109+
110+
err_data = ""
111+
cmds = []
112+
mount_file(tc.get("mounts", ""))
113+
current_tc = tc
114+
115+
116+
def swap_upper(tc):
117+
tmp_u = tc["upperdir"]
118+
tc["upperdir"] = disk_check.UPPER_DIR
119+
disk_check.UPPER_DIR = tmp_u
120+
121+
122+
def swap_work(tc):
123+
tmp_w = tc["workdir"]
124+
tc["upperdir"] = disk_check.WORK_DIR
125+
disk_check.WORK_DIR = tmp_w
126+
127+
128+
class TestDiskCheck(object):
129+
def setup(self):
130+
pass
131+
132+
133+
@patch("disk_check.syslog.syslog")
134+
@patch("disk_check.subprocess.run")
135+
def test_readonly(self, mock_proc, mock_log):
136+
global err_data, cmds, max_log_lvl
137+
138+
mock_proc.side_effect = mock_subproc_run
139+
mock_log.side_effect = report_err_msg
140+
141+
with patch('sys.argv', ["", "-l", "7", "-d", "/tmp"]):
142+
disk_check.main()
143+
assert max_log_lvl == syslog.LOG_DEBUG
144+
max_log_lvl = -1
145+
146+
for i, tc in test_data.items():
147+
print("-----------Start tc {}---------".format(i))
148+
init_tc(tc)
149+
150+
with patch('sys.argv', tc["args"]):
151+
if "upperdir" in tc:
152+
swap_upper(tc)
153+
154+
if "workdir" in tc:
155+
# restore
156+
swap_work(tc)
157+
158+
ret = disk_check.main()
159+
160+
if "upperdir" in tc:
161+
# restore
162+
swap_upper(tc)
163+
164+
if "workdir" in tc:
165+
# restore
166+
swap_work(tc)
167+
168+
print("ret = {}".format(ret))
169+
print("err_data={}".format(err_data))
170+
print("cmds: {}".format(cmds))
171+
172+
assert ret == tc.get("expect_ret", 0)
173+
if "err" in tc:
174+
assert err_data == tc["err"]
175+
assert cmds == tc.get("cmds", [])
176+
print("-----------End tc {}-----------".format(i))
177+
178+
179+
assert max_log_lvl == syslog.LOG_ERR
180+

0 commit comments

Comments
 (0)