Skip to content

Commit 3f651dc

Browse files
author
Praveen Chaudhary
authored
[config] Implement a process level lock (sonic-net#857)
Changes: 1.) Implement a class, which uses hsetnx for lock. 2.) lock is expired within timeout period or will be released by owner. 3.) After -y prompt, lock is reacquired, because timer could have expired, before user enters yes. Signed-off-by: Praveen Chaudhary [email protected]
1 parent cff92dd commit 3f651dc

File tree

1 file changed

+107
-0
lines changed

1 file changed

+107
-0
lines changed

config/main.py

+107
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,98 @@ def is_ipaddress(val):
529529
return False
530530
return True
531531

532+
# class for locking entire config process
533+
class ConfigDbLock():
534+
def __init__(self):
535+
self.lockName = "LOCK|configDbLock"
536+
self.timeout = 10
537+
self.pid = os.getpid()
538+
self.client = None
539+
540+
self._acquireLock()
541+
return
542+
543+
def _acquireLock(self):
544+
try:
545+
# connect to db
546+
db_kwargs = dict()
547+
configdb = ConfigDBConnector(**db_kwargs)
548+
configdb.connect()
549+
550+
self.client = configdb.get_redis_client('CONFIG_DB')
551+
# Set lock and expire time. Process may get killed b/w set lock and
552+
# expire call.
553+
if self.client.hsetnx(self.lockName, "PID", self.pid):
554+
self.client.expire(self.lockName, self.timeout)
555+
# if lock exists but expire timer not running, run expire time and
556+
# abort.
557+
elif not self.client.ttl(self.lockName):
558+
click.echo(":::Unable to acquire lock. Resetting timer and aborting:::");
559+
self.client.expire(self.lockName, self.timeout)
560+
sys.exit(1)
561+
else:
562+
click.echo(":::Unable to acquire lock. Aborting:::");
563+
sys.exit(1)
564+
except Exception as e:
565+
click.echo(":::Exception: {}:::".format(e))
566+
sys.exit(1)
567+
return
568+
569+
def reacquireLock(self):
570+
try:
571+
# Try to set lock first
572+
if self.client.hsetnx(self.lockName, "PID", self.pid):
573+
self.client.expire(self.lockName, self.timeout)
574+
# if lock exists, check who owns it
575+
else:
576+
p = self.client.pipeline(True)
577+
# watch, we do not want to work on modified lock
578+
p.watch(self.lockName)
579+
# if current process holding then extend the timer
580+
if p.hget(self.lockName, "PID") == str(self.pid):
581+
self.client.expire(self.lockName, self.timeout)
582+
p.unwatch()
583+
return
584+
else:
585+
# some other process is holding the lock.
586+
click.echo(":::Unable to reacquire lock (lock PID: {}, self.pid: {}):::".\
587+
format(p.hget(self.lockName, "PID"), self.pid))
588+
p.unwatch()
589+
sys.exit(1)
590+
except Exception as e:
591+
click.echo(":::Exception: {}:::".format(e))
592+
sys.exit(1)
593+
return
594+
595+
def _releaseLock(self):
596+
try:
597+
p = self.client.pipeline(True)
598+
# watch, we do not want to work on modified lock
599+
p.watch(self.lockName)
600+
# if current process holding the lock then release it.
601+
if p.hget(self.lockName, "PID") == str(self.pid):
602+
p.multi()
603+
p.delete(self.lockName)
604+
p.execute()
605+
return
606+
# lock may be None, if timer has expired before releasing lock.
607+
elif not self.lockName:
608+
return
609+
else:
610+
# some other process is holding the lock.
611+
click.echo(":::Unable to release lock (lock PID: {}, self.pid: {}):::".\
612+
format(p.hget(self.lockName, "PID"), self.pid))
613+
p.unwatch()
614+
except Exception as e:
615+
click.echo(":::Exception: {}:::".format(e))
616+
return
617+
618+
def __del__(self):
619+
self._releaseLock()
620+
return
621+
# end of class configdblock
532622

623+
configdb_lock = ConfigDbLock()
533624
# This is our main entrypoint - the main 'config' command
534625
@click.group(context_settings=CONTEXT_SETTINGS)
535626
def config():
@@ -547,6 +638,8 @@ def config():
547638
@click.argument('filename', default='/etc/sonic/config_db.json', type=click.Path())
548639
def save(filename):
549640
"""Export current config DB to a file on disk."""
641+
# reacquire lock after prompt
642+
configdb_lock.reacquireLock()
550643
command = "{} -d --print-data > {}".format(SONIC_CFGGEN_PATH, filename)
551644
run_command(command, display_cmd=True)
552645

@@ -557,6 +650,9 @@ def load(filename, yes):
557650
"""Import a previous saved config DB dump file."""
558651
if not yes:
559652
click.confirm('Load config from the file %s?' % filename, abort=True)
653+
# reacquire lock after prompt
654+
configdb_lock.reacquireLock()
655+
560656
command = "{} -j {} --write-to-db".format(SONIC_CFGGEN_PATH, filename)
561657
run_command(command, display_cmd=True)
562658

@@ -568,6 +664,8 @@ def reload(filename, yes, load_sysinfo):
568664
"""Clear current configuration and import a previous saved config DB dump file."""
569665
if not yes:
570666
click.confirm('Clear current config and reload config from the file %s?' % filename, abort=True)
667+
# reacquire lock after prompt
668+
configdb_lock.reacquireLock()
571669

572670
log_info("'reload' executing...")
573671

@@ -617,6 +715,8 @@ def reload(filename, yes, load_sysinfo):
617715
@click.argument('filename', default='/etc/sonic/device_desc.xml', type=click.Path(exists=True))
618716
def load_mgmt_config(filename):
619717
"""Reconfigure hostname and mgmt interface based on device description file."""
718+
# reacquire lock after prompt
719+
configdb_lock.reacquireLock()
620720
command = "{} -M {} --write-to-db".format(SONIC_CFGGEN_PATH, filename)
621721
run_command(command, display_cmd=True)
622722
#FIXME: After config DB daemon for hostname and mgmt interface is implemented, we'll no longer need to do manual configuration here
@@ -639,7 +739,10 @@ def load_mgmt_config(filename):
639739
@click.option('-y', '--yes', is_flag=True, callback=_abort_if_false,
640740
expose_value=False, prompt='Reload config from minigraph?')
641741
def load_minigraph():
742+
642743
"""Reconfigure based on minigraph."""
744+
# reacquire lock after prompt
745+
configdb_lock.reacquireLock()
643746
log_info("'load_minigraph' executing...")
644747

645748
# get the device type
@@ -2304,6 +2407,8 @@ def ztp():
23042407
@click.argument('run', required=False, type=click.Choice(["run"]))
23052408
def run(run):
23062409
"""Restart ZTP of the device."""
2410+
# reacquire lock after prompt
2411+
configdb_lock.reacquireLock()
23072412
command = "ztp run -y"
23082413
run_command(command, display_cmd=True)
23092414

@@ -2313,6 +2418,8 @@ def run(run):
23132418
@click.argument('disable', required=False, type=click.Choice(["disable"]))
23142419
def disable(disable):
23152420
"""Administratively Disable ZTP."""
2421+
# reacquire lock after prompt
2422+
configdb_lock.reacquireLock()
23162423
command = "ztp disable -y"
23172424
run_command(command, display_cmd=True)
23182425

0 commit comments

Comments
 (0)