From 3f651dc0fa15d686d01ba0aa5364f7a435b6fd1c Mon Sep 17 00:00:00 2001 From: Praveen Chaudhary Date: Tue, 14 Apr 2020 09:53:56 -0700 Subject: [PATCH] [config] Implement a process level lock (#857) Changes: 1.) Implement a class, which uses hsetnx for lock. 2.) lock is expired within timeout period or will be released by owner. 3.) After -y prompt, lock is reacquired, because timer could have expired, before user enters yes. Signed-off-by: Praveen Chaudhary pchaudhary@linkedin.com --- config/main.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/config/main.py b/config/main.py index 1d196826d79f..086bda1c38de 100755 --- a/config/main.py +++ b/config/main.py @@ -529,7 +529,98 @@ def is_ipaddress(val): return False return True +# class for locking entire config process +class ConfigDbLock(): + def __init__(self): + self.lockName = "LOCK|configDbLock" + self.timeout = 10 + self.pid = os.getpid() + self.client = None + + self._acquireLock() + return + + def _acquireLock(self): + try: + # connect to db + db_kwargs = dict() + configdb = ConfigDBConnector(**db_kwargs) + configdb.connect() + + self.client = configdb.get_redis_client('CONFIG_DB') + # Set lock and expire time. Process may get killed b/w set lock and + # expire call. + if self.client.hsetnx(self.lockName, "PID", self.pid): + self.client.expire(self.lockName, self.timeout) + # if lock exists but expire timer not running, run expire time and + # abort. + elif not self.client.ttl(self.lockName): + click.echo(":::Unable to acquire lock. Resetting timer and aborting:::"); + self.client.expire(self.lockName, self.timeout) + sys.exit(1) + else: + click.echo(":::Unable to acquire lock. Aborting:::"); + sys.exit(1) + except Exception as e: + click.echo(":::Exception: {}:::".format(e)) + sys.exit(1) + return + + def reacquireLock(self): + try: + # Try to set lock first + if self.client.hsetnx(self.lockName, "PID", self.pid): + self.client.expire(self.lockName, self.timeout) + # if lock exists, check who owns it + else: + p = self.client.pipeline(True) + # watch, we do not want to work on modified lock + p.watch(self.lockName) + # if current process holding then extend the timer + if p.hget(self.lockName, "PID") == str(self.pid): + self.client.expire(self.lockName, self.timeout) + p.unwatch() + return + else: + # some other process is holding the lock. + click.echo(":::Unable to reacquire lock (lock PID: {}, self.pid: {}):::".\ + format(p.hget(self.lockName, "PID"), self.pid)) + p.unwatch() + sys.exit(1) + except Exception as e: + click.echo(":::Exception: {}:::".format(e)) + sys.exit(1) + return + + def _releaseLock(self): + try: + p = self.client.pipeline(True) + # watch, we do not want to work on modified lock + p.watch(self.lockName) + # if current process holding the lock then release it. + if p.hget(self.lockName, "PID") == str(self.pid): + p.multi() + p.delete(self.lockName) + p.execute() + return + # lock may be None, if timer has expired before releasing lock. + elif not self.lockName: + return + else: + # some other process is holding the lock. + click.echo(":::Unable to release lock (lock PID: {}, self.pid: {}):::".\ + format(p.hget(self.lockName, "PID"), self.pid)) + p.unwatch() + except Exception as e: + click.echo(":::Exception: {}:::".format(e)) + return + + def __del__(self): + self._releaseLock() + return +# end of class configdblock +configdb_lock = ConfigDbLock() # This is our main entrypoint - the main 'config' command @click.group(context_settings=CONTEXT_SETTINGS) def config(): @@ -547,6 +638,8 @@ def config(): @click.argument('filename', default='/etc/sonic/config_db.json', type=click.Path()) def save(filename): """Export current config DB to a file on disk.""" + # reacquire lock after prompt + configdb_lock.reacquireLock() command = "{} -d --print-data > {}".format(SONIC_CFGGEN_PATH, filename) run_command(command, display_cmd=True) @@ -557,6 +650,9 @@ def load(filename, yes): """Import a previous saved config DB dump file.""" if not yes: click.confirm('Load config from the file %s?' % filename, abort=True) + # reacquire lock after prompt + configdb_lock.reacquireLock() + command = "{} -j {} --write-to-db".format(SONIC_CFGGEN_PATH, filename) run_command(command, display_cmd=True) @@ -568,6 +664,8 @@ def reload(filename, yes, load_sysinfo): """Clear current configuration and import a previous saved config DB dump file.""" if not yes: click.confirm('Clear current config and reload config from the file %s?' % filename, abort=True) + # reacquire lock after prompt + configdb_lock.reacquireLock() log_info("'reload' executing...") @@ -617,6 +715,8 @@ def reload(filename, yes, load_sysinfo): @click.argument('filename', default='/etc/sonic/device_desc.xml', type=click.Path(exists=True)) def load_mgmt_config(filename): """Reconfigure hostname and mgmt interface based on device description file.""" + # reacquire lock after prompt + configdb_lock.reacquireLock() command = "{} -M {} --write-to-db".format(SONIC_CFGGEN_PATH, filename) run_command(command, display_cmd=True) #FIXME: After config DB daemon for hostname and mgmt interface is implemented, we'll no longer need to do manual configuration here @@ -639,7 +739,10 @@ def load_mgmt_config(filename): @click.option('-y', '--yes', is_flag=True, callback=_abort_if_false, expose_value=False, prompt='Reload config from minigraph?') def load_minigraph(): + """Reconfigure based on minigraph.""" + # reacquire lock after prompt + configdb_lock.reacquireLock() log_info("'load_minigraph' executing...") # get the device type @@ -2304,6 +2407,8 @@ def ztp(): @click.argument('run', required=False, type=click.Choice(["run"])) def run(run): """Restart ZTP of the device.""" + # reacquire lock after prompt + configdb_lock.reacquireLock() command = "ztp run -y" run_command(command, display_cmd=True) @@ -2313,6 +2418,8 @@ def run(run): @click.argument('disable', required=False, type=click.Choice(["disable"])) def disable(disable): """Administratively Disable ZTP.""" + # reacquire lock after prompt + configdb_lock.reacquireLock() command = "ztp disable -y" run_command(command, display_cmd=True)