Skip to content

hostcfgd: Handle missed tacacs updates between load & listen #8223

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 6, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 63 additions & 36 deletions files/image_config/hostcfgd/hostcfgd
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import ast
import os
import time
import threading
import sys
import subprocess
import syslog
Expand All @@ -25,6 +26,20 @@ TACPLUS_SERVER_PASSKEY_DEFAULT = ""
TACPLUS_SERVER_TIMEOUT_DEFAULT = "5"
TACPLUS_SERVER_AUTH_TYPE_DEFAULT = "pap"

global_lock = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

iso of global_lock, can we move this lock to the AaaCfg() ? To prevent another classes in this file from using this lock

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If anyone else (another subscriber) gets a need, they could use this.


class lock_mgr:
def __init__(self):
self.lock = global_lock

def __enter__( self ):
if self.lock:
self.lock.acquire()

def __exit__( self, exc_type, exc_value, traceback ):
if self.lock:
self.lock.release()


def is_true(val):
if val == 'True' or val == 'true':
Expand Down Expand Up @@ -123,7 +138,7 @@ class Iptables(object):
.format(err.cmd, err.returncode, err.output))

class AaaCfg(object):
def __init__(self):
def __init__(self, config_db):
self.auth_default = {
'login': 'local',
}
Expand All @@ -136,49 +151,42 @@ class AaaCfg(object):
self.tacplus_global = {}
self.tacplus_servers = {}
self.debug = False
self.config_db = config_db

# Load conf from ConfigDb
def load(self, aaa_conf, tac_global_conf, tacplus_conf):
for row in aaa_conf:
self.aaa_update(row, aaa_conf[row], modify_conf=False)
for row in tac_global_conf:
self.tacacs_global_update(row, tac_global_conf[row], modify_conf=False)
for row in tacplus_conf:
self.tacacs_server_update(row, tacplus_conf[row], modify_conf=False)
def load(self):
self.modify_conf_file()

def aaa_update(self, key, data, modify_conf=True):
def aaa_update(self, key):
if key == 'authentication':
self.auth = data
if 'failthrough' in data:
self.auth['failthrough'] = is_true(data['failthrough'])
if 'debug' in data:
self.debug = is_true(data['debug'])
if modify_conf:
self.modify_conf_file()

def tacacs_global_update(self, key, data, modify_conf=True):
def tacacs_global_update(self, key):
if key == 'global':
self.tacplus_global = data
if modify_conf:
self.modify_conf_file()

def tacacs_server_update(self, key, data, modify_conf=True):
if data == {}:
if key in self.tacplus_servers:
del self.tacplus_servers[key]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we don't need this code anymore ? all these changes looks unrelated to this fix.
Can we have separate PR for reason of these changes or at least can we add to PR description

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need this code anymore. The whole purpose of this code is to keep a local cache of tacacs & aaa. Maintain the cache on updates and use the cache. We don't do caching anymore.

We get it when we need it. This handles the missed-updates in simple manner and btw, we don't gain any by keeping duplicate cache in mem.

else:
self.tacplus_servers[key] = data

if modify_conf:
self.modify_conf_file()

def tacacs_server_update(self, key):
self.modify_conf_file()

def modify_single_file(self, filename, operations=None):
if operations:
cmd = "sed -e {0} {1} > {1}.new; mv -f {1} {1}.old; mv -f {1}.new {1}".format(' -e '.join(operations), filename)
os.system(cmd)

def modify_conf_file(self):
with lock_mgr():
self.auth = self.config_db.get_table('AAA').get("authentication", {})
if 'failthrough' in self.auth:
self.auth['failthrough'] = is_true(self.auth['failthrough'])
if 'debug' in self.auth:
self.debug = is_true(self.auth['debug'])

self.tacplus_global = self.config_db.get_table('TACPLUS').get(
"global", {})
self.tacplus_servers = self.config_db.get_table('TACPLUS_SERVER')
self._modify_conf_file()

def _modify_conf_file(self):
auth = self.auth_default.copy()
auth.update(self.auth)
tacplus_global = self.tacplus_global_default.copy()
Expand Down Expand Up @@ -224,6 +232,11 @@ class AaaCfg(object):
with open(NSS_TACPLUS_CONF, 'w') as f:
f.write(nss_tacplus_conf)

if 'passkey' in tacplus_global:
tacplus_global['passkey'] = obfuscate(tacplus_global['passkey'])
syslog.syslog(syslog.LOG_INFO, 'pam.d files updated auth={} global={}'.
format(auth, tacplus_global))

class MultiAsicBgpMonCfg(object):
def __init__(self):
self.ns_for_bgp_mon = 'asic4'
Expand Down Expand Up @@ -343,7 +356,7 @@ class HostConfigDaemon:
self.config_db.connect(wait_for_init=True, retry_on=True)
syslog.syslog(syslog.LOG_INFO, 'ConfigDB connect success')

self.aaacfg = AaaCfg()
self.aaacfg = AaaCfg(self.config_db)
self.iptables = Iptables()
# Cache the values of 'state' field in 'FEATURE' table of each container
self.cached_feature_states = {}
Expand All @@ -355,11 +368,19 @@ class HostConfigDaemon:
if self.is_multi_npu:
self.masicBgpMonCfg = MultiAsicBgpMonCfg()


def timer_load(self):
global global_lock

syslog.syslog(syslog.LOG_INFO, 'reloading tacacs from timer thread')
self.aaacfg.load()

# Remove lock as timer is one shot
global_lock = None


def load(self):
aaa = self.config_db.get_table('AAA')
tacacs_global = self.config_db.get_table('TACPLUS')
tacacs_server = self.config_db.get_table('TACPLUS_SERVER')
self.aaacfg.load(aaa, tacacs_global, tacacs_server)
self.aaacfg.load()

lpbk_table = self.config_db.get_table('LOOPBACK_INTERFACE')
self.iptables.load(lpbk_table)
Expand Down Expand Up @@ -468,17 +489,18 @@ class HostConfigDaemon:
self.update_feature_state(feature_name, state, feature_table)

def aaa_handler(self, key, data):
self.aaacfg.aaa_update(key, data)
self.aaacfg.aaa_update(key)
syslog.syslog(syslog.LOG_INFO, 'value of {} changed to {}'.format(key, data))

def tacacs_server_handler(self, key, data):
self.aaacfg.tacacs_server_update(key, data)
self.aaacfg.tacacs_server_update(key)
log_data = copy.deepcopy(data)
if 'passkey' in log_data:
log_data['passkey'] = obfuscate(log_data['passkey'])
syslog.syslog(syslog.LOG_INFO, 'value of {} changed to {}'.format(key, log_data))

def tacacs_global_handler(self, key, data):
self.aaacfg.tacacs_global_update(key, data)
self.aaacfg.tacacs_global_update(key)
log_data = copy.deepcopy(data)
if 'passkey' in log_data:
log_data['passkey'] = obfuscate(log_data['passkey'])
Expand Down Expand Up @@ -515,6 +537,7 @@ class HostConfigDaemon:
self.update_feature_state(feature_name, state, feature_table)

def start(self):
global global_lock

self.config_db.subscribe('AAA', lambda table, key, data: self.aaa_handler(key, data))
self.config_db.subscribe('TACPLUS_SERVER', lambda table, key, data: self.tacacs_server_handler(key, data))
Expand All @@ -537,6 +560,10 @@ class HostConfigDaemon:
# Defer load until subscribe
self.load()

global_lock = threading.Lock()
self.tmr_thread = threading.Timer(30, self.timer_load)
self.tmr_thread.start()

self.config_db.listen()


Expand Down