Skip to content

Ensure redis client socket is closed and prevent race condition #11001

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 43 additions & 25 deletions src/sonic-py-common/sonic_py_common/multi_asic.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@
DEFAULT_NAMESPACE = ''
PORT_ROLE = 'role'


# Dictionary to cache config_db connection handle per namespace
# to prevent duplicate connections from being opened
config_db_handle = {}

def connect_config_db_for_ns(namespace=DEFAULT_NAMESPACE):
"""
The function connects to the config DB for a given namespace and
Expand All @@ -48,9 +43,9 @@ def connect_config_db_for_ns(namespace=DEFAULT_NAMESPACE):
def connect_to_all_dbs_for_ns(namespace=DEFAULT_NAMESPACE):
"""
The function connects to the DBs for a given namespace and
returns the handle
For voq chassis systems, the db list includes databases from
returns the handle

For voq chassis systems, the db list includes databases from
supervisor card. Avoid connecting to these databases from linecards

If no namespace is provided, it will connect to the db in the
Expand Down Expand Up @@ -205,23 +200,8 @@ def get_all_namespaces():
So we loop through the databases in different namespaces and depending on the sub_role
decide whether this is a front end ASIC/namespace or a back end one.
"""
front_ns = []
back_ns = []
num_asics = get_num_asics()

if is_multi_asic():
for asic in range(num_asics):
namespace = "{}{}".format(ASIC_NAME_PREFIX, asic)
if namespace not in config_db_handle:
config_db_handle[namespace] = connect_config_db_for_ns(namespace)
config_db = config_db_handle[namespace]

metadata = config_db.get_table('DEVICE_METADATA')
if metadata['localhost']['sub_role'] == FRONTEND_ASIC_SUB_ROLE:
front_ns.append(namespace)
elif metadata['localhost']['sub_role'] == BACKEND_ASIC_SUB_ROLE:
back_ns.append(namespace)

global front_ns
global back_ns
return {'front_ns': front_ns, 'back_ns': back_ns}


Expand Down Expand Up @@ -451,3 +431,41 @@ def validate_namespace(namespace):
return True
else:
return False


"""
One time module initialization. Get all the namespaces from CONFIG_DB.
get_all_namespaces() gets called lot many times(eg. 'portstat -s all')
especially when a platform has lot many interfaces across multiple namespaces
so doing DB connect/close at is time consuming, hence we store this static
information for the lifetime of the process. This prevents two issues we have seen:
1. FD leak due to too many files opened by the process
2. Race condition within multiple threads of the same process
"""
front_ns = []
back_ns = []
num_asics = get_num_asics()
if is_multi_asic():
for asic in range(num_asics):
namespace = "{}{}".format(ASIC_NAME_PREFIX, asic)
command = 'sonic-db-cli -n {} CONFIG_DB HGET "DEVICE_METADATA|localhost" "sub_role"'.format(namespace)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with this change, database container is failing to boot with the error at
"sonic-db-cli -n {} CONFIG_DB HGET "DEVICE_METADATA|localhost" "sub_role""

/usr/local/lib/python3.9/dist-packages/supervisor/options.py:473: UserWarning: Supervisord is running as root and it is searching for its configuration file in default locations (inc.
self.warnings.warn(
Traceback (most recent call last):
File "/usr/local/bin/sonic-cfggen", line 32, in
from minigraph import minigraph_encoder, parse_xml, parse_device_desc_xml, parse_asic_sub_role, parse_asic_switch_type
File "/usr/local/lib/python3.9/dist-packages/minigraph.py", line 14, in
from portconfig import get_port_config
File "/usr/local/lib/python3.9/dist-packages/portconfig.py", line 10, in
from sonic_py_common.multi_asic import get_asic_id_from_name
File "/usr/local/lib/python3.9/dist-packages/sonic_py_common/multi_asic.py", line 447, in
raise RuntimeError(
RuntimeError: Command sonic-db-cli -n asic0 CONFIG_DB HGET "DEVICE_METADATA|localhost" "sub_role" failed with stderr None
/usr/local/lib/python3.9/dist-packages/supervisor/options.py:473: UserWarning: Supervisord is running as root and it is searching for its configuration file in default locations (inc.
self.warnings.warn(
Unlinking stale socket /var/run/supervisor.sock

proc = subprocess.Popen(command,
stdout=subprocess.PIPE,
shell=True,
universal_newlines=True,
stderr=subprocess.STDOUT)
try:
stdout, stderr = proc.communicate()
if proc.returncode != 0:
raise RuntimeError(
"Command {} failed with stderr {}".format(command, stderr)
)

if stdout.rstrip('\n') != "":
role = stdout.rstrip('\n')
if role == FRONTEND_ASIC_SUB_ROLE:
front_ns.append(namespace)
elif role == BACKEND_ASIC_SUB_ROLE:
back_ns.append(namespace)
except OSError as e:
raise OSError("Error running command {}".format(command))