Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

container_checker on supervisor should check containers based on asic presence #11442

Merged
merged 5 commits into from
Aug 22, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions files/image_config/monit/container_checker
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,34 @@ import docker
import sys

import swsssdk
from sonic_py_common import multi_asic, device_info
from sonic_py_common import multi_asic, device_info, daemon_base
from swsscommon import swsscommon

def get_asic_presence_list():
"""
@summary: This function will get the asic presence list. On Supervisor, the list includes only the asics
for inserted and detected fabric cards. For non-supervisor cards, e.g. line card, the list should
contain all supported asics by the card. The function gets the asic list from CHASSIS_ASIC_TABLE from
CHASSIS_STATE_DB. The function assumes that the first N asic ids (asic0 to asic(N-1)) in
anamehra marked this conversation as resolved.
Show resolved Hide resolved
CHASSIS_ASIC_TABLE belongs to the supervisor, where N is the max number of asics supported by the Chassis
@return: List of asics present
"""
asics_list = []
if multi_asic.is_multi_asic():
if not device_info.is_supervisor():
# Supervisor has FRU Fabric cards. If not supervisor, all asics
anamehra marked this conversation as resolved.
Show resolved Hide resolved
# should be present. Add all asics, 0 - num_asics to the list.
asics_list = list(range(0,multi_asic.get_num_asics()))
else:
# Get asic list from CHASSIS_ASIC_TABLE
chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB")
asic_table = swsscommon.Table(chassis_state_db, 'CHASSIS_ASIC_TABLE')
if asic_table:
asics_presence_list = list(asic_table.getKeys())
for asic in asics_presence_list:
# asic is asid id: asic0, asic1.... asicN. Get the numeric value.
asics_list.append(int(asic[4:]))
return asics_list

def get_expected_running_containers():
"""
Expand All @@ -41,7 +66,15 @@ def get_expected_running_containers():

expected_running_containers = set()
always_running_containers = set()


# Get current asic presence list. For multi_asic system, multi instance containers
# should be checked only for asics present.
asics_id_presence = get_asic_presence_list()

# Some services, like database and bgp run all the instances irrespective of asic presence.
# Add those to exception list.
anamehra marked this conversation as resolved.
Show resolved Hide resolved
run_all_instance_list = ['database', 'bgp']

for container_name in feature_table.keys():
if feature_table[container_name]["state"] not in ["disabled", "always_disabled"]:
if multi_asic.is_multi_asic():
Expand All @@ -50,7 +83,8 @@ def get_expected_running_containers():
if feature_table[container_name]["has_per_asic_scope"] == "True":
num_asics = multi_asic.get_num_asics()
for asic_id in range(num_asics):
expected_running_containers.add(container_name + str(asic_id))
if asic_id in asics_id_presence or container_name in run_all_instance_list:
expected_running_containers.add(container_name + str(asic_id))
else:
expected_running_containers.add(container_name)
if feature_table[container_name]["state"] == 'always_enabled':
Expand All @@ -60,9 +94,11 @@ def get_expected_running_containers():
if feature_table[container_name]["has_per_asic_scope"] == "True":
num_asics = multi_asic.get_num_asics()
for asic_id in range(num_asics):
always_running_containers.add(container_name + str(asic_id))
if asic_id in asics_id_presence or container_name in run_all_instance_list:
always_running_containers.add(container_name + str(asic_id))
else:
always_running_containers.add(container_name)

if device_info.is_supervisor():
always_running_containers.add("database-chassis")
return expected_running_containers, always_running_containers
Expand Down