Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Multi NPU] Time Improvements to the config reload/load_minigraph commands #917

Merged
merged 6 commits into from
Jul 9, 2020
42 changes: 35 additions & 7 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import syslog
import time
import netifaces
import threading

import sonic_device_util
import ipaddress
Expand Down Expand Up @@ -115,6 +116,17 @@ def get_command(self, ctx, cmd_name):
# Helper functions
#

# Execute action per NPU instance for multi instance services.
def execute_asic_instance(inst, event, multi_inst_list, action):
for service in multi_inst_list:
try:
click.echo("Executing {} of service {}@{}...".format(action, service, inst))
run_command("systemctl {} {}@{}.service".format(action, service, inst))
except SystemExit as e:
log_error("Failed to execute {} of service {}@{} with error {}".format(action, service, inst, e))
# Set the event object if there is a failure and exception was raised.
event.set()

# Execute action on list of systemd services
def execute_systemctl(list_of_services, action):
num_asic = sonic_device_util.get_num_npus()
Expand All @@ -124,6 +136,8 @@ def execute_systemctl(list_of_services, action):
log_error("Failed to get generated services")
return

# For Multi NPU, do the "action" on the global services which is single instance first.
multi_inst_service_list = []
for service in list_of_services:
if (service + '.service' in generated_services_list):
try:
Expand All @@ -132,14 +146,28 @@ def execute_systemctl(list_of_services, action):
except SystemExit as e:
log_error("Failed to execute {} of service {} with error {}".format(action, service, e))
raise

if (service + '.service' in generated_multi_instance_services):
for inst in range(num_asic):
try:
click.echo("Executing {} of service {}@{}...".format(action, service, inst))
run_command("systemctl {} {}@{}.service".format(action, service, inst))
except SystemExit as e:
log_error("Failed to execute {} of service {}@{} with error {}".format(action, service, inst, e))
raise
multi_inst_service_list.append(service)

# With Multi NPU, Start a thread per instance to do the "action" on multi instance services.
if sonic_device_util.is_multi_npu():
threads = []
# Use this event object to co-ordinate if any threads raised exception
e = threading.Event()
kwargs = {'multi_inst_list' : multi_inst_service_list, 'action' : action}
jleveque marked this conversation as resolved.
Show resolved Hide resolved
for inst in range(num_asic):
t = threading.Thread(target=execute_asic_instance, args=(inst,e), kwargs=kwargs)
jleveque marked this conversation as resolved.
Show resolved Hide resolved
threads.append(t)
t.start()

# Wait for all the threads to finish.
for inst in range(num_asic):
threads[inst].join()

# Check if any of the threads have raised exception, if so exit the process.
if e.is_set():
sys.exit(1)

def run_command(command, display_cmd=False, ignore_error=False):
"""Run bash command and print output to stdout
Expand Down