From ec4adfec2fa85b8e57bcde91dbc5242fd8d4f473 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Sun, 17 May 2020 23:12:25 -0700 Subject: [PATCH 1/6] Improvments to the config reload/load_minigraph commands in Multi NPU platforms. --- config/main.py | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/config/main.py b/config/main.py index 8e110cd21d..65436dce51 100755 --- a/config/main.py +++ b/config/main.py @@ -9,6 +9,7 @@ import syslog import time import netifaces +import threading import sonic_device_util import ipaddress @@ -115,6 +116,16 @@ def get_command(self, ctx, cmd_name): # Helper functions # +# Execute action per NPU instance for multi instance services. +def execute_asic_instance(inst, multi_inst_list, action): + for service in multi_inst_list: + try: + click.echo("Executing {} of service {}@{}...".format(action, service, inst)) + run_command("systemctl {} {}@{}.service".format(action, service, inst)) + except SystemExit as e: + log_error("Failed to execute {} of service {}@{} with error {}".format(action, service, inst, e)) + raise + # Execute action on list of systemd services def execute_systemctl(list_of_services, action): num_asic = sonic_device_util.get_num_npus() @@ -124,6 +135,8 @@ def execute_systemctl(list_of_services, action): log_error("Failed to get generated services") return + # For Multi NPU, do the "action" on the global services which is single instance first. + multi_inst_service_list = [] for service in list_of_services: if (service + '.service' in generated_services_list): try: @@ -132,14 +145,22 @@ def execute_systemctl(list_of_services, action): except SystemExit as e: log_error("Failed to execute {} of service {} with error {}".format(action, service, e)) raise + if (service + '.service' in generated_multi_instance_services): - for inst in range(num_asic): - try: - click.echo("Executing {} of service {}@{}...".format(action, service, inst)) - run_command("systemctl {} {}@{}.service".format(action, service, inst)) - except SystemExit as e: - log_error("Failed to execute {} of service {}@{} with error {}".format(action, service, inst, e)) - raise + multi_inst_service_list.append(service) + + # With Multi NPU, Start a thread per instance to do the "action" on multi instance services. + if sonic_device_util.is_multi_npu(): + threads = [] + kwargs = {'multi_inst_list' : multi_inst_service_list, 'action' : action} + for inst in range(num_asic): + t = threading.Thread(target=execute_asic_instance, args=(inst,), kwargs=kwargs) + threads.append(t) + t.start() + + # Wait for all the threads to finish. + for inst in range(num_asic): + threads[inst].join() def run_command(command, display_cmd=False, ignore_error=False): """Run bash command and print output to stdout From 7ab9a5dde92145bc33e54422cd6dc9fef8b2d0f3 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Tue, 26 May 2020 18:20:32 -0700 Subject: [PATCH 2/6] Code change to check if any exception was raised in the threads and propogate it back to parent process. --- config/main.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/config/main.py b/config/main.py index 65436dce51..996f9ab0be 100755 --- a/config/main.py +++ b/config/main.py @@ -117,14 +117,15 @@ def get_command(self, ctx, cmd_name): # # Execute action per NPU instance for multi instance services. -def execute_asic_instance(inst, multi_inst_list, action): +def execute_asic_instance(inst, event, multi_inst_list, action): for service in multi_inst_list: try: click.echo("Executing {} of service {}@{}...".format(action, service, inst)) run_command("systemctl {} {}@{}.service".format(action, service, inst)) except SystemExit as e: log_error("Failed to execute {} of service {}@{} with error {}".format(action, service, inst, e)) - raise + # Set the event object if there is a failure and exception was raised. + event.set() # Execute action on list of systemd services def execute_systemctl(list_of_services, action): @@ -152,9 +153,11 @@ def execute_systemctl(list_of_services, action): # With Multi NPU, Start a thread per instance to do the "action" on multi instance services. if sonic_device_util.is_multi_npu(): threads = [] + # Use this event object to co-ordinate if any threads raised exception + e = threading.Event() kwargs = {'multi_inst_list' : multi_inst_service_list, 'action' : action} for inst in range(num_asic): - t = threading.Thread(target=execute_asic_instance, args=(inst,), kwargs=kwargs) + t = threading.Thread(target=execute_asic_instance, args=(inst,e), kwargs=kwargs) threads.append(t) t.start() @@ -162,6 +165,10 @@ def execute_systemctl(list_of_services, action): for inst in range(num_asic): threads[inst].join() + # Check if any of the threads have raised exception, if so exit the process. + if e.is_set(): + sys.exit(1) + def run_command(command, display_cmd=False, ignore_error=False): """Run bash command and print output to stdout """ From fb8499aa71c6ed55cbce86b40e3f03837b44462a Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Tue, 26 May 2020 22:41:07 -0700 Subject: [PATCH 3/6] Review comments updated. --- config/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/main.py b/config/main.py index 996f9ab0be..e41c405a4a 100755 --- a/config/main.py +++ b/config/main.py @@ -155,9 +155,9 @@ def execute_systemctl(list_of_services, action): threads = [] # Use this event object to co-ordinate if any threads raised exception e = threading.Event() - kwargs = {'multi_inst_list' : multi_inst_service_list, 'action' : action} + kwargs = {'multi_inst_list': multi_inst_service_list, 'action': action} for inst in range(num_asic): - t = threading.Thread(target=execute_asic_instance, args=(inst,e), kwargs=kwargs) + t = threading.Thread(target=execute_asic_instance, args=(inst, e), kwargs=kwargs) threads.append(t) t.start() From bb6e9cbace7fd6992b6b236638b920f7d6583fa7 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Mon, 15 Jun 2020 13:21:39 -0700 Subject: [PATCH 4/6] With earlier logic of seggregating first into the multi_inst_service_list, we were changing the order in which the services were stopped/started. Now changed the logic to directly pass the service to the thread handler. --- config/main.py | 56 ++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/config/main.py b/config/main.py index e41c405a4a..0836ed912b 100755 --- a/config/main.py +++ b/config/main.py @@ -117,15 +117,14 @@ def get_command(self, ctx, cmd_name): # # Execute action per NPU instance for multi instance services. -def execute_asic_instance(inst, event, multi_inst_list, action): - for service in multi_inst_list: - try: - click.echo("Executing {} of service {}@{}...".format(action, service, inst)) - run_command("systemctl {} {}@{}.service".format(action, service, inst)) - except SystemExit as e: - log_error("Failed to execute {} of service {}@{} with error {}".format(action, service, inst, e)) - # Set the event object if there is a failure and exception was raised. - event.set() +def execute_asic_instance(inst, event, service, action): + try: + click.echo("Executing {} of service {}@{}...".format(action, service, inst)) + run_command("systemctl {} {}@{}.service".format(action, service, inst)) + except SystemExit as e: + log_error("Failed to execute {} of service {}@{} with error {}".format(action, service, inst, e)) + # Set the event object if there is a failure and exception was raised. + event.set() # Execute action on list of systemd services def execute_systemctl(list_of_services, action): @@ -148,26 +147,25 @@ def execute_systemctl(list_of_services, action): raise if (service + '.service' in generated_multi_instance_services): - multi_inst_service_list.append(service) - - # With Multi NPU, Start a thread per instance to do the "action" on multi instance services. - if sonic_device_util.is_multi_npu(): - threads = [] - # Use this event object to co-ordinate if any threads raised exception - e = threading.Event() - kwargs = {'multi_inst_list': multi_inst_service_list, 'action': action} - for inst in range(num_asic): - t = threading.Thread(target=execute_asic_instance, args=(inst, e), kwargs=kwargs) - threads.append(t) - t.start() - - # Wait for all the threads to finish. - for inst in range(num_asic): - threads[inst].join() - - # Check if any of the threads have raised exception, if so exit the process. - if e.is_set(): - sys.exit(1) + # With Multi NPU, Start a thread per instance to do the "action" on multi instance services. + if sonic_device_util.is_multi_npu(): + threads = [] + # Use this event object to co-ordinate if any threads raised exception + e = threading.Event() + + kwargs = {'service': service, 'action': action} + for inst in range(num_asic): + t = threading.Thread(target=execute_asic_instance, args=(inst, e), kwargs=kwargs) + threads.append(t) + t.start() + + # Wait for all the threads to finish. + for inst in range(num_asic): + threads[inst].join() + + # Check if any of the threads have raised exception, if so exit the process. + if e.is_set(): + sys.exit(1) def run_command(command, display_cmd=False, ignore_error=False): """Run bash command and print output to stdout From 316bbe8bb4437c388e2b034523455f989bb96f17 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Mon, 15 Jun 2020 13:26:10 -0700 Subject: [PATCH 5/6] Removing unused variable multi_inst_service_list --- config/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/config/main.py b/config/main.py index 0836ed912b..094228701b 100755 --- a/config/main.py +++ b/config/main.py @@ -135,8 +135,6 @@ def execute_systemctl(list_of_services, action): log_error("Failed to get generated services") return - # For Multi NPU, do the "action" on the global services which is single instance first. - multi_inst_service_list = [] for service in list_of_services: if (service + '.service' in generated_services_list): try: From b648802a9fb152141038386930008b6e6830dc17 Mon Sep 17 00:00:00 2001 From: Judy Joseph Date: Wed, 17 Jun 2020 10:51:04 -0700 Subject: [PATCH 6/6] Updated the function name to reflect systemctl invocation. --- config/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/main.py b/config/main.py index 094228701b..5c9aee8c3a 100755 --- a/config/main.py +++ b/config/main.py @@ -117,7 +117,7 @@ def get_command(self, ctx, cmd_name): # # Execute action per NPU instance for multi instance services. -def execute_asic_instance(inst, event, service, action): +def execute_systemctl_per_asic_instance(inst, event, service, action): try: click.echo("Executing {} of service {}@{}...".format(action, service, inst)) run_command("systemctl {} {}@{}.service".format(action, service, inst)) @@ -153,7 +153,7 @@ def execute_systemctl(list_of_services, action): kwargs = {'service': service, 'action': action} for inst in range(num_asic): - t = threading.Thread(target=execute_asic_instance, args=(inst, e), kwargs=kwargs) + t = threading.Thread(target=execute_systemctl_per_asic_instance, args=(inst, e), kwargs=kwargs) threads.append(t) t.start()