From a4610e882a321e3d9e90b0df282959686bb2a1ac Mon Sep 17 00:00:00 2001
From: Dalton Bohning <dalton.bohning@intel.com>
Date: Thu, 13 Jun 2024 11:04:49 -0700
Subject: [PATCH] DAOS-15955 test: increase clush fanout for run_remote
 (#14509)

Set clush fanout in run_remote to the max of the default or number
of logical cores. This will help in large-scale environments.

Signed-off-by: Dalton Bohning <dalton.bohning@intel.com>
---
 src/tests/ftest/util/run_utils.py      | 12 ++++++++++--
 src/tests/ftest/util/soak_test_base.py |  3 ++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/tests/ftest/util/run_utils.py b/src/tests/ftest/util/run_utils.py
index 827ef52d372..23ac53c76d7 100644
--- a/src/tests/ftest/util/run_utils.py
+++ b/src/tests/ftest/util/run_utils.py
@@ -1,8 +1,9 @@
 """
-  (C) Copyright 2022-2023 Intel Corporation.
+  (C) Copyright 2022-2024 Intel Corporation.
 
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
+import os
 import shlex
 import subprocess  # nosec
 import time
@@ -380,7 +381,8 @@ def run_local(log, command, capture_output=True, timeout=None, check=False, verb
     return result
 
 
-def run_remote(log, hosts, command, verbose=True, timeout=120, task_debug=False, stderr=False):
+def run_remote(log, hosts, command, verbose=True, timeout=120, task_debug=False, stderr=False,
+               fanout=None):
     """Run the command on the remote hosts.
 
     Args:
@@ -392,6 +394,8 @@ def run_remote(log, hosts, command, verbose=True, timeout=120, task_debug=False,
             Defaults to 120 seconds.
         task_debug (bool, optional): whether to enable debug for the task object. Defaults to False.
         stderr (bool, optional): whether to enable stdout/stderr separation. Defaults to False.
+        fanout (int, optional): fanout to use. Default uses the max of the
+            clush default (64) or available cores
 
     Returns:
         RemoteCommandResult: a grouping of the command results from the same hosts with the same
@@ -401,6 +405,10 @@ def run_remote(log, hosts, command, verbose=True, timeout=120, task_debug=False,
     task = task_self()
     task.set_info('debug', task_debug)
     task.set_default("stderr", stderr)
+    # Set fan out to the max of the default or number of logical cores
+    if fanout is None:
+        fanout = max(task.info('fanout'), len(os.sched_getaffinity(0)))
+    task.set_info('fanout', fanout)
     # Enable forwarding of the ssh authentication agent connection
     task.set_info("ssh_options", "-oForwardAgent=yes")
     if verbose:
diff --git a/src/tests/ftest/util/soak_test_base.py b/src/tests/ftest/util/soak_test_base.py
index d75456cdfd6..a97bb2094eb 100644
--- a/src/tests/ftest/util/soak_test_base.py
+++ b/src/tests/ftest/util/soak_test_base.py
@@ -466,7 +466,8 @@ def job_completion(self, job_id_list):
             cmd = f"/usr/bin/rsync -avtr --min-size=1B {self.soak_log_dir} {self.outputsoak_dir}/"
             cmd2 = f"/usr/bin/rm -rf {self.soak_log_dir}"
             if self.enable_remote_logging:
-                result = run_remote(self.log, self.hostlist_clients, cmd, timeout=600)
+                # Limit fan out to reduce burden on filesystem
+                result = run_remote(self.log, self.hostlist_clients, cmd, timeout=600, fanout=64)
                 if result.passed:
                     result = run_remote(self.log, self.hostlist_clients, cmd2, timeout=600)
                 if not result.passed: