Skip to content

Commit

Permalink
DAOS-16265 test: Fix erasurecode/rebuild_fio.py out of space (#15020)
Browse files Browse the repository at this point in the history
Prevent accumulating large server log files caused by temporarily
enabling the DEBUG log mask while creating or destroying pools.

Signed-off-by: Phil Henderson <phillip.henderson@intel.com>
  • Loading branch information
phender authored Oct 17, 2024
1 parent 59b5b54 commit 8f70ea0
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/tests/ftest/erasurecode/multiple_failure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ server_config:
storage: auto
pool:
size: 93%
set_logmasks: False
container:
type: POSIX
control_method: daos
Expand Down
1 change: 1 addition & 0 deletions src/tests/ftest/erasurecode/rebuild_fio.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pool:
aggregation:
threshold: 50000000
aggr_timeout: 180
set_logmasks: False
container:
type: POSIX
control_method: daos
Expand Down
22 changes: 18 additions & 4 deletions src/tests/ftest/util/apricot/apricot/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ def __init__(self, *args, **kwargs):
self.setup_start_agents = True
self.slurm_exclude_servers = False
self.slurm_exclude_nodes = NodeSet()
self.max_test_dir_usage_check = 90
self.host_info = HostInfo()
self.hostlist_servers = NodeSet()
self.hostlist_clients = NodeSet()
Expand Down Expand Up @@ -693,6 +694,11 @@ def setUp(self):
self.slurm_exclude_servers = self.params.get(
"slurm_exclude_servers", "/run/setup/*", self.slurm_exclude_servers)

# Max test directory usage percentage - when exceeded will display sizes of files in the
# test directory
self.max_test_dir_usage_check = self.params.get(
"max_test_dir_usage_check", "/run/setup/*", self.max_test_dir_usage_check)

# The server config name should be obtained from each ServerManager
# object, but some tests still use this TestWithServers attribute.
self.server_group = self.params.get("name", "/run/server_config/*", "daos_server")
Expand Down Expand Up @@ -765,12 +771,20 @@ def setUp(self):

# List common test directory contents before running the test
self.log.info("-" * 100)
self.log.debug("Common test directory (%s) contents:", os.path.dirname(self.test_dir))
self.log.debug(
"Common test directory (%s) contents (check > %s%%):",
os.path.dirname(self.test_dir), self.max_test_dir_usage_check)
all_hosts = include_local_host(self.host_info.all_hosts)
test_dir_parent = os.path.dirname(self.test_dir)
result = run_remote(self.log, all_hosts, f"df -h {test_dir_parent}")
if int(max(re.findall(r" ([\d+])% ", result.joined_stdout) + ["0"])) > 90:
run_remote(self.log, all_hosts, f"du -sh {test_dir_parent}/*")
_result = run_remote(self.log, all_hosts, f"df -h {test_dir_parent}")
_details = NodeSet()
for _host, _stdout in _result.all_stdout.items():
_test_dir_usage = re.findall(r"\s+([\d]+)%\s+", _stdout)
_test_dir_usage_int = int(max(_test_dir_usage + ["0"]))
if _test_dir_usage_int > self.max_test_dir_usage_check:
_details.add(_host)
if _details:
run_remote(self.log, _details, f"du -sh {test_dir_parent}/*")
self.log.info("-" * 100)

if not self.start_servers_once or self.name.uid == 1:
Expand Down

0 comments on commit 8f70ea0

Please sign in to comment.