Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ env:
pytest-args: >-
--forked
--durations=20
--timeout=300
--timeout=600
--timeout-method=thread
-s

Expand Down
15 changes: 5 additions & 10 deletions scripts/03_calculate_metafeatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import sys
import unittest.mock
import tempfile

import arff
import joblib
Expand Down Expand Up @@ -82,10 +83,7 @@ def calculate_metafeatures(task_id):

for task_type in ("classification", "regression"):
output_directory = os.path.join(working_directory, "metafeatures", task_type)
try:
os.makedirs(output_directory)
except:
pass
os.makedirs(output_directory, exist_ok=True)

all_metafeatures = {}

Expand All @@ -100,13 +98,10 @@ def calculate_metafeatures(task_id):
tasks = copy.deepcopy(tasks)
np.random.shuffle(tasks)

def producer():
for task_id in tasks:
yield task_id

memory = joblib.Memory(location="/tmp/joblib", verbose=10)
tmpdir = os.path.join(tempfile.gettempdir(), "joblib")
memory = joblib.Memory(location=tmpdir, verbose=10)
cached_calculate_metafeatures = memory.cache(calculate_metafeatures)
mfs = [cached_calculate_metafeatures(task_id) for task_id in producer()]
mfs = [cached_calculate_metafeatures(task_id) for task_id in tasks]

for mf in mfs:
if mf is not None:
Expand Down
58 changes: 43 additions & 15 deletions test/test_scripts/test_metadata_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import shutil
import socket
import subprocess
import tempfile

import arff
import numpy as np
Expand All @@ -15,10 +16,12 @@

class TestMetadataGeneration(unittest.TestCase):
def setUp(self):
self.working_directory = "/tmp/autosklearn-unittest-tmp-dir-%s-%d-%d" % (
socket.gethostname(),
os.getpid(),
random.randint(0, 1000000),
host = socket.gethostname()
pid = os.getpid()
rint = random.randint(0, 1000000)

self.working_directory = os.path.join(
tempfile.gettempdir(), f"autosklearn-unittest-tmp-dir-{host}-{pid}-{rint}"
)

def print_files(self):
Expand All @@ -27,7 +30,6 @@ def print_files(self):
print(dirpath, dirnames, filenames)

def test_metadata_generation(self):

regression_task_id = 360029
regression_dataset_name = "SWD".lower()
classification_task_id = 245
Expand All @@ -52,10 +54,15 @@ def test_metadata_generation(self):
script_filename,
self.working_directory,
)

return_value = subprocess.run(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=30,
)
self.assertEqual(return_value.returncode, 0, msg=str(return_value))
self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}")

# 4. run one of the commands to get some data
commands_output_file = os.path.join(
Expand Down Expand Up @@ -99,8 +106,13 @@ def test_metadata_generation(self):
# for training. In production, it would use twice as much!
cmd = cmd.replace("-s 1", "-s 1 --unittest")
print("COMMAND: %s" % cmd)

return_value = subprocess.run(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=180,
)
print("STDOUT: %s" % repr(return_value.stdout), flush=True)
print("STDERR: %s" % repr(return_value.stderr), flush=True)
Expand All @@ -124,7 +136,9 @@ def test_metadata_generation(self):
with open(smac_log) as fh:
smac_output = fh.read()
self.assertEqual(
return_value.returncode, 0, msg=str(return_value) + "\n" + smac_output
return_value.returncode,
0,
msg=f"{cmd}\n{str(return_value)}" + "\n" + smac_output,
)
expected_validation_output = os.path.join(
expected_output_directory, "..", "validation_trajectory_1.json"
Expand Down Expand Up @@ -174,12 +188,17 @@ def test_metadata_generation(self):
self.working_directory,
)
print("COMMAND: %s" % cmd)

return_value = subprocess.run(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=60,
)
print("STDOUT: %s" % repr(return_value.stdout), flush=True)
print("STDERR: %s" % repr(return_value.stderr), flush=True)
self.assertEqual(return_value.returncode, 0, msg=str(return_value))
self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}")

for file in [
"algorithm_runs.arff",
Expand Down Expand Up @@ -218,9 +237,13 @@ def test_metadata_generation(self):
self.working_directory,
)
return_value = subprocess.run(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=90,
)
self.assertEqual(return_value.returncode, 0, msg=str(return_value))
self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}")
for task_type in ("classification", "regression"):
for file in [
"calculation_times.csv",
Expand Down Expand Up @@ -273,10 +296,15 @@ def test_metadata_generation(self):
script_filename,
self.working_directory,
)

return_value = subprocess.run(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=45,
)
self.assertEqual(return_value.returncode, 0, msg=str(return_value))
self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}")

for metric_, combination in (
(metric, "%s_binary.classification_dense" % metric),
Expand Down