diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index de29e860fc..4d9bc34b7b 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -26,7 +26,7 @@ env:
   pytest-args: >-
     --forked
     --durations=20
-    --timeout=300
+    --timeout=600
     --timeout-method=thread
     -s
 
diff --git a/scripts/03_calculate_metafeatures.py b/scripts/03_calculate_metafeatures.py
index 95d857145a..d7e08ffea8 100644
--- a/scripts/03_calculate_metafeatures.py
+++ b/scripts/03_calculate_metafeatures.py
@@ -5,6 +5,7 @@
 import os
 import sys
 import unittest.mock
+import tempfile
 
 import arff
 import joblib
@@ -82,10 +83,7 @@ def calculate_metafeatures(task_id):
 
     for task_type in ("classification", "regression"):
         output_directory = os.path.join(working_directory, "metafeatures", task_type)
-        try:
-            os.makedirs(output_directory)
-        except:
-            pass
+        os.makedirs(output_directory, exist_ok=True)
 
         all_metafeatures = {}
 
@@ -100,13 +98,10 @@ def calculate_metafeatures(task_id):
         tasks = copy.deepcopy(tasks)
         np.random.shuffle(tasks)
 
-        def producer():
-            for task_id in tasks:
-                yield task_id
-
-        memory = joblib.Memory(location="/tmp/joblib", verbose=10)
+        tmpdir = os.path.join(tempfile.gettempdir(), "joblib")
+        memory = joblib.Memory(location=tmpdir, verbose=10)
         cached_calculate_metafeatures = memory.cache(calculate_metafeatures)
-        mfs = [cached_calculate_metafeatures(task_id) for task_id in producer()]
+        mfs = [cached_calculate_metafeatures(task_id) for task_id in tasks]
 
         for mf in mfs:
             if mf is not None:
diff --git a/test/test_scripts/test_metadata_generation.py b/test/test_scripts/test_metadata_generation.py
index 89999d6be1..25c4855b08 100644
--- a/test/test_scripts/test_metadata_generation.py
+++ b/test/test_scripts/test_metadata_generation.py
@@ -4,6 +4,7 @@
 import shutil
 import socket
 import subprocess
+import tempfile
 
 import arff
 import numpy as np
@@ -15,10 +16,12 @@
 
 class TestMetadataGeneration(unittest.TestCase):
     def setUp(self):
-        self.working_directory = "/tmp/autosklearn-unittest-tmp-dir-%s-%d-%d" % (
-            socket.gethostname(),
-            os.getpid(),
-            random.randint(0, 1000000),
+        host = socket.gethostname()
+        pid = os.getpid()
+        rint = random.randint(0, 1000000)
+
+        self.working_directory = os.path.join(
+            tempfile.gettempdir(), f"autosklearn-unittest-tmp-dir-{host}-{pid}-{rint}"
         )
 
     def print_files(self):
@@ -27,7 +30,6 @@ def print_files(self):
             print(dirpath, dirnames, filenames)
 
     def test_metadata_generation(self):
-
         regression_task_id = 360029
         regression_dataset_name = "SWD".lower()
         classification_task_id = 245
@@ -52,10 +54,15 @@ def test_metadata_generation(self):
             script_filename,
             self.working_directory,
         )
+
         return_value = subprocess.run(
-            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+            cmd,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=30,
         )
-        self.assertEqual(return_value.returncode, 0, msg=str(return_value))
+        self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}")
 
         # 4. run one of the commands to get some data
         commands_output_file = os.path.join(
@@ -99,8 +106,13 @@ def test_metadata_generation(self):
             # for training. In production, it would use twice as much!
             cmd = cmd.replace("-s 1", "-s 1 --unittest")
             print("COMMAND: %s" % cmd)
+
             return_value = subprocess.run(
-                cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+                cmd,
+                shell=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                timeout=180,
             )
             print("STDOUT: %s" % repr(return_value.stdout), flush=True)
             print("STDERR: %s" % repr(return_value.stderr), flush=True)
@@ -124,7 +136,9 @@ def test_metadata_generation(self):
             with open(smac_log) as fh:
                 smac_output = fh.read()
             self.assertEqual(
-                return_value.returncode, 0, msg=str(return_value) + "\n" + smac_output
+                return_value.returncode,
+                0,
+                msg=f"{cmd}\n{str(return_value)}" + "\n" + smac_output,
             )
             expected_validation_output = os.path.join(
                 expected_output_directory, "..", "validation_trajectory_1.json"
@@ -174,12 +188,17 @@ def test_metadata_generation(self):
             self.working_directory,
         )
         print("COMMAND: %s" % cmd)
+
         return_value = subprocess.run(
-            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+            cmd,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=60,
         )
         print("STDOUT: %s" % repr(return_value.stdout), flush=True)
         print("STDERR: %s" % repr(return_value.stderr), flush=True)
-        self.assertEqual(return_value.returncode, 0, msg=str(return_value))
+        self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}")
 
         for file in [
             "algorithm_runs.arff",
@@ -218,9 +237,13 @@ def test_metadata_generation(self):
             self.working_directory,
         )
         return_value = subprocess.run(
-            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+            cmd,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=90,
         )
-        self.assertEqual(return_value.returncode, 0, msg=str(return_value))
+        self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}")
         for task_type in ("classification", "regression"):
             for file in [
                 "calculation_times.csv",
@@ -273,10 +296,15 @@ def test_metadata_generation(self):
             script_filename,
             self.working_directory,
         )
+
         return_value = subprocess.run(
-            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+            cmd,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=45,
         )
-        self.assertEqual(return_value.returncode, 0, msg=str(return_value))
+        self.assertEqual(return_value.returncode, 0, msg=f"{cmd}\n{str(return_value)}")
 
         for metric_, combination in (
             (metric, "%s_binary.classification_dense" % metric),