Reduce the number of trials in tuner unit tests.

PiperOrigin-RevId: 570962173
tensorflow · Oct 5, 2023 · fa875fe · fa875fe
1 parent fbe100e
commit fa875fe
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 42 deletions.
diff --git a/tensorflow_decision_forests/component/tuner/BUILD b/tensorflow_decision_forests/component/tuner/BUILD
@@ -26,7 +26,7 @@ py_test(
     srcs = ["tuner_test.py"],
     data = ["@ydf//yggdrasil_decision_forests/test_data"],
     python_version = "PY3",
-    shard_count = 10,
+    shard_count = 4,
     deps = [
         ":tuner",
         "@com_google_protobuf//:python_srcs",

diff --git a/tensorflow_decision_forests/component/tuner/tuner_test.py b/tensorflow_decision_forests/component/tuner/tuner_test.py
@@ -14,8 +14,8 @@
 
 import os
 
-from absl import logging
 from absl import flags
+from absl import logging
 from absl.testing import parameterized
 import pandas as pd
 import tensorflow as tf
@@ -32,7 +32,7 @@ def data_root_path() -> str:
 def ydf_test_datasets_path() -> str:
   return os.path.join(
       data_root_path(),
-      "external/ydf/yggdrasil_decision_forests/test_data/dataset"
+      "external/ydf/yggdrasil_decision_forests/test_data/dataset",
   )
 
 
@@ -42,7 +42,8 @@ def test_base(self):
     tuner = tuner_lib.RandomSearch(
         num_trials=20,
         trial_num_threads=2,
-        trial_maximum_training_duration_seconds=10)
+        trial_maximum_training_duration_seconds=10,
+    )
     tuner.choice("a", [1, 2, 3])
     tuner.choice("b", [1.0, 2.0, 3.0])
     tuner.choice("c", ["x", "y"])
@@ -141,7 +142,10 @@ def test_base(self):
     }
   }
 }
-     """, abstract_learner_pb2.TrainingConfig()))
+     """,
+            abstract_learner_pb2.TrainingConfig(),
+        ),
+    )
 
   def test_errors(self):
     tuner = tuner_lib.RandomSearch(num_trials=20)
@@ -163,24 +167,22 @@ def test_predefined_hps_ranking(self):
     model = keras.GradientBoostedTreesModel(
         task=keras.Task.RANKING,
         ranking_group="GROUP",
-        num_trees=50,
-        tuner=tuner)
+        num_trees=5,
+        tuner=tuner,
+    )
 
     model.fit(ds)
 
   def test_predefined_hps_classification(self):
-    tuner = tuner_lib.RandomSearch(num_trials=50, use_predefined_hps=True)
-    ds_path = os.path.join(
-        ydf_test_datasets_path(), "adult_train.csv"
-    )
+    tuner = tuner_lib.RandomSearch(num_trials=10, use_predefined_hps=True)
+    ds_path = os.path.join(ydf_test_datasets_path(), "adult_train.csv")
     train_df = pd.read_csv(ds_path)
     ds = keras.pd_dataframe_to_tf_dataset(
         train_df, "income", task=keras.Task.CLASSIFICATION
     )
     model = keras.GradientBoostedTreesModel(
-        task=keras.Task.CLASSIFICATION,
-        num_trees=50,
-        tuner=tuner)
+        task=keras.Task.CLASSIFICATION, num_trees=5, tuner=tuner
+    )
 
     model.fit(ds)
 

diff --git a/tensorflow_decision_forests/keras/BUILD b/tensorflow_decision_forests/keras/BUILD
@@ -147,6 +147,7 @@ py_test(
     srcs = ["keras_tuner_test.py"],
     data = ["@ydf//yggdrasil_decision_forests/test_data"],
     python_version = "PY3",
+    shard_count = 2,
     deps = [
         ":core",
         ":keras",

diff --git a/tensorflow_decision_forests/keras/keras_tuner_test.py b/tensorflow_decision_forests/keras/keras_tuner_test.py
@@ -17,6 +17,7 @@
 from __future__ import print_function
 
 import os
+
 from absl import flags
 from absl import logging
 import pandas as pd
@@ -31,8 +32,9 @@ def data_root_path() -> str:
 
 
 def test_data_path() -> str:
-  return os.path.join(data_root_path(),
-                      "external/ydf/yggdrasil_decision_forests/test_data")
+  return os.path.join(
+      data_root_path(), "external/ydf/yggdrasil_decision_forests/test_data"
+  )
 
 
 def tmp_path() -> str:
@@ -42,7 +44,6 @@ def tmp_path() -> str:
 class TFDFTunerTest(tf.test.TestCase):
 
   def test_random_adult_in_memory(self):
-
     # Prepare the datasets
     dataset_directory = os.path.join(test_data_path(), "dataset")
     train_path = os.path.join(dataset_directory, "adult_train.csv")
@@ -51,20 +52,23 @@ def test_random_adult_in_memory(self):
     label = "income"
 
     train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(
-        pd.read_csv(train_path), label=label)
+        pd.read_csv(train_path), label=label
+    )
     test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(
-        pd.read_csv(test_path), label=label)
+        pd.read_csv(test_path), label=label
+    )
 
     # Configure and train the model
-    tuner = tfdf.tuner.RandomSearch(num_trials=30)
+    tuner = tfdf.tuner.RandomSearch(num_trials=10)
     tuner.choice("num_candidate_attributes_ratio", [1.0, 0.8, 0.6])
     tuner.choice("use_hessian_gain", [True, False])
 
     local_search_space = tuner.choice("growing_strategy", ["LOCAL"])
     local_search_space.choice("max_depth", [4, 5, 6, 7])
 
     global_search_space = tuner.choice(
-        "growing_strategy", ["BEST_FIRST_GLOBAL"], merge=True)
+        "growing_strategy", ["BEST_FIRST_GLOBAL"], merge=True
+    )
     global_search_space.choice("max_num_nodes", [16, 32, 64, 128])
 
     model = tfdf.keras.GradientBoostedTreesModel(num_trees=50, tuner=tuner)
@@ -81,20 +85,26 @@ def test_random_adult_in_memory(self):
     self.assertSetEqual(
         set(tuning_logs.columns),
         set([
-            "score", "evaluation_time", "best",
-            "num_candidate_attributes_ratio", "use_hessian_gain",
-            "growing_strategy", "max_depth", "max_num_nodes"
-        ]))
-    self.assertEqual(tuning_logs.shape, (30, 8))
+            "score",
+            "evaluation_time",
+            "best",
+            "num_candidate_attributes_ratio",
+            "use_hessian_gain",
+            "growing_strategy",
+            "max_depth",
+            "max_num_nodes",
+        ]),
+    )
+    self.assertEqual(tuning_logs.shape, (10, 8))
     self.assertEqual(tuning_logs["best"].sum(), 1)
-    self.assertNear(tuning_logs["score"][tuning_logs["best"]].values[0], -0.587,
-                    0.05)
+    self.assertNear(
+        tuning_logs["score"][tuning_logs["best"]].values[0], -0.587, 0.05
+    )
 
     # This is a lot of text.
     _ = model.make_inspector().tuning_logs(return_format="proto")
 
   def test_random_adult_in_memory_predefined_hpspace(self):
-
     # Prepare the datasets
     dataset_directory = os.path.join(test_data_path(), "dataset")
     train_path = os.path.join(dataset_directory, "adult_train.csv")
@@ -103,12 +113,14 @@ def test_random_adult_in_memory_predefined_hpspace(self):
     label = "income"
 
     train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(
-        pd.read_csv(train_path), label=label)
+        pd.read_csv(train_path), label=label
+    )
     test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(
-        pd.read_csv(test_path), label=label)
+        pd.read_csv(test_path), label=label
+    )
 
     # Configure and train the model
-    tuner = tfdf.tuner.RandomSearch(num_trials=30, use_predefined_hps=True)
+    tuner = tfdf.tuner.RandomSearch(num_trials=10, use_predefined_hps=True)
     model = tfdf.keras.GradientBoostedTreesModel(num_trees=50, tuner=tuner)
     model.fit(train_ds)
 
@@ -123,17 +135,30 @@ def test_random_adult_in_memory_predefined_hpspace(self):
     self.assertSetEqual(
         set(tuning_logs.columns),
         set([
-            "score", "evaluation_time", "best",
-            "num_candidate_attributes_ratio", "use_hessian_gain",
-            "growing_strategy", "max_depth", "max_num_nodes", "subsample",
-            "shrinkage", "sampling_method", "sparse_oblique_weights",
-            "sparse_oblique_projection_density_factor", "categorical_algorithm",
-            "min_examples", "sparse_oblique_normalization", "split_axis"
-        ]))
-    self.assertEqual(tuning_logs.shape, (30, 17))
+            "score",
+            "evaluation_time",
+            "best",
+            "num_candidate_attributes_ratio",
+            "use_hessian_gain",
+            "growing_strategy",
+            "max_depth",
+            "max_num_nodes",
+            "subsample",
+            "shrinkage",
+            "sampling_method",
+            "sparse_oblique_weights",
+            "sparse_oblique_projection_density_factor",
+            "categorical_algorithm",
+            "min_examples",
+            "sparse_oblique_normalization",
+            "split_axis",
+        ]),
+    )
+    self.assertEqual(tuning_logs.shape, (10, 17))
     self.assertEqual(tuning_logs["best"].sum(), 1)
-    self.assertNear(tuning_logs["score"][tuning_logs["best"]].values[0], -0.587,
-                    0.05)
+    self.assertNear(
+        tuning_logs["score"][tuning_logs["best"]].values[0], -0.587, 0.05
+    )
 
     # This is a lot of text.
     _ = model.make_inspector().tuning_logs(return_format="proto")