Skip to content

Commit

Permalink
Reduce the number of trials in tuner unit tests.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 570962173
  • Loading branch information
achoum authored and copybara-github committed Oct 5, 2023
1 parent fbe100e commit fa875fe
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 42 deletions.
2 changes: 1 addition & 1 deletion tensorflow_decision_forests/component/tuner/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ py_test(
srcs = ["tuner_test.py"],
data = ["@ydf//yggdrasil_decision_forests/test_data"],
python_version = "PY3",
shard_count = 10,
shard_count = 4,
deps = [
":tuner",
"@com_google_protobuf//:python_srcs",
Expand Down
28 changes: 15 additions & 13 deletions tensorflow_decision_forests/component/tuner/tuner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

import os

from absl import logging
from absl import flags
from absl import logging
from absl.testing import parameterized
import pandas as pd
import tensorflow as tf
Expand All @@ -32,7 +32,7 @@ def data_root_path() -> str:
def ydf_test_datasets_path() -> str:
return os.path.join(
data_root_path(),
"external/ydf/yggdrasil_decision_forests/test_data/dataset"
"external/ydf/yggdrasil_decision_forests/test_data/dataset",
)


Expand All @@ -42,7 +42,8 @@ def test_base(self):
tuner = tuner_lib.RandomSearch(
num_trials=20,
trial_num_threads=2,
trial_maximum_training_duration_seconds=10)
trial_maximum_training_duration_seconds=10,
)
tuner.choice("a", [1, 2, 3])
tuner.choice("b", [1.0, 2.0, 3.0])
tuner.choice("c", ["x", "y"])
Expand Down Expand Up @@ -141,7 +142,10 @@ def test_base(self):
}
}
}
""", abstract_learner_pb2.TrainingConfig()))
""",
abstract_learner_pb2.TrainingConfig(),
),
)

def test_errors(self):
tuner = tuner_lib.RandomSearch(num_trials=20)
Expand All @@ -163,24 +167,22 @@ def test_predefined_hps_ranking(self):
model = keras.GradientBoostedTreesModel(
task=keras.Task.RANKING,
ranking_group="GROUP",
num_trees=50,
tuner=tuner)
num_trees=5,
tuner=tuner,
)

model.fit(ds)

def test_predefined_hps_classification(self):
tuner = tuner_lib.RandomSearch(num_trials=50, use_predefined_hps=True)
ds_path = os.path.join(
ydf_test_datasets_path(), "adult_train.csv"
)
tuner = tuner_lib.RandomSearch(num_trials=10, use_predefined_hps=True)
ds_path = os.path.join(ydf_test_datasets_path(), "adult_train.csv")
train_df = pd.read_csv(ds_path)
ds = keras.pd_dataframe_to_tf_dataset(
train_df, "income", task=keras.Task.CLASSIFICATION
)
model = keras.GradientBoostedTreesModel(
task=keras.Task.CLASSIFICATION,
num_trees=50,
tuner=tuner)
task=keras.Task.CLASSIFICATION, num_trees=5, tuner=tuner
)

model.fit(ds)

Expand Down
1 change: 1 addition & 0 deletions tensorflow_decision_forests/keras/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ py_test(
srcs = ["keras_tuner_test.py"],
data = ["@ydf//yggdrasil_decision_forests/test_data"],
python_version = "PY3",
shard_count = 2,
deps = [
":core",
":keras",
Expand Down
81 changes: 53 additions & 28 deletions tensorflow_decision_forests/keras/keras_tuner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from __future__ import print_function

import os

from absl import flags
from absl import logging
import pandas as pd
Expand All @@ -31,8 +32,9 @@ def data_root_path() -> str:


def test_data_path() -> str:
return os.path.join(data_root_path(),
"external/ydf/yggdrasil_decision_forests/test_data")
return os.path.join(
data_root_path(), "external/ydf/yggdrasil_decision_forests/test_data"
)


def tmp_path() -> str:
Expand All @@ -42,7 +44,6 @@ def tmp_path() -> str:
class TFDFTunerTest(tf.test.TestCase):

def test_random_adult_in_memory(self):

# Prepare the datasets
dataset_directory = os.path.join(test_data_path(), "dataset")
train_path = os.path.join(dataset_directory, "adult_train.csv")
Expand All @@ -51,20 +52,23 @@ def test_random_adult_in_memory(self):
label = "income"

train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(
pd.read_csv(train_path), label=label)
pd.read_csv(train_path), label=label
)
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(
pd.read_csv(test_path), label=label)
pd.read_csv(test_path), label=label
)

# Configure and train the model
tuner = tfdf.tuner.RandomSearch(num_trials=30)
tuner = tfdf.tuner.RandomSearch(num_trials=10)
tuner.choice("num_candidate_attributes_ratio", [1.0, 0.8, 0.6])
tuner.choice("use_hessian_gain", [True, False])

local_search_space = tuner.choice("growing_strategy", ["LOCAL"])
local_search_space.choice("max_depth", [4, 5, 6, 7])

global_search_space = tuner.choice(
"growing_strategy", ["BEST_FIRST_GLOBAL"], merge=True)
"growing_strategy", ["BEST_FIRST_GLOBAL"], merge=True
)
global_search_space.choice("max_num_nodes", [16, 32, 64, 128])

model = tfdf.keras.GradientBoostedTreesModel(num_trees=50, tuner=tuner)
Expand All @@ -81,20 +85,26 @@ def test_random_adult_in_memory(self):
self.assertSetEqual(
set(tuning_logs.columns),
set([
"score", "evaluation_time", "best",
"num_candidate_attributes_ratio", "use_hessian_gain",
"growing_strategy", "max_depth", "max_num_nodes"
]))
self.assertEqual(tuning_logs.shape, (30, 8))
"score",
"evaluation_time",
"best",
"num_candidate_attributes_ratio",
"use_hessian_gain",
"growing_strategy",
"max_depth",
"max_num_nodes",
]),
)
self.assertEqual(tuning_logs.shape, (10, 8))
self.assertEqual(tuning_logs["best"].sum(), 1)
self.assertNear(tuning_logs["score"][tuning_logs["best"]].values[0], -0.587,
0.05)
self.assertNear(
tuning_logs["score"][tuning_logs["best"]].values[0], -0.587, 0.05
)

# This is a lot of text.
_ = model.make_inspector().tuning_logs(return_format="proto")

def test_random_adult_in_memory_predefined_hpspace(self):

# Prepare the datasets
dataset_directory = os.path.join(test_data_path(), "dataset")
train_path = os.path.join(dataset_directory, "adult_train.csv")
Expand All @@ -103,12 +113,14 @@ def test_random_adult_in_memory_predefined_hpspace(self):
label = "income"

train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(
pd.read_csv(train_path), label=label)
pd.read_csv(train_path), label=label
)
test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(
pd.read_csv(test_path), label=label)
pd.read_csv(test_path), label=label
)

# Configure and train the model
tuner = tfdf.tuner.RandomSearch(num_trials=30, use_predefined_hps=True)
tuner = tfdf.tuner.RandomSearch(num_trials=10, use_predefined_hps=True)
model = tfdf.keras.GradientBoostedTreesModel(num_trees=50, tuner=tuner)
model.fit(train_ds)

Expand All @@ -123,17 +135,30 @@ def test_random_adult_in_memory_predefined_hpspace(self):
self.assertSetEqual(
set(tuning_logs.columns),
set([
"score", "evaluation_time", "best",
"num_candidate_attributes_ratio", "use_hessian_gain",
"growing_strategy", "max_depth", "max_num_nodes", "subsample",
"shrinkage", "sampling_method", "sparse_oblique_weights",
"sparse_oblique_projection_density_factor", "categorical_algorithm",
"min_examples", "sparse_oblique_normalization", "split_axis"
]))
self.assertEqual(tuning_logs.shape, (30, 17))
"score",
"evaluation_time",
"best",
"num_candidate_attributes_ratio",
"use_hessian_gain",
"growing_strategy",
"max_depth",
"max_num_nodes",
"subsample",
"shrinkage",
"sampling_method",
"sparse_oblique_weights",
"sparse_oblique_projection_density_factor",
"categorical_algorithm",
"min_examples",
"sparse_oblique_normalization",
"split_axis",
]),
)
self.assertEqual(tuning_logs.shape, (10, 17))
self.assertEqual(tuning_logs["best"].sum(), 1)
self.assertNear(tuning_logs["score"][tuning_logs["best"]].values[0], -0.587,
0.05)
self.assertNear(
tuning_logs["score"][tuning_logs["best"]].values[0], -0.587, 0.05
)

# This is a lot of text.
_ = model.make_inspector().tuning_logs(return_format="proto")
Expand Down

0 comments on commit fa875fe

Please sign in to comment.