Prepare release of TF-DF 1.10.0 and YDF 1.10.0 and PYDF 0.7.0

PiperOrigin-RevId: 665797043
tensorflow · Aug 21, 2024 · 2340111 · 2340111
1 parent af57f04
commit 2340111
Show file tree

Hide file tree

Showing 8 changed files with 47 additions and 44 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## 1.10.0 - 2024-08-21
+
+### Fix
+
+-   Fix compatibility with TF 2.17.0.
+-   Fix MacOS build.
+
 ## 1.9.2 - 2024-07-04
 
 ### Fix

diff --git a/WORKSPACE b/WORKSPACE
@@ -20,9 +20,9 @@ http_archive(
 # absl used by tensorflow.
 http_archive(
     name = "org_tensorflow",
-    strip_prefix = "tensorflow-2.16.2",
-    sha256 = "023849bf253080cb1e4f09386f5eb900492da2288274086ed6cfecd6d99da9eb",
-    urls = ["https://github.com/tensorflow/tensorflow/archive/v2.16.2.tar.gz"],
+    strip_prefix = "tensorflow-2.17.0",
+    sha256 = "9cc4d5773b8ee910079baaecb4086d0c28939f024dd74b33fc5e64779b6533dc",
+    urls = ["https://github.com/tensorflow/tensorflow/archive/v2.17.0.tar.gz"],
 )
 
 

diff --git a/configure/setup.py b/configure/setup.py
@@ -23,20 +23,20 @@
 from setuptools.command.install import install
 from setuptools.dist import Distribution
 
-_VERSION = "1.9.2"
+_VERSION = "1.10.0"
 
 with open("README.md", "r", encoding="utf-8") as fh:
   long_description = fh.read()
 
 REQUIRED_PACKAGES = [
     "numpy",
     "pandas",
-    "tensorflow==2.16.2",
+    "tensorflow==2.17.0",
     "six",
     "absl_py",
     "wheel",
     "wurlitzer",
-    "tf_keras~=2.16",
+    "tf_keras~=2.17",
     "ydf",
 ]
 
@@ -64,9 +64,9 @@ def is_pure(self):
       idx = sys.argv.index("bdist_wheel") + 1
       sys.argv.insert(idx, "--plat-name")
       if platform.processor() == "arm":
-        sys.argv.insert(idx + 1, "macosx_10_15_x86_64")
-      elif platform.processor() == "i386":
         sys.argv.insert(idx + 1, "macosx_12_0_arm64")
+      elif platform.processor() == "i386":
+        sys.argv.insert(idx + 1, "macosx_10_15_x86_64")
       else:
         raise ValueError(f"Unknown processor {platform.processor()}")
     else:

diff --git a/documentation/known_issues.md b/documentation/known_issues.md
@@ -20,7 +20,7 @@ TensorFlow Decision Forests is not yet available as a Windows Pip package.
 ## Incompatibility with Keras 3
 
 Compatibility with Keras 3 is not yet implemented. Use tf_keras or a TensorFlow
-version before 2.16.
+version before 2.16. Alternatively, use [ydf](https://pypi.org/project/ydf/).
 
 ## Untested for conda
 
@@ -54,6 +54,7 @@ The following table shows the compatibility between
 
 tensorflow_decision_forests | tensorflow
 --------------------------- | ---------------
+1.10.0                      | 2.17.0
 1.9.2                       | 2.16.2
 1.9.1                       | 2.16.1
 1.9.0                       | 2.16.1

diff --git a/tensorflow_decision_forests/__init__.py b/tensorflow_decision_forests/__init__.py
@@ -51,10 +51,10 @@
 ```
 """
 
-__version__ = "1.9.2"
+__version__ = "1.10.0"
 __author__ = "Mathieu Guillame-Bert"
 
-compatible_tf_versions = ["2.16.2"]
+compatible_tf_versions = ["2.17.0"]
 __git_version__ = "HEAD"  # Modify for release build.
 
 from tensorflow_decision_forests.tensorflow import check_version

diff --git a/tensorflow_decision_forests/keras/wrappers_pre_generated.py b/tensorflow_decision_forests/keras/wrappers_pre_generated.py
@@ -304,11 +304,13 @@ class CartModel(core.CoreModel):
     random_seed: Random seed for the training of the model. Learners are
       expected to be deterministic by the random seed. Default: 123456.
     sorting_strategy: How are sorted the numerical features in order to find the
-      splits - PRESORT: The features are pre-sorted at the start of the
-      training. This solution is faster but consumes much more memory than
-      IN_NODE. - IN_NODE: The features are sorted just before being used in the
-      node. This solution is slow but consumes little amount of memory. .
-      Default: "IN_NODE".
+      splits - AUTO: Selects the most efficient method among IN_NODE,
+      FORCE_PRESORT, and LAYER. - IN_NODE: The features are sorted just before
+      being used in the node. This solution is slow but consumes little amount
+      of memory. - FORCE_PRESORT: The features are pre-sorted at the start of
+      the training. This solution is faster but consumes much more memory than
+      IN_NODE. - PRESORT: Automatically choose between FORCE_PRESORT and
+      IN_NODE. . Default: "IN_NODE".
     sparse_oblique_max_num_projections: For sparse oblique splits i.e.
       `split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after
       the num_projections_exponent). Oblique splits try out
@@ -721,7 +723,7 @@ class DistributedGradientBoostedTreesModel(core.CoreModel):
     shrinkage: Coefficient applied to each tree prediction. A small value (0.02)
       tends to give more accurate results (assuming enough trees are trained),
       but results in larger models. Analogous to neural network learning rate.
-      Default: 0.1.
+      Fixed to 1.0 for DART models. Default: 0.1.
     use_hessian_gain: Use true, uses a formulation of split gain with a hessian
       term i.e. optimizes the splits to minimize the variance of "gradient /
       hessian. Available for all losses except regression. Default: False.
@@ -1029,7 +1031,7 @@ class GradientBoostedTreesModel(core.CoreModel):
       validation dataset. Enabling this feature can increase the training time
       significantly. Default: False.
     dart_dropout: Dropout rate applied when using the DART i.e. when
-      forest_extraction=DART. Default: 0.01.
+      forest_extraction=DART. Default: None.
     early_stopping: Early stopping detects the overfitting of the model and
       halts it training using the validation dataset. If not provided directly,
       the validation dataset is extracted from the training dataset (see
@@ -1207,13 +1209,15 @@ class GradientBoostedTreesModel(core.CoreModel):
     shrinkage: Coefficient applied to each tree prediction. A small value (0.02)
       tends to give more accurate results (assuming enough trees are trained),
       but results in larger models. Analogous to neural network learning rate.
-      Default: 0.1.
+      Fixed to 1.0 for DART models. Default: 0.1.
     sorting_strategy: How are sorted the numerical features in order to find the
-      splits - PRESORT: The features are pre-sorted at the start of the
-      training. This solution is faster but consumes much more memory than
-      IN_NODE. - IN_NODE: The features are sorted just before being used in the
-      node. This solution is slow but consumes little amount of memory. .
-      Default: "PRESORT".
+      splits - AUTO: Selects the most efficient method among IN_NODE,
+      FORCE_PRESORT, and LAYER. - IN_NODE: The features are sorted just before
+      being used in the node. This solution is slow but consumes little amount
+      of memory. - FORCE_PRESORT: The features are pre-sorted at the start of
+      the training. This solution is faster but consumes much more memory than
+      IN_NODE. - PRESORT: Automatically choose between FORCE_PRESORT and
+      IN_NODE. . Default: "PRESORT".
     sparse_oblique_max_num_projections: For sparse oblique splits i.e.
       `split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after
       the num_projections_exponent). Oblique splits try out
@@ -1332,7 +1336,7 @@ def __init__(
       categorical_set_split_max_num_items: Optional[int] = -1,
       categorical_set_split_min_item_frequency: Optional[int] = 1,
       compute_permutation_variable_importance: Optional[bool] = False,
-      dart_dropout: Optional[float] = 0.01,
+      dart_dropout: Optional[float] = None,
       early_stopping: Optional[str] = "LOSS_INCREASE",
       early_stopping_initial_iteration: Optional[int] = 10,
       early_stopping_num_trees_look_ahead: Optional[int] = 30,
@@ -2001,7 +2005,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities:
 class RandomForestModel(core.CoreModel):
   r"""Random Forest learning algorithm.
 
-  A Random Forest (https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf)
+  A [Random Forest](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf)
   is a collection of deep CART decision trees trained independently and without
   pruning. Each tree is trained on a random subset of the original training
   dataset (sampled with replacement).
@@ -2310,11 +2314,13 @@ class RandomForestModel(core.CoreModel):
       all the examples are used to train all the trees (you probably do not want
       that). Default: True.
     sorting_strategy: How are sorted the numerical features in order to find the
-      splits - PRESORT: The features are pre-sorted at the start of the
-      training. This solution is faster but consumes much more memory than
-      IN_NODE. - IN_NODE: The features are sorted just before being used in the
-      node. This solution is slow but consumes little amount of memory. .
-      Default: "PRESORT".
+      splits - AUTO: Selects the most efficient method among IN_NODE,
+      FORCE_PRESORT, and LAYER. - IN_NODE: The features are sorted just before
+      being used in the node. This solution is slow but consumes little amount
+      of memory. - FORCE_PRESORT: The features are pre-sorted at the start of
+      the training. This solution is faster but consumes much more memory than
+      IN_NODE. - PRESORT: Automatically choose between FORCE_PRESORT and
+      IN_NODE. . Default: "PRESORT".
     sparse_oblique_max_num_projections: For sparse oblique splits i.e.
       `split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after
       the num_projections_exponent). Oblique splits try out

diff --git a/tools/build_pip_package.sh b/tools/build_pip_package.sh
@@ -115,17 +115,6 @@ function assemble_files() {
 
   # Distribution server binaries
   cp ${SRCBIN}/keras/grpc_worker_main ${SRCPK}/tensorflow_decision_forests/keras/
-
-  # Note: Starting with TF-DF 0.9.1, the YDF Protos are included by (P)YDF.
-  # TODO: Remove this block.
-  # # YDF's proto wrappers.
-  # YDFSRCBIN="bazel-bin/external/ydf/yggdrasil_decision_forests"
-  # mkdir -p ${SRCPK}/yggdrasil_decision_forests
-  # pushd ${YDFSRCBIN}
-  # find . -name \*.py -exec rsync -R -arv {} ${SRCPK}/yggdrasil_decision_forests \;
-  # popd
-  # # Add __init__.py to all exported Yggdrasil sub-directories.
-  # find ${SRCPK}/yggdrasil_decision_forests -type d -exec touch {}/__init__.py \;
 }
 
 # Build a pip package.

diff --git a/tools/start_compile_docker.sh b/tools/start_compile_docker.sh
@@ -64,11 +64,11 @@
 # directory.
 TFDF_DIRNAME=${PWD##*/}
 
-DOCKER_IMAGE=tensorflow/build:2.16-python3.9
+DOCKER_IMAGE=tensorflow/build:2.17-python3.9
 DOCKER_CONTAINER=compile_tfdf
 
 echo "Available containers:"
-sudo sudo docker container ls -a --size
+sudo docker container ls -a --size
 
 set +e  # Ignore error if the container already exist
 CREATE_DOCKER_FLAGS="-i -t -p 8889:8889 --network host -v ${PWD}/..:/working_dir -w /working_dir/${TFDF_DIRNAME}"