diff --git a/CHANGELOG.md b/CHANGELOG.md index c5ce9fe..92411ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## 1.10.0 - 2024-08-21 + +### Fix + +- Fix compatibility with TF 2.17.0. +- Fix MacOS build. + ## 1.9.2 - 2024-07-04 ### Fix diff --git a/WORKSPACE b/WORKSPACE index 18bed1f..5a292f9 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -20,9 +20,9 @@ http_archive( # absl used by tensorflow. http_archive( name = "org_tensorflow", - strip_prefix = "tensorflow-2.16.2", - sha256 = "023849bf253080cb1e4f09386f5eb900492da2288274086ed6cfecd6d99da9eb", - urls = ["https://github.com/tensorflow/tensorflow/archive/v2.16.2.tar.gz"], + strip_prefix = "tensorflow-2.17.0", + sha256 = "9cc4d5773b8ee910079baaecb4086d0c28939f024dd74b33fc5e64779b6533dc", + urls = ["https://github.com/tensorflow/tensorflow/archive/v2.17.0.tar.gz"], ) diff --git a/configure/setup.py b/configure/setup.py index 2df113d..1b3e10b 100644 --- a/configure/setup.py +++ b/configure/setup.py @@ -23,7 +23,7 @@ from setuptools.command.install import install from setuptools.dist import Distribution -_VERSION = "1.9.2" +_VERSION = "1.10.0" with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() @@ -31,12 +31,12 @@ REQUIRED_PACKAGES = [ "numpy", "pandas", - "tensorflow==2.16.2", + "tensorflow==2.17.0", "six", "absl_py", "wheel", "wurlitzer", - "tf_keras~=2.16", + "tf_keras~=2.17", "ydf", ] @@ -64,9 +64,9 @@ def is_pure(self): idx = sys.argv.index("bdist_wheel") + 1 sys.argv.insert(idx, "--plat-name") if platform.processor() == "arm": - sys.argv.insert(idx + 1, "macosx_10_15_x86_64") - elif platform.processor() == "i386": sys.argv.insert(idx + 1, "macosx_12_0_arm64") + elif platform.processor() == "i386": + sys.argv.insert(idx + 1, "macosx_10_15_x86_64") else: raise ValueError(f"Unknown processor {platform.processor()}") else: diff --git a/documentation/known_issues.md b/documentation/known_issues.md index b181b94..4641291 100644 --- a/documentation/known_issues.md +++ b/documentation/known_issues.md @@ -20,7 +20,7 @@ TensorFlow Decision Forests is not yet available as a Windows Pip package. ## Incompatibility with Keras 3 Compatibility with Keras 3 is not yet implemented. Use tf_keras or a TensorFlow -version before 2.16. +version before 2.16. Alternatively, use [ydf](https://pypi.org/project/ydf/). ## Untested for conda @@ -54,6 +54,7 @@ The following table shows the compatibility between tensorflow_decision_forests | tensorflow --------------------------- | --------------- +1.10.0 | 2.17.0 1.9.2 | 2.16.2 1.9.1 | 2.16.1 1.9.0 | 2.16.1 diff --git a/tensorflow_decision_forests/__init__.py b/tensorflow_decision_forests/__init__.py index 23f2c1e..1c37a34 100644 --- a/tensorflow_decision_forests/__init__.py +++ b/tensorflow_decision_forests/__init__.py @@ -51,10 +51,10 @@ ``` """ -__version__ = "1.9.2" +__version__ = "1.10.0" __author__ = "Mathieu Guillame-Bert" -compatible_tf_versions = ["2.16.2"] +compatible_tf_versions = ["2.17.0"] __git_version__ = "HEAD" # Modify for release build. from tensorflow_decision_forests.tensorflow import check_version diff --git a/tensorflow_decision_forests/keras/wrappers_pre_generated.py b/tensorflow_decision_forests/keras/wrappers_pre_generated.py index f3d4c08..95233cf 100644 --- a/tensorflow_decision_forests/keras/wrappers_pre_generated.py +++ b/tensorflow_decision_forests/keras/wrappers_pre_generated.py @@ -304,11 +304,13 @@ class CartModel(core.CoreModel): random_seed: Random seed for the training of the model. Learners are expected to be deterministic by the random seed. Default: 123456. sorting_strategy: How are sorted the numerical features in order to find the - splits - PRESORT: The features are pre-sorted at the start of the - training. This solution is faster but consumes much more memory than - IN_NODE. - IN_NODE: The features are sorted just before being used in the - node. This solution is slow but consumes little amount of memory. . - Default: "IN_NODE". + splits - AUTO: Selects the most efficient method among IN_NODE, + FORCE_PRESORT, and LAYER. - IN_NODE: The features are sorted just before + being used in the node. This solution is slow but consumes little amount + of memory. - FORCE_PRESORT: The features are pre-sorted at the start of + the training. This solution is faster but consumes much more memory than + IN_NODE. - PRESORT: Automatically choose between FORCE_PRESORT and + IN_NODE. . Default: "IN_NODE". sparse_oblique_max_num_projections: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after the num_projections_exponent). Oblique splits try out @@ -721,7 +723,7 @@ class DistributedGradientBoostedTreesModel(core.CoreModel): shrinkage: Coefficient applied to each tree prediction. A small value (0.02) tends to give more accurate results (assuming enough trees are trained), but results in larger models. Analogous to neural network learning rate. - Default: 0.1. + Fixed to 1.0 for DART models. Default: 0.1. use_hessian_gain: Use true, uses a formulation of split gain with a hessian term i.e. optimizes the splits to minimize the variance of "gradient / hessian. Available for all losses except regression. Default: False. @@ -1029,7 +1031,7 @@ class GradientBoostedTreesModel(core.CoreModel): validation dataset. Enabling this feature can increase the training time significantly. Default: False. dart_dropout: Dropout rate applied when using the DART i.e. when - forest_extraction=DART. Default: 0.01. + forest_extraction=DART. Default: None. early_stopping: Early stopping detects the overfitting of the model and halts it training using the validation dataset. If not provided directly, the validation dataset is extracted from the training dataset (see @@ -1207,13 +1209,15 @@ class GradientBoostedTreesModel(core.CoreModel): shrinkage: Coefficient applied to each tree prediction. A small value (0.02) tends to give more accurate results (assuming enough trees are trained), but results in larger models. Analogous to neural network learning rate. - Default: 0.1. + Fixed to 1.0 for DART models. Default: 0.1. sorting_strategy: How are sorted the numerical features in order to find the - splits - PRESORT: The features are pre-sorted at the start of the - training. This solution is faster but consumes much more memory than - IN_NODE. - IN_NODE: The features are sorted just before being used in the - node. This solution is slow but consumes little amount of memory. . - Default: "PRESORT". + splits - AUTO: Selects the most efficient method among IN_NODE, + FORCE_PRESORT, and LAYER. - IN_NODE: The features are sorted just before + being used in the node. This solution is slow but consumes little amount + of memory. - FORCE_PRESORT: The features are pre-sorted at the start of + the training. This solution is faster but consumes much more memory than + IN_NODE. - PRESORT: Automatically choose between FORCE_PRESORT and + IN_NODE. . Default: "PRESORT". sparse_oblique_max_num_projections: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after the num_projections_exponent). Oblique splits try out @@ -1332,7 +1336,7 @@ def __init__( categorical_set_split_max_num_items: Optional[int] = -1, categorical_set_split_min_item_frequency: Optional[int] = 1, compute_permutation_variable_importance: Optional[bool] = False, - dart_dropout: Optional[float] = 0.01, + dart_dropout: Optional[float] = None, early_stopping: Optional[str] = "LOSS_INCREASE", early_stopping_initial_iteration: Optional[int] = 10, early_stopping_num_trees_look_ahead: Optional[int] = 30, @@ -2001,7 +2005,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities: class RandomForestModel(core.CoreModel): r"""Random Forest learning algorithm. - A Random Forest (https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf) + A [Random Forest](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf) is a collection of deep CART decision trees trained independently and without pruning. Each tree is trained on a random subset of the original training dataset (sampled with replacement). @@ -2310,11 +2314,13 @@ class RandomForestModel(core.CoreModel): all the examples are used to train all the trees (you probably do not want that). Default: True. sorting_strategy: How are sorted the numerical features in order to find the - splits - PRESORT: The features are pre-sorted at the start of the - training. This solution is faster but consumes much more memory than - IN_NODE. - IN_NODE: The features are sorted just before being used in the - node. This solution is slow but consumes little amount of memory. . - Default: "PRESORT". + splits - AUTO: Selects the most efficient method among IN_NODE, + FORCE_PRESORT, and LAYER. - IN_NODE: The features are sorted just before + being used in the node. This solution is slow but consumes little amount + of memory. - FORCE_PRESORT: The features are pre-sorted at the start of + the training. This solution is faster but consumes much more memory than + IN_NODE. - PRESORT: Automatically choose between FORCE_PRESORT and + IN_NODE. . Default: "PRESORT". sparse_oblique_max_num_projections: For sparse oblique splits i.e. `split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after the num_projections_exponent). Oblique splits try out diff --git a/tools/build_pip_package.sh b/tools/build_pip_package.sh index 1d8a84f..af07e96 100755 --- a/tools/build_pip_package.sh +++ b/tools/build_pip_package.sh @@ -115,17 +115,6 @@ function assemble_files() { # Distribution server binaries cp ${SRCBIN}/keras/grpc_worker_main ${SRCPK}/tensorflow_decision_forests/keras/ - - # Note: Starting with TF-DF 0.9.1, the YDF Protos are included by (P)YDF. - # TODO: Remove this block. - # # YDF's proto wrappers. - # YDFSRCBIN="bazel-bin/external/ydf/yggdrasil_decision_forests" - # mkdir -p ${SRCPK}/yggdrasil_decision_forests - # pushd ${YDFSRCBIN} - # find . -name \*.py -exec rsync -R -arv {} ${SRCPK}/yggdrasil_decision_forests \; - # popd - # # Add __init__.py to all exported Yggdrasil sub-directories. - # find ${SRCPK}/yggdrasil_decision_forests -type d -exec touch {}/__init__.py \; } # Build a pip package. diff --git a/tools/start_compile_docker.sh b/tools/start_compile_docker.sh index 12118e8..d8a3cac 100755 --- a/tools/start_compile_docker.sh +++ b/tools/start_compile_docker.sh @@ -64,11 +64,11 @@ # directory. TFDF_DIRNAME=${PWD##*/} -DOCKER_IMAGE=tensorflow/build:2.16-python3.9 +DOCKER_IMAGE=tensorflow/build:2.17-python3.9 DOCKER_CONTAINER=compile_tfdf echo "Available containers:" -sudo sudo docker container ls -a --size +sudo docker container ls -a --size set +e # Ignore error if the container already exist CREATE_DOCKER_FLAGS="-i -t -p 8889:8889 --network host -v ${PWD}/..:/working_dir -w /working_dir/${TFDF_DIRNAME}"