Skip to content

Commit

Permalink
Prepare release of TF-DF 1.10.0 and YDF 1.10.0 and PYDF 0.7.0
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 665797043
  • Loading branch information
rstz authored and copybara-github committed Aug 21, 2024
1 parent af57f04 commit 2340111
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 44 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## 1.10.0 - 2024-08-21

### Fix

- Fix compatibility with TF 2.17.0.
- Fix MacOS build.

## 1.9.2 - 2024-07-04

### Fix
Expand Down
6 changes: 3 additions & 3 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ http_archive(
# absl used by tensorflow.
http_archive(
name = "org_tensorflow",
strip_prefix = "tensorflow-2.16.2",
sha256 = "023849bf253080cb1e4f09386f5eb900492da2288274086ed6cfecd6d99da9eb",
urls = ["https://github.com/tensorflow/tensorflow/archive/v2.16.2.tar.gz"],
strip_prefix = "tensorflow-2.17.0",
sha256 = "9cc4d5773b8ee910079baaecb4086d0c28939f024dd74b33fc5e64779b6533dc",
urls = ["https://github.com/tensorflow/tensorflow/archive/v2.17.0.tar.gz"],
)


Expand Down
10 changes: 5 additions & 5 deletions configure/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,20 @@
from setuptools.command.install import install
from setuptools.dist import Distribution

_VERSION = "1.9.2"
_VERSION = "1.10.0"

with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()

REQUIRED_PACKAGES = [
"numpy",
"pandas",
"tensorflow==2.16.2",
"tensorflow==2.17.0",
"six",
"absl_py",
"wheel",
"wurlitzer",
"tf_keras~=2.16",
"tf_keras~=2.17",
"ydf",
]

Expand Down Expand Up @@ -64,9 +64,9 @@ def is_pure(self):
idx = sys.argv.index("bdist_wheel") + 1
sys.argv.insert(idx, "--plat-name")
if platform.processor() == "arm":
sys.argv.insert(idx + 1, "macosx_10_15_x86_64")
elif platform.processor() == "i386":
sys.argv.insert(idx + 1, "macosx_12_0_arm64")
elif platform.processor() == "i386":
sys.argv.insert(idx + 1, "macosx_10_15_x86_64")
else:
raise ValueError(f"Unknown processor {platform.processor()}")
else:
Expand Down
3 changes: 2 additions & 1 deletion documentation/known_issues.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ TensorFlow Decision Forests is not yet available as a Windows Pip package.
## Incompatibility with Keras 3

Compatibility with Keras 3 is not yet implemented. Use tf_keras or a TensorFlow
version before 2.16.
version before 2.16. Alternatively, use [ydf](https://pypi.org/project/ydf/).

## Untested for conda

Expand Down Expand Up @@ -54,6 +54,7 @@ The following table shows the compatibility between

tensorflow_decision_forests | tensorflow
--------------------------- | ---------------
1.10.0 | 2.17.0
1.9.2 | 2.16.2
1.9.1 | 2.16.1
1.9.0 | 2.16.1
Expand Down
4 changes: 2 additions & 2 deletions tensorflow_decision_forests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@
```
"""

__version__ = "1.9.2"
__version__ = "1.10.0"
__author__ = "Mathieu Guillame-Bert"

compatible_tf_versions = ["2.16.2"]
compatible_tf_versions = ["2.17.0"]
__git_version__ = "HEAD" # Modify for release build.

from tensorflow_decision_forests.tensorflow import check_version
Expand Down
46 changes: 26 additions & 20 deletions tensorflow_decision_forests/keras/wrappers_pre_generated.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,11 +304,13 @@ class CartModel(core.CoreModel):
random_seed: Random seed for the training of the model. Learners are
expected to be deterministic by the random seed. Default: 123456.
sorting_strategy: How are sorted the numerical features in order to find the
splits - PRESORT: The features are pre-sorted at the start of the
training. This solution is faster but consumes much more memory than
IN_NODE. - IN_NODE: The features are sorted just before being used in the
node. This solution is slow but consumes little amount of memory. .
Default: "IN_NODE".
splits - AUTO: Selects the most efficient method among IN_NODE,
FORCE_PRESORT, and LAYER. - IN_NODE: The features are sorted just before
being used in the node. This solution is slow but consumes little amount
of memory. - FORCE_PRESORT: The features are pre-sorted at the start of
the training. This solution is faster but consumes much more memory than
IN_NODE. - PRESORT: Automatically choose between FORCE_PRESORT and
IN_NODE. . Default: "IN_NODE".
sparse_oblique_max_num_projections: For sparse oblique splits i.e.
`split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after
the num_projections_exponent). Oblique splits try out
Expand Down Expand Up @@ -721,7 +723,7 @@ class DistributedGradientBoostedTreesModel(core.CoreModel):
shrinkage: Coefficient applied to each tree prediction. A small value (0.02)
tends to give more accurate results (assuming enough trees are trained),
but results in larger models. Analogous to neural network learning rate.
Default: 0.1.
Fixed to 1.0 for DART models. Default: 0.1.
use_hessian_gain: Use true, uses a formulation of split gain with a hessian
term i.e. optimizes the splits to minimize the variance of "gradient /
hessian. Available for all losses except regression. Default: False.
Expand Down Expand Up @@ -1029,7 +1031,7 @@ class GradientBoostedTreesModel(core.CoreModel):
validation dataset. Enabling this feature can increase the training time
significantly. Default: False.
dart_dropout: Dropout rate applied when using the DART i.e. when
forest_extraction=DART. Default: 0.01.
forest_extraction=DART. Default: None.
early_stopping: Early stopping detects the overfitting of the model and
halts it training using the validation dataset. If not provided directly,
the validation dataset is extracted from the training dataset (see
Expand Down Expand Up @@ -1207,13 +1209,15 @@ class GradientBoostedTreesModel(core.CoreModel):
shrinkage: Coefficient applied to each tree prediction. A small value (0.02)
tends to give more accurate results (assuming enough trees are trained),
but results in larger models. Analogous to neural network learning rate.
Default: 0.1.
Fixed to 1.0 for DART models. Default: 0.1.
sorting_strategy: How are sorted the numerical features in order to find the
splits - PRESORT: The features are pre-sorted at the start of the
training. This solution is faster but consumes much more memory than
IN_NODE. - IN_NODE: The features are sorted just before being used in the
node. This solution is slow but consumes little amount of memory. .
Default: "PRESORT".
splits - AUTO: Selects the most efficient method among IN_NODE,
FORCE_PRESORT, and LAYER. - IN_NODE: The features are sorted just before
being used in the node. This solution is slow but consumes little amount
of memory. - FORCE_PRESORT: The features are pre-sorted at the start of
the training. This solution is faster but consumes much more memory than
IN_NODE. - PRESORT: Automatically choose between FORCE_PRESORT and
IN_NODE. . Default: "PRESORT".
sparse_oblique_max_num_projections: For sparse oblique splits i.e.
`split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after
the num_projections_exponent). Oblique splits try out
Expand Down Expand Up @@ -1332,7 +1336,7 @@ def __init__(
categorical_set_split_max_num_items: Optional[int] = -1,
categorical_set_split_min_item_frequency: Optional[int] = 1,
compute_permutation_variable_importance: Optional[bool] = False,
dart_dropout: Optional[float] = 0.01,
dart_dropout: Optional[float] = None,
early_stopping: Optional[str] = "LOSS_INCREASE",
early_stopping_initial_iteration: Optional[int] = 10,
early_stopping_num_trees_look_ahead: Optional[int] = 30,
Expand Down Expand Up @@ -2001,7 +2005,7 @@ def capabilities() -> abstract_learner_pb2.LearnerCapabilities:
class RandomForestModel(core.CoreModel):
r"""Random Forest learning algorithm.
A Random Forest (https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf)
A [Random Forest](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf)
is a collection of deep CART decision trees trained independently and without
pruning. Each tree is trained on a random subset of the original training
dataset (sampled with replacement).
Expand Down Expand Up @@ -2310,11 +2314,13 @@ class RandomForestModel(core.CoreModel):
all the examples are used to train all the trees (you probably do not want
that). Default: True.
sorting_strategy: How are sorted the numerical features in order to find the
splits - PRESORT: The features are pre-sorted at the start of the
training. This solution is faster but consumes much more memory than
IN_NODE. - IN_NODE: The features are sorted just before being used in the
node. This solution is slow but consumes little amount of memory. .
Default: "PRESORT".
splits - AUTO: Selects the most efficient method among IN_NODE,
FORCE_PRESORT, and LAYER. - IN_NODE: The features are sorted just before
being used in the node. This solution is slow but consumes little amount
of memory. - FORCE_PRESORT: The features are pre-sorted at the start of
the training. This solution is faster but consumes much more memory than
IN_NODE. - PRESORT: Automatically choose between FORCE_PRESORT and
IN_NODE. . Default: "PRESORT".
sparse_oblique_max_num_projections: For sparse oblique splits i.e.
`split_axis=SPARSE_OBLIQUE`. Maximum number of projections (applied after
the num_projections_exponent). Oblique splits try out
Expand Down
11 changes: 0 additions & 11 deletions tools/build_pip_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,17 +115,6 @@ function assemble_files() {

# Distribution server binaries
cp ${SRCBIN}/keras/grpc_worker_main ${SRCPK}/tensorflow_decision_forests/keras/

# Note: Starting with TF-DF 0.9.1, the YDF Protos are included by (P)YDF.
# TODO: Remove this block.
# # YDF's proto wrappers.
# YDFSRCBIN="bazel-bin/external/ydf/yggdrasil_decision_forests"
# mkdir -p ${SRCPK}/yggdrasil_decision_forests
# pushd ${YDFSRCBIN}
# find . -name \*.py -exec rsync -R -arv {} ${SRCPK}/yggdrasil_decision_forests \;
# popd
# # Add __init__.py to all exported Yggdrasil sub-directories.
# find ${SRCPK}/yggdrasil_decision_forests -type d -exec touch {}/__init__.py \;
}

# Build a pip package.
Expand Down
4 changes: 2 additions & 2 deletions tools/start_compile_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@
# directory.
TFDF_DIRNAME=${PWD##*/}

DOCKER_IMAGE=tensorflow/build:2.16-python3.9
DOCKER_IMAGE=tensorflow/build:2.17-python3.9
DOCKER_CONTAINER=compile_tfdf

echo "Available containers:"
sudo sudo docker container ls -a --size
sudo docker container ls -a --size

set +e # Ignore error if the container already exist
CREATE_DOCKER_FLAGS="-i -t -p 8889:8889 --network host -v ${PWD}/..:/working_dir -w /working_dir/${TFDF_DIRNAME}"
Expand Down

0 comments on commit 2340111

Please sign in to comment.