From 8a4ddf138176108211e87c22492ce47d4b7af508 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 12 Sep 2023 11:41:14 +0800 Subject: [PATCH 1/2] only bucketize if skip statistic(#4660) Signed-off-by: Yu Wu --- .../hetero_feature_binning/test_feature_binning_asymmetric.py | 4 ---- python/fate/components/components/hetero_feature_binning.py | 4 +++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py index bd48a35729..43f70ae59d 100644 --- a/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py +++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py @@ -57,14 +57,10 @@ def main(config="../config.yaml", namespace=""): pipeline.add_task(binning_0) pipeline.add_task(binning_1) - # pipeline.add_task(hetero_feature_binning_0) pipeline.compile() # print(pipeline.get_dag()) pipeline.fit() - # print(pipeline.get_task_info("binning_1").get_output_model()) - # print(pipeline.get_task_info("feature_scale_1").get_output_model()) - pipeline.deploy([psi_0, binning_0]) predict_pipeline = FateFlowPipeline() diff --git a/python/fate/components/components/hetero_feature_binning.py b/python/fate/components/components/hetero_feature_binning.py index 52a8d7fd02..a29f437205 100644 --- a/python/fate/components/components/hetero_feature_binning.py +++ b/python/fate/components/components/hetero_feature_binning.py @@ -137,6 +137,7 @@ def train(ctx, train_data, train_output_data, output_model, role, method, n_bins else: raise ValueError(f"unknown role: {role}") binning.fit(sub_ctx, train_data) + binned_data = None if not skip_metrics: binned_data = binning._bin_obj.bucketize_data(train_data) binning.compute_metrics(sub_ctx, binned_data) @@ -146,7 +147,8 @@ def train(ctx, train_data, train_output_data, output_model, role, method, n_bins sub_ctx = ctx.sub_ctx("predict") output_data = train_data if transform_method is not None: - binned_data = binning._bin_obj.bucketize_data(train_data) + if binned_data is None: + binned_data = binning._bin_obj.bucketize_data(train_data) output_data = binning.transform(sub_ctx, binned_data) train_output_data.write(output_data) From 4b9adb17c630ace87e03b6cb10727a478c6ce8ae Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 12 Sep 2023 11:41:52 +0800 Subject: [PATCH 2/2] only bucketize if skip statistic(#4660) Signed-off-by: Yu Wu --- .../test_feature_binning_quantile.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py index e1dc37525d..b98031b4b2 100644 --- a/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py +++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py @@ -42,28 +42,26 @@ def main(config="../config.yaml", namespace=""): binning_0 = HeteroFeatureBinning("binning_0", method="quantile", n_bins=10, - bin_col=["x0"], transform_method="bin_idx", train_data=psi_0.outputs["output_data"] ) + binning_0.hosts[0].component_setting(bin_idx=[1]) + binning_0.guest.component_setting(bin_col=["x0"]) binning_1 = HeteroFeatureBinning("binning_1", transform_method="bin_idx", method="quantile", - category_col=["x0"], train_data=binning_0.outputs["train_output_data"]) + binning_1.hosts[0].component_setting(category_idx=[1]) + binning_1.guest.component_setting(category_col=["x0"]) pipeline.add_task(psi_0) pipeline.add_task(binning_0) pipeline.add_task(binning_1) - # pipeline.add_task(hetero_feature_binning_0) pipeline.compile() # print(pipeline.get_dag()) pipeline.fit() - # print(pipeline.get_task_info("binning_1").get_output_model()) - # print(pipeline.get_task_info("feature_scale_1").get_output_model()) - pipeline.deploy([psi_0, binning_0]) predict_pipeline = FateFlowPipeline()