From 20b2c9fc6dee26f2b952210ed5f8058279f1e949 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Wed, 26 Jul 2023 16:37:24 +0800
Subject: [PATCH 01/30] add fate-test(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/pipeline/coordinated_lr/config.yaml  |  10 +
 .../coordinated_lr_testsuite.yaml             |  42 ++
 .../pipeline/coordinated_lr/test_lr_sid.py    |  89 +++
 .../pipeline/coordinated_lr/test_lr_sid_cv.py |  60 ++
 .../coordinated_lr/test_lr_sid_warm_start.py  |  90 +++
 examples/pipeline/test_lr_sid.py              |  78 ---
 examples/pipeline/test_lr_sid_cv.py           |  38 --
 examples/pipeline/test_lr_sid_warm_start.py   |  81 ---
 .../pipeline/test_single_lr_multi_host.py     |  93 +++
 examples/pipeline/test_upload_sid.py          |  16 +-
 python/fate_test/__init__.py                  |   0
 python/fate_test/fate_test/__init__.py        |  15 +
 python/fate_test/fate_test/_ascii.py          |  48 ++
 python/fate_test/fate_test/_client.py         |  76 +++
 python/fate_test/fate_test/_config.py         | 269 ++++++++
 python/fate_test/fate_test/_flow_client.py    | 376 +++++++++++
 python/fate_test/fate_test/_io.py             |  70 +++
 python/fate_test/fate_test/_parser.py         | 587 ++++++++++++++++++
 .../fate_test/fate_test/scripts/__init__.py   |  15 +
 .../fate_test/fate_test/scripts/_options.py   |  67 ++
 python/fate_test/fate_test/scripts/_utils.py  | 188 ++++++
 .../fate_test/scripts/benchmark_cli.py        | 151 +++++
 python/fate_test/fate_test/scripts/cli.py     |  67 ++
 .../fate_test/fate_test/scripts/config_cli.py |  79 +++
 .../fate_test/fate_test/scripts/data_cli.py   | 435 +++++++++++++
 .../fate_test/scripts/generate_mock_data.py   | 345 ++++++++++
 .../fate_test/scripts/performance_cli.py      | 368 +++++++++++
 .../fate_test/scripts/quick_test_cli.py       |  95 +++
 .../fate_test/scripts/testsuite_cli.py        | 165 +++++
 python/fate_test/fate_test/utils.py           | 348 +++++++++++
 python/fate_test/pyproject.toml               |  44 ++
 python/fate_test/setup.py                     |  40 ++
 32 files changed, 4240 insertions(+), 205 deletions(-)
 create mode 100644 examples/pipeline/coordinated_lr/config.yaml
 create mode 100644 examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml
 create mode 100644 examples/pipeline/coordinated_lr/test_lr_sid.py
 create mode 100644 examples/pipeline/coordinated_lr/test_lr_sid_cv.py
 create mode 100644 examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
 delete mode 100644 examples/pipeline/test_lr_sid.py
 delete mode 100644 examples/pipeline/test_lr_sid_cv.py
 delete mode 100644 examples/pipeline/test_lr_sid_warm_start.py
 create mode 100644 examples/pipeline/test_single_lr_multi_host.py
 create mode 100644 python/fate_test/__init__.py
 create mode 100644 python/fate_test/fate_test/__init__.py
 create mode 100644 python/fate_test/fate_test/_ascii.py
 create mode 100644 python/fate_test/fate_test/_client.py
 create mode 100644 python/fate_test/fate_test/_config.py
 create mode 100644 python/fate_test/fate_test/_flow_client.py
 create mode 100644 python/fate_test/fate_test/_io.py
 create mode 100644 python/fate_test/fate_test/_parser.py
 create mode 100644 python/fate_test/fate_test/scripts/__init__.py
 create mode 100644 python/fate_test/fate_test/scripts/_options.py
 create mode 100644 python/fate_test/fate_test/scripts/_utils.py
 create mode 100644 python/fate_test/fate_test/scripts/benchmark_cli.py
 create mode 100644 python/fate_test/fate_test/scripts/cli.py
 create mode 100644 python/fate_test/fate_test/scripts/config_cli.py
 create mode 100644 python/fate_test/fate_test/scripts/data_cli.py
 create mode 100644 python/fate_test/fate_test/scripts/generate_mock_data.py
 create mode 100644 python/fate_test/fate_test/scripts/performance_cli.py
 create mode 100644 python/fate_test/fate_test/scripts/quick_test_cli.py
 create mode 100644 python/fate_test/fate_test/scripts/testsuite_cli.py
 create mode 100644 python/fate_test/fate_test/utils.py
 create mode 100644 python/fate_test/pyproject.toml
 create mode 100644 python/fate_test/setup.py

diff --git a/examples/pipeline/coordinated_lr/config.yaml b/examples/pipeline/coordinated_lr/config.yaml
new file mode 100644
index 0000000000..394a5b7802
--- /dev/null
+++ b/examples/pipeline/coordinated_lr/config.yaml
@@ -0,0 +1,10 @@
+parties: # parties default id
+  guest:
+    - 9999
+  host:
+    - 9998
+    - 9999
+  arbiter:
+    - 9998
+
+data_base_dir: "" # path to project base where data is located
\ No newline at end of file
diff --git a/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml b/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml
new file mode 100644
index 0000000000..2de8a25b4f
--- /dev/null
+++ b/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml
@@ -0,0 +1,42 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host_sid
+    namespace: experiment
+    role: host_0
+tasks:
+  normal-lr:
+    script: test_lr_sid.py
+  lr-cv:
+    script: test_lr_sid_cv.py
+  lr-warm-start:
+    script: test_lr_sid_warm_start.py
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid.py b/examples/pipeline/coordinated_lr/test_lr_sid.py
new file mode 100644
index 0000000000..9c7b31fb62
--- /dev/null
+++ b/examples/pipeline/coordinated_lr/test_lr_sid.py
@@ -0,0 +1,89 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="./config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+
+    intersect_0 = Intersection("intersect_0", method="raw")
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
+                                                                        namespace=f"{namespace}experiment"))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
+                                                                           namespace=f"{namespace}experiment"))
+    lr_0 = CoordinatedLR("lr_0",
+                         epochs=4,
+                         batch_size=None,
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
+                         init_param={"fit_intercept": True, "method": "zeros"},
+                         train_data=intersect_0.outputs["output_data"],
+                         learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
+                                                                                             "total_iters": 100}})
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="y",
+                              runtime_roles=["guest"],
+                              default_eval_setting="binary",
+                              input_data=lr_0.outputs["train_output_data"])
+
+    pipeline.add_task(intersect_0)
+    pipeline.add_task(lr_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    pipeline.deploy([intersect_0, lr_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    deployed_pipeline.intersect_0.guest.component_setting(
+        input_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
+                                        namespace=f"{namespace}experiment"))
+    deployed_pipeline.intersect_0.hosts[0].component_setting(
+        input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
+                                        namespace=f"{namespace}experiment"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+    # print(f"predict lr_0 data: {pipeline.get_task_info('lr_0').get_output_data()}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("-config", type=str, default="./config.yaml",
+                        help="config file")
+    parser.add_argument("-namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid_cv.py b/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
new file mode 100644
index 0000000000..badfed7a39
--- /dev/null
+++ b/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
@@ -0,0 +1,60 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="./config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+
+    intersect_0 = Intersection("intersect_0", method="raw")
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
+                                                                        namespace=f"{namespace}experiment"))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
+                                                                           namespace=f"{namespace}experiment"))
+    lr_0 = CoordinatedLR("lr_0",
+                         epochs=2,
+                         batch_size=100,
+                         optimizer={"method": "sgd", "optimizer_params": {"lr": 0.01}},
+                         init_param={"fit_intercept": True},
+                         cv_data=intersect_0.outputs["output_data"],
+                         cv_param={"n_splits": 3})
+
+    pipeline.add_task(intersect_0)
+    pipeline.add_task(lr_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("-config", type=str, default="./config.yaml",
+                        help="config file")
+    parser.add_argument("-namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py b/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
new file mode 100644
index 0000000000..b9bf8401ef
--- /dev/null
+++ b/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
@@ -0,0 +1,90 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="./config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+
+    intersect_0 = Intersection("intersect_0", method="raw")
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
+                                                                        namespace=f"{namespace}experiment"))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
+                                                                           namespace=f"{namespace}experiment"))
+    lr_0 = CoordinatedLR("lr_0",
+                         epochs=4,
+                         batch_size=None,
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
+                         init_param={"fit_intercept": True, "method": "zeros"},
+                         train_data=intersect_0.outputs["output_data"],
+                         learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
+                                                                                             "total_iters": 100}})
+    lr_1 = CoordinatedLR("lr_1", train_data=intersect_0.outputs["output_data"],
+                         warm_start_model=lr_0.outputs["output_model"],
+                         epochs=2,
+                         batch_size=None,
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
+                         )
+
+    lr_2 = CoordinatedLR("lr_2", epochs=6,
+                         batch_size=None,
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
+                         init_param={"fit_intercept": True, "method": "zeros"},
+                         train_data=intersect_0.outputs["output_data"],
+                         learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
+                                                                                             "total_iters": 100}})
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="y",
+                              runtime_roles=["guest"],
+                              default_eval_setting="binary",
+                              input_data=[lr_1.outputs["train_output_data"], lr_2.outputs["train_output_data"]])
+
+    pipeline.add_task(intersect_0)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(lr_1)
+    pipeline.add_task(lr_2)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+    print(f"lr_1 model: {pipeline.get_task_info('lr_1').get_output_model()}")
+    # print(f"train lr_1 data: {pipeline.get_task_info('lr_1').get_output_data()}")
+
+    print(f"lr_2 model: {pipeline.get_task_info('lr_2').get_output_model()}")
+    # print(f"train lr_2 data: {pipeline.get_task_info('lr_2').get_output_data()}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("-config", type=str, default="./config.yaml",
+                        help="config file")
+    parser.add_argument("-namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/test_lr_sid.py b/examples/pipeline/test_lr_sid.py
deleted file mode 100644
index e8569d3b1a..0000000000
--- a/examples/pipeline/test_lr_sid.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
-from fate_client.pipeline.components.fate import Evaluation
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-intersect_0 = Intersection("intersect_0", method="raw")
-intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                    namespace="experiment_sid"))
-intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                       namespace="experiment_sid"))
-lr_0 = CoordinatedLR("lr_0",
-                     epochs=4,
-                     batch_size=None,
-                     optimizer={"method": "rprop", "optimizer_params": {"lr": 0.01}},
-                     init_param={"fit_intercept": True, "method": "zeros"},
-                     train_data=intersect_0.outputs["output_data"])
-lr_1 = CoordinatedLR("lr_1", test_data=intersect_0.outputs["output_data"],
-                     input_model=lr_0.outputs["output_model"])
-
-"""lr_0.guest.component_setting(train_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
-                                                             namespace="experiment"))
-lr_0.hosts[0].component_setting(train_data=DataWarehouseChannel(name="breast_hetero_host_sid",
-                                                                namespace="experiment"))"""
-
-evaluation_0 = Evaluation("evaluation_0",
-                          label_column_name="y",
-                          runtime_roles=["guest"],
-                          default_eval_setting="binary",
-                          input_data=lr_0.outputs["train_output_data"])
-
-# pipeline.add_task(feature_scale_0)
-# pipeline.add_task(feature_scale_1)
-pipeline.add_task(intersect_0)
-pipeline.add_task(lr_0)
-# pipeline.add_task(evaluation_0)
-# pipeline.add_task(hetero_feature_binning_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
-print(f"lr_0 model: {pipeline.get_task_info('lr_0').get_output_model()}")
-print(f"train lr_0 data: {pipeline.get_task_info('lr_0').get_output_data()}")
-
-# print(pipeline.get_task_info("statistics_0").get_output_model())
-# print(f"evaluation metrics: ")
-# print(pipeline.get_task_info("evaluation_0").get_output_metric())
-
-pipeline.deploy([intersect_0, lr_0])
-
-predict_pipeline = FateFlowPipeline()
-
-deployed_pipeline = pipeline.get_deployed_pipeline()
-deployed_pipeline.intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                                      namespace="experiment_sid"))
-deployed_pipeline.intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                                         namespace="experiment_sid"))
-
-predict_pipeline.add_task(deployed_pipeline)
-predict_pipeline.compile()
-# print("\n\n\n")
-# print(predict_pipeline.compile().get_dag())
-predict_pipeline.predict()
-print(f"predict lr_0 data: {pipeline.get_task_info('lr_0').get_output_data()}")
diff --git a/examples/pipeline/test_lr_sid_cv.py b/examples/pipeline/test_lr_sid_cv.py
deleted file mode 100644
index 2f136a1d60..0000000000
--- a/examples/pipeline/test_lr_sid_cv.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-intersect_0 = Intersection("intersect_0", method="raw")
-intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                    namespace="experiment_sid"))
-intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                       namespace="experiment_sid"))
-lr_0 = CoordinatedLR("lr_0",
-                     epochs=2,
-                     batch_size=100,
-                     optimizer={"method": "sgd", "optimizer_params": {"lr": 0.01}},
-                     init_param={"fit_intercept": True},
-                     cv_data=intersect_0.outputs["output_data"],
-                     cv_param={"n_splits": 3})
-
-pipeline.add_task(intersect_0)
-pipeline.add_task(lr_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
diff --git a/examples/pipeline/test_lr_sid_warm_start.py b/examples/pipeline/test_lr_sid_warm_start.py
deleted file mode 100644
index bbd548313d..0000000000
--- a/examples/pipeline/test_lr_sid_warm_start.py
+++ /dev/null
@@ -1,81 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
-from fate_client.pipeline.components.fate import Evaluation
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-intersect_0 = Intersection("intersect_0", method="raw")
-intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                    namespace="experiment_sid"))
-intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                       namespace="experiment_sid"))
-lr_0 = CoordinatedLR("lr_0",
-                     epochs=3,
-                     batch_size=None,
-                     optimizer={"method": "sgd", "optimizer_params": {"lr": 0.01}},
-                     init_param={"fit_intercept": True, "method": "zeros"},
-                     train_data=intersect_0.outputs["output_data"])
-lr_1 = CoordinatedLR("lr_1", train_data=intersect_0.outputs["output_data"],
-                     warm_start_model=lr_0.outputs["output_model"],
-                     epochs=2,
-                     batch_size=200)
-
-"""lr_0.guest.component_setting(train_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
-                                                             namespace="experiment"))
-lr_0.hosts[0].component_setting(train_data=DataWarehouseChannel(name="breast_hetero_host_sid",
-                                                                namespace="experiment"))"""
-
-evaluation_0 = Evaluation("evaluation_0",
-                          runtime_roles=["guest"],
-                          input_data=lr_0.outputs["train_output_data"])
-
-# pipeline.add_task(feature_scale_0)
-# pipeline.add_task(feature_scale_1)
-pipeline.add_task(intersect_0)
-pipeline.add_task(lr_0)
-pipeline.add_task(lr_1)
-# pipeline.add_task(evaluation_0)
-# pipeline.add_task(hetero_feature_binning_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
-print(f"lr_0 model: {pipeline.get_task_info('lr_0').get_output_model()}")
-# print(f"lr_0 data: {pipeline.get_task_info('lr_0').get_output_data()}")
-print(f"\nlr_1 model: {pipeline.get_task_info('lr_1').get_output_model()}")
-
-"""# print(pipeline.get_task_info("statistics_0").get_output_model())
-print(pipeline.get_task_info("lr_0").get_output_model())
-print(pipeline.get_task_info("lr_0").get_output_metrics())
-print(f"evaluation metrics: ")
-print(pipeline.get_task_info("evaluation_0").get_output_metrics())
-
-pipeline.deploy([intersect_0, lr_0])
-
-predict_pipeline = FateFlowPipeline()
-
-deployed_pipeline = pipeline.get_deployed_pipeline()
-deployed_pipeline.intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                                      namespace="experiment_sid"))
-deployed_pipeline.intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                                         namespace="experiment_sid"))
-
-predict_pipeline.add_task(deployed_pipeline)
-predict_pipeline.compile()
-# print("\n\n\n")
-# print(predict_pipeline.compile().get_dag())
-predict_pipeline.predict()"""
diff --git a/examples/pipeline/test_single_lr_multi_host.py b/examples/pipeline/test_single_lr_multi_host.py
new file mode 100644
index 0000000000..cd332ad64e
--- /dev/null
+++ b/examples/pipeline/test_single_lr_multi_host.py
@@ -0,0 +1,93 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="./config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host
+    arbiter = parties.arbiter[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+
+    intersect_0 = Intersection("intersect_0", method="raw")
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                        namespace=f"{namespace}experiment_sid"))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                           namespace=f"{namespace}experiment_sid"))
+    intersect_0.hosts[1].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                           namespace=f"{namespace}experiment_sid"))
+    lr_0 = CoordinatedLR("lr_0",
+                         epochs=4,
+                         batch_size=None,
+                         early_stop="weight_diff",
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
+                         init_param={"fit_intercept": True, "method": "zeros"},
+                         train_data=intersect_0.outputs["output_data"],
+                         learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
+                                                                                             "total_iters": 100}})
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="y",
+                              runtime_roles=["guest"],
+                              default_eval_setting="binary",
+                              input_data=lr_0.outputs["train_output_data"])
+
+    pipeline.add_task(intersect_0)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(evaluation_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    pipeline.deploy([intersect_0, lr_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    deployed_pipeline.intersect_0.guest.component_setting(
+        input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                        namespace=f"{namespace}experiment_sid"))
+    deployed_pipeline.intersect_0.hosts[[0, 1]].component_setting(
+        input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
+                                        namespace=f"{namespace}experiment_sid"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+    # print(f"predict lr_0 data: {pipeline.get_task_info('lr_0').get_output_data()}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("-config", type=str, default="./config.yaml",
+                        help="config file")
+    parser.add_argument("-namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/test_upload_sid.py b/examples/pipeline/test_upload_sid.py
index 3d32f4757e..dfc82d1b18 100644
--- a/examples/pipeline/test_upload_sid.py
+++ b/examples/pipeline/test_upload_sid.py
@@ -31,10 +31,10 @@
         'tag_with_value': False,
         'weight_type': 'float64'}
 
-pipeline.transform_local_file_to_dataframe(  # file="${abs_path_of_data_guest}",
-    meta=meta, head=True, extend_sid=False,
-    namespace="experiment",
-    name="breast_hetero_guest_sid")
+pipeline.transform_local_file_to_dataframe("/Users/yuwu/PycharmProjects/FATE/examples/data/breast_hetero_guest_sid.csv",
+                                           meta=meta, head=True, extend_sid=False,
+                                           namespace="experiment_sid",
+                                           name="breast_hetero_guest")
 
 meta = {'delimiter': ',',
         'dtype': 'float64',
@@ -47,7 +47,7 @@
         'tag_with_value': False,
         'weight_type': 'float64'}
 
-pipeline.transform_local_file_to_dataframe(  # file="${abs_path_of_data_guest}",
-    meta=meta, head=True, extend_sid=False,
-    namespace="experiment",
-    name="breast_hetero_host_sid")
+pipeline.transform_local_file_to_dataframe("/Users/yuwu/PycharmProjects/FATE/examples/data/breast_hetero_host_sid.csv",
+                                           meta=meta, head=True, extend_sid=False,
+                                           namespace="experiment_sid",
+                                           name="breast_hetero_host")
diff --git a/python/fate_test/__init__.py b/python/fate_test/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/fate_test/fate_test/__init__.py b/python/fate_test/fate_test/__init__.py
new file mode 100644
index 0000000000..878d3a9c5d
--- /dev/null
+++ b/python/fate_test/fate_test/__init__.py
@@ -0,0 +1,15 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
diff --git a/python/fate_test/fate_test/_ascii.py b/python/fate_test/fate_test/_ascii.py
new file mode 100644
index 0000000000..ac3ba1244f
--- /dev/null
+++ b/python/fate_test/fate_test/_ascii.py
@@ -0,0 +1,48 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+
+HEAD = """\
+
+████████╗███████╗███████╗████████╗███████╗██╗   ██╗██╗████████╗███████╗
+╚══██╔══╝██╔════╝██╔════╝╚══██╔══╝██╔════╝██║   ██║██║╚══██╔══╝██╔════╝
+   ██║   █████╗  ███████╗   ██║   ███████╗██║   ██║██║   ██║   █████╗
+   ██║   ██╔══╝  ╚════██║   ██║   ╚════██║██║   ██║██║   ██║   ██╔══╝
+   ██║   ███████╗███████║   ██║   ███████║╚██████╔╝██║   ██║   ███████╗
+   ╚═╝   ╚══════╝╚══════╝   ╚═╝   ╚══════╝ ╚═════╝ ╚═╝   ╚═╝   ╚══════╝
+
+"""
+
+BENCHMARK = """\
+
+██████╗ ███████╗███╗   ██╗ ██████╗██╗  ██╗███╗   ███╗ █████╗ ██████╗ ██╗  ██╗
+██╔══██╗██╔════╝████╗  ██║██╔════╝██║  ██║████╗ ████║██╔══██╗██╔══██╗██║ ██╔╝
+██████╔╝█████╗  ██╔██╗ ██║██║     ███████║██╔████╔██║███████║██████╔╝█████╔╝
+██╔══██╗██╔══╝  ██║╚██╗██║██║     ██╔══██║██║╚██╔╝██║██╔══██║██╔══██╗██╔═██╗
+██████╔╝███████╗██║ ╚████║╚██████╗██║  ██║██║ ╚═╝ ██║██║  ██║██║  ██║██║  ██╗
+╚═════╝ ╚══════╝╚═╝  ╚═══╝ ╚═════╝╚═╝  ╚═╝╚═╝     ╚═╝╚═╝  ╚═╝╚═╝  ╚═╝╚═╝  ╚═╝
+"""
+
+TAIL = """\
+
+    ██╗  ██╗ █████╗ ██╗   ██╗███████╗    ███████╗██╗   ██╗███╗   ██╗
+    ██║  ██║██╔══██╗██║   ██║██╔════╝    ██╔════╝██║   ██║████╗  ██║
+    ███████║███████║██║   ██║█████╗      █████╗  ██║   ██║██╔██╗ ██║
+    ██╔══██║██╔══██║╚██╗ ██╔╝██╔══╝      ██╔══╝  ██║   ██║██║╚██╗██║
+    ██║  ██║██║  ██║ ╚████╔╝ ███████╗    ██║     ╚██████╔╝██║ ╚████║
+    ╚═╝  ╚═╝╚═╝  ╚═╝  ╚═══╝  ╚══════╝    ╚═╝      ╚═════╝ ╚═╝  ╚═══╝
+
+"""
diff --git a/python/fate_test/fate_test/_client.py b/python/fate_test/fate_test/_client.py
new file mode 100644
index 0000000000..84d623c4c3
--- /dev/null
+++ b/python/fate_test/fate_test/_client.py
@@ -0,0 +1,76 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import sshtunnel
+
+from fate_test._flow_client import FLOWClient
+from fate_test._io import LOGGER
+from fate_test._parser import Config
+
+
+class Clients(object):
+    def __init__(self, config: Config):
+        self._flow_clients = {}
+        self._tunnel_id_to_flow_clients = {}
+        self._role_str_to_service_id = {}
+        self._tunnel_id_to_tunnel = config.tunnel_id_to_tunnel
+
+        for service_id, service in config.service_id_to_service.items():
+            if isinstance(service, Config.service):
+                self._flow_clients[service_id] = FLOWClient(
+                    service.address, config.data_base_dir, config.cache_directory)
+
+            elif isinstance(service, Config.tunnel_service):
+                self._flow_clients[service_id] = FLOWClient(None, config.data_base_dir, config.cache_directory)
+                self._tunnel_id_to_flow_clients.setdefault(service.tunnel_id, []).append(
+                    (service.index, self._flow_clients[service_id]))
+
+        for party, service_id in config.party_to_service_id.items():
+            for role_str in config.parties.party_to_role_string(party):
+                self._role_str_to_service_id[role_str] = service_id
+
+    def __getitem__(self, role_str: str) -> 'FLOWClient':
+        if role_str not in self._role_str_to_service_id:
+            raise RuntimeError(f"no flow client found binding to {role_str}")
+        return self._flow_clients[self._role_str_to_service_id[role_str]]
+
+    def __enter__(self):
+        # open ssh tunnels and create flow clients for remote
+        self._tunnels = []
+        for tunnel_id, tunnel_conf in self._tunnel_id_to_tunnel.items():
+            tunnel = sshtunnel.SSHTunnelForwarder(ssh_address_or_host=tunnel_conf.ssh_address,
+                                                  ssh_username=tunnel_conf.ssh_username,
+                                                  ssh_password=tunnel_conf.ssh_password,
+                                                  ssh_pkey=tunnel_conf.ssh_priv_key,
+                                                  remote_bind_addresses=tunnel_conf.services_address)
+            tunnel.start()
+            self._tunnels.append(tunnel)
+            for index, flow_client in self._tunnel_id_to_flow_clients[tunnel_id]:
+                flow_client.set_address(f"127.0.0.1:{tunnel.local_bind_ports[index]}")
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        for tunnel in self._tunnels:
+            try:
+                tunnel.stop()
+            except Exception as e:
+                LOGGER.exception(e)
+
+    def contains(self, role_str):
+        return role_str in self._role_str_to_service_id
+
+    def all_roles(self):
+        return sorted(self._role_str_to_service_id.keys())
diff --git a/python/fate_test/fate_test/_config.py b/python/fate_test/fate_test/_config.py
new file mode 100644
index 0000000000..7b26b69c3c
--- /dev/null
+++ b/python/fate_test/fate_test/_config.py
@@ -0,0 +1,269 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import json
+import os
+import typing
+from collections import namedtuple
+from pathlib import Path
+
+from ruamel import yaml
+
+template = """\
+# base dir for data upload conf eg, data_base_dir={FATE}
+# also used for accessing local files when running standalone mode
+# examples/data/breast_hetero_guest.csv -> $data_base_dir/examples/data/breast_hetero_guest.csv
+data_base_dir: path(FATE)
+
+# directory dedicated to fate_test job file storage, default cache location={FATE}/examples/cache/
+cache_directory: examples/cache/
+# directory stores performance benchmark suites, default location={FATE}/examples/benchmark_performance
+performance_template_directory: examples/benchmark_performance/
+# directory stores flow test config, default location={FATE}/examples/flow_test_template/hetero_lr/flow_test_config.yaml
+# st_config_directory: examples/flow_test_template/hetero_lr/flow_test_config.yaml
+
+# directory stores testsuite file with min_test data sets to upload,
+# default location={FATE}/examples/data/upload_config/min_test_data_testsuite.json
+min_test_data_config: examples/data/upload_config/min_test_data_testsuite.json
+# directory stores testsuite file with all example data sets to upload,
+# default location={FATE}/examples/data/upload_config/all_examples_data_testsuite.json
+all_examples_data_config: examples/data/upload_config/all_examples_data_testsuite.json
+
+# directory where FATE code locates, default installation location={FATE}/fate
+# python/federatedml -> $fate_base/python/federatedml
+fate_base: path(FATE)/fate
+
+# whether to delete data in suites after all jobs done
+clean_data: true
+
+# work mode: 0 for standalone, 1 for cluster
+work_mode: 0
+
+# participating parties' id and correponding flow service ip & port information
+parties:
+  guest: [9999]
+  host: [10000, 9999]
+  arbiter: [10000]
+services:
+  - flow_services:
+      - {address: 127.0.0.1:9380, parties: [9999, 10000]}
+    serving_setting:
+      address: 127.0.0.1:8059
+
+    ssh_tunnel: # optional
+      enable: false
+      ssh_address: <remote ip>:<remote port>
+      ssh_username:
+      ssh_password: # optional
+      ssh_priv_key: "~/.ssh/id_rsa"
+
+
+# what is ssh_tunnel?
+# to open the ssh tunnel(s) if the remote service
+# cannot be accessed directly from the location where the test suite is run!
+#
+#                       +---------------------+
+#                       |    ssh address      |
+#                       |    ssh username     |
+#                       |    ssh password/    |
+#         +--------+    |    ssh priv_key     |        +----------------+
+#         |local ip+----------ssh tuunel-------------->+remote local ip |
+#         +--------+    |                     |        +----------------+
+#                       |                     |
+# request local ip:port +----- as if --------->request remote's local ip:port from remote side
+#                       |                     |
+#                       |                     |
+#                       +---------------------+
+#
+
+"""
+
+data_base_dir = Path(__file__).resolve().parents[3]
+if (data_base_dir / 'examples').is_dir():
+    template = template.replace('path(FATE)', str(data_base_dir))
+
+_default_config = Path(__file__).resolve().parent / 'fate_test_config.yaml'
+
+data_switch = None
+use_local_data = 1
+data_alter = dict()
+deps_alter = dict()
+jobs_num = 0
+jobs_progress = 0
+non_success_jobs = []
+
+
+def create_config(path: Path, override=False):
+    if path.exists() and not override:
+        raise FileExistsError(f"{path} exists")
+
+    with path.open("w") as f:
+        f.write(template)
+
+
+def default_config():
+    if not _default_config.exists():
+        create_config(_default_config)
+    return _default_config
+
+
+class Parties(object):
+    def __init__(self, **kwargs):
+        """
+        mostly, accept guest, host and arbiter
+        """
+        self._role_to_parties = kwargs
+
+        self._party_to_role_string = {}
+        for role in kwargs:
+            parties = kwargs[role]
+            setattr(self, role, parties)
+            for i, party in enumerate(parties):
+                if party not in self._party_to_role_string:
+                    self._party_to_role_string[party] = set()
+                self._party_to_role_string[party].add(f"{role.lower()}_{i}")
+
+    @staticmethod
+    def from_dict(d: typing.MutableMapping[str, typing.List[int]]):
+        return Parties(**d)
+
+    def party_to_role_string(self, party):
+        return self._party_to_role_string[party]
+
+    def extract_role(self, counts: typing.MutableMapping[str, int]):
+        roles = {}
+        for role, num in counts.items():
+            if role not in self._role_to_parties and num > 0:
+                raise ValueError(f"{role} not found in config")
+            else:
+                if len(self._role_to_parties[role]) < num:
+                    raise ValueError(f"require {num} {role} parties, only {len(self._role_to_parties[role])} in config")
+            roles[role] = self._role_to_parties[role][:num]
+        return roles
+
+    def extract_initiator_role(self, role):
+        initiator_role = role.strip()
+        if len(self._role_to_parties[initiator_role]) < 1:
+            raise ValueError(f"role {initiator_role} has empty party list")
+        party_id = self._role_to_parties[initiator_role][0]
+        return dict(role=initiator_role, party_id=party_id)
+
+
+class Config(object):
+    service = namedtuple("service", ["address"])
+    tunnel_service = namedtuple("tunnel_service", ["tunnel_id", "index"])
+    tunnel = namedtuple("tunnel", ["ssh_address", "ssh_username", "ssh_password", "ssh_priv_key", "services_address"])
+
+    def __init__(self, config):
+        self.data_base_dir = config["data_base_dir"]
+        self.cache_directory = os.path.join(config["data_base_dir"], config["cache_directory"])
+        self.perf_template_dir = os.path.join(config["data_base_dir"], config["performance_template_directory"])
+        # self.flow_test_config_dir = os.path.join(config["data_base_dir"], config["flow_test_config_directory"])
+        self.min_test_data_config = os.path.join(config["data_base_dir"], config["min_test_data_config"])
+        self.all_examples_data_config = os.path.join(config["data_base_dir"], config["all_examples_data_config"])
+        self.fate_base = config["fate_base"]
+        self.clean_data = config.get("clean_data", True)
+        self.parties = Parties.from_dict(config["parties"])
+        self.role = config["parties"]
+        self.serving_setting = config["services"][0]
+        self.party_to_service_id = {}
+        self.service_id_to_service = {}
+        self.tunnel_id_to_tunnel = {}
+        self.extend_sid = None
+        self.auto_increasing_sid = None
+        self.work_mode = config.get("work_mode", 0)
+
+        tunnel_id = 0
+        service_id = 0
+        os.makedirs(os.path.dirname(self.cache_directory), exist_ok=True)
+        for service_config in config["services"]:
+            flow_services = service_config["flow_services"]
+            if service_config.get("ssh_tunnel", {}).get("enable", False):
+                tunnel_id += 1
+                services_address = []
+                for index, flow_service in enumerate(flow_services):
+                    service_id += 1
+                    address_host, address_port = flow_service["address"].split(":")
+                    address_port = int(address_port)
+                    services_address.append((address_host, address_port))
+                    self.service_id_to_service[service_id] = self.tunnel_service(tunnel_id, index)
+                    for party in flow_service["parties"]:
+                        self.party_to_service_id[party] = service_id
+                tunnel_config = service_config["ssh_tunnel"]
+                ssh_address_host, ssh_address_port = tunnel_config["ssh_address"].split(":")
+                self.tunnel_id_to_tunnel[tunnel_id] = self.tunnel((ssh_address_host, int(ssh_address_port)),
+                                                                  tunnel_config["ssh_username"],
+                                                                  tunnel_config["ssh_password"],
+                                                                  tunnel_config["ssh_priv_key"],
+                                                                  services_address)
+            else:
+                for flow_service in flow_services:
+                    service_id += 1
+                    address = flow_service["address"]
+                    self.service_id_to_service[service_id] = self.service(address)
+                    for party in flow_service["parties"]:
+                        self.party_to_service_id[party] = service_id
+
+    @staticmethod
+    def load(path: typing.Union[str, Path], **kwargs):
+        if isinstance(path, str):
+            path = Path(path)
+        config = {}
+        if path is not None:
+            with path.open("r") as f:
+                config.update(yaml.safe_load(f))
+
+        if config["data_base_dir"] == "path(FATE)":
+            raise ValueError("Invalid 'data_base_dir'.")
+        config["data_base_dir"] = path.resolve().joinpath(config["data_base_dir"]).resolve()
+
+        config.update(kwargs)
+        return Config(config)
+
+    @staticmethod
+    def load_from_file(path: typing.Union[str, Path]):
+        """
+        Loads conf content from json or yaml file. Used to read in parameter configuration
+        Parameters
+        ----------
+        path: str, path to conf file, should be absolute path
+
+        Returns
+        -------
+        dict, parameter configuration in dictionary format
+
+        """
+        if isinstance(path, str):
+            path = Path(path)
+        config = {}
+        if path is not None:
+            file_type = path.suffix
+            with path.open("r") as f:
+                if file_type == ".yaml":
+                    config.update(yaml.safe_load(f))
+                elif file_type == ".json":
+                    config.update(json.load(f))
+                else:
+                    raise ValueError(f"Cannot load conf from file type {file_type}")
+        return config
+
+
+def parse_config(config):
+    try:
+        config_inst = Config.load(config)
+    except Exception as e:
+        raise RuntimeError(f"error parse config from {config}") from e
+    return config_inst
diff --git a/python/fate_test/fate_test/_flow_client.py b/python/fate_test/fate_test/_flow_client.py
new file mode 100644
index 0000000000..0cfafb9d8e
--- /dev/null
+++ b/python/fate_test/fate_test/_flow_client.py
@@ -0,0 +1,376 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import os
+import time
+import typing
+from datetime import timedelta
+from pathlib import Path
+
+from fate_client.flow_sdk import FlowClient
+from fate_test._parser import Data
+
+from fate_test import _config
+
+
+class FLOWClient(object):
+
+    def __init__(self,
+                 address: typing.Optional[str],
+                 data_base_dir: typing.Optional[Path],
+                 cache_directory: typing.Optional[Path],
+                 role: str,
+                 party_id: int):
+        self.address = address
+        self.version = "2.0.0-beta"
+        self._client = FlowClient(self.address.split(':')[0], self.address.split(':')[1], self.version)
+        self._data_base_dir = data_base_dir
+        self._cache_directory = cache_directory
+        self.data_size = 0
+        self.role = role
+        self.party_id = party_id
+
+    def set_address(self, address):
+        self.address = address
+
+    def upload_data(self, data: Data, callback=None, output_path=None):
+        response = self._upload_data(data, output_path=output_path)
+        try:
+            code = response["code"]
+            if code != 0:
+                raise ValueError(f"Return code {code}!=0")
+
+            namespace = response["data"]["namespace"]
+            name = response["data"]["name"]
+            job_id = response["job_id"]
+        except BaseException:
+            raise ValueError(f"Upload data fails, response={response}")
+
+        # self.monitor_status(job_id, role=self.role, party_id=self.party_id)
+        self._awaiting(job_id, self.role, self.party_id, )
+        return dict(namespace=namespace, name=name)
+
+    def delete_data(self, data: Data):
+        # @todo: use client.table.delete(table=, namespace=)
+        try:
+            table_name = data.config['table_name'] if data.config.get(
+                'table_name', None) is not None else data.config.get('name')
+            self._delete_data(table_name=table_name, namespace=data.config['namespace'])
+        except Exception as e:
+            raise RuntimeError(f"delete data failed") from e
+
+    def output_data_table(self, job_id, role, party_id, component_name):
+        result = self._output_data_table(job_id=job_id, role=role, party_id=party_id, component_name=component_name)
+        return result
+
+    def table_info(self, table_name, namespace):
+        result = self._table_info(table_name=table_name, namespace=namespace)
+        return result
+
+    def add_notes(self, job_id, role, party_id, notes):
+        self._add_notes(job_id=job_id, role=role, party_id=party_id, notes=notes)
+
+    """def check_connection(self):
+        try:
+            version = self._http.request(method="POST", url=f"{self._base}version/get", json={"module": "FATE"},
+                                         timeout=2).json()
+        except Exception:
+            import traceback
+            traceback.print_exc()
+            raise
+        fate_version = version.get("data", {}).get("FATE")
+        if fate_version:
+            return fate_version, self.address
+
+        raise EnvironmentError(f"connection not ok")"""
+
+    def _awaiting(self, job_id, role, party_id, callback=None):
+        while True:
+            response = self._query_job(job_id, role=role, party_id=party_id)
+            if response.status.is_done():
+                return response.status
+            if callback is not None:
+                callback(response)
+            time.sleep(1)
+
+    def _upload_data(self, data, output_path=None, verbose=0, destroy=1):
+        conf = data.conf
+        # if conf.get("engine", {}) != "PATH":
+        if output_path is not None:
+            conf['file'] = os.path.join(os.path.abspath(output_path), os.path.basename(conf.get('file')))
+        else:
+            if _config.data_switch is not None:
+                conf['file'] = os.path.join(str(self._cache_directory), os.path.basename(conf.get('file')))
+            else:
+                conf['file'] = os.path.join(str(self._data_base_dir), conf.get('file'))
+        path = Path(conf.get('file'))
+        if not path.exists():
+            raise Exception('The file is obtained from the fate flow client machine, but it does not exist, '
+                            f'please check the path: {path}')
+        response = self._client.data.upload(file=data.file,
+                                            head=data.head,
+                                            meta=data.meta,
+                                            extend_sid=data.extend_sid,
+                                            partitions=data.partitions)
+        return response
+
+    def _table_info(self, table_name, namespace):
+        param = {
+            'table_name': table_name,
+            'namespace': namespace
+        }
+        response = self.flow_client(request='table/info', param=param)
+        return response
+
+    def _delete_data(self, table_name, namespace):
+        param = {
+            'table_name': table_name,
+            'namespace': namespace
+        }
+        response = self.flow_client(request='table/delete', param=param)
+        return response
+
+    def _submit_job(self, conf, dsl):
+        param = {
+            'job_dsl': self._save_json(dsl, 'submit_dsl.json'),
+            'job_runtime_conf': self._save_json(conf, 'submit_conf.json')
+        }
+        response = SubmitJobResponse(self.flow_client(request='job/submit', param=param))
+        return response
+
+    def _deploy_model(self, model_id, model_version, dsl=None):
+        post_data = {'model_id': model_id,
+                     'model_version': model_version,
+                     'predict_dsl': dsl}
+        response = self.flow_client(request='model/deploy', param=post_data)
+        result = {}
+        try:
+            retcode = response['retcode']
+            retmsg = response['retmsg']
+            if retcode != 0 or retmsg != 'success':
+                raise RuntimeError(f"deploy model error: {response}")
+            result["model_id"] = response["data"]["model_id"]
+            result["model_version"] = response["data"]["model_version"]
+        except Exception as e:
+            raise RuntimeError(f"deploy model error: {response}") from e
+
+        return result
+
+    def _output_data_table(self, job_id, role, party_id, component_name):
+        post_data = {'job_id': job_id,
+                     'role': role,
+                     'party_id': party_id,
+                     'component_name': component_name}
+        response = self.flow_client(request='component/output_data_table', param=post_data)
+        result = {}
+        try:
+            retcode = response['retcode']
+            retmsg = response['retmsg']
+            if retcode != 0 or retmsg != 'success':
+                raise RuntimeError(f"deploy model error: {response}")
+            result["name"] = response["data"][0]["table_name"]
+            result["namespace"] = response["data"][0]["table_namespace"]
+        except Exception as e:
+            raise RuntimeError(f"output data table error: {response}") from e
+        return result
+
+    def _get_summary(self, job_id, role, party_id, component_name):
+        post_data = {'job_id': job_id,
+                     'role': role,
+                     'party_id': party_id,
+                     'component_name': component_name}
+        response = self.flow_client(request='component/get_summary', param=post_data)
+        try:
+            retcode = response['retcode']
+            retmsg = response['retmsg']
+            result = {}
+            if retcode != 0 or retmsg != 'success':
+                raise RuntimeError(f"deploy model error: {response}")
+            result["summary_dir"] = retmsg  # 获取summary文件位置
+        except Exception as e:
+            raise RuntimeError(f"output data table error: {response}") from e
+        return result
+
+    """def _query_job(self, job_id, role):
+        param = {
+            'job_id': job_id,
+            'role': role
+        }
+        response = QueryJobResponse(self.flow_client(request='job/query', param=param))
+        return response"""
+
+    def _query_job(self, job_id, role, party_id):
+        response = self._client.job.query(job_id, role, party_id)
+        try:
+            code = response["code"]
+            if code != 0:
+                raise ValueError(f"Return code {code}!=0")
+
+            data = response["data"][0]
+            return data
+        except BaseException:
+            raise ValueError(f"query job is failed, response={response}")
+
+    def get_version(self):
+        response = self._post(url='version/get', json={"module": "FATE"})
+        try:
+            retcode = response['retcode']
+            retmsg = response['retmsg']
+            if retcode != 0 or retmsg != 'success':
+                raise RuntimeError(f"get version error: {response}")
+            fate_version = response["data"]["FATE"]
+        except Exception as e:
+            raise RuntimeError(f"get version error: {response}") from e
+        return fate_version
+
+    def _add_notes(self, job_id, role, party_id, notes):
+        data = dict(job_id=job_id, role=role, party_id=party_id, notes=notes)
+        response = AddNotesResponse(self._post(url='job/update', json=data))
+        return response
+
+    def _table_bind(self, data):
+        response = self._post(url='table/bind', json=data)
+        try:
+            retcode = response['retcode']
+            retmsg = response['retmsg']
+            if retcode != 0 or retmsg != 'success':
+                raise RuntimeError(f"table bind error: {response}")
+        except Exception as e:
+            raise RuntimeError(f"table bind error: {response}") from e
+        return response
+
+
+class Status(object):
+    def __init__(self, status: str):
+        self.status = status
+
+    def is_done(self):
+        return self.status.lower() in ['complete', 'success', 'canceled', 'failed', "timeout"]
+
+    def is_success(self):
+        return self.status.lower() in ['complete', 'success']
+
+    def __str__(self):
+        return self.status
+
+    def __repr__(self):
+        return self.__str__()
+
+
+"""class QueryJobResponse(object):
+    def __init__(self, response: dict):
+        try:
+            status = Status(response.get('data')[0]["f_status"])
+            progress = response.get('data')[0]['f_progress']
+        except Exception as e:
+            raise RuntimeError(f"query job error, response: {response}") from e
+        self.status = status
+        self.progress = progress"""
+
+
+class UploadDataResponse(object):
+    def __init__(self, response: dict):
+        try:
+            self.job_id = response["jobId"]
+        except Exception as e:
+            raise RuntimeError(f"upload error, response: {response}") from e
+        self.status: typing.Optional[Status] = None
+
+
+class AddNotesResponse(object):
+    def __init__(self, response: dict):
+        try:
+            retcode = response['retcode']
+            retmsg = response['retmsg']
+            if retcode != 0 or retmsg != 'success':
+                raise RuntimeError(f"add notes error: {response}")
+        except Exception as e:
+            raise RuntimeError(f"add notes error: {response}") from e
+
+
+"""class SubmitJobResponse(object):
+    def __init__(self, response: dict):
+        try:
+            self.job_id = response["jobId"]
+            self.model_info = response["data"]["model_info"]
+        except Exception as e:
+            raise RuntimeError(f"submit job error, response: {response}") from e
+        self.status: typing.Optional[Status] = None
+"""
+
+
+class DataProgress(object):
+    def __init__(self, role_str):
+        self.role_str = role_str
+        self.start = time.time()
+        self.show_str = f"[{self.elapse()}] {self.role_str}"
+        self.job_id = ""
+
+    def elapse(self):
+        return f"{timedelta(seconds=int(time.time() - self.start))}"
+
+    def submitted(self, job_id):
+        self.job_id = job_id
+        self.show_str = f"[{self.elapse()}]{self.job_id} {self.role_str}"
+
+    def update(self):
+        self.show_str = f"[{self.elapse()}]{self.job_id} {self.role_str}"
+
+    def show(self):
+        return self.show_str
+
+
+class JobProgress(object):
+    def __init__(self, name):
+        self.name = name
+        self.start = time.time()
+        self.show_str = f"[{self.elapse()}] {self.name}"
+        self.job_id = ""
+        self.progress_tracking = ""
+
+    def elapse(self):
+        return f"{timedelta(seconds=int(time.time() - self.start))}"
+
+    def set_progress_tracking(self, progress_tracking):
+        self.progress_tracking = progress_tracking + " "
+
+    def submitted(self, job_id):
+        self.job_id = job_id
+        self.show_str = f"{self.progress_tracking}[{self.elapse()}]{self.job_id} submitted {self.name}"
+
+    def running(self, status, progress):
+        if progress is None:
+            progress = 0
+        self.show_str = f"{self.progress_tracking}[{self.elapse()}]{self.job_id} {status} {progress:3}% {self.name}"
+
+    def exception(self, exception_id):
+        self.show_str = f"{self.progress_tracking}[{self.elapse()}]{self.name} exception({exception_id}): {self.job_id}"
+
+    def final(self, status):
+        self.show_str = f"{self.progress_tracking}[{self.elapse()}]{self.job_id} {status} {self.name}"
+
+    def show(self):
+        return self.show_str
+
+
+class JobStatus(object):
+    WAITING = 'waiting'
+    READY = 'ready'
+    RUNNING = "running"
+    CANCELED = "canceled"
+    TIMEOUT = "timeout"
+    FAILED = "failed"
+    PASS = "pass"
+    SUCCESS = "success"
diff --git a/python/fate_test/fate_test/_io.py b/python/fate_test/fate_test/_io.py
new file mode 100644
index 0000000000..edfaeee964
--- /dev/null
+++ b/python/fate_test/fate_test/_io.py
@@ -0,0 +1,70 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import click
+import loguru
+
+from fate_test._ascii import HEAD, TAIL, BENCHMARK
+
+
+# noinspection PyPep8Naming
+class echo(object):
+    _file = None
+
+    @classmethod
+    def set_file(cls, file):
+        cls._file = file
+
+    @classmethod
+    def echo(cls, message, **kwargs):
+        click.secho(message, **kwargs)
+        click.secho(message, file=cls._file, **kwargs)
+
+    @classmethod
+    def file(cls, message, **kwargs):
+        click.secho(message, file=cls._file, **kwargs)
+
+    @classmethod
+    def stdout(cls, message, **kwargs):
+        click.secho(message, **kwargs)
+
+    @classmethod
+    def stdout_newline(cls):
+        click.secho("")
+
+    @classmethod
+    def welcome(cls, banner_type="testsuite"):
+        if banner_type == "testsuite":
+            cls.echo(HEAD)
+        elif banner_type == "benchmark":
+            cls.echo(BENCHMARK)
+
+    @classmethod
+    def farewell(cls):
+        cls.echo(TAIL)
+
+    @classmethod
+    def flush(cls):
+        import sys
+        sys.stdout.flush()
+
+
+def set_logger(name):
+    loguru.logger.remove()
+    loguru.logger.add(name, level='ERROR', delay=True)
+    return loguru.logger
+
+
+LOGGER = loguru.logger
diff --git a/python/fate_test/fate_test/_parser.py b/python/fate_test/fate_test/_parser.py
new file mode 100644
index 0000000000..3ab001da29
--- /dev/null
+++ b/python/fate_test/fate_test/_parser.py
@@ -0,0 +1,587 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import json
+import typing
+from collections import deque
+from pathlib import Path
+
+import click
+import prettytable
+from fate_test._config import Parties, Config
+from fate_test._io import echo
+from fate_test.utils import TxtStyle
+
+from fate_test import _config
+
+
+# noinspection PyPep8Naming
+class chain_hook(object):
+    def __init__(self):
+        self._hooks = []
+
+    def add_hook(self, hook):
+        self._hooks.append(hook)
+        return self
+
+    def add_extend_namespace_hook(self, namespace):
+        self.add_hook(_namespace_hook(namespace))
+        return self
+
+    def add_replace_hook(self, mapping):
+        self.add_hook(_replace_hook(mapping))
+
+    def hook(self, d):
+        return self._chain_hooks(self._hooks, d)
+
+    @staticmethod
+    def _chain_hooks(hook_funcs, d):
+        for hook_func in hook_funcs:
+            if d is None:
+                return
+            d = hook_func(d)
+        return d
+
+
+DATA_JSON_HOOK = chain_hook()
+CONF_JSON_HOOK = chain_hook()
+DSL_JSON_HOOK = chain_hook()
+
+
+class Data(object):
+    def __init__(self, config: dict, role_str: str):
+        self.config = config
+        self.file = config.get("file", "")
+        self.meta = config.get("meta", {})
+        self.partitions = config.get("partitions", 4)
+        self.head = config.get("head", True)
+        self.extend_sid = config.get("extend_sid", True)
+        self.namespace = config.get("namespace", "")
+        self.table_name = config.get("table_name", "")
+        self.role_str = role_str
+
+    @staticmethod
+    def load(config, path: Path):
+        kwargs = {}
+        for field_name in config.keys():
+            if field_name not in ["file", "role"]:
+                kwargs[field_name] = config[field_name]
+        # if config.get("engine", {}) != "PATH":
+        file_path = path.parent.joinpath(config["file"]).resolve()
+        if not file_path.exists():
+            kwargs["file"] = config["file"]
+        else:
+            kwargs["file"] = file_path
+        role_str = config.get("role") if config.get("role") != "guest" else "guest_0"
+        return Data(config=kwargs, role_str=role_str)
+
+    def update(self, config: Config):
+        if config.extend_sid is not None:
+            self.extend_sid = config.extend_sid
+        if config.meta is not None:
+            self.meta.update(config.meta)
+
+
+class JobConf(object):
+    def __init__(self, initiator: dict, role: dict, job_parameters=None, **kwargs):
+        self.initiator = initiator
+        self.role = role
+        self.job_parameters = job_parameters if job_parameters else {}
+        self.others_kwargs = kwargs
+
+    def as_dict(self):
+        return dict(
+            initiator=self.initiator,
+            role=self.role,
+            job_parameters=self.job_parameters,
+            **self.others_kwargs,
+        )
+
+    @staticmethod
+    def load(path: Path):
+        with path.open("r") as f:
+            kwargs = json.load(f, object_hook=CONF_JSON_HOOK.hook)
+        return JobConf(**kwargs)
+
+    @property
+    def dsl_version(self):
+        return self.others_kwargs.get("dsl_version", 1)
+
+    def update(
+            self,
+            parties: Parties,
+            timeout,
+            job_parameters,
+            component_parameters,
+    ):
+        self.initiator = parties.extract_initiator_role(self.initiator["role"])
+        self.role = parties.extract_role(
+            {role: len(parties) for role, parties in self.role.items()}
+        )
+        if timeout > 0:
+            self.update_job_common_parameters(timeout=timeout)
+
+        if timeout > 0:
+            self.update_job_common_parameters(timeout=timeout)
+
+        for key, value in job_parameters.items():
+            self.update_parameters(parameters=self.job_parameters, key=key, value=value)
+        for key, value in component_parameters.items():
+            if self.dsl_version == 1:
+                self.update_parameters(
+                    parameters=self.others_kwargs.get("algorithm_parameters"),
+                    key=key,
+                    value=value,
+                )
+            else:
+                self.update_parameters(
+                    parameters=self.others_kwargs.get("component_parameters"),
+                    key=key,
+                    value=value,
+                )
+
+    def update_parameters(self, parameters, key, value):
+        if isinstance(parameters, dict):
+            for keys in parameters:
+                if keys == key:
+                    parameters.get(key).update(value),
+                elif isinstance(parameters[keys], dict):
+                    self.update_parameters(parameters[keys], key, value)
+
+    def update_job_common_parameters(self, **kwargs):
+        if self.dsl_version == 1:
+            self.job_parameters.update(**kwargs)
+        else:
+            self.job_parameters.setdefault("common", {}).update(**kwargs)
+
+    def update_job_type(self, job_type="predict"):
+        if self.dsl_version == 1:
+            if self.job_parameters.get("job_type", None) is None:
+                self.job_parameters.update({"job_type": job_type})
+        else:
+            if self.job_parameters.setdefault("common", {}).get("job_type", None) is None:
+                self.job_parameters.setdefault("common", {}).update({"job_type": job_type})
+
+    def update_component_parameters(self, key, value, parameters=None):
+        if parameters is None:
+            if self.dsl_version == 1:
+                parameters = self.others_kwargs.get("algorithm_parameters")
+            else:
+                parameters = self.others_kwargs.get("component_parameters")
+        if isinstance(parameters, dict):
+            for keys in parameters:
+                if keys == key:
+                    if isinstance(value, dict):
+                        parameters[keys].update(value)
+                    else:
+                        parameters.update({key: value})
+                elif (
+                        isinstance(parameters[keys], dict) and parameters[keys] is not None
+                ):
+                    self.update_component_parameters(key, value, parameters[keys])
+
+    def get_component_parameters(self, keys):
+        if len(keys) == 0:
+            return self.others_kwargs.get("component_parameters") if self.dsl_version == 2 else self.others_kwargs.get(
+                "role_parameters")
+        if self.dsl_version == 1:
+            parameters = self.others_kwargs.get("role_parameters")
+        else:
+            parameters = self.others_kwargs.get("component_parameters").get("role")
+
+        for key in keys:
+            parameters = parameters[key]
+        return parameters
+
+
+class JobDSL(object):
+    def __init__(self, components: dict, provider=None):
+        self.components = components
+        self.provider = provider
+
+    @staticmethod
+    def load(path: Path, provider):
+        with path.open("r") as f:
+            kwargs = json.load(f, object_hook=DSL_JSON_HOOK.hook)
+            if provider is not None:
+                kwargs["provider"] = provider
+        return JobDSL(**kwargs)
+
+    def as_dict(self):
+        if self.provider is None:
+            return dict(components=self.components)
+        else:
+            return dict(components=self.components, provider=self.provider)
+
+
+class Job(object):
+    def __init__(
+            self,
+            job_name: str,
+            job_conf: JobConf,
+            job_dsl: typing.Optional[JobDSL],
+            pre_works: list,
+    ):
+        self.job_name = job_name
+        self.job_conf = job_conf
+        self.job_dsl = job_dsl
+        self.pre_works = pre_works
+
+    @classmethod
+    def load(cls, job_name, job_configs, base: Path, provider):
+        job_conf = JobConf.load(base.joinpath(job_configs.get("conf")).resolve())
+        job_dsl = job_configs.get("dsl", None)
+        if job_dsl is not None:
+            job_dsl = JobDSL.load(base.joinpath(job_dsl).resolve(), provider)
+
+        pre_works = []
+        pre_works_value = {}
+        deps_dict = {}
+
+        if job_configs.get("model_deps", None):
+            pre_works.append(job_configs["model_deps"])
+            deps_dict["model_deps"] = {'name': job_configs["model_deps"]}
+        elif job_configs.get("deps", None):
+            pre_works.append(job_configs["deps"])
+            deps_dict["model_deps"] = {'name': job_configs["deps"]}
+        if job_configs.get("data_deps", None):
+            deps_dict["data_deps"] = {'data': job_configs["data_deps"]}
+            pre_works.append(list(job_configs["data_deps"].keys())[0])
+            deps_dict["data_deps"].update({'name': list(job_configs["data_deps"].keys())})
+        if job_configs.get("cache_deps", None):
+            pre_works.append(job_configs["cache_deps"])
+            deps_dict["cache_deps"] = {'name': job_configs["cache_deps"]}
+        if job_configs.get("model_loader_deps", None):
+            pre_works.append(job_configs["model_loader_deps"])
+            deps_dict["model_loader_deps"] = {'name': job_configs["model_loader_deps"]}
+
+        pre_works_value.update(deps_dict)
+        _config.deps_alter[job_name] = pre_works_value
+
+        return Job(
+            job_name=job_name, job_conf=job_conf, job_dsl=job_dsl, pre_works=pre_works
+        )
+
+    @property
+    def submit_params(self):
+        return dict(
+            conf=self.job_conf.as_dict(),
+            dsl=self.job_dsl.as_dict() if self.job_dsl else None,
+        )
+
+    def set_pre_work(self, name, **kwargs):
+        self.job_conf.update_job_common_parameters(**kwargs)
+        self.job_conf.update_job_type("predict")
+
+    def set_input_data(self, hierarchys, table_info):
+        for table_name, hierarchy in zip(table_info, hierarchys):
+            key = list(table_name.keys())[0]
+            value = table_name[key]
+            self.job_conf.update_component_parameters(
+                key=key,
+                value=value,
+                parameters=self.job_conf.get_component_parameters(hierarchy),
+            )
+
+    def is_submit_ready(self):
+        return len(self.pre_works) == 0
+
+
+class PipelineJob(object):
+    def __init__(self, job_name: str, script_path: Path):
+        self.job_name = job_name
+        self.script_path = script_path
+
+
+class Testsuite(object):
+    def __init__(
+            self,
+            dataset: typing.List[Data],
+            # jobs: typing.List[Job],
+            pipeline_jobs: typing.List[PipelineJob],
+            path: Path,
+    ):
+        self.dataset = dataset
+        # self.jobs = jobs
+        self.pipeline_jobs = pipeline_jobs
+        self.path = path
+        self.suite_name = Path(self.path).stem
+
+        self._dependency: typing.MutableMapping[str, typing.List[Job]] = {}
+        self._final_status: typing.MutableMapping[str, FinalStatus] = {}
+        self._ready_jobs = deque()
+        """for job in self.jobs:
+            for name in job.pre_works:
+                self._dependency.setdefault(name, []).append(job)
+
+            self._final_status[job.job_name] = FinalStatus(job.job_name)
+            if job.is_submit_ready():
+                self._ready_jobs.appendleft(job)"""
+
+        for job in self.pipeline_jobs:
+            self._final_status[job.job_name] = FinalStatus(job.job_name)
+
+    @staticmethod
+    def load(path: Path, provider):
+        with path.open("r") as f:
+            testsuite_config = json.load(f, object_hook=DATA_JSON_HOOK.hook)
+
+        dataset = []
+        for d in testsuite_config.get("data"):
+            if "use_local_data" not in d:
+                d.update({"use_local_data": _config.use_local_data})
+            dataset.append(Data.load(d, path))
+        """jobs = []
+        for job_name, job_configs in testsuite_config.get("tasks", {}).items():
+            jobs.append(
+                Job.load(job_name=job_name, job_configs=job_configs, base=path.parent, provider=provider)
+            )"""
+
+        pipeline_jobs = []
+        if testsuite_config.get("tasks", None) is not None and provider is not None:
+            echo.echo('[Warning]  Pipeline does not support parameter: provider-> {}'.format(provider))
+        for job_name, job_configs in testsuite_config.get("tasks", {}).items():
+            script_path = path.parent.joinpath(job_configs["script"]).resolve()
+            pipeline_jobs.append(PipelineJob(job_name, script_path))
+
+        testsuite = Testsuite(dataset, pipeline_jobs, path)
+        return testsuite
+
+    def jobs_iter(self) -> typing.Generator[Job, None, None]:
+        while self._ready_jobs:
+            yield self._ready_jobs.pop()
+
+    @staticmethod
+    def style_table(txt):
+        colored_txt = txt.replace("success", f"{TxtStyle.TRUE_VAL}success{TxtStyle.END}")
+        colored_txt = colored_txt.replace("failed", f"{TxtStyle.FALSE_VAL}failed{TxtStyle.END}")
+        colored_txt = colored_txt.replace("not submitted", f"{TxtStyle.FALSE_VAL}not submitted{TxtStyle.END}")
+        return colored_txt
+
+    def pretty_final_summary(self, time_consuming, suite_file=None):
+        """table = prettytable.PrettyTable(
+            ["job_name", "job_id", "status", "time_consuming", "exception_id", "rest_dependency"]
+        )"""
+        table = prettytable.PrettyTable()
+        table.set_style(prettytable.ORGMODE)
+        field_names = ["job_name", "job_id", "status", "time_consuming", "exception_id", "rest_dependency"]
+        table.field_names = field_names
+        for status in self.get_final_status().values():
+            if status.status != "success":
+                status.suite_file = suite_file
+                _config.non_success_jobs.append(status)
+            if status.exception_id != "-":
+                exception_id_txt = f"{TxtStyle.FALSE_VAL}{status.exception_id}{TxtStyle.END}"
+            else:
+                exception_id_txt = f"{TxtStyle.FIELD_VAL}{status.exception_id}{TxtStyle.END}"
+            table.add_row(
+                [
+                    f"{TxtStyle.FIELD_VAL}{status.name}{TxtStyle.END}",
+                    f"{TxtStyle.FIELD_VAL}{status.job_id}{TxtStyle.END}",
+                    self.style_table(status.status),
+                    f"{TxtStyle.FIELD_VAL}{time_consuming.pop(0) if status.job_id != '-' else '-'}{TxtStyle.END}",
+                    f"{exception_id_txt}",
+                    f"{TxtStyle.FIELD_VAL}{','.join(status.rest_dependency)}{TxtStyle.END}",
+                ]
+            )
+
+        return table.get_string(title=f"{TxtStyle.TITLE}Testsuite Summary: {self.suite_name}{TxtStyle.END}")
+
+    def model_in_dep(self, name):
+        return name in self._dependency
+
+    def get_dependent_jobs(self, name):
+        return self._dependency[name]
+
+    def remove_dependency(self, name):
+        del self._dependency[name]
+
+    def feed_dep_info(self, job, name, model_info=None, table_info=None, cache_info=None, model_loader_info=None):
+        if model_info is not None:
+            job.set_pre_work(name, **model_info)
+        if table_info is not None:
+            job.set_input_data(table_info["hierarchy"], table_info["table_info"])
+        if cache_info is not None:
+            job.set_input_data(cache_info["hierarchy"], cache_info["cache_info"])
+        if model_loader_info is not None:
+            job.set_input_data(model_loader_info["hierarchy"], model_loader_info["model_loader_info"])
+        if name in job.pre_works:
+            job.pre_works.remove(name)
+        if job.is_submit_ready():
+            self._ready_jobs.appendleft(job)
+
+    def reflash_configs(self, config: Config):
+        failed = []
+        for job in self.jobs:
+            try:
+                job.job_conf.update(
+                    config.parties, None, {}, {}
+                )
+            except ValueError as e:
+                failed.append((job, e))
+        return failed
+
+    def update_status(
+            self, job_name, job_id: str = None, status: str = None, exception_id: str = None
+    ):
+        for k, v in locals().items():
+            if k != "job_name" and v is not None:
+                setattr(self._final_status[job_name], k, v)
+
+    def get_final_status(self):
+        for name, jobs in self._dependency.items():
+            for job in jobs:
+                self._final_status[job.job_name].rest_dependency.append(name)
+        return self._final_status
+
+
+class FinalStatus(object):
+    def __init__(
+            self,
+            name: str,
+            job_id: str = "-",
+            status: str = "not submitted",
+            exception_id: str = "-",
+            rest_dependency: typing.List[str] = None,
+    ):
+        self.name = name
+        self.job_id = job_id
+        self.status = status
+        self.exception_id = exception_id
+        self.rest_dependency = rest_dependency or []
+        self.suite_file = None
+
+
+class BenchmarkJob(object):
+    def __init__(self, job_name: str, script_path: Path, conf_path: Path):
+        self.job_name = job_name
+        self.script_path = script_path
+        self.conf_path = conf_path
+
+
+class BenchmarkPair(object):
+    def __init__(
+            self, pair_name: str, jobs: typing.List[BenchmarkJob], compare_setting: dict
+    ):
+        self.pair_name = pair_name
+        self.jobs = jobs
+        self.compare_setting = compare_setting
+
+
+class BenchmarkSuite(object):
+    def __init__(
+            self, dataset: typing.List[Data], pairs: typing.List[BenchmarkPair], path: Path
+    ):
+        self.dataset = dataset
+        self.pairs = pairs
+        self.path = path
+
+    @staticmethod
+    def load(path: Path):
+        with path.open("r") as f:
+            testsuite_config = json.load(f, object_hook=DATA_JSON_HOOK.hook)
+
+        dataset = []
+        for d in testsuite_config.get("data"):
+            dataset.append(Data.load(d, path))
+
+        pairs = []
+        for pair_name, pair_configs in testsuite_config.items():
+            if pair_name == "data":
+                continue
+            jobs = []
+            for job_name, job_configs in pair_configs.items():
+                if job_name == "compare_setting":
+                    continue
+                script_path = path.parent.joinpath(job_configs["script"]).resolve()
+                if job_configs.get("conf"):
+                    conf_path = path.parent.joinpath(job_configs["conf"]).resolve()
+                else:
+                    conf_path = ""
+                jobs.append(
+                    BenchmarkJob(
+                        job_name=job_name, script_path=script_path, conf_path=conf_path
+                    )
+                )
+            compare_setting = pair_configs.get("compare_setting")
+            if compare_setting and not isinstance(compare_setting, dict):
+                raise ValueError(
+                    f"expected 'compare_setting' type is dict, received {type(compare_setting)} instead."
+                )
+            pairs.append(
+                BenchmarkPair(
+                    pair_name=pair_name, jobs=jobs, compare_setting=compare_setting
+                )
+            )
+        suite = BenchmarkSuite(dataset=dataset, pairs=pairs, path=path)
+        return suite
+
+
+def non_success_summary():
+    status = {}
+    for job in _config.non_success_jobs:
+        if job.status not in status.keys():
+            status[job.status] = prettytable.PrettyTable(
+                ["testsuite_name", "job_name", "job_id", "status", "exception_id", "rest_dependency"]
+            )
+
+        status[job.status].add_row(
+            [
+                job.suite_file,
+                job.name,
+                job.job_id,
+                job.status,
+                job.exception_id,
+                ",".join(job.rest_dependency),
+            ]
+        )
+    for k, v in status.items():
+        echo.echo("\n" + "#" * 60)
+        echo.echo(v.get_string(title=f"{k} job record"), fg='red')
+
+
+def _namespace_hook(namespace):
+    def _hook(d):
+        if d is None:
+            return d
+        if "namespace" in d and namespace:
+            d["namespace"] = f"{d['namespace']}_{namespace}"
+        return d
+
+    return _hook
+
+
+def _replace_hook(mapping: dict):
+    def _hook(d):
+        for k, v in mapping.items():
+            if k in d:
+                d[k] = v
+        return d
+
+    return _hook
+
+
+class JsonParamType(click.ParamType):
+    name = "json_string"
+
+    def convert(self, value, param, ctx):
+        try:
+            return json.loads(value)
+        except ValueError:
+            self.fail(f"{value} is not a valid json string", param, ctx)
+
+
+JSON_STRING = JsonParamType()
diff --git a/python/fate_test/fate_test/scripts/__init__.py b/python/fate_test/fate_test/scripts/__init__.py
new file mode 100644
index 0000000000..878d3a9c5d
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/__init__.py
@@ -0,0 +1,15 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
diff --git a/python/fate_test/fate_test/scripts/_options.py b/python/fate_test/fate_test/scripts/_options.py
new file mode 100644
index 0000000000..ae30f748a0
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/_options.py
@@ -0,0 +1,67 @@
+import time
+
+import click
+from fate_test._config import parse_config, default_config
+from fate_test.scripts._utils import _set_namespace
+
+
+class SharedOptions(object):
+    _options = {
+        "config": (('-c', '--config'),
+                   dict(type=click.Path(exists=True), help=f"Manual specify config file", default=None),
+                   default_config().__str__()),
+        "namespace": (('-n', '--namespace'),
+                      dict(type=str, help=f"Manual specify fate_test namespace", default=None),
+                      time.strftime('%Y%m%d%H%M%S')),
+        "namespace_mangling": (('-nm', '--namespace-mangling',),
+                               dict(type=bool, is_flag=True, help="Mangling data namespace", default=None),
+                               False),
+        "yes": (('-y', '--yes',), dict(type=bool, is_flag=True, help="Skip double check", default=None),
+                False),
+        # "extend_sid": (('--extend_sid',),
+        #               dict(type=bool, is_flag=True, help="whether to append uuid as sid when uploading data",
+        #                    default=None), None),
+        # "auto_increasing_sid": (('--auto_increasing_sid',),
+        #                        dict(type=bool, is_flag=True, help="whether to generate sid value starting at 0",
+        #                             default=None), None),
+        # "mode": (('--mode',), dict(type=click.Choice(["cluster", "standalone"]), default="cluster",
+        #                           help="job mode, choose from 'cluster' or 'standalone'"), None)
+    }
+
+    def __init__(self):
+        self._options_kwargs = {}
+
+    def __getitem__(self, item):
+        return self._options_kwargs[item]
+
+    def get(self, k, default=None):
+        v = self._options_kwargs.get(k, default)
+        if v is None and k in self._options:
+            v = self._options[k][2]
+        return v
+
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if v is not None:
+                self._options_kwargs[k] = v
+
+    def post_process(self):
+        # add defaults here
+        for k, v in self._options.items():
+            if self._options_kwargs.get(k, None) is None:
+                self._options_kwargs[k] = v[2]
+
+        # update config
+        config = parse_config(self._options_kwargs['config'])
+        self._options_kwargs['config'] = config
+
+        _set_namespace(self._options_kwargs['namespace_mangling'], self._options_kwargs['namespace'])
+
+    @classmethod
+    def get_shared_options(cls, hidden=False):
+        def shared_options(f):
+            for name, option in cls._options.items():
+                f = click.option(*option[0], **dict(option[1], hidden=hidden))(f)
+            return f
+
+        return shared_options
diff --git a/python/fate_test/fate_test/scripts/_utils.py b/python/fate_test/fate_test/scripts/_utils.py
new file mode 100644
index 0000000000..c087300515
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/_utils.py
@@ -0,0 +1,188 @@
+import glob as glob_
+import importlib
+import os
+import time
+import uuid
+from pathlib import Path
+
+import click
+from fate_test._client import Clients
+from fate_test._config import Config
+from fate_test._flow_client import DataProgress, UploadDataResponse, QueryJobResponse
+from fate_test._io import echo, LOGGER, set_logger
+from fate_test._parser import Testsuite, BenchmarkSuite, DATA_JSON_HOOK, CONF_JSON_HOOK, DSL_JSON_HOOK
+
+from fate_test import _config
+
+
+def _big_data_task(includes, guest_data_size, host_data_size, guest_feature_num, host_feature_num, host_data_type,
+                   config_inst, encryption_type, match_rate, sparsity, force, split_host, output_path, parallelize):
+    from fate_test.scripts import generate_mock_data
+
+    def _find_testsuite_files(path):
+        suffix = ["testsuite.json", "benchmark.json"]
+        if isinstance(path, str):
+            path = Path(path)
+        if path.is_file():
+            if path.name.endswith(suffix[0]) or path.name.endswith(suffix[1]):
+                paths = [path]
+            else:
+                LOGGER.warning(f"{path} is file, but not end with `{suffix}`, skip")
+                paths = []
+            return [p.resolve() for p in paths]
+        else:
+            os.path.abspath(path)
+            paths = glob_.glob(f"{path}/*{suffix[0]}") + glob_.glob(f"{path}/*{suffix[1]}")
+            return [Path(p) for p in paths]
+
+    for include in includes:
+        if isinstance(include, str):
+            include_paths = Path(include)
+            include_paths = _find_testsuite_files(include_paths)
+            for include_path in include_paths:
+                generate_mock_data.get_big_data(guest_data_size, host_data_size, guest_feature_num, host_feature_num,
+                                                include_path, host_data_type, config_inst, encryption_type,
+                                                match_rate, sparsity, force, split_host, output_path, parallelize)
+
+
+def _load_testsuites(includes, excludes, glob, provider=None, suffix="testsuite.json", suite_type="testsuite"):
+    def _find_testsuite_files(path):
+        if isinstance(path, str):
+            path = Path(path)
+        if path.is_file():
+            if path.name.endswith(suffix):
+                paths = [path]
+            else:
+                LOGGER.warning(f"{path} is file, but not end with `{suffix}`, skip")
+                paths = []
+        else:
+            paths = path.glob(f"**/*{suffix}")
+        return [p.resolve() for p in paths]
+
+    excludes_set = set()
+    for exclude in excludes:
+        excludes_set.update(_find_testsuite_files(exclude))
+
+    suite_paths = set()
+    for include in includes:
+        if isinstance(include, str):
+            include = Path(include)
+
+        # glob
+        if glob is not None and include.is_dir():
+            include_list = include.glob(glob)
+        else:
+            include_list = [include]
+        for include_path in include_list:
+            for suite_path in _find_testsuite_files(include_path):
+                if suite_path not in excludes_set:
+                    suite_paths.add(suite_path)
+    suites = []
+    for suite_path in suite_paths:
+        try:
+            if suite_type == "testsuite":
+                suite = Testsuite.load(suite_path.resolve(), provider)
+            elif suite_type == "benchmark":
+                suite = BenchmarkSuite.load(suite_path.resolve())
+            else:
+                raise ValueError(f"Unsupported suite type: {suite_type}. Only accept type 'testsuite' or 'benchmark'.")
+        except Exception as e:
+            echo.stdout(f"load suite {suite_path} failed: {e}")
+        else:
+            suites.append(suite)
+    return suites
+
+
+@LOGGER.catch
+def _upload_data(clients: Clients, suite, config: Config, output_path=None):
+    with click.progressbar(length=len(suite.dataset),
+                           label="dataset",
+                           show_eta=False,
+                           show_pos=True,
+                           width=24) as bar:
+        for i, data in enumerate(suite.dataset):
+            data.update(config)
+            table_name = data.config['table_name'] if data.config.get(
+                'table_name', None) is not None else data.config.get('name')
+            data_progress = DataProgress(f"{data.role_str}<-{data.config['namespace']}.{table_name}")
+
+            def update_bar(n_step):
+                bar.item_show_func = lambda x: data_progress.show()
+                time.sleep(0.1)
+                bar.update(n_step)
+
+            def _call_back(resp):
+                if isinstance(resp, UploadDataResponse):
+                    data_progress.submitted(resp.job_id)
+                    echo.file(f"[dataset]{resp.job_id}")
+                if isinstance(resp, QueryJobResponse):
+                    data_progress.update()
+                update_bar(0)
+
+            try:
+                echo.stdout_newline()
+                status, data_path = clients[data.role_str].upload_data(data, _call_back, output_path)
+                time.sleep(1)
+                data_progress.update()
+                if status != 'success':
+                    raise RuntimeError(f"uploading {i + 1}th data for {suite.path} {status}")
+                bar.update(1)
+                if _config.data_switch:
+                    from fate_test.scripts import generate_mock_data
+
+                    generate_mock_data.remove_file(data_path)
+            except Exception:
+                exception_id = str(uuid.uuid1())
+                echo.file(f"exception({exception_id})")
+                LOGGER.exception(f"exception id: {exception_id}")
+                echo.echo(f"upload {i + 1}th data {data.config} to {data.role_str} fail, exception_id: {exception_id}")
+                # raise RuntimeError(f"exception uploading {i + 1}th data") from e
+
+
+def _delete_data(clients: Clients, suite: Testsuite):
+    with click.progressbar(length=len(suite.dataset),
+                           label="delete ",
+                           show_eta=False,
+                           show_pos=True,
+                           width=24) as bar:
+        for data in suite.dataset:
+            # noinspection PyBroadException
+            try:
+                table_name = data.config['table_name'] if data.config.get(
+                    'table_name', None) is not None else data.config.get('name')
+                bar.item_show_func = \
+                    lambda x: f"delete table: name={table_name}, namespace={data.config['namespace']}"
+                clients[data.role_str].delete_data(data)
+            except Exception:
+                LOGGER.exception(
+                    f"delete failed: name={table_name}, namespace={data.config['namespace']}")
+
+            time.sleep(0.5)
+            bar.update(1)
+            echo.stdout_newline()
+
+
+def _load_module_from_script(script_path):
+    module_name = str(script_path).split("/", -1)[-1].split(".")[0]
+    loader = importlib.machinery.SourceFileLoader(module_name, str(script_path))
+    spec = importlib.util.spec_from_loader(loader.name, loader)
+    mod = importlib.util.module_from_spec(spec)
+    loader.exec_module(mod)
+    return mod
+
+
+def _set_namespace(data_namespace_mangling, namespace):
+    Path(f"logs/{namespace}").mkdir(exist_ok=True, parents=True)
+    set_logger(f"logs/{namespace}/exception.log")
+    echo.set_file(click.open_file(f'logs/{namespace}/stdout', "a"))
+
+    if data_namespace_mangling:
+        echo.echo(f"add data_namespace_mangling: _{namespace}")
+        DATA_JSON_HOOK.add_extend_namespace_hook(namespace)
+        CONF_JSON_HOOK.add_extend_namespace_hook(namespace)
+
+
+def _add_replace_hook(replace):
+    DATA_JSON_HOOK.add_replace_hook(replace)
+    CONF_JSON_HOOK.add_replace_hook(replace)
+    DSL_JSON_HOOK.add_replace_hook(replace)
diff --git a/python/fate_test/fate_test/scripts/benchmark_cli.py b/python/fate_test/fate_test/scripts/benchmark_cli.py
new file mode 100644
index 0000000000..9030ed9818
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/benchmark_cli.py
@@ -0,0 +1,151 @@
+import os
+import re
+import time
+import uuid
+from datetime import timedelta
+from inspect import signature
+
+import click
+from fate_test._client import Clients
+from fate_test._config import Config
+from fate_test._io import LOGGER, echo
+from fate_test._parser import BenchmarkSuite
+from fate_test.scripts._options import SharedOptions
+from fate_test.scripts._utils import _upload_data, _delete_data, _load_testsuites, _load_module_from_script
+from fate_test.utils import show_data, match_metrics
+
+DATA_DISPLAY_PATTERN = re.compile("^FATE")
+
+
+@click.command(name="benchmark-quality")
+@click.option('-i', '--include', required=True, type=click.Path(exists=True), multiple=True, metavar="<include>",
+              help="include *benchmark.json under these paths")
+@click.option('-e', '--exclude', type=click.Path(exists=True), multiple=True,
+              help="exclude *benchmark.json under these paths")
+@click.option('-g', '--glob', type=str,
+              help="glob string to filter sub-directory of path specified by <include>")
+@click.option('-t', '--tol', type=float,
+              help="tolerance (absolute error) for metrics to be considered almost equal. "
+                   "Comparison is done by evaluating abs(a-b) <= max(relative_tol * max(abs(a), abs(b)), absolute_tol)")
+@click.option('-s', '--storage-tag', type=str,
+              help="tag for storing metrics, for future metrics info comparison")
+@click.option('-v', '--history-tag', type=str, multiple=True,
+              help="Extract metrics info from history tags for comparison")
+@click.option('-d', '--match-details', type=click.Choice(['all', 'relative', 'absolute', 'none']),
+              default="all", help="Error value display in algorithm comparison")
+@click.option('--skip-data', is_flag=True, default=False,
+              help="skip uploading data specified in benchmark conf")
+@click.option("--disable-clean-data", "clean_data", flag_value=False, default=None)
+@click.option("--enable-clean-data", "clean_data", flag_value=True, default=None)
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, storage_tag, history_tag, match_details,
+                  **kwargs):
+    """
+    process benchmark suite, alias: bq
+    """
+    ctx.obj.update(**kwargs)
+    ctx.obj.post_process()
+    namespace = ctx.obj["namespace"]
+    config_inst = ctx.obj["config"]
+    if ctx.obj["extend_sid"] is not None:
+        config_inst.extend_sid = ctx.obj["extend_sid"]
+    if ctx.obj["auto_increasing_sid"] is not None:
+        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
+    if clean_data is None:
+        clean_data = config_inst.clean_data
+    data_namespace_mangling = ctx.obj["namespace_mangling"]
+    yes = ctx.obj["yes"]
+
+    echo.welcome("benchmark")
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+    echo.echo("loading testsuites:")
+    suites = _load_testsuites(includes=include, excludes=exclude, glob=glob,
+                              suffix="benchmark.json", suite_type="benchmark")
+    for suite in suites:
+        echo.echo(f"\tdataset({len(suite.dataset)}) benchmark groups({len(suite.pairs)}) {suite.path}")
+    if not yes and not click.confirm("running?"):
+        return
+    with Clients(config_inst) as client:
+        fate_version = client["guest_0"].get_version()
+        for i, suite in enumerate(suites):
+            # noinspection PyBroadException
+            try:
+                start = time.time()
+                echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red')
+                if not skip_data:
+                    try:
+                        _upload_data(client, suite, config_inst)
+                    except Exception as e:
+                        raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
+                try:
+                    _run_benchmark_pairs(config_inst, suite, tol, namespace, data_namespace_mangling, storage_tag,
+                                         history_tag, fate_version, match_details)
+                except Exception as e:
+                    raise RuntimeError(f"exception occur while running benchmark jobs for {suite.path}") from e
+
+                if not skip_data and clean_data:
+                    _delete_data(client, suite)
+                echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red')
+
+            except Exception:
+                exception_id = uuid.uuid1()
+                echo.echo(f"exception in {suite.path}, exception_id={exception_id}", err=True, fg='red')
+                LOGGER.exception(f"exception id: {exception_id}")
+            finally:
+                echo.stdout_newline()
+    echo.farewell()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+
+
+@LOGGER.catch
+def _run_benchmark_pairs(config: Config, suite: BenchmarkSuite, tol: float, namespace: str,
+                         data_namespace_mangling: bool, storage_tag, history_tag, fate_version, match_details):
+    # pipeline demo goes here
+    pair_n = len(suite.pairs)
+    fate_base = config.fate_base
+    PYTHONPATH = os.environ.get('PYTHONPATH') + ":" + os.path.join(fate_base, "python")
+    os.environ['PYTHONPATH'] = PYTHONPATH
+    for i, pair in enumerate(suite.pairs):
+        echo.echo(f"Running [{i + 1}/{pair_n}] group: {pair.pair_name}")
+        results = {}
+        # data_summary = None
+        job_n = len(pair.jobs)
+        for j, job in enumerate(pair.jobs):
+            try:
+                echo.echo(f"Running [{j + 1}/{job_n}] job: {job.job_name}")
+                job_name, script_path, conf_path = job.job_name, job.script_path, job.conf_path
+                param = Config.load_from_file(conf_path)
+                mod = _load_module_from_script(script_path)
+                input_params = signature(mod.main).parameters
+                # local script
+                if len(input_params) == 1:
+                    data, metric = mod.main(param=param)
+                elif len(input_params) == 2:
+                    data, metric = mod.main(config=config, param=param)
+                # pipeline script
+                elif len(input_params) == 3:
+                    if data_namespace_mangling:
+                        data, metric = mod.main(config=config, param=param, namespace=f"_{namespace}")
+                    else:
+                        data, metric = mod.main(config=config, param=param)
+                else:
+                    data, metric = mod.main()
+                results[job_name] = metric
+                echo.echo(f"[{j + 1}/{job_n}] job: {job.job_name} Success!\n")
+                if data and DATA_DISPLAY_PATTERN.match(job_name):
+                    # data_summary = data
+                    show_data(data)
+                # if data_summary is None:
+                #    data_summary = data
+            except Exception as e:
+                exception_id = uuid.uuid1()
+                echo.echo(f"exception while running [{j + 1}/{job_n}] job, exception_id={exception_id}", err=True,
+                          fg='red')
+                LOGGER.exception(f"exception id: {exception_id}, error message: \n{e}")
+                continue
+        rel_tol = pair.compare_setting.get("relative_tol")
+        # show_data(data_summary)
+        match_metrics(evaluate=True, group_name=pair.pair_name, abs_tol=tol, rel_tol=rel_tol,
+                      storage_tag=storage_tag, history_tag=history_tag, fate_version=fate_version,
+                      cache_directory=config.cache_directory, match_details=match_details, **results)
diff --git a/python/fate_test/fate_test/scripts/cli.py b/python/fate_test/fate_test/scripts/cli.py
new file mode 100644
index 0000000000..8dc444c7d8
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/cli.py
@@ -0,0 +1,67 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import click
+
+from fate_test.scripts._options import SharedOptions
+from fate_test.scripts.benchmark_cli import run_benchmark
+from fate_test.scripts.config_cli import config_group
+from fate_test.scripts.data_cli import data_group
+# from fate_test.scripts.flow_test_cli import flow_group
+from fate_test.scripts.performance_cli import run_task
+from fate_test.scripts.quick_test_cli import unittest_group
+# from fate_test.scripts.secure_protocol_cli import secure_protocol_group
+from fate_test.scripts.testsuite_cli import run_suite
+
+commands = {
+    "config": config_group,
+    "suite": run_suite,
+    "performance": run_task,
+    "benchmark-quality": run_benchmark,
+    "data": data_group,
+    "unittest": unittest_group
+}
+
+commands_alias = {
+    "bq": "benchmark-quality",
+    "bp": "performance"
+}
+
+
+class MultiCLI(click.MultiCommand):
+
+    def list_commands(self, ctx):
+        return list(commands)
+
+    def get_command(self, ctx, name):
+        if name not in commands and name in commands_alias:
+            name = commands_alias[name]
+        if name not in commands:
+            ctx.fail("No such command '{}'.".format(name))
+        return commands[name]
+
+
+@click.command(cls=MultiCLI, help="A collection of useful tools to running FATE's test.",
+               context_settings=dict(help_option_names=["-h", "--help"]))
+@SharedOptions.get_shared_options()
+@click.pass_context
+def cli(ctx, **kwargs):
+    ctx.ensure_object(SharedOptions)
+    ctx.obj.update(**kwargs)
+
+
+if __name__ == '__main__':
+    cli(obj=SharedOptions())
diff --git a/python/fate_test/fate_test/scripts/config_cli.py b/python/fate_test/fate_test/scripts/config_cli.py
new file mode 100644
index 0000000000..55f0b4c61a
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/config_cli.py
@@ -0,0 +1,79 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+from pathlib import Path
+
+import click
+from fate_test._client import Clients
+from fate_test._config import create_config, default_config, parse_config
+from fate_test.scripts._options import SharedOptions
+
+
+@click.group("config", help="fate_test config")
+def config_group():
+    """
+    config fate_test
+    """
+    pass
+
+
+@config_group.command(name="new")
+def _new():
+    """
+    create new fate_test config temperate
+    """
+    create_config(Path("fate_test_config.yaml"))
+    click.echo(f"create config file: fate_test_config.yaml")
+
+
+@config_group.command(name="edit")
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def _edit(ctx, **kwargs):
+    """
+    edit fate_test config file
+    """
+    ctx.obj.update(**kwargs)
+    config = ctx.obj.get("config")
+    click.edit(filename=config)
+
+
+@config_group.command(name="show")
+def _show():
+    """
+    show fate_test default config path
+    """
+    click.echo(f"default config path is {default_config()}")
+
+
+@config_group.command(name="check")
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def _config(ctx, **kwargs):
+    """
+    check connection
+    """
+    ctx.obj.update(**kwargs)
+    config_inst = parse_config(ctx.obj.get("config"))
+    with Clients(config_inst) as clients:
+        roles = clients.all_roles()
+        for r in roles:
+            try:
+                version, address = clients[r].check_connection()
+            except Exception as e:
+                click.echo(f"[X]connection fail, role is {r}, exception is {e.args}")
+            else:
+                click.echo(f"[✓]connection {address} ok, fate version is {version}, role is {r}")
diff --git a/python/fate_test/fate_test/scripts/data_cli.py b/python/fate_test/fate_test/scripts/data_cli.py
new file mode 100644
index 0000000000..01d8c19c33
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/data_cli.py
@@ -0,0 +1,435 @@
+import json
+import os
+import re
+import sys
+import time
+import uuid
+from datetime import timedelta
+from pathlib import Path
+
+import click
+# from fate_test._client import Clients
+from fate_test._config import Config
+from fate_test._io import LOGGER, echo
+from fate_test.scripts._options import SharedOptions
+from fate_test.scripts._utils import _load_testsuites, _delete_data, _big_data_task
+from ruamel import yaml
+
+from fate_test import _config
+
+
+@click.group(name="data")
+def data_group():
+    """
+    upload or delete data in suite config files
+    """
+    ...
+
+
+@data_group.command("upload")
+@click.option('-i', '--include', required=False, type=click.Path(exists=True), multiple=True, metavar="<include>",
+              help="include *benchmark.json under these paths")
+@click.option('-e', '--exclude', type=click.Path(exists=True), multiple=True,
+              help="exclude *benchmark.json under these paths")
+@click.option("-t", "--config-type", type=click.Choice(["min_test", "all_examples"]), default="min_test",
+              help="config file")
+@click.option('-g', '--glob', type=str,
+              help="glob string to filter sub-directory of path specified by <include>")
+@click.option('-s', '--suite-type', required=False, type=click.Choice(["testsuite", "benchmark"]), default="testsuite",
+              help="suite type")
+@click.option('-r', '--role', type=str, default='all', help="role to process, default to `all`. "
+                                                            "use option likes: `guest_0`, `host_0`, `host`")
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def upload(ctx, include, exclude, glob, suite_type, role, config_type, **kwargs):
+    """
+    upload data defined in suite config files
+    """
+    ctx.obj.update(**kwargs)
+    ctx.obj.post_process()
+    namespace = ctx.obj["namespace"]
+    config_inst = ctx.obj["config"]
+    if ctx.obj["extend_sid"] is not None:
+        config_inst.extend_sid = ctx.obj["extend_sid"]
+    if ctx.obj["auto_increasing_sid"] is not None:
+        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
+    yes = ctx.obj["yes"]
+    echo.welcome()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+    if len(include) != 0:
+        echo.echo("loading testsuites:")
+        suffix = "benchmark.json" if suite_type == "benchmark" else "testsuite.json"
+        suites = _load_testsuites(includes=include, excludes=exclude, glob=glob,
+                                  suffix=suffix, suite_type=suite_type)
+        for suite in suites:
+            if role != "all":
+                suite.dataset = [d for d in suite.dataset if re.match(d.role_str, role)]
+            echo.echo(f"\tdataset({len(suite.dataset)}) {suite.path}")
+        if not yes and not click.confirm("running?"):
+            return
+        # client_upload(suites=suites, config_inst=config_inst, namespace=namespace)
+        # todo: upload with pipeline
+    else:
+        config = get_config(config_inst)
+        if config_type == 'min_test':
+            config_file = config.min_test_data_config
+        else:
+            config_file = config.all_examples_data_config
+
+        with open(config_file, 'r', encoding='utf-8') as f:
+            upload_data = json.loads(f.read())
+
+        echo.echo(f"\tdataset({len(upload_data['data'])}) {config_file}")
+        if not yes and not click.confirm("running?"):
+            return
+        """with Clients(config_inst) as client:
+            data_upload(client, config_inst, upload_data)"""
+        # @todo: upload data with pipeline
+        echo.farewell()
+        echo.echo(f"testsuite namespace: {namespace}", fg='red')
+
+
+@data_group.command("delete")
+@click.option('-i', '--include', required=True, type=click.Path(exists=True), multiple=True, metavar="<include>",
+              help="include *benchmark.json under these paths")
+@click.option('-e', '--exclude', type=click.Path(exists=True), multiple=True,
+              help="exclude *benchmark.json under these paths")
+@click.option('-g', '--glob', type=str,
+              help="glob string to filter sub-directory of path specified by <include>")
+@click.option('-s', '--suite-type', required=True, type=click.Choice(["testsuite", "benchmark"]), help="suite type")
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def delete(ctx, include, exclude, glob, yes, suite_type, **kwargs):
+    """
+    delete data defined in suite config files
+    """
+    ctx.obj.update(**kwargs)
+    ctx.obj.post_process()
+    namespace = ctx.obj["namespace"]
+    config_inst = ctx.obj["config"]
+    echo.welcome()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+    echo.echo("loading testsuites:")
+    suffix = "benchmark.json" if suite_type == "benchmark" else "testsuite.json"
+
+    suites = _load_testsuites(includes=include, excludes=exclude, glob=glob,
+                              suffix=suffix, suite_type=suite_type)
+    if not yes and not click.confirm("running?"):
+        return
+
+    for suite in suites:
+        echo.echo(f"\tdataset({len(suite.dataset)}) {suite.path}")
+    if not yes and not click.confirm("running?"):
+        return
+    with Clients(config_inst) as client:
+        for i, suite in enumerate(suites):
+            _delete_data(client, suite)
+    echo.farewell()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+
+
+@data_group.command("generate")
+@click.option('-i', '--include', required=True, type=click.Path(exists=True), multiple=True, metavar="<include>",
+              help="include *testsuite.json / *benchmark.json under these paths")
+@click.option('-ht', '--host-data-type', default='tag_value', type=click.Choice(['dense', 'tag', 'tag_value']),
+              help="Select the format of the host data")
+@click.option('-p', '--encryption-type', type=click.Choice(['sha256', 'md5']),
+              help="Entry ID encryption method for,  sha256 and md5")
+@click.option('-m', '--match-rate', default=1.0, type=float,
+              help="Intersection rate relative to guest, between [0, 1]")
+@click.option('-s', '--sparsity', default=0.2, type=float,
+              help="The sparsity of tag data, The value is between (0-1)")
+@click.option('-ng', '--guest-data-size', type=int, default=10000,
+              help="Set guest data set size, not less than 100")
+@click.option('-nh', '--host-data-size', type=int,
+              help="Set host data set size, not less than 100")
+@click.option('-fg', '--guest-feature-num', type=int, default=20,
+              help="Set guest feature dimensions")
+@click.option('-fh', '--host-feature-num', type=int, default=200,
+              help="Set host feature dimensions; the default is equal to the number of guest's size")
+@click.option('-o', '--output-path', type=click.Path(exists=True),
+              help="Customize the output path of generated data")
+@click.option('--force', is_flag=True, default=False,
+              help="Overwrite existing file")
+@click.option('--split-host', is_flag=True, default=False,
+              help="Divide the amount of host data equally among all the host tables in TestSuite")
+@click.option('--upload-data', is_flag=True, default=False,
+              help="Generated data will be uploaded")
+@click.option('--remove-data', is_flag=True, default=False,
+              help="The generated data will be deleted")
+@click.option('--parallelize', is_flag=True, default=False,
+              help="It is directly used to upload data, and will not generate data")
+@click.option('--use-local-data', is_flag=True, default=False,
+              help="The existing data of the server will be uploaded, This parameter is not recommended for "
+                   "distributed applications")
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def generate(ctx, include, host_data_type, encryption_type, match_rate, sparsity, guest_data_size,
+             host_data_size, guest_feature_num, host_feature_num, output_path, force, split_host, upload_data,
+             remove_data, use_local_data, parallelize, **kwargs):
+    """
+    create data defined in suite config files
+    """
+    ctx.obj.update(**kwargs)
+    ctx.obj.post_process()
+    namespace = ctx.obj["namespace"]
+    config_inst = ctx.obj["config"]
+    if ctx.obj["extend_sid"] is not None:
+        config_inst.extend_sid = ctx.obj["extend_sid"]
+    if ctx.obj["auto_increasing_sid"] is not None:
+        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
+    if parallelize and upload_data:
+        upload_data = False
+    yes = ctx.obj["yes"]
+    echo.welcome()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+    echo.echo("loading testsuites:")
+    if host_data_size is None:
+        host_data_size = guest_data_size
+    suites = _load_testsuites(includes=include, excludes=tuple(), glob=None)
+    suites += _load_testsuites(includes=include, excludes=tuple(), glob=None,
+                               suffix="benchmark.json", suite_type="benchmark")
+    for suite in suites:
+        if upload_data:
+            echo.echo(f"\tdataget({len(suite.dataset)}) dataset({len(suite.dataset)}) {suite.path}")
+        else:
+            echo.echo(f"\tdataget({len(suite.dataset)}) {suite.path}")
+    if not yes and not click.confirm("running?"):
+        return
+
+    _big_data_task(include, guest_data_size, host_data_size, guest_feature_num, host_feature_num, host_data_type,
+                   config_inst, encryption_type, match_rate, sparsity, force, split_host, output_path, parallelize)
+    if upload_data:
+        if use_local_data:
+            _config.use_local_data = 0
+        _config.data_switch = remove_data
+        # client_upload(suites=suites, config_inst=config_inst, namespace=namespace, output_path=output_path)
+        # todo: upload with pipeline
+
+
+@data_group.command("download")
+@click.option("-t", "--type", type=click.Choice(["mnist"]), default="mnist",
+              help="config file")
+@click.option('-o', '--output-path', type=click.Path(exists=True),
+              help="output path of mnist data, the default path is examples/data")
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def download_mnists(ctx, output_path, **kwargs):
+    """
+    download mnist data for flow
+    """
+    ctx.obj.update(**kwargs)
+    ctx.obj.post_process()
+    namespace = ctx.obj["namespace"]
+    config_inst = ctx.obj["config"]
+    yes = ctx.obj["yes"]
+    echo.welcome()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+
+    if output_path is None:
+        config = get_config(config_inst)
+        output_path = str(config.data_base_dir) + "/examples/data/"
+    if not yes and not click.confirm("running?"):
+        return
+    try:
+        download_mnist(Path(output_path), "mnist_train")
+        download_mnist(Path(output_path), "mnist_eval", is_train=False)
+    except Exception:
+        exception_id = uuid.uuid1()
+        echo.echo(f"exception_id={exception_id}")
+        LOGGER.exception(f"exception id: {exception_id}")
+    finally:
+        echo.stdout_newline()
+    echo.farewell()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+
+
+@data_group.command("query_schema")
+@click.option('-cpn', '--component-name', required=False, type=str, help="component name", default='dataio_0')
+@click.option('-j', '--job-id', required=True, type=str, help="job id")
+@click.option('-r', '--role', required=True, type=click.Choice(["guest", "host", "arbiter"]), help="job id")
+@click.option('-p', '--party-id', required=True, type=str, help="party id")
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def query_schema(ctx, component_name, job_id, role, party_id, **kwargs):
+    """
+    query the meta of the output data of a component
+    """
+    ctx.obj.update(**kwargs)
+    ctx.obj.post_process()
+    namespace = ctx.obj["namespace"]
+    yes = ctx.obj["yes"]
+    config_inst = ctx.obj["config"]
+    echo.welcome()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+
+    if not yes and not click.confirm("running?"):
+        return
+    # todo: upload data with pipeline
+    """with Clients(config_inst) as client:
+        query_component_output_data(client, config_inst, component_name, job_id, role, party_id)"""
+    echo.farewell()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+
+
+def get_config(conf: Config):
+    return conf
+
+
+def query_component_output_data(clients, config: Config, component_name, job_id, role, party_id):
+    roles = config.role
+    clients_role = None
+    for k, v in roles.items():
+        if int(party_id) in v and k == role:
+            clients_role = role + "_" + str(v.index(int(party_id)))
+    try:
+        if clients_role is None:
+            raise ValueError(f"party id {party_id} does not exist")
+
+        try:
+            table_info = clients[clients_role].output_data_table(job_id=job_id, role=role, party_id=party_id,
+                                                                 component_name=component_name)
+            table_info = clients[clients_role].table_info(table_name=table_info['name'],
+                                                          namespace=table_info['namespace'])
+        except Exception as e:
+            raise RuntimeError(f"An exception occurred while getting data {clients_role}<-{component_name}") from e
+
+        echo.echo("query_component_output_data result: {}".format(table_info))
+        try:
+            header = table_info['data']['schema']['header']
+        except ValueError as e:
+            raise ValueError(f"Obtain header from table error, error msg: {e}")
+
+        result = []
+        for idx, header_name in enumerate(header[1:]):
+            result.append((idx, header_name))
+        echo.echo("Queried header is {}".format(result))
+    except Exception:
+        exception_id = uuid.uuid1()
+        echo.echo(f"exception_id={exception_id}")
+        LOGGER.exception(f"exception id: {exception_id}")
+    finally:
+        echo.stdout_newline()
+
+
+def download_mnist(base, name, is_train=True):
+    import torchvision
+
+    dataset = torchvision.datasets.MNIST(
+        root=base.joinpath(".cache"), train=is_train, download=True
+    )
+    converted_path = base.joinpath(name)
+    converted_path.mkdir(exist_ok=True)
+
+    inputs_path = converted_path.joinpath("images")
+    inputs_path.mkdir(exist_ok=True)
+    targets_path = converted_path.joinpath("targets")
+    config_path = converted_path.joinpath("config.yaml")
+    filenames_path = converted_path.joinpath("filenames")
+
+    with filenames_path.open("w") as filenames:
+        with targets_path.open("w") as targets:
+            for idx, (img, target) in enumerate(dataset):
+                filename = f"{idx:05d}"
+                # save img
+                img.save(inputs_path.joinpath(f"{filename}.jpg"))
+                # save target
+                targets.write(f"{filename},{target}\n")
+                # save filenames
+                filenames.write(f"{filename}\n")
+
+    config = {
+        "type": "vision",
+        "inputs": {"type": "images", "ext": "jpg", "PIL_mode": "L"},
+        "targets": {"type": "integer"},
+    }
+    with config_path.open("w") as f:
+        yaml.safe_dump(config, f, indent=2, default_flow_style=False)
+
+
+"""def client_upload(suites, config_inst, namespace, output_path=None):
+    with Clients(config_inst) as client:
+        for i, suite in enumerate(suites):
+            # noinspection PyBroadException
+            try:
+                echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red')
+                try:
+                    _upload_data(client, suite, config_inst, output_path)
+                except Exception as e:
+                    raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
+            except Exception:
+                exception_id = uuid.uuid1()
+                echo.echo(f"exception in {suite.path}, exception_id={exception_id}")
+                LOGGER.exception(f"exception id: {exception_id}")
+            finally:
+                echo.stdout_newline()
+    echo.farewell()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+"""
+
+
+def data_upload(clients, conf: Config, upload_config):
+    def _await_finish(job_id, task_name=None):
+        deadline = time.time() + sys.maxsize
+        start = time.time()
+        param = dict(
+            job_id=job_id,
+            role=None
+        )
+        while True:
+            stdout = clients["guest_0"].flow_client("job/query", param)
+            status = stdout["data"][0]["f_status"]
+            elapse_seconds = int(time.time() - start)
+            date = time.strftime('%Y-%m-%d %X')
+            if task_name:
+                log_msg = f"[{date}][{task_name}]{status}, elapse: {timedelta(seconds=elapse_seconds)}"
+            else:
+                log_msg = f"[{date}]{job_id} {status}, elapse: {timedelta(seconds=elapse_seconds)}"
+            if (status == "running" or status == "waiting") and time.time() < deadline:
+                print(log_msg, end="\r")
+                time.sleep(1)
+                continue
+            else:
+                print(" " * 60, end="\r")  # clean line
+                echo.echo(log_msg)
+                return status
+
+    task_data = upload_config["data"]
+    for i, data in enumerate(task_data):
+        format_msg = f"@{data['file']} >> {data['namespace']}.{data['table_name']}"
+        echo.echo(f"[{time.strftime('%Y-%m-%d %X')}]uploading {format_msg}")
+        try:
+            data["file"] = str(os.path.join(conf.data_base_dir, data["file"]))
+            param = dict(
+                file=data["file"],
+                head=data["head"],
+                partition=data["partition"],
+                table_name=data["table_name"],
+                namespace=data["namespace"]
+            )
+            stdout = clients["guest_0"].flow_client("data/upload", param, drop=1)
+            job_id = stdout.get('jobId', None)
+            echo.echo(f"[{time.strftime('%Y-%m-%d %X')}]upload done {format_msg}, job_id={job_id}\n")
+            if job_id is None:
+                echo.echo("table already exist. To upload again, Please add '-f 1' in start cmd")
+                continue
+            _await_finish(job_id)
+            param = dict(
+                table_name=data["table_name"],
+                namespace=data["namespace"]
+            )
+            stdout = clients["guest_0"].flow_client("table/info", param)
+
+            count = stdout["data"]["count"]
+            if count != data["count"]:
+                raise AssertionError("Count of upload file is not as expect, count is: {},"
+                                     "expect is: {}".format(count, data["count"]))
+            echo.echo(f"[{time.strftime('%Y-%m-%d %X')}] check_data_out {stdout} \n")
+        except Exception as e:
+            exception_id = uuid.uuid1()
+            echo.echo(f"exception in {data['file']}, exception_id={exception_id}")
+            LOGGER.exception(f"exception id: {exception_id}")
+            echo.echo(f"upload {i + 1}th data {data['table_name']} fail, exception_id: {exception_id}")
+            # raise RuntimeError(f"exception occur while uploading data for {data['file']}") from e
+        finally:
+            echo.stdout_newline()
diff --git a/python/fate_test/fate_test/scripts/generate_mock_data.py b/python/fate_test/fate_test/scripts/generate_mock_data.py
new file mode 100644
index 0000000000..c18e1ddf3b
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/generate_mock_data.py
@@ -0,0 +1,345 @@
+import functools
+import hashlib
+import json
+import os
+import random
+import sys
+import threading
+import time
+import uuid
+
+import numpy as np
+import pandas as pd
+from fate_test._config import Config
+from fate_test._io import echo, LOGGER
+
+
+def import_fate():
+    from fate_arch import storage
+    from fate_flow.utils import data_utils
+    from fate_arch import session
+    from fate_arch.storage import StorageEngine
+    from fate_arch.common.conf_utils import get_base_config
+    from fate_arch.storage import EggRollStoreType
+    return storage, data_utils, session, StorageEngine, get_base_config, EggRollStoreType
+
+
+storage, data_utils, session, StorageEngine, get_base_config, EggRollStoreType = import_fate()
+
+sys.setrecursionlimit(1000000)
+
+
+class data_progress:
+    def __init__(self, down_load, time_start):
+        self.time_start = time_start
+        self.down_load = down_load
+        self.time_percent = 0
+        self.switch = True
+
+    def set_switch(self, switch):
+        self.switch = switch
+
+    def get_switch(self):
+        return self.switch
+
+    def set_time_percent(self, time_percent):
+        self.time_percent = time_percent
+
+    def get_time_percent(self):
+        return self.time_percent
+
+    def progress(self, percent):
+        if percent > 100:
+            percent = 100
+        end = time.time()
+        if percent != 100:
+            print(f"\r{self.down_load}  %.f%s  [%s]  running" % (percent, '%', self.timer(end - self.time_start)),
+                  flush=True, end='')
+        else:
+            print(f"\r{self.down_load}  %.f%s  [%s]  success" % (percent, '%', self.timer(end - self.time_start)),
+                  flush=True, end='')
+
+    @staticmethod
+    def timer(times):
+        hours, rem = divmod(times, 3600)
+        minutes, seconds = divmod(rem, 60)
+        return "{:0>2}:{:0>2}:{:0>2}".format(int(hours), int(minutes), int(seconds))
+
+
+def remove_file(path):
+    os.remove(path)
+
+
+def id_encryption(encryption_type, start_num, end_num):
+    if encryption_type == 'md5':
+        return [hashlib.md5(bytes(str(value), encoding='utf-8')).hexdigest() for value in range(start_num, end_num)]
+    elif encryption_type == 'sha256':
+        return [hashlib.sha256(bytes(str(value), encoding='utf-8')).hexdigest() for value in range(start_num, end_num)]
+    else:
+        return [str(value) for value in range(start_num, end_num)]
+
+
+def get_big_data(guest_data_size, host_data_size, guest_feature_num, host_feature_num, include_path, host_data_type,
+                 conf: Config, encryption_type, match_rate, sparsity, force, split_host, output_path, parallelize):
+    global big_data_dir
+
+    def list_tag_value(feature_nums, head):
+        # data = ''
+        # for f in range(feature_nums):
+        #     data += head[f] + ':' + str(round(np.random.randn(), 4)) + ";"
+        # return data[:-1]
+        return ";".join([head[k] + ':' + str(round(v, 4)) for k, v in enumerate(np.random.randn(feature_nums))])
+
+    def list_tag(feature_nums, data_list):
+        data = ''
+        for f in range(feature_nums):
+            data += random.choice(data_list) + ";"
+        return data[:-1]
+
+    def _generate_tag_value_data(data_path, start_num, end_num, feature_nums, progress):
+        data_num = end_num - start_num
+        section_data_size = round(data_num / 100)
+        iteration = round(data_num / section_data_size)
+        head = ['x' + str(i) for i in range(feature_nums)]
+        for batch in range(iteration + 1):
+            progress.set_time_percent(batch)
+            output_data = pd.DataFrame(columns=["id"])
+            if section_data_size * (batch + 1) <= data_num:
+                output_data["id"] = id_encryption(encryption_type, section_data_size * batch + start_num,
+                                                  section_data_size * (batch + 1) + start_num)
+                slicing_data_size = section_data_size
+            elif section_data_size * batch < data_num:
+                output_data['id'] = id_encryption(encryption_type, section_data_size * batch + start_num, end_num)
+                slicing_data_size = data_num - section_data_size * batch
+            else:
+                break
+            feature = [list_tag_value(feature_nums, head) for i in range(slicing_data_size)]
+            output_data['feature'] = feature
+            output_data.to_csv(data_path, mode='a+', index=False, header=False)
+
+    def _generate_dens_data(data_path, start_num, end_num, feature_nums, label_flag, progress):
+        if label_flag:
+            head_1 = ['id', 'y']
+        else:
+            head_1 = ['id']
+        data_num = end_num - start_num
+        head_2 = ['x' + str(i) for i in range(feature_nums)]
+        df_data_1 = pd.DataFrame(columns=head_1)
+        head_data = pd.DataFrame(columns=head_1 + head_2)
+        head_data.to_csv(data_path, mode='a+', index=False)
+        section_data_size = round(data_num / 100)
+        iteration = round(data_num / section_data_size)
+        for batch in range(iteration + 1):
+            progress.set_time_percent(batch)
+            if section_data_size * (batch + 1) <= data_num:
+                df_data_1["id"] = id_encryption(encryption_type, section_data_size * batch + start_num,
+                                                section_data_size * (batch + 1) + start_num)
+                slicing_data_size = section_data_size
+            elif section_data_size * batch < data_num:
+                df_data_1 = pd.DataFrame(columns=head_1)
+                df_data_1["id"] = id_encryption(encryption_type, section_data_size * batch + start_num, end_num)
+                slicing_data_size = data_num - section_data_size * batch
+            else:
+                break
+            if label_flag:
+                df_data_1["y"] = [round(np.random.random()) for x in range(slicing_data_size)]
+            feature = np.random.randint(-10000, 10000, size=[slicing_data_size, feature_nums]) / 10000
+            df_data_2 = pd.DataFrame(feature, columns=head_2)
+            output_data = pd.concat([df_data_1, df_data_2], axis=1)
+            output_data.to_csv(data_path, mode='a+', index=False, header=False)
+
+    def _generate_tag_data(data_path, start_num, end_num, feature_nums, sparsity, progress):
+        data_num = end_num - start_num
+        section_data_size = round(data_num / 100)
+        iteration = round(data_num / section_data_size)
+        valid_set = [x for x in range(2019120799, 2019120799 + round(feature_nums / sparsity))]
+        data = list(map(str, valid_set))
+        for batch in range(iteration + 1):
+            progress.set_time_percent(batch)
+            output_data = pd.DataFrame(columns=["id"])
+            if section_data_size * (batch + 1) <= data_num:
+                output_data["id"] = id_encryption(encryption_type, section_data_size * batch + start_num,
+                                                  section_data_size * (batch + 1) + start_num)
+                slicing_data_size = section_data_size
+            elif section_data_size * batch < data_num:
+                output_data["id"] = id_encryption(encryption_type, section_data_size * batch + start_num, end_num)
+                slicing_data_size = data_num - section_data_size * batch
+            else:
+                break
+            feature = [list_tag(feature_nums, data_list=data) for i in range(slicing_data_size)]
+            output_data['feature'] = feature
+            output_data.to_csv(data_path, mode='a+', index=False, header=False)
+
+    def _generate_parallelize_data(start_num, end_num, feature_nums, table_name, namespace, label_flag, data_type,
+                                   partition, progress):
+        def expand_id_range(k, v):
+            if label_flag:
+                return [(id_encryption(encryption_type, ids, ids + 1)[0],
+                         ",".join([str(round(np.random.random()))] + [str(round(i, 4)) for i in np.random.randn(v)]))
+                        for ids in range(int(k), min(step + int(k), end_num))]
+            else:
+                if data_type == 'tag':
+                    valid_set = [x for x in range(2019120799, 2019120799 + round(feature_nums / sparsity))]
+                    data = list(map(str, valid_set))
+                    return [(id_encryption(encryption_type, ids, ids + 1)[0],
+                             ";".join([random.choice(data) for i in range(int(v))]))
+                            for ids in range(int(k), min(step + int(k), data_num))]
+
+                elif data_type == 'tag_value':
+                    return [(id_encryption(encryption_type, ids, ids + 1)[0],
+                             ";".join([f"x{i}" + ':' + str(round(i, 4)) for i in np.random.randn(v)]))
+                            for ids in range(int(k), min(step + int(k), data_num))]
+                elif data_type == 'dense':
+                    return [(id_encryption(encryption_type, ids, ids + 1)[0],
+                             ",".join([str(round(i, 4)) for i in np.random.randn(v)]))
+                            for ids in range(int(k), min(step + int(k), data_num))]
+
+        data_num = end_num - start_num
+        step = 10000 if data_num > 10000 else int(data_num / 10)
+        table_list = [(f"{i * step}", f"{feature_nums}") for i in range(int(data_num / step) + start_num)]
+        table = sess.computing.parallelize(table_list, partition=partition, include_key=True)
+        table = table.flatMap(functools.partial(expand_id_range))
+        if label_flag:
+            schema = {"sid": "id", "header": ",".join(["y"] + [f"x{i}" for i in range(feature_nums)])}
+        else:
+            schema = {"sid": "id", "header": ",".join([f"x{i}" for i in range(feature_nums)])}
+        if data_type != "dense":
+            schema = None
+
+        h_table = sess.get_table(name=table_name, namespace=namespace)
+        if h_table:
+            h_table.destroy()
+
+        table_meta = sess.persistent(computing_table=table, name=table_name, namespace=namespace, schema=schema)
+
+        storage_session = sess.storage()
+        s_table = storage_session.get_table(namespace=table_meta.get_namespace(), name=table_meta.get_name())
+        if s_table.count() == data_num:
+            progress.set_time_percent(100)
+        from fate_flow.manager.data_manager import DataTableTracker
+        DataTableTracker.create_table_tracker(
+            table_name=table_name,
+            table_namespace=namespace,
+            entity_info={}
+        )
+
+    def data_save(data_info, table_names, namespaces, partition_list):
+        data_count = 0
+        for idx, data_name in enumerate(data_info.keys()):
+            label_flag = True if 'guest' in data_info[data_name] else False
+            data_type = 'dense' if 'guest' in data_info[data_name] else host_data_type
+            if split_host and ('host' in data_info[data_name]):
+                host_end_num = int(np.ceil(host_data_size / len(data_info))) * (data_count + 1) if np.ceil(
+                    host_data_size / len(data_info)) * (data_count + 1) <= host_data_size else host_data_size
+                host_start_num = int(np.ceil(host_data_size / len(data_info))) * data_count
+                data_count += 1
+            else:
+                host_end_num = host_data_size
+                host_start_num = 0
+            out_path = os.path.join(str(big_data_dir), data_name)
+            if os.path.exists(out_path) and os.path.isfile(out_path) and not parallelize:
+                if force:
+                    remove_file(out_path)
+                else:
+                    echo.echo('{} Already exists'.format(out_path))
+                    continue
+            data_i = (idx + 1) / len(data_info)
+            downLoad = f'dataget  [{"#" * int(24 * data_i)}{"-" * (24 - int(24 * data_i))}]  {idx + 1}/{len(data_info)}'
+            start = time.time()
+            progress = data_progress(downLoad, start)
+            thread = threading.Thread(target=run, args=[progress])
+            thread.start()
+
+            try:
+                if 'guest' in data_info[data_name]:
+                    if not parallelize:
+                        _generate_dens_data(out_path, guest_start_num, guest_end_num,
+                                            guest_feature_num, label_flag, progress)
+                    else:
+                        _generate_parallelize_data(
+                            guest_start_num,
+                            guest_end_num,
+                            guest_feature_num,
+                            table_names[idx],
+                            namespaces[idx],
+                            label_flag,
+                            data_type,
+                            partition_list[idx],
+                            progress)
+                else:
+                    if data_type == 'tag' and not parallelize:
+                        _generate_tag_data(out_path, host_start_num, host_end_num, host_feature_num, sparsity, progress)
+                    elif data_type == 'tag_value' and not parallelize:
+                        _generate_tag_value_data(out_path, host_start_num, host_end_num, host_feature_num, progress)
+                    elif data_type == 'dense' and not parallelize:
+                        _generate_dens_data(out_path, host_start_num, host_end_num,
+                                            host_feature_num, label_flag, progress)
+                    elif parallelize:
+                        _generate_parallelize_data(
+                            host_start_num,
+                            host_end_num,
+                            host_feature_num,
+                            table_names[idx],
+                            namespaces[idx],
+                            label_flag,
+                            data_type,
+                            partition_list[idx],
+                            progress)
+                progress.set_switch(False)
+                time.sleep(1)
+            except Exception:
+                exception_id = uuid.uuid1()
+                echo.echo(f"exception_id={exception_id}")
+                LOGGER.exception(f"exception id: {exception_id}")
+            finally:
+                progress.set_switch(False)
+                echo.stdout_newline()
+
+    def run(p):
+        while p.get_switch():
+            time.sleep(1)
+            p.progress(p.get_time_percent())
+
+    if not match_rate > 0 or not match_rate <= 1:
+        raise Exception(f"The value is between (0-1), Please check match_rate:{match_rate}")
+    guest_start_num = host_data_size - int(guest_data_size * match_rate)
+    guest_end_num = guest_start_num + guest_data_size
+
+    if os.path.isfile(include_path):
+        with include_path.open("r") as f:
+            testsuite_config = json.load(f)
+    else:
+        raise Exception(f'Input file error, please check{include_path}.')
+    try:
+        if output_path is not None:
+            big_data_dir = os.path.abspath(output_path)
+        else:
+            big_data_dir = os.path.abspath(conf.cache_directory)
+    except Exception:
+        raise Exception('{}path does not exist'.format(big_data_dir))
+    date_set = {}
+    table_name_list = []
+    table_namespace_list = []
+    partition_list = []
+    for upload_dict in testsuite_config.get('data'):
+        date_set[os.path.basename(upload_dict.get('file'))] = upload_dict.get('role')
+        table_name_list.append(upload_dict.get('table_name'))
+        table_namespace_list.append(upload_dict.get('namespace'))
+        partition_list.append(upload_dict.get('partition', 8))
+
+    if parallelize:
+        with session.Session() as sess:
+            session_id = str(uuid.uuid1())
+            sess.init_computing(session_id)
+            data_save(
+                data_info=date_set,
+                table_names=table_name_list,
+                namespaces=table_namespace_list,
+                partition_list=partition_list)
+    else:
+        data_save(
+            data_info=date_set,
+            table_names=table_name_list,
+            namespaces=table_namespace_list,
+            partition_list=partition_list)
+        echo.echo(f'Data storage address, please check{big_data_dir}')
diff --git a/python/fate_test/fate_test/scripts/performance_cli.py b/python/fate_test/fate_test/scripts/performance_cli.py
new file mode 100644
index 0000000000..7fe0ca5627
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/performance_cli.py
@@ -0,0 +1,368 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import glob
+import json
+import os
+import time
+import uuid
+from datetime import timedelta
+
+import click
+from fate_test._client import Clients
+from fate_test._config import Config
+from fate_test._flow_client import JobProgress, SubmitJobResponse, QueryJobResponse
+from fate_test._io import LOGGER, echo
+from fate_test._parser import JSON_STRING, Testsuite
+from fate_test.scripts._options import SharedOptions
+from fate_test.scripts._utils import _load_testsuites, _upload_data, _delete_data, _load_module_from_script, \
+    _add_replace_hook
+from fate_test.utils import TxtStyle
+from prettytable import PrettyTable, ORGMODE
+
+from fate_test import _config
+
+
+@click.command("performance")
+@click.option('-t', '--job-type', type=click.Choice(['intersect', 'intersect_multi', 'hetero_lr', 'hetero_sbt']),
+              help="Select the job type, you can also set through include")
+@click.option('-i', '--include', type=click.Path(exists=True), multiple=True, metavar="<include>",
+              help="include *testsuite.json under these paths")
+@click.option('-r', '--replace', default="{}", type=JSON_STRING,
+              help="a json string represents mapping for replacing fields in data/conf/dsl")
+@click.option('-m', '--timeout', type=int, default=3600,
+              help="maximun running time of job")
+@click.option('-e', '--max-iter', type=int, help="When the algorithm model is LR, the number of iterations is set")
+@click.option('-d', '--max-depth', type=int,
+              help="When the algorithm model is SecureBoost, set the number of model layers")
+@click.option('-nt', '--num-trees', type=int, help="When the algorithm model is SecureBoost, set the number of trees")
+@click.option('-p', '--task-cores', type=int, help="processors per node")
+@click.option('-uj', '--update-job-parameters', default="{}", type=JSON_STRING,
+              help="a json string represents mapping for replacing fields in conf.job_parameters")
+@click.option('-uc', '--update-component-parameters', default="{}", type=JSON_STRING,
+              help="a json string represents mapping for replacing fields in conf.component_parameters")
+@click.option('-s', '--storage-tag', type=str,
+              help="tag for storing performance time consuming, for future comparison")
+@click.option('-v', '--history-tag', type=str, multiple=True,
+              help="Extract performance time consuming from history tags for comparison")
+@click.option("--skip-data", is_flag=True, default=False,
+              help="skip uploading data specified in testsuite")
+@click.option("--provider", type=str,
+              help="Select the fate version, for example: fate@1.7")
+@click.option("--disable-clean-data", "clean_data", flag_value=False, default=None)
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def run_task(ctx, job_type, include, replace, timeout, update_job_parameters, update_component_parameters, max_iter,
+             max_depth, num_trees, task_cores, storage_tag, history_tag, skip_data, clean_data, provider, **kwargs):
+    """
+    Test the performance of big data tasks, alias: bp
+    """
+    ctx.obj.update(**kwargs)
+    ctx.obj.post_process()
+    config_inst = ctx.obj["config"]
+    if ctx.obj["extend_sid"] is not None:
+        config_inst.extend_sid = ctx.obj["extend_sid"]
+    if ctx.obj["auto_increasing_sid"] is not None:
+        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
+    namespace = ctx.obj["namespace"]
+    yes = ctx.obj["yes"]
+    data_namespace_mangling = ctx.obj["namespace_mangling"]
+    if clean_data is None:
+        clean_data = config_inst.clean_data
+
+    def get_perf_template(conf: Config, job_type):
+        perf_dir = os.path.join(os.path.abspath(conf.perf_template_dir) + '/' + job_type + '/' + "*testsuite.json")
+        return glob.glob(perf_dir)
+
+    if not include:
+        include = get_perf_template(config_inst, job_type)
+    # prepare output dir and json hooks
+    _add_replace_hook(replace)
+
+    echo.welcome()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+    echo.echo("loading testsuites:")
+    suites = _load_testsuites(includes=include, excludes=tuple(), glob=None, provider=provider)
+    for i, suite in enumerate(suites):
+        echo.echo(f"\tdataset({len(suite.dataset)}) dsl jobs({len(suite.jobs)}) {suite.path}")
+
+    if not yes and not click.confirm("running?"):
+        return
+
+    echo.stdout_newline()
+    with Clients(config_inst) as client:
+
+        for i, suite in enumerate(suites):
+            # noinspection PyBroadException
+            try:
+                start = time.time()
+                echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red')
+
+                if not skip_data:
+                    try:
+                        _upload_data(client, suite, config_inst)
+                    except Exception as e:
+                        raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
+
+                echo.stdout_newline()
+                try:
+                    time_consuming = _submit_job(client, suite, namespace, config_inst, timeout, update_job_parameters,
+                                                 storage_tag, history_tag, update_component_parameters, max_iter,
+                                                 max_depth, num_trees, task_cores)
+                except Exception as e:
+                    raise RuntimeError(f"exception occur while submit job for {suite.path}") from e
+
+                try:
+                    _run_pipeline_jobs(config_inst, suite, namespace, data_namespace_mangling)
+                except Exception as e:
+                    raise RuntimeError(f"exception occur while running pipeline jobs for {suite.path}") from e
+
+                echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red')
+                if not skip_data and clean_data:
+                    _delete_data(client, suite)
+                echo.echo(suite.pretty_final_summary(time_consuming), fg='red')
+
+            except Exception:
+                exception_id = uuid.uuid1()
+                echo.echo(f"exception in {suite.path}, exception_id={exception_id}")
+                LOGGER.exception(f"exception id: {exception_id}")
+            finally:
+                echo.stdout_newline()
+
+    echo.farewell()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+
+
+def _submit_job(clients: Clients, suite: Testsuite, namespace: str, config: Config, timeout, update_job_parameters,
+                storage_tag, history_tag, update_component_parameters, max_iter, max_depth, num_trees, task_cores):
+    # submit jobs
+    with click.progressbar(length=len(suite.jobs),
+                           label="jobs",
+                           show_eta=False,
+                           show_pos=True,
+                           width=24) as bar:
+        time_list = []
+        for job in suite.jobs_iter():
+            start = time.time()
+            job_progress = JobProgress(job.job_name)
+
+            def _raise():
+                exception_id = str(uuid.uuid1())
+                job_progress.exception(exception_id)
+                suite.update_status(job_name=job.job_name, exception_id=exception_id)
+                echo.file(f"exception({exception_id})")
+                LOGGER.exception(f"exception id: {exception_id}")
+
+            # noinspection PyBroadException
+            try:
+                if max_iter is not None:
+                    job.job_conf.update_component_parameters('max_iter', max_iter)
+                if max_depth is not None:
+                    job.job_conf.update_component_parameters('max_depth', max_depth)
+                if num_trees is not None:
+                    job.job_conf.update_component_parameters('num_trees', num_trees)
+                if task_cores is not None:
+                    job.job_conf.update_job_common_parameters(task_cores=task_cores)
+                job.job_conf.update(config.parties, timeout, update_job_parameters, update_component_parameters)
+            except Exception:
+                _raise()
+                continue
+
+            def update_bar(n_step):
+                bar.item_show_func = lambda x: job_progress.show()
+                time.sleep(0.1)
+                bar.update(n_step)
+
+            update_bar(1)
+
+            def _call_back(resp: SubmitJobResponse):
+                if isinstance(resp, SubmitJobResponse):
+                    job_progress.submitted(resp.job_id)
+                    echo.file(f"[jobs] {resp.job_id} ", nl=False)
+                    suite.update_status(job_name=job.job_name, job_id=resp.job_id)
+
+                if isinstance(resp, QueryJobResponse):
+                    job_progress.running(resp.status, resp.progress)
+
+                update_bar(0)
+
+            # noinspection PyBroadException
+            try:
+                response = clients["guest_0"].submit_job(job=job, callback=_call_back)
+
+                # noinspection PyBroadException
+                try:
+                    # add notes
+                    notes = f"{job.job_name}@{suite.path}@{namespace}"
+                    for role, party_id_list in job.job_conf.role.items():
+                        for i, party_id in enumerate(party_id_list):
+                            clients[f"{role}_{i}"].add_notes(job_id=response.job_id, role=role, party_id=party_id,
+                                                             notes=notes)
+                except Exception:
+                    pass
+            except Exception:
+                _raise()
+            else:
+                job_progress.final(response.status)
+                suite.update_status(job_name=job.job_name, status=response.status.status)
+                if response.status.is_success():
+                    if suite.model_in_dep(job.job_name):
+                        dependent_jobs = suite.get_dependent_jobs(job.job_name)
+                        for predict_job in dependent_jobs:
+                            model_info, table_info, cache_info, model_loader_info = None, None, None, None
+                            for i in _config.deps_alter[predict_job.job_name]:
+                                if isinstance(i, dict):
+                                    name = i.get('name')
+                                    data_pre = i.get('data')
+
+                            if 'data_deps' in _config.deps_alter[predict_job.job_name]:
+                                roles = list(data_pre.keys())
+                                table_info, hierarchy = [], []
+                                for role_ in roles:
+                                    role, index = role_.split("_")
+                                    input_ = data_pre[role_]
+                                    for data_input, cpn in input_.items():
+                                        try:
+                                            table_name = clients["guest_0"].output_data_table(
+                                                job_id=response.job_id,
+                                                role=role,
+                                                party_id=config.role[role][int(index)],
+                                                component_name=cpn)
+                                        except Exception:
+                                            _raise()
+                                        if predict_job.job_conf.dsl_version == 2:
+                                            hierarchy.append([role, index, data_input])
+                                            table_info.append({'table': table_name})
+                                        else:
+                                            hierarchy.append([role, 'args', 'data'])
+                                            table_info.append({data_input: [table_name]})
+                                table_info = {'hierarchy': hierarchy, 'table_info': table_info}
+                            if 'model_deps' in _config.deps_alter[predict_job.job_name]:
+                                if predict_job.job_conf.dsl_version == 2:
+                                    # noinspection PyBroadException
+                                    try:
+                                        model_info = clients["guest_0"].deploy_model(
+                                            model_id=response.model_info["model_id"],
+                                            model_version=response.model_info["model_version"],
+                                            dsl=predict_job.job_dsl.as_dict())
+                                    except Exception:
+                                        _raise()
+                                else:
+                                    model_info = response.model_info
+                            if 'cache_deps' in _config.deps_alter[predict_job.job_name]:
+                                cache_dsl = predict_job.job_dsl.as_dict()
+                                cache_info = []
+                                for cpn in cache_dsl.get("components").keys():
+                                    if "CacheLoader" in cache_dsl.get("components").get(cpn).get("module"):
+                                        cache_info.append({cpn: {'job_id': response.job_id}})
+                                cache_info = {'hierarchy': [""], 'cache_info': cache_info}
+                            if 'model_loader_deps' in _config.deps_alter[predict_job.job_name]:
+                                model_loader_dsl = predict_job.job_dsl.as_dict()
+                                model_loader_info = []
+                                for cpn in model_loader_dsl.get("components").keys():
+                                    if "ModelLoader" in model_loader_dsl.get("components").get(cpn).get("module"):
+                                        model_loader_info.append({cpn: response.model_info})
+                                model_loader_info = {'hierarchy': [""], 'model_loader_info': model_loader_info}
+
+                            suite.feed_dep_info(predict_job, name, model_info=model_info, table_info=table_info,
+                                                cache_info=cache_info, model_loader_info=model_loader_info)
+                        suite.remove_dependency(job.job_name)
+            update_bar(0)
+            time_consuming = time.time() - start
+            performance_dir = "/".join(
+                [os.path.join(os.path.abspath(config.cache_directory), 'benchmark_history', "performance.json")])
+            fate_version = clients["guest_0"].get_version()
+            if history_tag:
+                history_tag = ["_".join([i, job.job_name]) for i in history_tag]
+                comparison_quality(job.job_name, history_tag, performance_dir, time_consuming)
+            if storage_tag:
+                storage_tag = "_".join(['FATE', fate_version, storage_tag, job.job_name])
+                save_quality(storage_tag, performance_dir, time_consuming)
+            echo.stdout_newline()
+            time_list.append(time_consuming)
+        return [str(int(i)) + "s" for i in time_list]
+
+
+def _run_pipeline_jobs(config: Config, suite: Testsuite, namespace: str, data_namespace_mangling: bool):
+    # pipeline demo goes here
+    job_n = len(suite.pipeline_jobs)
+    for i, pipeline_job in enumerate(suite.pipeline_jobs):
+        echo.echo(f"Running [{i + 1}/{job_n}] job: {pipeline_job.job_name}")
+
+        def _raise(err_msg, status="failed"):
+            exception_id = str(uuid.uuid1())
+            suite.update_status(job_name=job_name, exception_id=exception_id, status=status)
+            echo.file(f"exception({exception_id}), error message:\n{err_msg}")
+            # LOGGER.exception(f"exception id: {exception_id}")
+
+        job_name, script_path = pipeline_job.job_name, pipeline_job.script_path
+        mod = _load_module_from_script(script_path)
+        try:
+            if data_namespace_mangling:
+                try:
+                    mod.main(config=config, namespace=f"_{namespace}")
+                    suite.update_status(job_name=job_name, status="success")
+                except Exception as e:
+                    _raise(e)
+                    continue
+            else:
+                try:
+                    mod.main(config=config)
+                    suite.update_status(job_name=job_name, status="success")
+                except Exception as e:
+                    _raise(e)
+                    continue
+        except Exception as e:
+            _raise(e, status="not submitted")
+            continue
+
+
+def comparison_quality(group_name, history_tags, history_info_dir, time_consuming):
+    assert os.path.exists(history_info_dir), f"Please check the {history_info_dir} Is it deleted"
+    with open(history_info_dir, 'r') as f:
+        benchmark_quality = json.load(f, object_hook=dict)
+    benchmark_performance = {}
+    for history_tag in history_tags:
+        for tag in benchmark_quality:
+            if '_'.join(tag.split("_")[2:]) == history_tag:
+                benchmark_performance[tag] = benchmark_quality[tag]
+    if benchmark_performance is not None:
+        benchmark_performance[group_name] = time_consuming
+
+    table = PrettyTable()
+    table.set_style(ORGMODE)
+    table.field_names = ["Script Model Name", "time consuming"]
+    for script_model_name in benchmark_performance:
+        table.add_row([f"{script_model_name}"] +
+                      [f"{TxtStyle.FIELD_VAL}{benchmark_performance[script_model_name]}{TxtStyle.END}"])
+    print("\n")
+    print(table.get_string(title=f"{TxtStyle.TITLE}Performance comparison results{TxtStyle.END}"))
+    print("#" * 60)
+
+
+def save_quality(storage_tag, save_dir, time_consuming):
+    os.makedirs(os.path.dirname(save_dir), exist_ok=True)
+    if os.path.exists(save_dir):
+        with open(save_dir, 'r') as f:
+            benchmark_quality = json.load(f, object_hook=dict)
+    else:
+        benchmark_quality = {}
+    benchmark_quality.update({storage_tag: time_consuming})
+    try:
+        with open(save_dir, 'w') as fp:
+            json.dump(benchmark_quality, fp, indent=2)
+        print("\n" + "Storage successful, please check: ", save_dir)
+    except Exception:
+        print("\n" + "Storage failed, please check: ", save_dir)
diff --git a/python/fate_test/fate_test/scripts/quick_test_cli.py b/python/fate_test/fate_test/scripts/quick_test_cli.py
new file mode 100644
index 0000000000..08f95e9964
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/quick_test_cli.py
@@ -0,0 +1,95 @@
+import os
+import subprocess
+
+import click
+from fate_test._config import Config
+from fate_test._io import echo
+from fate_test.scripts._options import SharedOptions
+
+
+@click.group(name="unittest")
+def unittest_group():
+    """
+    unit test
+    """
+    ...
+
+
+@unittest_group.command("federatedml")
+@click.option('-i', '--include', type=click.Path(exists=True), multiple=True, metavar="<include>",
+              help="Specify federatedml test units for testing")
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def unit_test(ctx, include, **kwargs):
+    """
+    federatedml unit test
+    """
+    ctx.obj.update(**kwargs)
+    ctx.obj.post_process()
+    namespace = ctx.obj["namespace"]
+    config_inst = ctx.obj["config"]
+    yes = ctx.obj["yes"]
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+
+    if not yes and not click.confirm("running?"):
+        return
+
+    error_log_file = f"./logs/{namespace}/error_test.log"
+    os.makedirs(os.path.dirname(error_log_file), exist_ok=True)
+    run_test(includes=include, conf=config_inst, error_log_file=error_log_file)
+
+
+def run_test(includes, conf: Config, error_log_file):
+    def error_log(stdout):
+        if stdout is None:
+            return os.path.abspath(error_log_file)
+        with open(error_log_file, "a") as f:
+            f.write(stdout)
+
+    def run_test(file):
+        global failed_count
+        echo.echo("start to run test {}".format(file))
+        try:
+            subp = subprocess.Popen(["python", file],
+                                    stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+            stdout, stderr = subp.communicate()
+            stdout = stdout.decode("utf-8")
+            echo.echo(stdout)
+            if "FAILED" in stdout:
+                failed_count += 1
+                error_log(stdout=f"error sequence {failed_count}: {file}")
+                error_log(stdout=stdout)
+        except Exception:
+            return
+
+    def traverse_folder(file_fullname):
+        if os.path.isfile(file_fullname):
+            if "_test.py" in file_fullname and "ftl" not in file_fullname:
+                run_test(file_fullname)
+        else:
+            for file in os.listdir(file_fullname):
+                file_fullname_new = os.path.join(file_fullname, file)
+                if os.path.isdir(file_fullname_new):
+                    traverse_folder(file_fullname_new)
+                if "_test.py" in file and ("/test" in file_fullname or "tests" in file_fullname):
+                    if "ftl" in file_fullname_new:
+                        continue
+                    else:
+                        run_test(file_fullname_new)
+
+    global failed_count
+    failed_count = 0
+    fate_base = conf.fate_base
+    ml_dir = os.path.join(fate_base, "python/federatedml")
+    PYTHONPATH = os.environ.get('PYTHONPATH') + ":" + os.path.join(fate_base, "python")
+    os.environ['PYTHONPATH'] = PYTHONPATH
+    if len(includes) == 0:
+        traverse_folder(ml_dir)
+    else:
+        ml_dir = includes
+        for v in ml_dir:
+            traverse_folder(os.path.abspath(v))
+
+    echo.echo(f"there are {failed_count} failed test")
+    if failed_count > 0:
+        print('Please check the error content: {}'.format(error_log(None)))
diff --git a/python/fate_test/fate_test/scripts/testsuite_cli.py b/python/fate_test/fate_test/scripts/testsuite_cli.py
new file mode 100644
index 0000000000..864ac17e53
--- /dev/null
+++ b/python/fate_test/fate_test/scripts/testsuite_cli.py
@@ -0,0 +1,165 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import time
+import uuid
+from datetime import timedelta
+
+import click
+from fate_test._client import Clients
+from fate_test._config import Config
+from fate_test._io import LOGGER, echo
+from fate_test._parser import Testsuite, non_success_summary
+from fate_test.scripts._options import SharedOptions
+from fate_test.scripts._utils import _load_testsuites, _upload_data, _delete_data, _load_module_from_script
+
+from fate_test import _config
+
+"""
+@click.option('-uj', '--update-job-parameters', default="{}", type=JSON_STRING,
+              help="a json string represents mapping for replacing fields in conf.job_parameters")
+@click.option('-uc', '--update-component-parameters', default="{}", type=JSON_STRING,
+              help="a json string represents mapping for replacing fields in conf.component_parameters")
+@click.option('-m', '--timeout', type=int, default=3600, help="maximun running time of job")
+@click.option('-p', '--task-cores', type=int, help="processors per node")
+"""
+
+
+@click.command("suite")
+@click.option('-i', '--include', required=True, type=click.Path(exists=True), multiple=True, metavar="<include>",
+              help="include *testsuite.json under these paths")
+@click.option('-e', '--exclude', type=click.Path(exists=True), multiple=True,
+              help="exclude *testsuite.json under these paths")
+@click.option("-g", '--glob', type=str,
+              help="glob string to filter sub-directory of path specified by <include>")
+@click.option("--skip-jobs", is_flag=True, default=False,
+              help="skip pipeline jobs defined in testsuite")
+@click.option("--skip-data", is_flag=True, default=False,
+              help="skip uploading data specified in testsuite")
+@click.option("--data-only", is_flag=True, default=False,
+              help="upload data only")
+@click.option("--provider", type=str,
+              help="Select the fate version, for example: fate@2.0-beta")
+@click.option("--disable-clean-data", "clean_data", flag_value=False, default=None)
+@click.option("--enable-clean-data", "clean_data", flag_value=True, default=None)
+@SharedOptions.get_shared_options(hidden=True)
+@click.pass_context
+def run_suite(ctx, include, exclude, glob,
+              skip_jobs, skip_data, data_only, clean_data, provider, **kwargs):
+    """
+    process testsuite
+    """
+    ctx.obj.update(**kwargs)
+    ctx.obj.post_process()
+    config_inst = ctx.obj["config"]
+    """if ctx.obj["extend_sid"] is not None:
+        config_inst.extend_sid = ctx.obj["extend_sid"]
+    if ctx.obj["auto_increasing_sid"] is not None:
+        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]"""
+    if clean_data is None:
+        clean_data = config_inst.clean_data
+    namespace = ctx.obj["namespace"]
+    yes = ctx.obj["yes"]
+    data_namespace_mangling = ctx.obj["namespace_mangling"]
+    # prepare output dir and json hooks
+    # _add_replace_hook(replace)
+    echo.welcome()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+    echo.echo("loading testsuites:")
+    suites = _load_testsuites(includes=include, excludes=exclude, glob=glob, provider=provider)
+    for suite in suites:
+        _config.jobs_num += len(suite.pipeline_jobs)
+        echo.echo(f"\tdataset({len(suite.dataset)}) "
+                  f"pipeline jobs ({len(suite.pipeline_jobs)}) {suite.path}")
+    if not yes and not click.confirm("running?"):
+        return
+
+    echo.stdout_newline()
+    with Clients(config_inst) as client:
+        for i, suite in enumerate(suites):
+            # noinspection PyBroadException
+            try:
+                start = time.time()
+                echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red')
+                if not skip_data and config_inst.work_mode:
+                    try:
+                        _upload_data(client, suite, config_inst)
+                    except Exception as e:
+                        raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
+                if data_only:
+                    continue
+
+                if not skip_jobs:
+                    try:
+                        time_consuming = _run_pipeline_jobs(config_inst, suite, namespace, data_namespace_mangling)
+                    except Exception as e:
+                        raise RuntimeError(f"exception occur while running pipeline jobs for {suite.path}") from e
+
+                if not skip_data and clean_data and config_inst.work_mode:
+                    _delete_data(client, suite)
+                echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red')
+                if not skip_jobs:
+                    suite_file = str(suite.path).split("/")[-1]
+                    echo.echo(suite.pretty_final_summary(time_consuming, suite_file))
+
+            except Exception:
+                exception_id = uuid.uuid1()
+                echo.echo(f"exception in {suite.path}, exception_id={exception_id}")
+                LOGGER.exception(f"exception id: {exception_id}")
+            finally:
+                echo.stdout_newline()
+    non_success_summary()
+    echo.farewell()
+    echo.echo(f"testsuite namespace: {namespace}", fg='red')
+
+
+def _run_pipeline_jobs(config: Config, suite: Testsuite, namespace: str, data_namespace_mangling: bool):
+    # pipeline demo goes here
+    job_n = len(suite.pipeline_jobs)
+    time_list = []
+    for i, pipeline_job in enumerate(suite.pipeline_jobs):
+        echo.echo(f"Running [{i + 1}/{job_n}] job: {pipeline_job.job_name}")
+
+        def _raise(err_msg, status="failed"):
+            exception_id = str(uuid.uuid1())
+            suite.update_status(job_name=job_name, exception_id=exception_id, status=status)
+            echo.file(f"exception({exception_id}), error message:\n{err_msg}")
+
+        job_name, script_path = pipeline_job.job_name, pipeline_job.script_path
+        mod = _load_module_from_script(script_path)
+        start = time.time()
+        try:
+            if data_namespace_mangling:
+                try:
+                    mod.main(config=config, namespace=f"_{namespace}")
+                    suite.update_status(job_name=job_name, status="success")
+                    time_list.append(time.time() - start)
+
+                except Exception as e:
+                    _raise(e)
+                    continue
+            else:
+                try:
+                    mod.main(config=config)
+                    suite.update_status(job_name=job_name, status="success")
+                    time_list.append(time.time() - start)
+                except Exception as e:
+                    _raise(e)
+                    continue
+        except Exception as e:
+            _raise(e, status="not submitted")
+            continue
+
+    return [str(int(i)) + "s" for i in time_list]
diff --git a/python/fate_test/fate_test/utils.py b/python/fate_test/fate_test/utils.py
new file mode 100644
index 0000000000..f33d7af74c
--- /dev/null
+++ b/python/fate_test/fate_test/utils.py
@@ -0,0 +1,348 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+import json
+import math
+import os
+
+import numpy as np
+from colorama import init, deinit, Fore, Style
+from fate_test._io import echo
+from prettytable import PrettyTable, ORGMODE
+
+SCRIPT_METRICS = "script_metrics"
+DISTRIBUTION_METRICS = "distribution_metrics"
+ALL = "all"
+RELATIVE = "relative"
+ABSOLUTE = "absolute"
+
+
+class TxtStyle:
+    TRUE_VAL = Fore.GREEN
+    FALSE_VAL = Fore.RED + Style.BRIGHT
+    TITLE = Fore.BLUE
+    FIELD_VAL = Fore.YELLOW
+    DATA_FIELD_VAL = Fore.CYAN
+    END = Style.RESET_ALL
+
+
+def show_data(data):
+    data_table = PrettyTable()
+    data_table.set_style(ORGMODE)
+    data_table.field_names = ["Data", "Information"]
+    for name, table_name in data.items():
+        row = [name, f"{TxtStyle.DATA_FIELD_VAL}{table_name}{TxtStyle.END}"]
+        data_table.add_row(row)
+    echo.echo(data_table.get_string(title=f"{TxtStyle.TITLE}Data Summary{TxtStyle.END}"))
+    echo.echo("\n")
+
+
+def _get_common_metrics(**results):
+    common_metrics = None
+    for result in results.values():
+        if common_metrics is None:
+            common_metrics = set(result.keys())
+        else:
+            common_metrics = common_metrics & result.keys()
+    if SCRIPT_METRICS in common_metrics:
+        common_metrics.remove(SCRIPT_METRICS)
+    return list(common_metrics)
+
+
+def _filter_results(metrics, **results):
+    filtered_results = {}
+    for model_name, result in results.items():
+        model_result = [result.get(metric, None) for metric in metrics]
+        if None in model_result:
+            continue
+        filtered_results[model_name] = model_result
+    return filtered_results
+
+
+def style_table(txt):
+    colored_txt = txt.replace("True", f"{TxtStyle.TRUE_VAL}True{TxtStyle.END}")
+    colored_txt = colored_txt.replace("False", f"{TxtStyle.FALSE_VAL}False{TxtStyle.END}")
+    return colored_txt
+
+
+def evaluate_almost_equal(metrics, results, abs_tol=None, rel_tol=None):
+    """
+    Evaluate for each given metric if values in results are almost equal
+    Parameters
+    ----------
+    metrics: List[str], metrics names
+    results: dict, results to be evaluated
+    abs_tol: float, absolute error tolerance
+    rel_tol: float, relative difference tolerance
+    Returns
+    -------
+    bool, return True if all metrics in results are almost equal
+    """
+    # return False if empty
+    if len(metrics) == 0:
+        return False
+    eval_summary = {}
+    for i, metric in enumerate(metrics):
+        v_eval = [res[i] for res in results.values()]
+        first_v = v_eval[0]
+        if metric == SCRIPT_METRICS:
+            continue
+        if abs_tol is not None and rel_tol is not None:
+            eval_summary[metric] = all(math.isclose(v, first_v, abs_tol=abs_tol, rel_tol=rel_tol) for v in v_eval)
+        elif abs_tol is not None:
+            eval_summary[metric] = all(math.isclose(v, first_v, abs_tol=abs_tol) for v in v_eval)
+        elif rel_tol is not None:
+            eval_summary[metric] = all(math.isclose(v, first_v, rel_tol=rel_tol) for v in v_eval)
+        else:
+            eval_summary[metric] = all(math.isclose(v, first_v) for v in v_eval)
+    all_match = all(eval_summary.values())
+    return eval_summary, all_match
+
+
+def _distribution_metrics(**results):
+    filtered_metric_group = _filter_results([DISTRIBUTION_METRICS], **results)
+    for script, model_results_pair in filtered_metric_group.items():
+        metric_results = model_results_pair[0]
+        common_metrics = _get_common_metrics(**metric_results)
+        filtered_results = _filter_results(common_metrics, **metric_results)
+        table = PrettyTable()
+        table.set_style(ORGMODE)
+        script_model_names = list(filtered_results.keys())
+        table.field_names = ["Script Model Name"] + common_metrics
+        for script_model_name in script_model_names:
+            row = [f"{script}-{script_model_name}"] + [f"{TxtStyle.FIELD_VAL}{v}{TxtStyle.END}" for v in
+                                                       filtered_results[script_model_name]]
+            table.add_row(row)
+        echo.echo(table.get_string(title=f"{TxtStyle.TITLE}{script} distribution metrics{TxtStyle.END}"))
+        echo.echo("\n" + "#" * 60)
+
+
+def match_script_metrics(abs_tol, rel_tol, match_details, **results):
+    filtered_metric_group = _filter_results([SCRIPT_METRICS], **results)
+    for script, model_results_pair in filtered_metric_group.items():
+        metric_results = model_results_pair[0]
+        common_metrics = _get_common_metrics(**metric_results)
+        filtered_results = _filter_results(common_metrics, **metric_results)
+        table = PrettyTable()
+        table.set_style(ORGMODE)
+        script_model_names = list(filtered_results.keys())
+        table.field_names = ["Script Model Name"] + common_metrics
+        for script_model_name in script_model_names:
+            row = [f"{script_model_name}-{script}"] + [f"{TxtStyle.FIELD_VAL}{v}{TxtStyle.END}" for v in
+                                                       filtered_results[script_model_name]]
+            table.add_row(row)
+        echo.echo(table.get_string(title=f"{TxtStyle.TITLE}{script} Script Metrics Summary{TxtStyle.END}"))
+        _all_match(common_metrics, filtered_results, abs_tol, rel_tol, script, match_details=match_details)
+
+
+def match_metrics(evaluate, group_name, abs_tol=None, rel_tol=None, storage_tag=None, history_tag=None,
+                  fate_version=None, cache_directory=None, match_details=None, **results):
+    """
+    Get metrics
+    Parameters
+    ----------
+    evaluate: bool, whether to evaluate metrics are almost equal, and include compare results in output report
+    group_name: str, group name of all models
+    abs_tol: float, max tolerance of absolute error to consider two metrics to be almost equal
+    rel_tol: float, max tolerance of relative difference to consider two metrics to be almost equal
+    storage_tag: str, metrics information storage tag
+    history_tag: str, historical metrics information comparison tag
+    fate_version: str, FATE version
+    cache_directory: str, Storage path of metrics information
+    match_details: str, Error value display in algorithm comparison
+    results: dict of model name: metrics
+    Returns
+    -------
+    match result
+    """
+    init(autoreset=True)
+    common_metrics = _get_common_metrics(**results)
+    filtered_results = _filter_results(common_metrics, **results)
+    table = PrettyTable()
+    table.set_style(ORGMODE)
+    model_names = list(filtered_results.keys())
+    table.field_names = ["Model Name"] + common_metrics
+    for model_name in model_names:
+        row = [f"{model_name}-{group_name}"] + [f"{TxtStyle.FIELD_VAL}{v}{TxtStyle.END}" for v in
+                                                filtered_results[model_name]]
+        table.add_row(row)
+    echo.echo(table.get_string(title=f"{TxtStyle.TITLE}Metrics Summary{TxtStyle.END}"))
+
+    if evaluate and len(filtered_results.keys()) > 1:
+        _all_match(common_metrics, filtered_results, abs_tol, rel_tol, match_details=match_details)
+
+    _distribution_metrics(**results)
+    match_script_metrics(abs_tol, rel_tol, match_details, **results)
+    if history_tag:
+        history_tag = ["_".join([i, group_name]) for i in history_tag]
+        comparison_quality(group_name, history_tag, cache_directory, abs_tol, rel_tol, match_details, **results)
+    if storage_tag:
+        storage_tag = "_".join(['FATE', fate_version, storage_tag, group_name])
+        _save_quality(storage_tag, cache_directory, **results)
+    deinit()
+
+
+def _match_error(metrics, results):
+    relative_error_list = []
+    absolute_error_list = []
+    if len(metrics) == 0:
+        return False
+    for i, v in enumerate(metrics):
+        v_eval = [res[i] for res in results.values()]
+        absolute_error_list.append(f"{TxtStyle.FIELD_VAL}{abs(max(v_eval) - min(v_eval))}{TxtStyle.END}")
+        relative_error_list.append(
+            f"{TxtStyle.FIELD_VAL}{abs((max(v_eval) - min(v_eval)) / max(v_eval))}{TxtStyle.END}")
+    return relative_error_list, absolute_error_list
+
+
+def _all_match(common_metrics, filtered_results, abs_tol, rel_tol, script=None, match_details=None):
+    eval_summary, all_match = evaluate_almost_equal(common_metrics, filtered_results, abs_tol, rel_tol)
+    eval_table = PrettyTable()
+    eval_table.set_style(ORGMODE)
+    field_names = ["Metric", "All Match"]
+    relative_error_list, absolute_error_list = _match_error(common_metrics, filtered_results)
+    for i, metric in enumerate(eval_summary.keys()):
+        row = [metric, eval_summary.get(metric)]
+        if match_details == ALL:
+            field_names = ["Metric", "All Match", "max_relative_error", "max_absolute_error"]
+            row += [relative_error_list[i], absolute_error_list[i]]
+        elif match_details == RELATIVE:
+            field_names = ["Metric", "All Match", "max_relative_error"]
+            row += [relative_error_list[i]]
+        elif match_details == ABSOLUTE:
+            field_names = ["Metric", "All Match", "max_absolute_error"]
+            row += [absolute_error_list[i]]
+        eval_table.add_row(row)
+    eval_table.field_names = field_names
+
+    echo.echo(style_table(eval_table.get_string(title=f"{TxtStyle.TITLE}Match Results{TxtStyle.END}")))
+    script = "" if script is None else f"{script} "
+    if all_match:
+        echo.echo(f"All {script}Metrics Match: {TxtStyle.TRUE_VAL}{all_match}{TxtStyle.END}")
+    else:
+        echo.echo(f"All {script}Metrics Match: {TxtStyle.FALSE_VAL}{all_match}{TxtStyle.END}")
+
+
+def comparison_quality(group_name, history_tags, cache_directory, abs_tol, rel_tol, match_details, **results):
+    def regression_group(results_dict):
+        metric = {}
+        for k, v in results_dict.items():
+            if not isinstance(v, dict):
+                metric[k] = v
+        return metric
+
+    def class_group(class_dict):
+        metric = {}
+        for k, v in class_dict.items():
+            if not isinstance(v, dict):
+                metric[k] = v
+        for k, v in class_dict['distribution_metrics'].items():
+            metric.update(v)
+        return metric
+
+    history_info_dir = "/".join([os.path.join(os.path.abspath(cache_directory), 'benchmark_history',
+                                              "benchmark_quality.json")])
+    assert os.path.exists(history_info_dir), f"Please check the {history_info_dir} Is it deleted"
+    with open(history_info_dir, 'r') as f:
+        benchmark_quality = json.load(f, object_hook=dict)
+    regression_metric = {}
+    regression_quality = {}
+    class_quality = {}
+    for history_tag in history_tags:
+        for tag in benchmark_quality:
+            if '_'.join(tag.split("_")[2:]) == history_tag and SCRIPT_METRICS in results["FATE"]:
+                regression_metric[tag] = regression_group(benchmark_quality[tag]['FATE'])
+                for key, value in _filter_results([SCRIPT_METRICS], **benchmark_quality[tag])['FATE'][0].items():
+                    regression_quality["_".join([tag, key])] = value
+            elif '_'.join(tag.split("_")[2:]) == history_tag and DISTRIBUTION_METRICS in results["FATE"]:
+                class_quality[tag] = class_group(benchmark_quality[tag]['FATE'])
+
+    if SCRIPT_METRICS in results["FATE"] and regression_metric:
+        regression_metric[group_name] = regression_group(results['FATE'])
+        metric_compare(abs_tol, rel_tol, match_details, **regression_metric)
+        for key, value in _filter_results([SCRIPT_METRICS], **results)['FATE'][0].items():
+            regression_quality["_".join([group_name, key])] = value
+        metric_compare(abs_tol, rel_tol, match_details, **regression_quality)
+        echo.echo("\n" + "#" * 60)
+    elif DISTRIBUTION_METRICS in results["FATE"] and class_quality:
+
+        class_quality[group_name] = class_group(results['FATE'])
+        metric_compare(abs_tol, rel_tol, match_details, **class_quality)
+        echo.echo("\n" + "#" * 60)
+
+
+def metric_compare(abs_tol, rel_tol, match_details, **metric_results):
+    common_metrics = _get_common_metrics(**metric_results)
+    filtered_results = _filter_results(common_metrics, **metric_results)
+    table = PrettyTable()
+    table.set_style(ORGMODE)
+    script_model_names = list(filtered_results.keys())
+    table.field_names = ["Script Model Name"] + common_metrics
+    for script_model_name in script_model_names:
+        table.add_row([f"{script_model_name}"] +
+                      [f"{TxtStyle.FIELD_VAL}{v}{TxtStyle.END}" for v in filtered_results[script_model_name]])
+    print(
+        table.get_string(title=f"{TxtStyle.TITLE}Comparison results of all metrics of Script Model FATE{TxtStyle.END}"))
+    _all_match(common_metrics, filtered_results, abs_tol, rel_tol, match_details=match_details)
+
+
+def _save_quality(storage_tag, cache_directory, **results):
+    save_dir = "/".join([os.path.join(os.path.abspath(cache_directory), 'benchmark_history', "benchmark_quality.json")])
+    os.makedirs(os.path.dirname(save_dir), exist_ok=True)
+    if os.path.exists(save_dir):
+        with open(save_dir, 'r') as f:
+            benchmark_quality = json.load(f, object_hook=dict)
+    else:
+        benchmark_quality = {}
+    if storage_tag in benchmark_quality:
+        print("This tag already exists in the history and will be updated to the record information.")
+    benchmark_quality.update({storage_tag: results})
+    try:
+        with open(save_dir, 'w') as fp:
+            json.dump(benchmark_quality, fp, indent=2)
+        print("Storage success, please check: ", save_dir)
+    except Exception:
+        print("Storage failed, please check: ", save_dir)
+
+
+def parse_summary_result(rs_dict):
+    for model_key in rs_dict:
+        rs_content = rs_dict[model_key]
+        if 'validate' in rs_content:
+            return rs_content['validate']
+        else:
+            return rs_content['train']
+
+
+def extract_data(df, col_name, convert_float=True, keep_id=False):
+    """
+    component output data to numpy array
+    Parameters
+    ----------
+    df: dataframe
+    col_name: column to extract
+    convert_float: whether to convert extracted value to float value
+    keep_id: whether to keep id
+    Returns
+    -------
+    array of extracted data, optionally with id
+    """
+    if keep_id:
+        if convert_float:
+            df[col_name] = df[col_name].to_numpy().astype(np.float64)
+
+        return df[[df.columns[0], col_name]].to_numpy()
+    else:
+        return df[col_name].to_numpy().astype(np.float64)
diff --git a/python/fate_test/pyproject.toml b/python/fate_test/pyproject.toml
new file mode 100644
index 0000000000..2f4dbe9f7f
--- /dev/null
+++ b/python/fate_test/pyproject.toml
@@ -0,0 +1,44 @@
+[tool.poetry]
+name = "fate_test"
+version = "2.0.0-beta"
+description = "test tools for FATE"
+authors = ["FederatedAI <contact@FedAI.org>"]
+license = "Apache-2.0"
+
+homepage = "https://fate.fedai.org/"
+repository = "https://github.com/FederatedAI/FATE"
+documentation = "https://fate.readthedocs.io/en/latest/?badge=latest"
+keywords = ["FATE", "Federated Learning", "Testsuite"]
+
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Environment :: Console",
+    "Topic :: Software Development :: Testing",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education"
+]
+
+packages = [
+    { include = "fate_test" }
+]
+
+[tool.poetry.dependencies]
+python = "^3.8"
+requests_toolbelt = "^0.9.1"
+requests = "^2.24.0"
+click = "^7.1.2"
+"ruamel.yaml" = "^0.16.10"
+loguru = ">=0.6.0"
+prettytable = "^1.0.0"
+sshtunnel = "^0.1.5"
+pandas = ">=1.1.5"
+colorama = "^0.4.4"
+
+[tool.poetry.dev-dependencies]
+
+[tool.poetry.scripts]
+fate_test = "fate_test.scripts.cli:cli"
+
+[build-system]
+requires = ["poetry>=0.12", "setuptools>=50.0,<51.0"]
+build-backend = "poetry.masonry.api"
\ No newline at end of file
diff --git a/python/fate_test/setup.py b/python/fate_test/setup.py
new file mode 100644
index 0000000000..98898dbb41
--- /dev/null
+++ b/python/fate_test/setup.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+from setuptools import setup
+
+packages = ["fate_test", "fate_test.scripts"]
+
+package_data = {"": ["*"]}
+
+install_requires = [
+    "click>=7.1.2,<8.0.0",
+    "loguru>=0.6.0",
+    "pandas>=1.1.5",
+    "poetry>=0.12",
+    "prettytable>=1.0.0,<2.0.0",
+    # "requests>=2.24.0,<3.0.0",
+    # "requests_toolbelt>=0.9.1,<0.10.0",
+    "ruamel.yaml>=0.16.10,<0.17.0",
+    # "sshtunnel>=0.1.5,<0.2.0",
+    'colorama>=0.4.4'
+]
+
+entry_points = {"console_scripts": ["fate_test = fate_test.scripts.cli:cli"]}
+
+setup_kwargs = {
+    "name": "fate-test",
+    "version": "2.0.0-beta",
+    "description": "test tools for FATE",
+    "long_description": 'FATE Test\n=========\n\nA collection of useful tools to running FATE\'s test.\n\n.. image:: images/tutorial.gif\n   :align: center\n   :alt: tutorial\n\nquick start\n-----------\n\n1. (optional) create virtual env\n\n   .. code-block:: bash\n\n      python -m venv venv\n      source venv/bin/activate\n      pip install -U pip\n\n\n2. install fate_test\n\n   .. code-block:: bash\n\n      pip install fate_test\n      fate_test --help\n\n\n3. edit default fate_test_config.yaml\n\n   .. code-block:: bash\n\n      # edit priority config file with system default editor\n      # filling some field according to comments\n      fate_test config edit\n\n4. configure FATE-Pipeline and FATE-Flow Commandline server setting\n\n.. code-block:: bash\n\n      # configure FATE-Pipeline server setting\n      pipeline init --port 9380 --ip 127.0.0.1\n      # configure FATE-Flow Commandline server setting\n      flow init --port 9380 --ip 127.0.0.1\n\n5. run some fate_test suite\n\n   .. code-block:: bash\n\n      fate_test suite -i <path contains *testsuite.json>\n\n\n6. run some fate_test benchmark\n\n   .. code-block:: bash\n\n      fate_test benchmark-quality -i <path contains *benchmark.json>\n\n7. useful logs or exception will be saved to logs dir with namespace shown in last step\n\ndevelop install\n---------------\nIt is more convenient to use the editable mode during development: replace step 2 with flowing steps\n\n.. code-block:: bash\n\n   pip install -e ${FATE}/python/fate_client && pip install -e ${FATE}/python/fate_test\n\n\n\ncommand types\n-------------\n\n- suite: used for running testsuites, collection of FATE jobs\n\n  .. code-block:: bash\n\n     fate_test suite -i <path contains *testsuite.json>\n\n\n- benchmark-quality used for comparing modeling quality between FATE and other machine learning systems\n\n  .. code-block:: bash\n\n      fate_test benchmark-quality -i <path contains *benchmark.json>\n\n\n\nconfiguration by examples\n--------------------------\n\n1. no need ssh tunnel:\n\n   - 9999, service: service_a\n   - 10000, service: service_b\n\n   and both service_a, service_b can be requested directly:\n\n   .. code-block:: yaml\n\n      work_mode: 1 # 0 for standalone, 1 for cluster\n      data_base_dir: <path_to_data>\n      parties:\n        guest: [10000]\n        host: [9999, 10000]\n        arbiter: [9999]\n      services:\n        - flow_services:\n          - {address: service_a, parties: [9999]}\n          - {address: service_b, parties: [10000]}\n\n2. need ssh tunnel:\n\n   - 9999, service: service_a\n   - 10000, service: service_b\n\n   service_a, can be requested directly while service_b don\'t,\n   but you can request service_b in other node, say B:\n\n   .. code-block:: yaml\n\n      work_mode: 0 # 0 for standalone, 1 for cluster\n      data_base_dir: <path_to_data>\n      parties:\n        guest: [10000]\n        host: [9999, 10000]\n        arbiter: [9999]\n      services:\n        - flow_services:\n          - {address: service_a, parties: [9999]}\n        - flow_services:\n          - {address: service_b, parties: [10000]}\n          ssh_tunnel: # optional\n          enable: true\n          ssh_address: <ssh_ip_to_B>:<ssh_port_to_B>\n          ssh_username: <ssh_username_to B>\n          ssh_password: # optional\n          ssh_priv_key: "~/.ssh/id_rsa"\n\n\nTestsuite\n---------\n\nTestsuite is used for running a collection of jobs in sequence. Data used for jobs could be uploaded before jobs are\nsubmitted, and are cleaned when jobs finished. This tool is useful for FATE\'s release test.\n\ncommand options\n~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n      fate_test suite --help\n\n1. include:\n\n   .. code-block:: bash\n\n      fate_test suite -i <path1 contains *testsuite.json>\n\n   will run testsuites in *path1*\n\n2. exclude:\n\n   .. code-block:: bash\n\n      fate_test suite -i <path1 contains *testsuite.json> -e <path2 to exclude> -e <path3 to exclude> ...\n\n   will run testsuites in *path1* but not in *path2* and *path3*\n\n3. glob:\n\n   .. code-block:: bash\n\n      fate_test suite -i <path1 contains *testsuite.json> -g "hetero*"\n\n   will run testsuites in sub directory start with *hetero* of *path1*\n\n4. replace:\n\n   .. code-block:: bash\n\n      fate_test suite -i <path1 contains *testsuite.json> -r \'{"maxIter": 5}\'\n\n   will find all key-value pair with key "maxIter" in `data conf` or `conf` or `dsl` and replace the value with 5\n\n\n5. skip-data:\n\n   .. code-block:: bash\n\n       fate_test suite -i <path1 contains *testsuite.json> --skip-data\n\n   will run testsuites in *path1* without uploading data specified in *benchmark.json*.\n\n\n6. yes:\n\n   .. code-block:: bash\n\n      fate_test suite -i <path1 contains *testsuite.json> --yes\n\n   will run testsuites in *path1* directly, skipping double check\n\n7. skip-dsl-jobs:\n\n   .. code-block:: bash\n\n      fate_test suite -i <path1 contains *testsuite.json> --skip-dsl-jobs\n\n   will run testsuites in *path1* but skip all *tasks* in testsuites. It\'s would be useful when only pipeline tasks needed.\n\n8. skip-pipeline-jobs:\n\n   .. code-block:: bash\n\n      fate_test suite -i <path1 contains *testsuite.json> --skip-pipeline-jobs\n\n   will run testsuites in *path1* but skip all *pipeline tasks* in testsuites. It\'s would be useful when only dsl tasks needed.\n\n\nBenchmark Quality\n------------------\n\nBenchmark-quality is used for comparing modeling quality between FATE\nand other machine learning systems. Benchmark produces a metrics comparison\nsummary for each benchmark job group.\n\n.. code-block:: bash\n\n   fate_test benchmark-quality -i examples/benchmark_quality/hetero_linear_regression\n\n.. code-block:: bash\n\n    +-------+--------------------------------------------------------------+\n    |  Data |                             Name                             |\n    +-------+--------------------------------------------------------------+\n    | train | {\'guest\': \'motor_hetero_guest\', \'host\': \'motor_hetero_host\'} |\n    |  test | {\'guest\': \'motor_hetero_guest\', \'host\': \'motor_hetero_host\'} |\n    +-------+--------------------------------------------------------------+\n    +------------------------------------+--------------------+--------------------+-------------------------+---------------------+\n    |             Model Name             | explained_variance |      r2_score      | root_mean_squared_error |  mean_squared_error |\n    +------------------------------------+--------------------+--------------------+-------------------------+---------------------+\n    | local-linear_regression-regression | 0.9035168452250094 | 0.9035070863155368 |   0.31340413289880553   | 0.09822215051805216 |\n    | FATE-linear_regression-regression  | 0.903146386539082  | 0.9031411831961411 |    0.3139977881119483   | 0.09859461093919596 |\n    +------------------------------------+--------------------+--------------------+-------------------------+---------------------+\n    +-------------------------+-----------+\n    |          Metric         | All Match |\n    +-------------------------+-----------+\n    |    explained_variance   |    True   |\n    |         r2_score        |    True   |\n    | root_mean_squared_error |    True   |\n    |    mean_squared_error   |    True   |\n    +-------------------------+-----------+\n\ncommand options\n~~~~~~~~~~~~~~~\n\nuse the following command to show help message\n\n.. code-block:: bash\n\n      fate_test benchmark-quality --help\n\n1. include:\n\n   .. code-block:: bash\n\n      fate_test benchmark-quality -i <path1 contains *benchmark.json>\n\n   will run benchmark testsuites in *path1*\n\n2. exclude:\n\n   .. code-block:: bash\n\n      fate_test benchmark-quality -i <path1 contains *benchmark.json> -e <path2 to exclude> -e <path3 to exclude> ...\n\n   will run benchmark testsuites in *path1* but not in *path2* and *path3*\n\n3. glob:\n\n   .. code-block:: bash\n\n      fate_test benchmark-quality -i <path1 contains *benchmark.json> -g "hetero*"\n\n   will run benchmark testsuites in sub directory start with *hetero* of *path1*\n\n4. tol:\n\n   .. code-block:: bash\n\n      fate_test benchmark-quality -i <path1 contains *benchmark.json> -t 1e-3\n\n   will run benchmark testsuites in *path1* with absolute tolerance of difference between metrics set to 0.001.\n   If absolute difference between metrics is smaller than *tol*, then metrics are considered\n   almost equal. Check benchmark testsuite `writing guide <#benchmark-testsuite>`_ on setting alternative tolerance.\n\n5. skip-data:\n\n   .. code-block:: bash\n\n       fate_test benchmark-quality -i <path1 contains *benchmark.json> --skip-data\n\n   will run benchmark testsuites in *path1* without uploading data specified in *benchmark.json*.\n\n\n6. yes:\n\n   .. code-block:: bash\n\n      fate_test benchmark-quality -i <path1 contains *benchmark.json> --yes\n\n   will run benchmark testsuites in *path1* directly, skipping double check\n\n\nbenchmark testsuite\n~~~~~~~~~~~~~~~~~~~\n\nConfiguration of jobs should be specified in a benchmark testsuite whose file name ends\nwith "\\*benchmark.json". For benchmark testsuite example,\nplease refer `here <../../examples/benchmark_quality>`_.\n\nA benchmark testsuite includes the following elements:\n\n- data: list of local data to be uploaded before running FATE jobs\n\n  - file: path to original data file to be uploaded, should be relative to testsuite or FATE installation path\n  - head: whether file includes header\n  - partition: number of partition for data storage\n  - table_name: table name in storage\n  - namespace: table namespace in storage\n  - role: which role to upload the data, as specified in fate_test.config;\n    naming format is: "{role_type}_{role_index}", index starts at 0\n\n  .. code-block:: json\n\n        "data": [\n            {\n                "file": "examples/data/motor_hetero_host.csv",\n                "head": 1,\n                "partition": 8,\n                "table_name": "motor_hetero_host",\n                "namespace": "experiment",\n                "role": "host_0"\n            }\n        ]\n\n- job group: each group includes arbitrary number of jobs with paths to corresponding script and configuration\n\n  - job: name of job to be run, must be unique within each group list\n\n    - script: path to `testing script <#testing-script>`_, should be relative to testsuite\n    - conf: path to job configuration file for script, should be relative to testsuite\n\n    .. code-block:: json\n\n       "local": {\n            "script": "./local-linr.py",\n            "conf": "./linr_config.yaml"\n       }\n\n  - compare_setting: additional setting for quality metrics comparison, currently only takes ``relative_tol``\n\n    If metrics *a* and *b* satisfy *abs(a-b) <= max(relative_tol \\* max(abs(a), abs(b)), absolute_tol)*\n    (from `math module <https://docs.python.org/3/library/math.html#math.isclose>`_),\n    they are considered almost equal. In the below example, metrics from "local" and "FATE" jobs are\n    considered almost equal if their relative difference is smaller than\n    *0.05 \\* max(abs(local_metric), abs(pipeline_metric)*.\n\n  .. code-block:: json\n\n     "linear_regression-regression": {\n         "local": {\n             "script": "./local-linr.py",\n             "conf": "./linr_config.yaml"\n         },\n         "FATE": {\n             "script": "./fate-linr.py",\n             "conf": "./linr_config.yaml"\n         },\n         "compare_setting": {\n             "relative_tol": 0.01\n         }\n     }\n\n\ntesting script\n~~~~~~~~~~~~~~\n\nAll job scripts need to have ``Main`` function as an entry point for executing jobs; scripts should\nreturn two dictionaries: first with data information key-value pairs: {data_type}: {data_name_dictionary};\nthe second contains {metric_name}: {metric_value} key-value pairs for metric comparison.\n\nBy default, the final data summary shows the output from the job named "FATE"; if no such job exists,\ndata information returned by the first job is shown. For clear presentation, we suggest that user follow\nthis general `guideline <../../examples/data/README.md#data-set-naming-rule>`_ for data set naming. In the case of multi-host\ntask, consider numbering host as such:\n\n::\n\n    {\'guest\': \'default_credit_homo_guest\',\n     \'host_1\': \'default_credit_homo_host_1\',\n     \'host_2\': \'default_credit_homo_host_2\'}\n\nReturned quality metrics of the same key are to be compared.\nNote that only **real-value** metrics can be compared.\n\n- FATE script: ``Main`` always has three inputs:\n\n  - config: job configuration, `JobConfig <../fate_client/pipeline/utils/tools.py#L64>`_ object loaded from "fate_test_config.yaml"\n  - param: job parameter setting, dictionary loaded from "conf" file specified in benchmark testsuite\n  - namespace: namespace suffix, user-given *namespace* or generated timestamp string when using *namespace-mangling*\n\n- non-FATE script: ``Main`` always has one input:\n\n  - param: job parameter setting, dictionary loaded from "conf" file specified in benchmark testsuite\n\n\ndata\n----\n\n`Data` sub-command is used for upload or delete dataset in suite\'s.\n\ncommand options\n~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n      fate_test data --help\n\n1. include:\n\n   .. code-block:: bash\n\n      fate_test data [upload|delete] -i <path1 contains *testsuite.json>\n\n   will upload/delete dataset in testsuites in *path1*\n\n2. exclude:\n\n   .. code-block:: bash\n\n      fate_test data [upload|delete] -i <path1 contains *testsuite.json> -e <path2 to exclude> -e <path3 to exclude> ...\n\n   will upload/delete dataset in testsuites in *path1* but not in *path2* and *path3*\n\n3. glob:\n\n   .. code-block:: bash\n\n      fate_test data [upload|delete] -i <path1 contains *testsuite.json> -g "hetero*"\n\n   will upload/delete dataset in testsuites in sub directory start with *hetero* of *path1*\n\n\nfull command options\n---------------------\n\n.. click:: fate_test.scripts.cli:cli\n  :prog: fate_test\n  :show-nested:\n',
+    "author": "FederatedAI",
+    "author_email": "contact@FedAI.org",
+    "maintainer": None,
+    "maintainer_email": None,
+    "url": "https://fate.fedai.org/",
+    "packages": packages,
+    "package_data": package_data,
+    "install_requires": install_requires,
+    "entry_points": entry_points,
+    "python_requires": ">=3.6,<4.0",
+}
+
+setup(**setup_kwargs)

From bf5d579f13883622e5cd6f4d3f5eaefa9471439f Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Thu, 27 Jul 2023 10:21:16 +0800
Subject: [PATCH 02/30] edit fate_test(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 python/fate_test/fate_test/_client.py         | 48 ++++++-------------
 python/fate_test/fate_test/_config.py         | 22 ++++-----
 python/fate_test/fate_test/_flow_client.py    | 34 ++++++++-----
 python/fate_test/fate_test/_parser.py         | 36 +++++++-------
 .../fate_test/scripts/performance_cli.py      | 16 ++-----
 5 files changed, 70 insertions(+), 86 deletions(-)

diff --git a/python/fate_test/fate_test/_client.py b/python/fate_test/fate_test/_client.py
index 84d623c4c3..d0abbb318e 100644
--- a/python/fate_test/fate_test/_client.py
+++ b/python/fate_test/fate_test/_client.py
@@ -14,61 +14,41 @@
 #  limitations under the License.
 #
 
-import sshtunnel
-
 from fate_test._flow_client import FLOWClient
-from fate_test._io import LOGGER
 from fate_test._parser import Config
 
 
 class Clients(object):
     def __init__(self, config: Config):
         self._flow_clients = {}
-        self._tunnel_id_to_flow_clients = {}
+        # self._tunnel_id_to_flow_clients = {}
         self._role_str_to_service_id = {}
-        self._tunnel_id_to_tunnel = config.tunnel_id_to_tunnel
+        self._service_id_to_role_str = {}
+        self._service_id_to_party = {}
+        # self._tunnel_id_to_tunnel = config.tunnel_id_to_tunnel
+        for party, service_id in config.party_to_service_id.items():
+            for role_str in config.parties.party_to_role_string(party):
+                self._role_str_to_service_id[role_str] = service_id
+                self._service_id_to_role_str[service_id] = role_str
+                self._service_id_to_party[service_id] = party
 
         for service_id, service in config.service_id_to_service.items():
             if isinstance(service, Config.service):
+                role = self._service_id_to_role_str[service_id].split("_")[0]
+                party = self._service_id_to_party[service_id]
                 self._flow_clients[service_id] = FLOWClient(
-                    service.address, config.data_base_dir, config.cache_directory)
+                    service.address, config.data_base_dir, config.cache_directory, role, party)
 
-            elif isinstance(service, Config.tunnel_service):
+            """elif isinstance(service, Config.tunnel_service):
                 self._flow_clients[service_id] = FLOWClient(None, config.data_base_dir, config.cache_directory)
                 self._tunnel_id_to_flow_clients.setdefault(service.tunnel_id, []).append(
-                    (service.index, self._flow_clients[service_id]))
-
-        for party, service_id in config.party_to_service_id.items():
-            for role_str in config.parties.party_to_role_string(party):
-                self._role_str_to_service_id[role_str] = service_id
+                    (service.index, self._flow_clients[service_id]))"""
 
     def __getitem__(self, role_str: str) -> 'FLOWClient':
         if role_str not in self._role_str_to_service_id:
             raise RuntimeError(f"no flow client found binding to {role_str}")
         return self._flow_clients[self._role_str_to_service_id[role_str]]
 
-    def __enter__(self):
-        # open ssh tunnels and create flow clients for remote
-        self._tunnels = []
-        for tunnel_id, tunnel_conf in self._tunnel_id_to_tunnel.items():
-            tunnel = sshtunnel.SSHTunnelForwarder(ssh_address_or_host=tunnel_conf.ssh_address,
-                                                  ssh_username=tunnel_conf.ssh_username,
-                                                  ssh_password=tunnel_conf.ssh_password,
-                                                  ssh_pkey=tunnel_conf.ssh_priv_key,
-                                                  remote_bind_addresses=tunnel_conf.services_address)
-            tunnel.start()
-            self._tunnels.append(tunnel)
-            for index, flow_client in self._tunnel_id_to_flow_clients[tunnel_id]:
-                flow_client.set_address(f"127.0.0.1:{tunnel.local_bind_ports[index]}")
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        for tunnel in self._tunnels:
-            try:
-                tunnel.stop()
-            except Exception as e:
-                LOGGER.exception(e)
-
     def contains(self, role_str):
         return role_str in self._role_str_to_service_id
 
diff --git a/python/fate_test/fate_test/_config.py b/python/fate_test/fate_test/_config.py
index 7b26b69c3c..1d8cf8db56 100644
--- a/python/fate_test/fate_test/_config.py
+++ b/python/fate_test/fate_test/_config.py
@@ -49,9 +49,6 @@
 # whether to delete data in suites after all jobs done
 clean_data: true
 
-# work mode: 0 for standalone, 1 for cluster
-work_mode: 0
-
 # participating parties' id and correponding flow service ip & port information
 parties:
   guest: [9999]
@@ -184,14 +181,15 @@ def __init__(self, config):
         self.tunnel_id_to_tunnel = {}
         self.extend_sid = None
         self.auto_increasing_sid = None
-        self.work_mode = config.get("work_mode", 0)
+        # self.work_mode = config.get("work_mode", 0)
 
         tunnel_id = 0
         service_id = 0
         os.makedirs(os.path.dirname(self.cache_directory), exist_ok=True)
         for service_config in config["services"]:
             flow_services = service_config["flow_services"]
-            if service_config.get("ssh_tunnel", {}).get("enable", False):
+            # @todo: rm ssh tunnel; add host flow services
+            """if service_config.get("ssh_tunnel", {}).get("enable", False):
                 tunnel_id += 1
                 services_address = []
                 for index, flow_service in enumerate(flow_services):
@@ -209,13 +207,13 @@ def __init__(self, config):
                                                                   tunnel_config["ssh_password"],
                                                                   tunnel_config["ssh_priv_key"],
                                                                   services_address)
-            else:
-                for flow_service in flow_services:
-                    service_id += 1
-                    address = flow_service["address"]
-                    self.service_id_to_service[service_id] = self.service(address)
-                    for party in flow_service["parties"]:
-                        self.party_to_service_id[party] = service_id
+            else:"""
+            for flow_service in flow_services:
+                service_id += 1
+                address = flow_service["address"]
+                self.service_id_to_service[service_id] = self.service(address)
+                for party in flow_service["parties"]:
+                    self.party_to_service_id[party] = service_id
 
     @staticmethod
     def load(path: typing.Union[str, Path], **kwargs):
diff --git a/python/fate_test/fate_test/_flow_client.py b/python/fate_test/fate_test/_flow_client.py
index 0cfafb9d8e..280aac5323 100644
--- a/python/fate_test/fate_test/_flow_client.py
+++ b/python/fate_test/fate_test/_flow_client.py
@@ -71,12 +71,12 @@ def delete_data(self, data: Data):
         except Exception as e:
             raise RuntimeError(f"delete data failed") from e
 
-    def output_data_table(self, job_id, role, party_id, component_name):
+    """def output_data_table(self, job_id, role, party_id, component_name):
         result = self._output_data_table(job_id=job_id, role=role, party_id=party_id, component_name=component_name)
-        return result
+        return result"""
 
-    def table_info(self, table_name, namespace):
-        result = self._table_info(table_name=table_name, namespace=namespace)
+    def table_query(self, table_name, namespace):
+        result = self._table_query(table_name=table_name, namespace=namespace)
         return result
 
     def add_notes(self, job_id, role, party_id, notes):
@@ -126,7 +126,7 @@ def _upload_data(self, data, output_path=None, verbose=0, destroy=1):
                                             partitions=data.partitions)
         return response
 
-    def _table_info(self, table_name, namespace):
+    """def _table_info(self, table_name, namespace):
         param = {
             'table_name': table_name,
             'namespace': namespace
@@ -140,17 +140,25 @@ def _delete_data(self, table_name, namespace):
             'namespace': namespace
         }
         response = self.flow_client(request='table/delete', param=param)
+        return response"""
+
+    def _table_query(self, table_name, namespace):
+        response = self._client.table.query(namespace=namespace, table_name=table_name)
+        return response
+
+    def _delete_data(self, table_name, namespace):
+        response = self._client.table.delete(namespace=namespace, table_name=table_name)
         return response
 
-    def _submit_job(self, conf, dsl):
+    """def _submit_job(self, conf, dsl):
         param = {
             'job_dsl': self._save_json(dsl, 'submit_dsl.json'),
             'job_runtime_conf': self._save_json(conf, 'submit_conf.json')
         }
         response = SubmitJobResponse(self.flow_client(request='job/submit', param=param))
-        return response
+        return response"""
 
-    def _deploy_model(self, model_id, model_version, dsl=None):
+    """def _deploy_model(self, model_id, model_version, dsl=None):
         post_data = {'model_id': model_id,
                      'model_version': model_version,
                      'predict_dsl': dsl}
@@ -166,9 +174,9 @@ def _deploy_model(self, model_id, model_version, dsl=None):
         except Exception as e:
             raise RuntimeError(f"deploy model error: {response}") from e
 
-        return result
+        return result"""
 
-    def _output_data_table(self, job_id, role, party_id, component_name):
+    """def _output_data_table(self, job_id, role, party_id, component_name):
         post_data = {'job_id': job_id,
                      'role': role,
                      'party_id': party_id,
@@ -201,7 +209,7 @@ def _get_summary(self, job_id, role, party_id, component_name):
             result["summary_dir"] = retmsg  # 获取summary文件位置
         except Exception as e:
             raise RuntimeError(f"output data table error: {response}") from e
-        return result
+        return result"""
 
     """def _query_job(self, job_id, role):
         param = {
@@ -269,7 +277,7 @@ def __repr__(self):
         return self.__str__()
 
 
-"""class QueryJobResponse(object):
+class QueryJobResponse(object):
     def __init__(self, response: dict):
         try:
             status = Status(response.get('data')[0]["f_status"])
@@ -277,7 +285,7 @@ def __init__(self, response: dict):
         except Exception as e:
             raise RuntimeError(f"query job error, response: {response}") from e
         self.status = status
-        self.progress = progress"""
+        self.progress = progress
 
 
 class UploadDataResponse(object):
diff --git a/python/fate_test/fate_test/_parser.py b/python/fate_test/fate_test/_parser.py
index 3ab001da29..fc1d832778 100644
--- a/python/fate_test/fate_test/_parser.py
+++ b/python/fate_test/fate_test/_parser.py
@@ -14,16 +14,16 @@
 #  limitations under the License.
 #
 
-import json
 import typing
 from collections import deque
 from pathlib import Path
 
-import click
 import prettytable
 from fate_test._config import Parties, Config
 from fate_test._io import echo
 from fate_test.utils import TxtStyle
+# import json
+from ruamel import yaml
 
 from fate_test import _config
 
@@ -113,7 +113,8 @@ def as_dict(self):
     @staticmethod
     def load(path: Path):
         with path.open("r") as f:
-            kwargs = json.load(f, object_hook=CONF_JSON_HOOK.hook)
+            # kwargs = json.load(f, object_hook=CONF_JSON_HOOK.hook)
+            kwargs = yaml.safe_load(f)
         return JobConf(**kwargs)
 
     @property
@@ -215,7 +216,8 @@ def __init__(self, components: dict, provider=None):
     @staticmethod
     def load(path: Path, provider):
         with path.open("r") as f:
-            kwargs = json.load(f, object_hook=DSL_JSON_HOOK.hook)
+            # kwargs = json.load(f, object_hook=DSL_JSON_HOOK.hook)
+            kwargs = yaml.safe_load(f)
             if provider is not None:
                 kwargs["provider"] = provider
         return JobDSL(**kwargs)
@@ -275,16 +277,16 @@ def load(cls, job_name, job_configs, base: Path, provider):
             job_name=job_name, job_conf=job_conf, job_dsl=job_dsl, pre_works=pre_works
         )
 
-    @property
+    """@property
     def submit_params(self):
         return dict(
             conf=self.job_conf.as_dict(),
             dsl=self.job_dsl.as_dict() if self.job_dsl else None,
-        )
+        )"""
 
-    def set_pre_work(self, name, **kwargs):
+    """def set_pre_work(self, name, **kwargs):
         self.job_conf.update_job_common_parameters(**kwargs)
-        self.job_conf.update_job_type("predict")
+        self.job_conf.update_job_type("predict")"""
 
     def set_input_data(self, hierarchys, table_info):
         for table_name, hierarchy in zip(table_info, hierarchys):
@@ -337,7 +339,8 @@ def __init__(
     @staticmethod
     def load(path: Path, provider):
         with path.open("r") as f:
-            testsuite_config = json.load(f, object_hook=DATA_JSON_HOOK.hook)
+            # testsuite_config = json.load(f, object_hook=DATA_JSON_HOOK.hook)
+            testsuite_config = yaml.safe_load(f)
 
         dataset = []
         for d in testsuite_config.get("data"):
@@ -403,13 +406,13 @@ def pretty_final_summary(self, time_consuming, suite_file=None):
     def model_in_dep(self, name):
         return name in self._dependency
 
-    def get_dependent_jobs(self, name):
-        return self._dependency[name]
+    """def get_dependent_jobs(self, name):
+        return self._dependency[name]"""
 
     def remove_dependency(self, name):
         del self._dependency[name]
 
-    def feed_dep_info(self, job, name, model_info=None, table_info=None, cache_info=None, model_loader_info=None):
+    """def feed_dep_info(self, job, name, model_info=None, table_info=None, cache_info=None, model_loader_info=None):
         if model_info is not None:
             job.set_pre_work(name, **model_info)
         if table_info is not None:
@@ -421,7 +424,7 @@ def feed_dep_info(self, job, name, model_info=None, table_info=None, cache_info=
         if name in job.pre_works:
             job.pre_works.remove(name)
         if job.is_submit_ready():
-            self._ready_jobs.appendleft(job)
+            self._ready_jobs.appendleft(job)"""
 
     def reflash_configs(self, config: Config):
         failed = []
@@ -492,7 +495,8 @@ def __init__(
     @staticmethod
     def load(path: Path):
         with path.open("r") as f:
-            testsuite_config = json.load(f, object_hook=DATA_JSON_HOOK.hook)
+            # testsuite_config = json.load(f, object_hook=DATA_JSON_HOOK.hook)
+            testsuite_config = yaml.safe_load(f)
 
         dataset = []
         for d in testsuite_config.get("data"):
@@ -574,7 +578,7 @@ def _hook(d):
     return _hook
 
 
-class JsonParamType(click.ParamType):
+"""class JsonParamType(click.ParamType):
     name = "json_string"
 
     def convert(self, value, param, ctx):
@@ -584,4 +588,4 @@ def convert(self, value, param, ctx):
             self.fail(f"{value} is not a valid json string", param, ctx)
 
 
-JSON_STRING = JsonParamType()
+JSON_STRING = JsonParamType()"""
diff --git a/python/fate_test/fate_test/scripts/performance_cli.py b/python/fate_test/fate_test/scripts/performance_cli.py
index 7fe0ca5627..338f66c868 100644
--- a/python/fate_test/fate_test/scripts/performance_cli.py
+++ b/python/fate_test/fate_test/scripts/performance_cli.py
@@ -23,9 +23,9 @@
 import click
 from fate_test._client import Clients
 from fate_test._config import Config
-from fate_test._flow_client import JobProgress, SubmitJobResponse, QueryJobResponse
+from fate_test._flow_client import JobProgress, QueryJobResponse
 from fate_test._io import LOGGER, echo
-from fate_test._parser import JSON_STRING, Testsuite
+from fate_test._parser import Testsuite
 from fate_test.scripts._options import SharedOptions
 from fate_test.scripts._utils import _load_testsuites, _upload_data, _delete_data, _load_module_from_script, \
     _add_replace_hook
@@ -40,8 +40,6 @@
               help="Select the job type, you can also set through include")
 @click.option('-i', '--include', type=click.Path(exists=True), multiple=True, metavar="<include>",
               help="include *testsuite.json under these paths")
-@click.option('-r', '--replace', default="{}", type=JSON_STRING,
-              help="a json string represents mapping for replacing fields in data/conf/dsl")
 @click.option('-m', '--timeout', type=int, default=3600,
               help="maximun running time of job")
 @click.option('-e', '--max-iter', type=int, help="When the algorithm model is LR, the number of iterations is set")
@@ -49,10 +47,6 @@
               help="When the algorithm model is SecureBoost, set the number of model layers")
 @click.option('-nt', '--num-trees', type=int, help="When the algorithm model is SecureBoost, set the number of trees")
 @click.option('-p', '--task-cores', type=int, help="processors per node")
-@click.option('-uj', '--update-job-parameters', default="{}", type=JSON_STRING,
-              help="a json string represents mapping for replacing fields in conf.job_parameters")
-@click.option('-uc', '--update-component-parameters', default="{}", type=JSON_STRING,
-              help="a json string represents mapping for replacing fields in conf.component_parameters")
 @click.option('-s', '--storage-tag', type=str,
               help="tag for storing performance time consuming, for future comparison")
 @click.option('-v', '--history-tag', type=str, multiple=True,
@@ -187,11 +181,11 @@ def update_bar(n_step):
 
             update_bar(1)
 
-            def _call_back(resp: SubmitJobResponse):
-                if isinstance(resp, SubmitJobResponse):
+            def _call_back(resp):
+                """if isinstance(resp, SubmitJobResponse):
                     job_progress.submitted(resp.job_id)
                     echo.file(f"[jobs] {resp.job_id} ", nl=False)
-                    suite.update_status(job_name=job.job_name, job_id=resp.job_id)
+                    suite.update_status(job_name=job.job_name, job_id=resp.job_id)"""
 
                 if isinstance(resp, QueryJobResponse):
                     job_progress.running(resp.status, resp.progress)

From 76f3b86e7c093769db57fbfd48b2705077ba031a Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Thu, 27 Jul 2023 19:17:06 +0800
Subject: [PATCH 03/30] fix fate-test testsuite(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../coordinated_lr_testsuite.yaml             |   2 +-
 .../pipeline/coordinated_lr/test_lr_sid.py    |  16 +-
 .../pipeline/coordinated_lr/test_lr_sid_cv.py |   8 +-
 .../coordinated_lr/test_lr_sid_warm_start.py  |   8 +-
 python/fate_test/fate_test/_client.py         |  12 +-
 python/fate_test/fate_test/_config.py         |  21 --
 python/fate_test/fate_test/_flow_client.py    |  76 ++++--
 python/fate_test/fate_test/_parser.py         | 241 ++----------------
 python/fate_test/fate_test/scripts/_utils.py  |  19 +-
 .../fate_test/scripts/benchmark_cli.py        |  48 ++--
 .../fate_test/scripts/performance_cli.py      |  64 ++---
 .../fate_test/scripts/testsuite_cli.py        |  66 ++---
 12 files changed, 189 insertions(+), 392 deletions(-)

diff --git a/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml b/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml
index 2de8a25b4f..029d8c6dfc 100644
--- a/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml
+++ b/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml
@@ -30,7 +30,7 @@ data:
     partitions: 4
     head: true
     extend_sid: true
-    table_name: breast_hetero_host_sid
+    table_name: breast_hetero_host
     namespace: experiment
     role: host_0
 tasks:
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid.py b/examples/pipeline/coordinated_lr/test_lr_sid.py
index 9c7b31fb62..5fb0905ff1 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid.py
+++ b/examples/pipeline/coordinated_lr/test_lr_sid.py
@@ -33,10 +33,10 @@ def main(config="./config.yaml", namespace=""):
     pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
 
     intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
-                                                                        namespace=f"{namespace}experiment"))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
-                                                                           namespace=f"{namespace}experiment"))
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                        namespace=f"experiment{namespace}"))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                           namespace=f"experiment{namespace}"))
     lr_0 = CoordinatedLR("lr_0",
                          epochs=4,
                          batch_size=None,
@@ -65,11 +65,11 @@ def main(config="./config.yaml", namespace=""):
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
     deployed_pipeline.intersect_0.guest.component_setting(
-        input_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
-                                        namespace=f"{namespace}experiment"))
+        input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                        namespace=f"experiment{namespace}"))
     deployed_pipeline.intersect_0.hosts[0].component_setting(
-        input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
-                                        namespace=f"{namespace}experiment"))
+        input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                        namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid_cv.py b/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
index badfed7a39..16ce51d4a7 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
+++ b/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
@@ -31,10 +31,10 @@ def main(config="./config.yaml", namespace=""):
     pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
 
     intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
-                                                                        namespace=f"{namespace}experiment"))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
-                                                                           namespace=f"{namespace}experiment"))
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                        namespace=f"experiment{namespace}"))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                           namespace=f"experiment{namespace}"))
     lr_0 = CoordinatedLR("lr_0",
                          epochs=2,
                          batch_size=100,
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py b/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
index b9bf8401ef..fb8090064d 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
+++ b/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
@@ -32,10 +32,10 @@ def main(config="./config.yaml", namespace=""):
     pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
 
     intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
-                                                                        namespace=f"{namespace}experiment"))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
-                                                                           namespace=f"{namespace}experiment"))
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                        namespace=f"experiment{namespace}"))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                           namespace=f"experiment{namespace}"))
     lr_0 = CoordinatedLR("lr_0",
                          epochs=4,
                          batch_size=None,
diff --git a/python/fate_test/fate_test/_client.py b/python/fate_test/fate_test/_client.py
index d0abbb318e..b10b2cf363 100644
--- a/python/fate_test/fate_test/_client.py
+++ b/python/fate_test/fate_test/_client.py
@@ -23,21 +23,19 @@ def __init__(self, config: Config):
         self._flow_clients = {}
         # self._tunnel_id_to_flow_clients = {}
         self._role_str_to_service_id = {}
-        self._service_id_to_role_str = {}
-        self._service_id_to_party = {}
+        # self._service_id_to_role_str = {}
+        # self._service_id_to_party = {}
         # self._tunnel_id_to_tunnel = config.tunnel_id_to_tunnel
         for party, service_id in config.party_to_service_id.items():
             for role_str in config.parties.party_to_role_string(party):
                 self._role_str_to_service_id[role_str] = service_id
-                self._service_id_to_role_str[service_id] = role_str
-                self._service_id_to_party[service_id] = party
+                # self._service_id_to_role_str[service_id] = role_str
+                # self._service_id_to_party[service_id] = party
 
         for service_id, service in config.service_id_to_service.items():
             if isinstance(service, Config.service):
-                role = self._service_id_to_role_str[service_id].split("_")[0]
-                party = self._service_id_to_party[service_id]
                 self._flow_clients[service_id] = FLOWClient(
-                    service.address, config.data_base_dir, config.cache_directory, role, party)
+                    service.address, config.data_base_dir, config.cache_directory)
 
             """elif isinstance(service, Config.tunnel_service):
                 self._flow_clients[service_id] = FLOWClient(None, config.data_base_dir, config.cache_directory)
diff --git a/python/fate_test/fate_test/_config.py b/python/fate_test/fate_test/_config.py
index 1d8cf8db56..b81b25e59e 100644
--- a/python/fate_test/fate_test/_config.py
+++ b/python/fate_test/fate_test/_config.py
@@ -183,31 +183,10 @@ def __init__(self, config):
         self.auto_increasing_sid = None
         # self.work_mode = config.get("work_mode", 0)
 
-        tunnel_id = 0
         service_id = 0
         os.makedirs(os.path.dirname(self.cache_directory), exist_ok=True)
         for service_config in config["services"]:
             flow_services = service_config["flow_services"]
-            # @todo: rm ssh tunnel; add host flow services
-            """if service_config.get("ssh_tunnel", {}).get("enable", False):
-                tunnel_id += 1
-                services_address = []
-                for index, flow_service in enumerate(flow_services):
-                    service_id += 1
-                    address_host, address_port = flow_service["address"].split(":")
-                    address_port = int(address_port)
-                    services_address.append((address_host, address_port))
-                    self.service_id_to_service[service_id] = self.tunnel_service(tunnel_id, index)
-                    for party in flow_service["parties"]:
-                        self.party_to_service_id[party] = service_id
-                tunnel_config = service_config["ssh_tunnel"]
-                ssh_address_host, ssh_address_port = tunnel_config["ssh_address"].split(":")
-                self.tunnel_id_to_tunnel[tunnel_id] = self.tunnel((ssh_address_host, int(ssh_address_port)),
-                                                                  tunnel_config["ssh_username"],
-                                                                  tunnel_config["ssh_password"],
-                                                                  tunnel_config["ssh_priv_key"],
-                                                                  services_address)
-            else:"""
             for flow_service in flow_services:
                 service_id += 1
                 address = flow_service["address"]
diff --git a/python/fate_test/fate_test/_flow_client.py b/python/fate_test/fate_test/_flow_client.py
index 280aac5323..2d0d3f8d98 100644
--- a/python/fate_test/fate_test/_flow_client.py
+++ b/python/fate_test/fate_test/_flow_client.py
@@ -13,6 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import json
 import os
 import time
 import typing
@@ -30,24 +31,27 @@ class FLOWClient(object):
     def __init__(self,
                  address: typing.Optional[str],
                  data_base_dir: typing.Optional[Path],
-                 cache_directory: typing.Optional[Path],
-                 role: str,
-                 party_id: int):
+                 cache_directory: typing.Optional[Path]):
         self.address = address
-        self.version = "2.0.0-beta"
+        self.version = "v2"
         self._client = FlowClient(self.address.split(':')[0], self.address.split(':')[1], self.version)
         self._data_base_dir = data_base_dir
         self._cache_directory = cache_directory
         self.data_size = 0
-        self.role = role
-        self.party_id = party_id
 
     def set_address(self, address):
         self.address = address
 
+    def transform_local_file_to_dataframe(self, data: Data, callback=None, output_path=None):
+        data_warehouse = self.upload_data(data, callback, output_path)
+        status = self.transform_to_dataframe(data.namespace, data.table_name, data_warehouse, callback)
+        return status
+
     def upload_data(self, data: Data, callback=None, output_path=None):
-        response = self._upload_data(data, output_path=output_path)
+        response, file_path = self._upload_data(data, output_path=output_path)
         try:
+            if callback is not None:
+                callback(response)
             code = response["code"]
             if code != 0:
                 raise ValueError(f"Return code {code}!=0")
@@ -57,24 +61,45 @@ def upload_data(self, data: Data, callback=None, output_path=None):
             job_id = response["job_id"]
         except BaseException:
             raise ValueError(f"Upload data fails, response={response}")
-
         # self.monitor_status(job_id, role=self.role, party_id=self.party_id)
-        self._awaiting(job_id, self.role, self.party_id, )
+        self._awaiting(job_id, "local", 0)
+
         return dict(namespace=namespace, name=name)
 
+    def transform_to_dataframe(self, namespace, table_name, data_warehouse, callback=None):
+        response = self._client.data.dataframe_transformer(namespace=namespace,
+                                                           name=table_name,
+                                                           data_warehouse=data_warehouse)
+
+        """try:
+            code = response["code"]
+            if code != 0:
+                raise ValueError(f"Return code {code}!=0")
+            job_id = response["job_id"]
+        except BaseException:
+            raise ValueError(f"Transform data fails, response={response}")"""
+        try:
+            if callback is not None:
+                callback(response)
+                status = self._awaiting(response["job_id"], "local", 0)
+                status = str(status).lower()
+            else:
+                status = response["retmsg"]
+
+        except Exception as e:
+            raise RuntimeError(f"upload data failed") from e
+        job_id = response["job_id"]
+        self._awaiting(job_id, "local", 0)
+        return status
+
     def delete_data(self, data: Data):
-        # @todo: use client.table.delete(table=, namespace=)
         try:
             table_name = data.config['table_name'] if data.config.get(
                 'table_name', None) is not None else data.config.get('name')
-            self._delete_data(table_name=table_name, namespace=data.config['namespace'])
+            self._client.table.delete(table_name=table_name, namespace=data.config['namespace'])
         except Exception as e:
             raise RuntimeError(f"delete data failed") from e
 
-    """def output_data_table(self, job_id, role, party_id, component_name):
-        result = self._output_data_table(job_id=job_id, role=role, party_id=party_id, component_name=component_name)
-        return result"""
-
     def table_query(self, table_name, namespace):
         result = self._table_query(table_name=table_name, namespace=namespace)
         return result
@@ -106,7 +131,7 @@ def _awaiting(self, job_id, role, party_id, callback=None):
             time.sleep(1)
 
     def _upload_data(self, data, output_path=None, verbose=0, destroy=1):
-        conf = data.conf
+        conf = data.config
         # if conf.get("engine", {}) != "PATH":
         if output_path is not None:
             conf['file'] = os.path.join(os.path.abspath(output_path), os.path.basename(conf.get('file')))
@@ -119,12 +144,12 @@ def _upload_data(self, data, output_path=None, verbose=0, destroy=1):
         if not path.exists():
             raise Exception('The file is obtained from the fate flow client machine, but it does not exist, '
                             f'please check the path: {path}')
-        response = self._client.data.upload(file=data.file,
+        response = self._client.data.upload(file=str(path),
                                             head=data.head,
                                             meta=data.meta,
                                             extend_sid=data.extend_sid,
                                             partitions=data.partitions)
-        return response
+        return response, conf["file"]
 
     """def _table_info(self, table_name, namespace):
         param = {
@@ -221,7 +246,7 @@ def _get_summary(self, job_id, role, party_id, component_name):
 
     def _query_job(self, job_id, role, party_id):
         response = self._client.job.query(job_id, role, party_id)
-        try:
+        """try:
             code = response["code"]
             if code != 0:
                 raise ValueError(f"Return code {code}!=0")
@@ -229,9 +254,10 @@ def _query_job(self, job_id, role, party_id):
             data = response["data"][0]
             return data
         except BaseException:
-            raise ValueError(f"query job is failed, response={response}")
+            raise ValueError(f"query job is failed, response={response}")"""
+        return QueryJobResponse(response)
 
-    def get_version(self):
+    """def get_version(self):
         response = self._post(url='version/get', json={"module": "FATE"})
         try:
             retcode = response['retcode']
@@ -241,7 +267,7 @@ def get_version(self):
             fate_version = response["data"]["FATE"]
         except Exception as e:
             raise RuntimeError(f"get version error: {response}") from e
-        return fate_version
+        return fate_version"""
 
     def _add_notes(self, job_id, role, party_id, notes):
         data = dict(job_id=job_id, role=role, party_id=party_id, notes=notes)
@@ -280,10 +306,10 @@ def __repr__(self):
 class QueryJobResponse(object):
     def __init__(self, response: dict):
         try:
-            status = Status(response.get('data')[0]["f_status"])
-            progress = response.get('data')[0]['f_progress']
+            status = Status(response.get('data')[0]["status"])
+            progress = response.get('data')[0]['progress']
         except Exception as e:
-            raise RuntimeError(f"query job error, response: {response}") from e
+            raise RuntimeError(f"query job error, response: {json.dumps(response, indent=4)}") from e
         self.status = status
         self.progress = progress
 
diff --git a/python/fate_test/fate_test/_parser.py b/python/fate_test/fate_test/_parser.py
index fc1d832778..9f411c948e 100644
--- a/python/fate_test/fate_test/_parser.py
+++ b/python/fate_test/fate_test/_parser.py
@@ -15,11 +15,10 @@
 #
 
 import typing
-from collections import deque
 from pathlib import Path
 
 import prettytable
-from fate_test._config import Parties, Config
+from fate_test._config import Config
 from fate_test._io import echo
 from fate_test.utils import TxtStyle
 # import json
@@ -91,215 +90,8 @@ def load(config, path: Path):
     def update(self, config: Config):
         if config.extend_sid is not None:
             self.extend_sid = config.extend_sid
-        if config.meta is not None:
-            self.meta.update(config.meta)
-
-
-class JobConf(object):
-    def __init__(self, initiator: dict, role: dict, job_parameters=None, **kwargs):
-        self.initiator = initiator
-        self.role = role
-        self.job_parameters = job_parameters if job_parameters else {}
-        self.others_kwargs = kwargs
-
-    def as_dict(self):
-        return dict(
-            initiator=self.initiator,
-            role=self.role,
-            job_parameters=self.job_parameters,
-            **self.others_kwargs,
-        )
-
-    @staticmethod
-    def load(path: Path):
-        with path.open("r") as f:
-            # kwargs = json.load(f, object_hook=CONF_JSON_HOOK.hook)
-            kwargs = yaml.safe_load(f)
-        return JobConf(**kwargs)
-
-    @property
-    def dsl_version(self):
-        return self.others_kwargs.get("dsl_version", 1)
-
-    def update(
-            self,
-            parties: Parties,
-            timeout,
-            job_parameters,
-            component_parameters,
-    ):
-        self.initiator = parties.extract_initiator_role(self.initiator["role"])
-        self.role = parties.extract_role(
-            {role: len(parties) for role, parties in self.role.items()}
-        )
-        if timeout > 0:
-            self.update_job_common_parameters(timeout=timeout)
-
-        if timeout > 0:
-            self.update_job_common_parameters(timeout=timeout)
-
-        for key, value in job_parameters.items():
-            self.update_parameters(parameters=self.job_parameters, key=key, value=value)
-        for key, value in component_parameters.items():
-            if self.dsl_version == 1:
-                self.update_parameters(
-                    parameters=self.others_kwargs.get("algorithm_parameters"),
-                    key=key,
-                    value=value,
-                )
-            else:
-                self.update_parameters(
-                    parameters=self.others_kwargs.get("component_parameters"),
-                    key=key,
-                    value=value,
-                )
-
-    def update_parameters(self, parameters, key, value):
-        if isinstance(parameters, dict):
-            for keys in parameters:
-                if keys == key:
-                    parameters.get(key).update(value),
-                elif isinstance(parameters[keys], dict):
-                    self.update_parameters(parameters[keys], key, value)
-
-    def update_job_common_parameters(self, **kwargs):
-        if self.dsl_version == 1:
-            self.job_parameters.update(**kwargs)
-        else:
-            self.job_parameters.setdefault("common", {}).update(**kwargs)
-
-    def update_job_type(self, job_type="predict"):
-        if self.dsl_version == 1:
-            if self.job_parameters.get("job_type", None) is None:
-                self.job_parameters.update({"job_type": job_type})
-        else:
-            if self.job_parameters.setdefault("common", {}).get("job_type", None) is None:
-                self.job_parameters.setdefault("common", {}).update({"job_type": job_type})
-
-    def update_component_parameters(self, key, value, parameters=None):
-        if parameters is None:
-            if self.dsl_version == 1:
-                parameters = self.others_kwargs.get("algorithm_parameters")
-            else:
-                parameters = self.others_kwargs.get("component_parameters")
-        if isinstance(parameters, dict):
-            for keys in parameters:
-                if keys == key:
-                    if isinstance(value, dict):
-                        parameters[keys].update(value)
-                    else:
-                        parameters.update({key: value})
-                elif (
-                        isinstance(parameters[keys], dict) and parameters[keys] is not None
-                ):
-                    self.update_component_parameters(key, value, parameters[keys])
-
-    def get_component_parameters(self, keys):
-        if len(keys) == 0:
-            return self.others_kwargs.get("component_parameters") if self.dsl_version == 2 else self.others_kwargs.get(
-                "role_parameters")
-        if self.dsl_version == 1:
-            parameters = self.others_kwargs.get("role_parameters")
-        else:
-            parameters = self.others_kwargs.get("component_parameters").get("role")
-
-        for key in keys:
-            parameters = parameters[key]
-        return parameters
-
-
-class JobDSL(object):
-    def __init__(self, components: dict, provider=None):
-        self.components = components
-        self.provider = provider
-
-    @staticmethod
-    def load(path: Path, provider):
-        with path.open("r") as f:
-            # kwargs = json.load(f, object_hook=DSL_JSON_HOOK.hook)
-            kwargs = yaml.safe_load(f)
-            if provider is not None:
-                kwargs["provider"] = provider
-        return JobDSL(**kwargs)
-
-    def as_dict(self):
-        if self.provider is None:
-            return dict(components=self.components)
-        else:
-            return dict(components=self.components, provider=self.provider)
-
-
-class Job(object):
-    def __init__(
-            self,
-            job_name: str,
-            job_conf: JobConf,
-            job_dsl: typing.Optional[JobDSL],
-            pre_works: list,
-    ):
-        self.job_name = job_name
-        self.job_conf = job_conf
-        self.job_dsl = job_dsl
-        self.pre_works = pre_works
-
-    @classmethod
-    def load(cls, job_name, job_configs, base: Path, provider):
-        job_conf = JobConf.load(base.joinpath(job_configs.get("conf")).resolve())
-        job_dsl = job_configs.get("dsl", None)
-        if job_dsl is not None:
-            job_dsl = JobDSL.load(base.joinpath(job_dsl).resolve(), provider)
-
-        pre_works = []
-        pre_works_value = {}
-        deps_dict = {}
-
-        if job_configs.get("model_deps", None):
-            pre_works.append(job_configs["model_deps"])
-            deps_dict["model_deps"] = {'name': job_configs["model_deps"]}
-        elif job_configs.get("deps", None):
-            pre_works.append(job_configs["deps"])
-            deps_dict["model_deps"] = {'name': job_configs["deps"]}
-        if job_configs.get("data_deps", None):
-            deps_dict["data_deps"] = {'data': job_configs["data_deps"]}
-            pre_works.append(list(job_configs["data_deps"].keys())[0])
-            deps_dict["data_deps"].update({'name': list(job_configs["data_deps"].keys())})
-        if job_configs.get("cache_deps", None):
-            pre_works.append(job_configs["cache_deps"])
-            deps_dict["cache_deps"] = {'name': job_configs["cache_deps"]}
-        if job_configs.get("model_loader_deps", None):
-            pre_works.append(job_configs["model_loader_deps"])
-            deps_dict["model_loader_deps"] = {'name': job_configs["model_loader_deps"]}
-
-        pre_works_value.update(deps_dict)
-        _config.deps_alter[job_name] = pre_works_value
-
-        return Job(
-            job_name=job_name, job_conf=job_conf, job_dsl=job_dsl, pre_works=pre_works
-        )
-
-    """@property
-    def submit_params(self):
-        return dict(
-            conf=self.job_conf.as_dict(),
-            dsl=self.job_dsl.as_dict() if self.job_dsl else None,
-        )"""
-
-    """def set_pre_work(self, name, **kwargs):
-        self.job_conf.update_job_common_parameters(**kwargs)
-        self.job_conf.update_job_type("predict")"""
-
-    def set_input_data(self, hierarchys, table_info):
-        for table_name, hierarchy in zip(table_info, hierarchys):
-            key = list(table_name.keys())[0]
-            value = table_name[key]
-            self.job_conf.update_component_parameters(
-                key=key,
-                value=value,
-                parameters=self.job_conf.get_component_parameters(hierarchy),
-            )
-
-    def is_submit_ready(self):
-        return len(self.pre_works) == 0
+        """if config.meta is not None:
+            self.meta.update(config.meta)"""
 
 
 class PipelineJob(object):
@@ -321,11 +113,11 @@ def __init__(
         self.pipeline_jobs = pipeline_jobs
         self.path = path
         self.suite_name = Path(self.path).stem
-
-        self._dependency: typing.MutableMapping[str, typing.List[Job]] = {}
         self._final_status: typing.MutableMapping[str, FinalStatus] = {}
+        """
+        self._dependency: typing.MutableMapping[str, typing.List[Job]] = {}
         self._ready_jobs = deque()
-        """for job in self.jobs:
+        for job in self.jobs:
             for name in job.pre_works:
                 self._dependency.setdefault(name, []).append(job)
 
@@ -341,17 +133,14 @@ def load(path: Path, provider):
         with path.open("r") as f:
             # testsuite_config = json.load(f, object_hook=DATA_JSON_HOOK.hook)
             testsuite_config = yaml.safe_load(f)
+            # testsuite_config = DATA_JSON_HOOK.hook(testsuite_config)
 
         dataset = []
         for d in testsuite_config.get("data"):
-            if "use_local_data" not in d:
-                d.update({"use_local_data": _config.use_local_data})
+            d = DATA_JSON_HOOK.hook(d)
+            """if "use_local_data" not in d:
+                d.update({"use_local_data": _config.use_local_data})"""
             dataset.append(Data.load(d, path))
-        """jobs = []
-        for job_name, job_configs in testsuite_config.get("tasks", {}).items():
-            jobs.append(
-                Job.load(job_name=job_name, job_configs=job_configs, base=path.parent, provider=provider)
-            )"""
 
         pipeline_jobs = []
         if testsuite_config.get("tasks", None) is not None and provider is not None:
@@ -363,9 +152,9 @@ def load(path: Path, provider):
         testsuite = Testsuite(dataset, pipeline_jobs, path)
         return testsuite
 
-    def jobs_iter(self) -> typing.Generator[Job, None, None]:
+    """def jobs_iter(self) -> typing.Generator[Job, None, None]:
         while self._ready_jobs:
-            yield self._ready_jobs.pop()
+            yield self._ready_jobs.pop()"""
 
     @staticmethod
     def style_table(txt):
@@ -445,9 +234,9 @@ def update_status(
                 setattr(self._final_status[job_name], k, v)
 
     def get_final_status(self):
-        for name, jobs in self._dependency.items():
+        """for name, jobs in self._dependency.items():
             for job in jobs:
-                self._final_status[job.job_name].rest_dependency.append(name)
+                self._final_status[job.job_name].rest_dependency.append(name)"""
         return self._final_status
 
 
@@ -497,9 +286,11 @@ def load(path: Path):
         with path.open("r") as f:
             # testsuite_config = json.load(f, object_hook=DATA_JSON_HOOK.hook)
             testsuite_config = yaml.safe_load(f)
+            # testsuite_config = DATA_JSON_HOOK.hook(testsuite_config)
 
         dataset = []
         for d in testsuite_config.get("data"):
+            d = DATA_JSON_HOOK.hook(d)
             dataset.append(Data.load(d, path))
 
         pairs = []
diff --git a/python/fate_test/fate_test/scripts/_utils.py b/python/fate_test/fate_test/scripts/_utils.py
index c087300515..cd3a04e5a0 100644
--- a/python/fate_test/fate_test/scripts/_utils.py
+++ b/python/fate_test/fate_test/scripts/_utils.py
@@ -12,8 +12,6 @@
 from fate_test._io import echo, LOGGER, set_logger
 from fate_test._parser import Testsuite, BenchmarkSuite, DATA_JSON_HOOK, CONF_JSON_HOOK, DSL_JSON_HOOK
 
-from fate_test import _config
-
 
 def _big_data_task(includes, guest_data_size, host_data_size, guest_feature_num, host_feature_num, host_data_type,
                    config_inst, encryption_type, match_rate, sparsity, force, split_host, output_path, parallelize):
@@ -45,7 +43,7 @@ def _find_testsuite_files(path):
                                                 match_rate, sparsity, force, split_host, output_path, parallelize)
 
 
-def _load_testsuites(includes, excludes, glob, provider=None, suffix="testsuite.json", suite_type="testsuite"):
+def _load_testsuites(includes, excludes, glob, provider=None, suffix="testsuite.yaml", suite_type="testsuite"):
     def _find_testsuite_files(path):
         if isinstance(path, str):
             path = Path(path)
@@ -102,9 +100,7 @@ def _upload_data(clients: Clients, suite, config: Config, output_path=None):
                            width=24) as bar:
         for i, data in enumerate(suite.dataset):
             data.update(config)
-            table_name = data.config['table_name'] if data.config.get(
-                'table_name', None) is not None else data.config.get('name')
-            data_progress = DataProgress(f"{data.role_str}<-{data.config['namespace']}.{table_name}")
+            data_progress = DataProgress(f"{data.role_str}<-{data.namespace}.{data.table_name}")
 
             def update_bar(n_step):
                 bar.item_show_func = lambda x: data_progress.show()
@@ -121,16 +117,21 @@ def _call_back(resp):
 
             try:
                 echo.stdout_newline()
-                status, data_path = clients[data.role_str].upload_data(data, _call_back, output_path)
+                # role, idx = data.role_str.lower().split("_")
+                # party_id = config.role[role][int(idx)]
+                status = clients[data.role_str].transform_local_file_to_dataframe(data,
+                                                                                  _call_back,
+                                                                                  output_path)
                 time.sleep(1)
                 data_progress.update()
                 if status != 'success':
                     raise RuntimeError(f"uploading {i + 1}th data for {suite.path} {status}")
                 bar.update(1)
-                if _config.data_switch:
+
+                """if _config.data_switch:
                     from fate_test.scripts import generate_mock_data
 
-                    generate_mock_data.remove_file(data_path)
+                    generate_mock_data.remove_file(data_path)"""
             except Exception:
                 exception_id = str(uuid.uuid1())
                 echo.file(f"exception({exception_id})")
diff --git a/python/fate_test/fate_test/scripts/benchmark_cli.py b/python/fate_test/fate_test/scripts/benchmark_cli.py
index 9030ed9818..d9f82d4139 100644
--- a/python/fate_test/fate_test/scripts/benchmark_cli.py
+++ b/python/fate_test/fate_test/scripts/benchmark_cli.py
@@ -66,34 +66,34 @@ def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, stora
         echo.echo(f"\tdataset({len(suite.dataset)}) benchmark groups({len(suite.pairs)}) {suite.path}")
     if not yes and not click.confirm("running?"):
         return
-    with Clients(config_inst) as client:
-        fate_version = client["guest_0"].get_version()
-        for i, suite in enumerate(suites):
-            # noinspection PyBroadException
-            try:
-                start = time.time()
-                echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red')
-                if not skip_data:
-                    try:
-                        _upload_data(client, suite, config_inst)
-                    except Exception as e:
-                        raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
+    client = Clients(config_inst)
+    fate_version = client["guest_0"].get_version()
+    for i, suite in enumerate(suites):
+        # noinspection PyBroadException
+        try:
+            start = time.time()
+            echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red')
+            if not skip_data:
                 try:
-                    _run_benchmark_pairs(config_inst, suite, tol, namespace, data_namespace_mangling, storage_tag,
-                                         history_tag, fate_version, match_details)
+                    _upload_data(client, suite, config_inst)
                 except Exception as e:
-                    raise RuntimeError(f"exception occur while running benchmark jobs for {suite.path}") from e
+                    raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
+            try:
+                _run_benchmark_pairs(config_inst, suite, tol, namespace, data_namespace_mangling, storage_tag,
+                                     history_tag, fate_version, match_details)
+            except Exception as e:
+                raise RuntimeError(f"exception occur while running benchmark jobs for {suite.path}") from e
 
-                if not skip_data and clean_data:
-                    _delete_data(client, suite)
-                echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red')
+            if not skip_data and clean_data:
+                _delete_data(client, suite)
+            echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red')
 
-            except Exception:
-                exception_id = uuid.uuid1()
-                echo.echo(f"exception in {suite.path}, exception_id={exception_id}", err=True, fg='red')
-                LOGGER.exception(f"exception id: {exception_id}")
-            finally:
-                echo.stdout_newline()
+        except Exception:
+            exception_id = uuid.uuid1()
+            echo.echo(f"exception in {suite.path}, exception_id={exception_id}", err=True, fg='red')
+            LOGGER.exception(f"exception id: {exception_id}")
+        finally:
+            echo.stdout_newline()
     echo.farewell()
     echo.echo(f"testsuite namespace: {namespace}", fg='red')
 
diff --git a/python/fate_test/fate_test/scripts/performance_cli.py b/python/fate_test/fate_test/scripts/performance_cli.py
index 338f66c868..e07791cc9a 100644
--- a/python/fate_test/fate_test/scripts/performance_cli.py
+++ b/python/fate_test/fate_test/scripts/performance_cli.py
@@ -96,44 +96,44 @@ def get_perf_template(conf: Config, job_type):
         return
 
     echo.stdout_newline()
-    with Clients(config_inst) as client:
+    client = Clients(config_inst)
 
-        for i, suite in enumerate(suites):
-            # noinspection PyBroadException
-            try:
-                start = time.time()
-                echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red')
-
-                if not skip_data:
-                    try:
-                        _upload_data(client, suite, config_inst)
-                    except Exception as e:
-                        raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
-
-                echo.stdout_newline()
-                try:
-                    time_consuming = _submit_job(client, suite, namespace, config_inst, timeout, update_job_parameters,
-                                                 storage_tag, history_tag, update_component_parameters, max_iter,
-                                                 max_depth, num_trees, task_cores)
-                except Exception as e:
-                    raise RuntimeError(f"exception occur while submit job for {suite.path}") from e
+    for i, suite in enumerate(suites):
+        # noinspection PyBroadException
+        try:
+            start = time.time()
+            echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red')
 
+            if not skip_data:
                 try:
-                    _run_pipeline_jobs(config_inst, suite, namespace, data_namespace_mangling)
+                    _upload_data(client, suite, config_inst)
                 except Exception as e:
-                    raise RuntimeError(f"exception occur while running pipeline jobs for {suite.path}") from e
+                    raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
 
-                echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red')
-                if not skip_data and clean_data:
-                    _delete_data(client, suite)
-                echo.echo(suite.pretty_final_summary(time_consuming), fg='red')
+            echo.stdout_newline()
+            try:
+                time_consuming = _submit_job(client, suite, namespace, config_inst, timeout, update_job_parameters,
+                                             storage_tag, history_tag, update_component_parameters, max_iter,
+                                             max_depth, num_trees, task_cores)
+            except Exception as e:
+                raise RuntimeError(f"exception occur while submit job for {suite.path}") from e
 
-            except Exception:
-                exception_id = uuid.uuid1()
-                echo.echo(f"exception in {suite.path}, exception_id={exception_id}")
-                LOGGER.exception(f"exception id: {exception_id}")
-            finally:
-                echo.stdout_newline()
+            try:
+                _run_pipeline_jobs(config_inst, suite, namespace, data_namespace_mangling)
+            except Exception as e:
+                raise RuntimeError(f"exception occur while running pipeline jobs for {suite.path}") from e
+
+            echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red')
+            if not skip_data and clean_data:
+                _delete_data(client, suite)
+            echo.echo(suite.pretty_final_summary(time_consuming), fg='red')
+
+        except Exception:
+            exception_id = uuid.uuid1()
+            echo.echo(f"exception in {suite.path}, exception_id={exception_id}")
+            LOGGER.exception(f"exception id: {exception_id}")
+        finally:
+            echo.stdout_newline()
 
     echo.farewell()
     echo.echo(f"testsuite namespace: {namespace}", fg='red')
diff --git a/python/fate_test/fate_test/scripts/testsuite_cli.py b/python/fate_test/fate_test/scripts/testsuite_cli.py
index 864ac17e53..f308ad1674 100644
--- a/python/fate_test/fate_test/scripts/testsuite_cli.py
+++ b/python/fate_test/fate_test/scripts/testsuite_cli.py
@@ -87,39 +87,41 @@ def run_suite(ctx, include, exclude, glob,
         return
 
     echo.stdout_newline()
-    with Clients(config_inst) as client:
-        for i, suite in enumerate(suites):
-            # noinspection PyBroadException
-            try:
-                start = time.time()
-                echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red')
-                if not skip_data and config_inst.work_mode:
-                    try:
-                        _upload_data(client, suite, config_inst)
-                    except Exception as e:
-                        raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
-                if data_only:
-                    continue
+    # with Clients(config_inst) as client:
+    client = Clients(config_inst)
+
+    for i, suite in enumerate(suites):
+        # noinspection PyBroadException
+        try:
+            start = time.time()
+            echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red')
+            if not skip_data:
+                try:
+                    _upload_data(client, suite, config_inst)
+                except Exception as e:
+                    raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
+            if data_only:
+                continue
+
+            if not skip_jobs:
+                try:
+                    time_consuming = _run_pipeline_jobs(config_inst, suite, namespace, data_namespace_mangling)
+                except Exception as e:
+                    raise RuntimeError(f"exception occur while running pipeline jobs for {suite.path}") from e
+
+            if not skip_data and clean_data:
+                _delete_data(client, suite)
+            echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red')
+            if not skip_jobs:
+                suite_file = str(suite.path).split("/")[-1]
+                echo.echo(suite.pretty_final_summary(time_consuming, suite_file))
 
-                if not skip_jobs:
-                    try:
-                        time_consuming = _run_pipeline_jobs(config_inst, suite, namespace, data_namespace_mangling)
-                    except Exception as e:
-                        raise RuntimeError(f"exception occur while running pipeline jobs for {suite.path}") from e
-
-                if not skip_data and clean_data and config_inst.work_mode:
-                    _delete_data(client, suite)
-                echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red')
-                if not skip_jobs:
-                    suite_file = str(suite.path).split("/")[-1]
-                    echo.echo(suite.pretty_final_summary(time_consuming, suite_file))
-
-            except Exception:
-                exception_id = uuid.uuid1()
-                echo.echo(f"exception in {suite.path}, exception_id={exception_id}")
-                LOGGER.exception(f"exception id: {exception_id}")
-            finally:
-                echo.stdout_newline()
+        except Exception:
+            exception_id = uuid.uuid1()
+            echo.echo(f"exception in {suite.path}, exception_id={exception_id}")
+            LOGGER.exception(f"exception id: {exception_id}")
+        finally:
+            echo.stdout_newline()
     non_success_summary()
     echo.farewell()
     echo.echo(f"testsuite namespace: {namespace}", fg='red')

From 0f48049f5b82c581df178db22e68d258d0c01d3e Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Fri, 28 Jul 2023 15:12:06 +0800
Subject: [PATCH 04/30] add benchmark example(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/benchmark_quality/__init__.py        |   0
 examples/benchmark_quality/breast_config.yaml |  16 ++
 .../default_credit_config.yaml                |  16 ++
 .../benchmark_quality/epsilon_5k_config.yaml  |  16 ++
 .../benchmark_quality/give_credit_config.yaml |  16 ++
 examples/benchmark_quality/lr_benchmark.yaml  | 162 ++++++++++++++++++
 .../benchmark_quality/pipeline-lr-binary.py   | 134 +++++++++++++++
 .../benchmark_quality/pipeline-lr-multi.py    | 116 +++++++++++++
 .../benchmark_quality/sklearn-lr-binary.py    |  90 ++++++++++
 .../benchmark_quality/sklearn-lr-multi.py     |  79 +++++++++
 .../benchmark_quality/vehicle_config.yaml     |  12 ++
 .../vehicle_lr_sklearn_config.yaml            |  12 ++
 python/fate_test/fate_test/_parser.py         |  27 +--
 python/fate_test/fate_test/scripts/_utils.py  |  12 +-
 14 files changed, 682 insertions(+), 26 deletions(-)
 create mode 100644 examples/benchmark_quality/__init__.py
 create mode 100644 examples/benchmark_quality/breast_config.yaml
 create mode 100644 examples/benchmark_quality/default_credit_config.yaml
 create mode 100644 examples/benchmark_quality/epsilon_5k_config.yaml
 create mode 100644 examples/benchmark_quality/give_credit_config.yaml
 create mode 100644 examples/benchmark_quality/lr_benchmark.yaml
 create mode 100644 examples/benchmark_quality/pipeline-lr-binary.py
 create mode 100644 examples/benchmark_quality/pipeline-lr-multi.py
 create mode 100644 examples/benchmark_quality/sklearn-lr-binary.py
 create mode 100644 examples/benchmark_quality/sklearn-lr-multi.py
 create mode 100644 examples/benchmark_quality/vehicle_config.yaml
 create mode 100644 examples/benchmark_quality/vehicle_lr_sklearn_config.yaml

diff --git a/examples/benchmark_quality/__init__.py b/examples/benchmark_quality/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/examples/benchmark_quality/breast_config.yaml b/examples/benchmark_quality/breast_config.yaml
new file mode 100644
index 0000000000..00090b4c16
--- /dev/null
+++ b/examples/benchmark_quality/breast_config.yaml
@@ -0,0 +1,16 @@
+data_guest: "examples/data/breast_hetero_guest.csv"
+data_host: "examples/data/breast_hetero_host.csv"
+idx: "id"
+label_name: "y"
+penalty: "L2"
+epochs: 30
+learning_rate_scheduler:
+  method: "constant"
+  scheduler_params:
+    lr: 0.15
+    factor: 1.0
+    total_iters: 100
+optimizer:
+  method: "rmsprop"
+batch_size: 5000
+early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/default_credit_config.yaml b/examples/benchmark_quality/default_credit_config.yaml
new file mode 100644
index 0000000000..f86a48e834
--- /dev/null
+++ b/examples/benchmark_quality/default_credit_config.yaml
@@ -0,0 +1,16 @@
+data_guest: "examples/data/default_credit_hetero_guest.csv"
+data_host: "examples/data/default_credit_hetero_host.csv"
+idx: "id"
+label_name: "y"
+penalty: "L2"
+epochs: 30
+learning_rate_scheduler:
+  method: "constant"
+  scheduler_params:
+    lr: 0.15
+    factor: 1.0
+    total_iters: 100
+optimizer:
+  method: "zeros"
+batch_size: 500
+early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/epsilon_5k_config.yaml b/examples/benchmark_quality/epsilon_5k_config.yaml
new file mode 100644
index 0000000000..be63b9b414
--- /dev/null
+++ b/examples/benchmark_quality/epsilon_5k_config.yaml
@@ -0,0 +1,16 @@
+data_guest: "examples/data/epsilon_5k_hetero_guest.csv"
+data_host: "examples/data/epsilon_5k_hetero_host.csv"
+idx: "id"
+label_name: "y"
+penalty: "L2"
+epochs: 30
+learning_rate_scheduler:
+  method: "constant"
+  scheduler_params:
+    lr: 0.15
+    factor: 1.0
+    total_iters: 800
+optimizer:
+  method: "rmsprop"
+batch_size: 5000
+early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/give_credit_config.yaml b/examples/benchmark_quality/give_credit_config.yaml
new file mode 100644
index 0000000000..bc2b6a683f
--- /dev/null
+++ b/examples/benchmark_quality/give_credit_config.yaml
@@ -0,0 +1,16 @@
+data_guest: "examples/data/give_credit_hetero_guest.csv"
+data_host: "examples/data/give_credit_hetero_host.csv"
+idx: "id"
+label_name: "y"
+penalty: "L2"
+epochs: 6
+learning_rate_scheduler:
+  method: "constant"
+  scheduler_params:
+    lr: 0.15
+    factor: 1.0
+    total_iters: 100
+optimizer:
+  method: "adam"
+batch_size: 550
+early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr_benchmark.yaml b/examples/benchmark_quality/lr_benchmark.yaml
new file mode 100644
index 0000000000..dad81264ed
--- /dev/null
+++ b/examples/benchmark_quality/lr_benchmark.yaml
@@ -0,0 +1,162 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_0
+  - file: "../../data/default_credit_hetero_guest.csv"
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      label_type: int64
+      label_name: y
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: default_credit_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: "../../data/default_credit_hetero_host.csv"
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: default_credit_hetero_host
+    namespace: experiment
+    role: host_0
+  - file: "../../data/give_credit_hetero_guest.csv"
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      label_type: int64
+      label_name: y
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: give_credit_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: "../../data/give_credit_hetero_host.csv"
+    head: 1
+    partition: 16
+    table_name: give_credit_hetero_host
+    namespace: experiment
+    role: host_0
+  - file: "../../data/epsilon_5k_hetero_guest.csv"
+    head: 1
+    partition: 16
+    table_name: epsilon_5k_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: "../../data/epsilon_5k_hetero_host.csv"
+    head: 1
+    partition: 16
+    table_name: epsilon_5k_hetero_host
+    namespace: experiment
+    role: host_0
+  - file: "../../data/vehicle_scale_hetero_guest.csv"
+    head: 1
+    partition: 16
+    table_name: vehicle_scale_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: "../../data/vehicle_scale_hetero_host.csv"
+    head: 1
+    partition: 16
+    table_name: vehicle_scale_hetero_host
+    namespace: experiment
+    role: host_0
+hetero_lr-binary-0:
+  local:
+    script: "./sklearn-lr-binary.py"
+    conf: "./breast_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-binary.py"
+    conf: "./breast_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
+hetero_lr-binary-1:
+  local:
+    script: "./sklearn-lr-binary.py"
+    conf: "./default_credit_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-binary.py"
+    conf: "./default_credit_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
+hetero_lr-binary-2:
+  local:
+    script: "./sklearn-lr-binary.py"
+    conf: "./epsilon_5k_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-binary.py"
+    conf: "./epsilon_5k_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
+hetero_lr-binary-3:
+  local:
+    script: "./sklearn-lr-binary.py"
+    conf: "./give_credit_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-binary.py"
+    conf: "./give_credit_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
+multi:
+  local:
+    script: "./sklearn-lr-multi.py"
+    conf: "./vehicle_lr_sklearn_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-multi.py"
+    conf: "./vehicle_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
diff --git a/examples/benchmark_quality/pipeline-lr-binary.py b/examples/benchmark_quality/pipeline-lr-binary.py
new file mode 100644
index 0000000000..7fa1f786ba
--- /dev/null
+++ b/examples/benchmark_quality/pipeline-lr-binary.py
@@ -0,0 +1,134 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+from fate_test.utils import extract_data, parse_summary_result
+from federatedml.evaluation.metrics import classification_metric
+
+
+def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
+    # obtain config
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    if isinstance(param, str):
+        param = test_utils.JobConfig.load_from_file(param)
+
+    assert isinstance(param, dict)
+
+    data_set = param.get("data_guest").split('/')[-1]
+    if data_set == "default_credit_hetero_guest.csv":
+        guest_data_table = 'default_credit_hetero_guest'
+        host_data_table = 'default_credit_hetero_host'
+    elif data_set == 'breast_hetero_guest.csv':
+        guest_data_table = 'breast_hetero_guest'
+        host_data_table = 'breast_hetero_host'
+    elif data_set == 'give_credit_hetero_guest.csv':
+        guest_data_table = 'give_credit_hetero_guest'
+        host_data_table = 'give_credit_hetero_host'
+    elif data_set == 'epsilon_5k_hetero_guest.csv':
+        guest_data_table = 'epsilon_5k_hetero_guest'
+        host_data_table = 'epsilon_5k_hetero_host'
+    else:
+        raise ValueError(f"Cannot recognized data_set: {data_set}")
+
+    guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"}
+    host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+
+    intersect_0 = Intersection("intersect_0", method="raw")
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
+                                                                        namespace=guest_train_data["namespace"]))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
+                                                                           namespace=host_train_data["namespace"]))
+
+    lr_param = {
+    }
+
+    config_param = {
+        "penalty": param["penalty"],
+        "epochs": param["epochs"],
+        "learning_rate_scheduler": param["learning_rate_scheduler"],
+        "optimizer": param["optimizer"],
+        "batch_size": param["batch_size"],
+        "early_stop": "diff",
+        "tol": 1e-5,
+        "init_param": param.get("init_method", {"method": "zeros"})
+    }
+    lr_param.update(config_param)
+    lr_0 = CoordinatedLR("lr_0",
+                         train_data=intersect_0.outputs["output_data"],
+                         **config_param)
+    lr_1 = CoordinatedLR("lr_1",
+                         test_data=intersect_0.outputs["output_data"],
+                         input_model=lr_0.outputs["train_output_model"])
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="y",
+                              runtime_roles=["guest"],
+                              default_eval_setting="binary",
+                              input_data=lr_0.outputs["train_output_data"])
+
+    pipeline.add_task(intersect_0)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(lr_1)
+    pipeline.add_task(evaluation_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    lr_0_data = pipeline.get_component("lr_0").get_output_data()
+    lr_1_data = pipeline.get_component("lr_1").get_output_data()
+    lr_0_score = extract_data(lr_0_data, "predict_result")
+    lr_0_label = extract_data(lr_0_data, "label")
+    lr_1_score = extract_data(lr_1_data, "predict_result")
+    lr_1_label = extract_data(lr_1_data, "label")
+    lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
+    lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
+    result_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
+    metric_lr = {
+        "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label),
+        "ks_2samp": classification_metric.KSTest.compute(lr_0_score, lr_1_score),
+        "mAP_D_value": classification_metric.AveragePrecisionScore().compute(lr_0_score, lr_1_score, lr_0_label,
+                                                                             lr_1_label)}
+    result_summary["distribution_metrics"] = {"hetero_lr": metric_lr}
+
+    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
+                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
+                    }
+
+    return data_summary, result_summary
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("BENCHMARK-QUALITY PIPELINE JOB")
+    parser.add_argument("-c", "--config", type=str,
+                        help="config file", default="../../config.yaml")
+    parser.add_argument("-p", "--param", type=str,
+                        help="config file for params", default="./breast_config.yaml")
+    args = parser.parse_args()
+    main(args.config, args.param)
diff --git a/examples/benchmark_quality/pipeline-lr-multi.py b/examples/benchmark_quality/pipeline-lr-multi.py
new file mode 100644
index 0000000000..f774515dc1
--- /dev/null
+++ b/examples/benchmark_quality/pipeline-lr-multi.py
@@ -0,0 +1,116 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+from fate_test.utils import extract_data, parse_summary_result
+from federatedml.evaluation.metrics import classification_metric
+
+
+def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
+    # obtain config
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    if isinstance(param, str):
+        param = test_utils.JobConfig.load_from_file(param)
+
+    assert isinstance(param, dict)
+    data_set = param.get("data_guest").split('/')[-1]
+    if data_set == "vehicle_scale_hetero_guest.csv":
+        guest_data_table = 'vehicle_scale_hetero_guest'
+        host_data_table = 'vehicle_scale_hetero_host'
+    else:
+        raise ValueError(f"Cannot recognized data_set: {data_set}")
+
+    guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"}
+    host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+
+    intersect_0 = Intersection("intersect_0", method="raw")
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
+                                                                        namespace=guest_train_data["namespace"]))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
+                                                                           namespace=host_train_data["namespace"]))
+
+    lr_param = {
+    }
+
+    config_param = {
+        "penalty": param["penalty"],
+        "epochs": param["epochs"],
+        "learning_rate_scheduler": param["learning_rate_scheduler"],
+        "optimizer": param["optimizer"],
+        "batch_size": param["batch_size"],
+        "early_stop": "diff",
+        "tol": 1e-5,
+        "init_param": param.get("init_method", {"method": "zeros"})
+    }
+    lr_param.update(config_param)
+    lr_0 = CoordinatedLR("lr_0",
+                         train_data=intersect_0.outputs["output_data"],
+                         **config_param)
+    lr_1 = CoordinatedLR("lr_1",
+                         test_data=intersect_0.outputs["output_data"],
+                         input_model=lr_0.outputs["train_output_model"])
+
+    evaluation_0 = Evaluation('evaluation_0', default_eval_setting="multi")
+    pipeline.add_task(intersect_0)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(lr_1)
+    pipeline.add_task(evaluation_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    lr_0_data = pipeline.get_component("lr_0").get_output_data()
+    lr_1_data = pipeline.get_component("lr_1").get_output_data()
+
+    result_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
+    lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
+    lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
+    metric_lr = {
+        "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label)}
+    result_summary["distribution_metrics"] = {"hetero_lr": metric_lr}
+
+    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
+                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
+                    }
+    return data_summary, result_summary
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("BENCHMARK-QUALITY PIPELINE JOB")
+    parser.add_argument("-c", "--config", type=str,
+                        help="config file", default="../../config.yaml")
+    parser.add_argument("-p", "--param", type=str,
+                        help="config file for params", default="./vehicle_config.yaml")
+
+    args = parser.parse_args()
+    if args.config is not None:
+        main(args.config, args.param)
+    else:
+        main()
diff --git a/examples/benchmark_quality/sklearn-lr-binary.py b/examples/benchmark_quality/sklearn-lr-binary.py
new file mode 100644
index 0000000000..5b17692621
--- /dev/null
+++ b/examples/benchmark_quality/sklearn-lr-binary.py
@@ -0,0 +1,90 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import argparse
+import os
+
+import pandas
+from pipeline.utils.tools import JobConfig
+from sklearn.linear_model import SGDClassifier
+from sklearn.metrics import roc_auc_score, precision_score, accuracy_score, recall_score, roc_curve
+
+
+def main(config="../../config.yaml", param="./vechile_config.yaml"):
+    # obtain config
+    if isinstance(param, str):
+        param = JobConfig.load_from_file(param)
+    assert isinstance(param, dict)
+    data_guest = param["data_guest"]
+    data_host = param["data_host"]
+    idx = param["idx"]
+    label_name = param["label_name"]
+
+    if isinstance(config, str):
+        config = JobConfig.load_from_file(config)
+        print(f"config: {config}")
+        data_base_dir = config["data_base_dir"]
+    else:
+        data_base_dir = config.data_base_dir
+
+    config_param = {
+        "penalty": param["penalty"],
+        "max_iter": 100,
+        "alpha": param["alpha"],
+        "learning_rate": "optimal",
+        "eta0": param["learning_rate"],
+        "random_state": 105
+    }
+
+    # prepare data
+    df_guest = pandas.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx)
+    df_host = pandas.read_csv(os.path.join(data_base_dir, data_host), index_col=idx)
+    df = df_guest.join(df_host, rsuffix="host")
+    y = df[label_name]
+    X = df.drop(label_name, axis=1)
+
+    # x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
+    x_train, x_test, y_train, y_test = X, X, y, y
+
+    # lm = LogisticRegression(max_iter=20)
+    lm = SGDClassifier(loss="log", **config_param)
+    lm_fit = lm.fit(x_train, y_train)
+    y_pred = lm_fit.predict(x_test)
+    y_prob = lm_fit.predict_proba(x_test)[:, 1]
+    try:
+        auc_score = roc_auc_score(y_test, y_prob)
+    except BaseException:
+        print(f"no auc score available")
+        return
+    recall = recall_score(y_test, y_pred, average="macro")
+    pr = precision_score(y_test, y_pred, average="macro")
+    acc = accuracy_score(y_test, y_pred)
+    # y_predict_proba = est.predict_proba(X_test)[:, 1]
+    fpr, tpr, thresholds = roc_curve(y_test, y_prob)
+
+    ks = max(tpr - fpr)
+    result = {"auc": auc_score, "recall": recall, "precision": pr, "accuracy": acc}
+    print(result)
+    print(f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}")
+    return {}, result
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("BENCHMARK-QUALITY SKLEARN JOB")
+    parser.add_argument("-p", "--param", type=str, default="./breast_config.yaml",
+                        help="config file for params")
+    args = parser.parse_args()
+    main(param=args.param)
diff --git a/examples/benchmark_quality/sklearn-lr-multi.py b/examples/benchmark_quality/sklearn-lr-multi.py
new file mode 100644
index 0000000000..0b33e57c8f
--- /dev/null
+++ b/examples/benchmark_quality/sklearn-lr-multi.py
@@ -0,0 +1,79 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import argparse
+import os
+
+import pandas
+from pipeline.utils.tools import JobConfig
+from sklearn.linear_model import SGDClassifier
+from sklearn.metrics import precision_score, accuracy_score, recall_score
+
+
+def main(config="../../config.yaml", param="./vechile_config.yaml"):
+    # obtain config
+    if isinstance(param, str):
+        param = JobConfig.load_from_file(param)
+    assert isinstance(param, dict)
+    data_guest = param["data_guest"]
+    data_host = param["data_host"]
+
+    idx = param["idx"]
+    label_name = param["label_name"]
+
+    if isinstance(config, str):
+        config = JobConfig.load_from_file(config)
+        data_base_dir = config["data_base_dir"]
+    else:
+        data_base_dir = config.data_base_dir
+
+    config_param = {
+        "penalty": param["penalty"],
+        "max_iter": param["max_iter"],
+        "alpha": param["alpha"],
+        "learning_rate": "optimal",
+        "eta0": param["learning_rate"],
+        "random_state": 105
+    }
+
+    # prepare data
+    df_guest = pandas.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx)
+    df_host = pandas.read_csv(os.path.join(data_base_dir, data_host), index_col=idx)
+
+    df = df_guest.join(df_host, rsuffix="host")
+    y = df[label_name]
+    X = df.drop(label_name, axis=1)
+    # lm = LogisticRegression(max_iter=20)
+    lm = SGDClassifier(loss="log", **config_param, shuffle=False)
+    lm_fit = lm.fit(X, y)
+    y_pred = lm_fit.predict(X)
+
+    recall = recall_score(y, y_pred, average="macro")
+    pr = precision_score(y, y_pred, average="macro")
+    acc = accuracy_score(y, y_pred)
+
+    result = {"accuracy": acc}
+    print(result)
+    return {}, result
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("BENCHMARK-QUALITY SKLEARN JOB")
+    parser.add_argument("-param", type=str,
+                        help="config file for params")
+    args = parser.parse_args()
+    if args.param is not None:
+        main(args.param)
diff --git a/examples/benchmark_quality/vehicle_config.yaml b/examples/benchmark_quality/vehicle_config.yaml
new file mode 100644
index 0000000000..29dceb4345
--- /dev/null
+++ b/examples/benchmark_quality/vehicle_config.yaml
@@ -0,0 +1,12 @@
+data_guest: "examples/data/vehicle_scale_hetero_guest.csv"
+data_host: "examples/data/vehicle_scale_hetero_host.csv"
+idx: "id"
+label_name: "y"
+penalty: "L2"
+max_iter: 20
+alpha: 0.00001
+learning_rate: 0.3
+optimizer: "adam"
+batch_size: 16
+early_stop: "diff"
+init_method: "random_uniform"
\ No newline at end of file
diff --git a/examples/benchmark_quality/vehicle_lr_sklearn_config.yaml b/examples/benchmark_quality/vehicle_lr_sklearn_config.yaml
new file mode 100644
index 0000000000..f70fdb409a
--- /dev/null
+++ b/examples/benchmark_quality/vehicle_lr_sklearn_config.yaml
@@ -0,0 +1,12 @@
+data_guest: "examples/data/vehicle_scale_hetero_guest.csv"
+data_host: "examples/data/vehicle_scale_hetero_host.csv"
+idx: "id"
+label_name: "y"
+penalty: "L2"
+max_iter: 30
+alpha: 0.001
+learning_rate: 0.15
+optimizer: "rmsprop"
+batch_size: -1
+early_stop: "diff"
+init_method: "zeros"
\ No newline at end of file
diff --git a/python/fate_test/fate_test/_parser.py b/python/fate_test/fate_test/_parser.py
index 9f411c948e..7d2e898382 100644
--- a/python/fate_test/fate_test/_parser.py
+++ b/python/fate_test/fate_test/_parser.py
@@ -55,9 +55,9 @@ def _chain_hooks(hook_funcs, d):
         return d
 
 
-DATA_JSON_HOOK = chain_hook()
-CONF_JSON_HOOK = chain_hook()
-DSL_JSON_HOOK = chain_hook()
+DATA_LOAD_HOOK = chain_hook()
+CONF_LOAD_HOOK = chain_hook()
+DSL_LOAD_HOOK = chain_hook()
 
 
 class Data(object):
@@ -131,13 +131,13 @@ def __init__(
     @staticmethod
     def load(path: Path, provider):
         with path.open("r") as f:
-            # testsuite_config = json.load(f, object_hook=DATA_JSON_HOOK.hook)
+            # testsuite_config = json.load(f, object_hook=DATA_LOAD_HOOK.hook)
             testsuite_config = yaml.safe_load(f)
-            # testsuite_config = DATA_JSON_HOOK.hook(testsuite_config)
+            # testsuite_config = DATA_LOAD_HOOK.hook(testsuite_config)
 
         dataset = []
         for d in testsuite_config.get("data"):
-            d = DATA_JSON_HOOK.hook(d)
+            d = DATA_LOAD_HOOK.hook(d)
             """if "use_local_data" not in d:
                 d.update({"use_local_data": _config.use_local_data})"""
             dataset.append(Data.load(d, path))
@@ -290,7 +290,7 @@ def load(path: Path):
 
         dataset = []
         for d in testsuite_config.get("data"):
-            d = DATA_JSON_HOOK.hook(d)
+            d = DATA_LOAD_HOOK.hook(d)
             dataset.append(Data.load(d, path))
 
         pairs = []
@@ -367,16 +367,3 @@ def _hook(d):
         return d
 
     return _hook
-
-
-"""class JsonParamType(click.ParamType):
-    name = "json_string"
-
-    def convert(self, value, param, ctx):
-        try:
-            return json.loads(value)
-        except ValueError:
-            self.fail(f"{value} is not a valid json string", param, ctx)
-
-
-JSON_STRING = JsonParamType()"""
diff --git a/python/fate_test/fate_test/scripts/_utils.py b/python/fate_test/fate_test/scripts/_utils.py
index cd3a04e5a0..8445e55bc1 100644
--- a/python/fate_test/fate_test/scripts/_utils.py
+++ b/python/fate_test/fate_test/scripts/_utils.py
@@ -10,7 +10,7 @@
 from fate_test._config import Config
 from fate_test._flow_client import DataProgress, UploadDataResponse, QueryJobResponse
 from fate_test._io import echo, LOGGER, set_logger
-from fate_test._parser import Testsuite, BenchmarkSuite, DATA_JSON_HOOK, CONF_JSON_HOOK, DSL_JSON_HOOK
+from fate_test._parser import Testsuite, BenchmarkSuite, DATA_LOAD_HOOK, CONF_LOAD_HOOK, DSL_LOAD_HOOK
 
 
 def _big_data_task(includes, guest_data_size, host_data_size, guest_feature_num, host_feature_num, host_data_type,
@@ -179,11 +179,11 @@ def _set_namespace(data_namespace_mangling, namespace):
 
     if data_namespace_mangling:
         echo.echo(f"add data_namespace_mangling: _{namespace}")
-        DATA_JSON_HOOK.add_extend_namespace_hook(namespace)
-        CONF_JSON_HOOK.add_extend_namespace_hook(namespace)
+        DATA_LOAD_HOOK.add_extend_namespace_hook(namespace)
+        CONF_LOAD_HOOK.add_extend_namespace_hook(namespace)
 
 
 def _add_replace_hook(replace):
-    DATA_JSON_HOOK.add_replace_hook(replace)
-    CONF_JSON_HOOK.add_replace_hook(replace)
-    DSL_JSON_HOOK.add_replace_hook(replace)
+    DATA_LOAD_HOOK.add_replace_hook(replace)
+    CONF_LOAD_HOOK.add_replace_hook(replace)
+    DSL_LOAD_HOOK.add_replace_hook(replace)

From 36ff38861a35a00d2fff7bcd2eb181a386704385 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Fri, 28 Jul 2023 15:14:01 +0800
Subject: [PATCH 05/30] replace json with yaml(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 python/fate_test/fate_test/utils.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/python/fate_test/fate_test/utils.py b/python/fate_test/fate_test/utils.py
index f33d7af74c..443644b223 100644
--- a/python/fate_test/fate_test/utils.py
+++ b/python/fate_test/fate_test/utils.py
@@ -13,7 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-import json
+
 import math
 import os
 
@@ -21,6 +21,7 @@
 from colorama import init, deinit, Fore, Style
 from fate_test._io import echo
 from prettytable import PrettyTable, ORGMODE
+from ruamel import yaml
 
 SCRIPT_METRICS = "script_metrics"
 DISTRIBUTION_METRICS = "distribution_metrics"
@@ -253,10 +254,10 @@ def class_group(class_dict):
         return metric
 
     history_info_dir = "/".join([os.path.join(os.path.abspath(cache_directory), 'benchmark_history',
-                                              "benchmark_quality.json")])
+                                              "benchmark_quality.yaml")])
     assert os.path.exists(history_info_dir), f"Please check the {history_info_dir} Is it deleted"
     with open(history_info_dir, 'r') as f:
-        benchmark_quality = json.load(f, object_hook=dict)
+        benchmark_quality = yaml.safe_load(f)
     regression_metric = {}
     regression_quality = {}
     class_quality = {}
@@ -299,11 +300,11 @@ def metric_compare(abs_tol, rel_tol, match_details, **metric_results):
 
 
 def _save_quality(storage_tag, cache_directory, **results):
-    save_dir = "/".join([os.path.join(os.path.abspath(cache_directory), 'benchmark_history', "benchmark_quality.json")])
+    save_dir = "/".join([os.path.join(os.path.abspath(cache_directory), 'benchmark_history', "benchmark_quality.yaml")])
     os.makedirs(os.path.dirname(save_dir), exist_ok=True)
     if os.path.exists(save_dir):
         with open(save_dir, 'r') as f:
-            benchmark_quality = json.load(f, object_hook=dict)
+            benchmark_quality = yaml.safe_load(f, object_hook=dict)
     else:
         benchmark_quality = {}
     if storage_tag in benchmark_quality:
@@ -311,7 +312,7 @@ def _save_quality(storage_tag, cache_directory, **results):
     benchmark_quality.update({storage_tag: results})
     try:
         with open(save_dir, 'w') as fp:
-            json.dump(benchmark_quality, fp, indent=2)
+            yaml.dump(benchmark_quality, fp)
         print("Storage success, please check: ", save_dir)
     except Exception:
         print("Storage failed, please check: ", save_dir)

From 0f61d905c6bd9cf206e53b7776e5088c084db32b Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Mon, 31 Jul 2023 15:42:47 +0800
Subject: [PATCH 06/30] edit examples

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/benchmark_quality/pipeline-lr-binary.py | 9 +--------
 examples/benchmark_quality/pipeline-lr-multi.py  | 6 +-----
 python/fate_test/fate_test/utils.py              | 2 +-
 3 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/examples/benchmark_quality/pipeline-lr-binary.py b/examples/benchmark_quality/pipeline-lr-binary.py
index 7fa1f786ba..8f8e9c897a 100644
--- a/examples/benchmark_quality/pipeline-lr-binary.py
+++ b/examples/benchmark_quality/pipeline-lr-binary.py
@@ -22,7 +22,6 @@
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
 from fate_test.utils import extract_data, parse_summary_result
-from federatedml.evaluation.metrics import classification_metric
 
 
 def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
@@ -109,13 +108,7 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     lr_1_label = extract_data(lr_1_data, "label")
     lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
     lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
-    result_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
-    metric_lr = {
-        "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label),
-        "ks_2samp": classification_metric.KSTest.compute(lr_0_score, lr_1_score),
-        "mAP_D_value": classification_metric.AveragePrecisionScore().compute(lr_0_score, lr_1_score, lr_0_label,
-                                                                             lr_1_label)}
-    result_summary["distribution_metrics"] = {"hetero_lr": metric_lr}
+    result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_metric())
 
     data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                     "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
diff --git a/examples/benchmark_quality/pipeline-lr-multi.py b/examples/benchmark_quality/pipeline-lr-multi.py
index f774515dc1..384e03bd7c 100644
--- a/examples/benchmark_quality/pipeline-lr-multi.py
+++ b/examples/benchmark_quality/pipeline-lr-multi.py
@@ -22,7 +22,6 @@
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
 from fate_test.utils import extract_data, parse_summary_result
-from federatedml.evaluation.metrics import classification_metric
 
 
 def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
@@ -89,12 +88,9 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     lr_0_data = pipeline.get_component("lr_0").get_output_data()
     lr_1_data = pipeline.get_component("lr_1").get_output_data()
 
-    result_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
+    result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_metric())
     lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
     lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
-    metric_lr = {
-        "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label)}
-    result_summary["distribution_metrics"] = {"hetero_lr": metric_lr}
 
     data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                     "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
diff --git a/python/fate_test/fate_test/utils.py b/python/fate_test/fate_test/utils.py
index 443644b223..d31b07bfb3 100644
--- a/python/fate_test/fate_test/utils.py
+++ b/python/fate_test/fate_test/utils.py
@@ -304,7 +304,7 @@ def _save_quality(storage_tag, cache_directory, **results):
     os.makedirs(os.path.dirname(save_dir), exist_ok=True)
     if os.path.exists(save_dir):
         with open(save_dir, 'r') as f:
-            benchmark_quality = yaml.safe_load(f, object_hook=dict)
+            benchmark_quality = yaml.safe_load(f)
     else:
         benchmark_quality = {}
     if storage_tag in benchmark_quality:

From a685faaa9975ae4929af4b8025286f0df49461e3 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Mon, 31 Jul 2023 17:55:03 +0800
Subject: [PATCH 07/30] edit fate-test bq & examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../{ => lr}/breast_config.yaml               |  5 ++++-
 .../{ => lr}/default_credit_config.yaml       |  5 ++++-
 .../{ => lr}/epsilon_5k_config.yaml           |  5 ++++-
 .../{ => lr}/give_credit_config.yaml          |  5 ++++-
 .../{ => lr}/lr_benchmark.yaml                |  0
 .../{ => lr}/pipeline-lr-binary.py            | 10 +++++-----
 .../{ => lr}/pipeline-lr-multi.py             |  7 +++----
 .../{ => lr}/sklearn-lr-binary.py             |  0
 .../{ => lr}/sklearn-lr-multi.py              |  0
 .../benchmark_quality/lr/vehicle_config.yaml  | 19 +++++++++++++++++++
 .../lr/vehicle_lr_sklearn_config.yaml         | 19 +++++++++++++++++++
 .../benchmark_quality/vehicle_config.yaml     | 12 ------------
 .../vehicle_lr_sklearn_config.yaml            | 12 ------------
 examples/config.yaml                          |  2 --
 examples/pipeline/test_upload_sid.py          | 16 ++++++++--------
 .../fate_test/fate_test/scripts/_options.py   |  6 +++---
 .../fate_test/scripts/benchmark_cli.py        | 11 +++++++----
 .../fate_test/scripts/testsuite_cli.py        |  4 ++--
 18 files changed, 82 insertions(+), 56 deletions(-)
 rename examples/benchmark_quality/{ => lr}/breast_config.yaml (82%)
 rename examples/benchmark_quality/{ => lr}/default_credit_config.yaml (83%)
 rename examples/benchmark_quality/{ => lr}/epsilon_5k_config.yaml (82%)
 rename examples/benchmark_quality/{ => lr}/give_credit_config.yaml (82%)
 rename examples/benchmark_quality/{ => lr}/lr_benchmark.yaml (100%)
 rename examples/benchmark_quality/{ => lr}/pipeline-lr-binary.py (95%)
 rename examples/benchmark_quality/{ => lr}/pipeline-lr-multi.py (95%)
 rename examples/benchmark_quality/{ => lr}/sklearn-lr-binary.py (100%)
 rename examples/benchmark_quality/{ => lr}/sklearn-lr-multi.py (100%)
 create mode 100644 examples/benchmark_quality/lr/vehicle_config.yaml
 create mode 100644 examples/benchmark_quality/lr/vehicle_lr_sklearn_config.yaml
 delete mode 100644 examples/benchmark_quality/vehicle_config.yaml
 delete mode 100644 examples/benchmark_quality/vehicle_lr_sklearn_config.yaml

diff --git a/examples/benchmark_quality/breast_config.yaml b/examples/benchmark_quality/lr/breast_config.yaml
similarity index 82%
rename from examples/benchmark_quality/breast_config.yaml
rename to examples/benchmark_quality/lr/breast_config.yaml
index 00090b4c16..46a52cd575 100644
--- a/examples/benchmark_quality/breast_config.yaml
+++ b/examples/benchmark_quality/lr/breast_config.yaml
@@ -2,8 +2,10 @@ data_guest: "examples/data/breast_hetero_guest.csv"
 data_host: "examples/data/breast_hetero_host.csv"
 idx: "id"
 label_name: "y"
-penalty: "L2"
 epochs: 30
+init_param:
+  fit_intercept: True
+  method: "zeros"
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
@@ -12,5 +14,6 @@ learning_rate_scheduler:
     total_iters: 100
 optimizer:
   method: "rmsprop"
+  penalty: "L2"
 batch_size: 5000
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/default_credit_config.yaml b/examples/benchmark_quality/lr/default_credit_config.yaml
similarity index 83%
rename from examples/benchmark_quality/default_credit_config.yaml
rename to examples/benchmark_quality/lr/default_credit_config.yaml
index f86a48e834..a6c833bc1d 100644
--- a/examples/benchmark_quality/default_credit_config.yaml
+++ b/examples/benchmark_quality/lr/default_credit_config.yaml
@@ -2,8 +2,10 @@ data_guest: "examples/data/default_credit_hetero_guest.csv"
 data_host: "examples/data/default_credit_hetero_host.csv"
 idx: "id"
 label_name: "y"
-penalty: "L2"
 epochs: 30
+init_param:
+  fit_intercept: True
+  method: "zeros"
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
@@ -12,5 +14,6 @@ learning_rate_scheduler:
     total_iters: 100
 optimizer:
   method: "zeros"
+  penalty: "L2"
 batch_size: 500
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/epsilon_5k_config.yaml b/examples/benchmark_quality/lr/epsilon_5k_config.yaml
similarity index 82%
rename from examples/benchmark_quality/epsilon_5k_config.yaml
rename to examples/benchmark_quality/lr/epsilon_5k_config.yaml
index be63b9b414..fdc50ec717 100644
--- a/examples/benchmark_quality/epsilon_5k_config.yaml
+++ b/examples/benchmark_quality/lr/epsilon_5k_config.yaml
@@ -2,8 +2,10 @@ data_guest: "examples/data/epsilon_5k_hetero_guest.csv"
 data_host: "examples/data/epsilon_5k_hetero_host.csv"
 idx: "id"
 label_name: "y"
-penalty: "L2"
 epochs: 30
+init_param:
+  fit_intercept: True
+  method: "zeros"
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
@@ -12,5 +14,6 @@ learning_rate_scheduler:
     total_iters: 800
 optimizer:
   method: "rmsprop"
+  penalty: "L2"
 batch_size: 5000
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/give_credit_config.yaml b/examples/benchmark_quality/lr/give_credit_config.yaml
similarity index 82%
rename from examples/benchmark_quality/give_credit_config.yaml
rename to examples/benchmark_quality/lr/give_credit_config.yaml
index bc2b6a683f..d1ba9f48f7 100644
--- a/examples/benchmark_quality/give_credit_config.yaml
+++ b/examples/benchmark_quality/lr/give_credit_config.yaml
@@ -2,8 +2,10 @@ data_guest: "examples/data/give_credit_hetero_guest.csv"
 data_host: "examples/data/give_credit_hetero_host.csv"
 idx: "id"
 label_name: "y"
-penalty: "L2"
 epochs: 6
+init_param:
+  fit_intercept: True
+  method: "zeros"
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
@@ -12,5 +14,6 @@ learning_rate_scheduler:
     total_iters: 100
 optimizer:
   method: "adam"
+  penalty: "L2"
 batch_size: 550
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr_benchmark.yaml b/examples/benchmark_quality/lr/lr_benchmark.yaml
similarity index 100%
rename from examples/benchmark_quality/lr_benchmark.yaml
rename to examples/benchmark_quality/lr/lr_benchmark.yaml
diff --git a/examples/benchmark_quality/pipeline-lr-binary.py b/examples/benchmark_quality/lr/pipeline-lr-binary.py
similarity index 95%
rename from examples/benchmark_quality/pipeline-lr-binary.py
rename to examples/benchmark_quality/lr/pipeline-lr-binary.py
index 8f8e9c897a..4b6c003060 100644
--- a/examples/benchmark_quality/pipeline-lr-binary.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-binary.py
@@ -68,14 +68,13 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     }
 
     config_param = {
-        "penalty": param["penalty"],
         "epochs": param["epochs"],
         "learning_rate_scheduler": param["learning_rate_scheduler"],
         "optimizer": param["optimizer"],
         "batch_size": param["batch_size"],
-        "early_stop": "diff",
-        "tol": 1e-5,
-        "init_param": param.get("init_method", {"method": "zeros"})
+        "early_stop": param["early_stop"],
+        "init_param": param["init_param"],
+        "tol": 1e-5
     }
     lr_param.update(config_param)
     lr_0 = CoordinatedLR("lr_0",
@@ -83,7 +82,7 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
                          **config_param)
     lr_1 = CoordinatedLR("lr_1",
                          test_data=intersect_0.outputs["output_data"],
-                         input_model=lr_0.outputs["train_output_model"])
+                         input_model=lr_0.outputs["output_model"])
 
     evaluation_0 = Evaluation("evaluation_0",
                               label_column_name="y",
@@ -109,6 +108,7 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
     lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
     result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_metric())
+    print(f"result_summary")
 
     data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                     "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
diff --git a/examples/benchmark_quality/pipeline-lr-multi.py b/examples/benchmark_quality/lr/pipeline-lr-multi.py
similarity index 95%
rename from examples/benchmark_quality/pipeline-lr-multi.py
rename to examples/benchmark_quality/lr/pipeline-lr-multi.py
index 384e03bd7c..5d504c19c3 100644
--- a/examples/benchmark_quality/pipeline-lr-multi.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-multi.py
@@ -58,14 +58,13 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     }
 
     config_param = {
-        "penalty": param["penalty"],
         "epochs": param["epochs"],
         "learning_rate_scheduler": param["learning_rate_scheduler"],
         "optimizer": param["optimizer"],
         "batch_size": param["batch_size"],
-        "early_stop": "diff",
+        "early_stop": param["early_stop"],
+        "init_param": param["init_param"],
         "tol": 1e-5,
-        "init_param": param.get("init_method", {"method": "zeros"})
     }
     lr_param.update(config_param)
     lr_0 = CoordinatedLR("lr_0",
@@ -73,7 +72,7 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
                          **config_param)
     lr_1 = CoordinatedLR("lr_1",
                          test_data=intersect_0.outputs["output_data"],
-                         input_model=lr_0.outputs["train_output_model"])
+                         input_model=lr_0.outputs["output_model"])
 
     evaluation_0 = Evaluation('evaluation_0', default_eval_setting="multi")
     pipeline.add_task(intersect_0)
diff --git a/examples/benchmark_quality/sklearn-lr-binary.py b/examples/benchmark_quality/lr/sklearn-lr-binary.py
similarity index 100%
rename from examples/benchmark_quality/sklearn-lr-binary.py
rename to examples/benchmark_quality/lr/sklearn-lr-binary.py
diff --git a/examples/benchmark_quality/sklearn-lr-multi.py b/examples/benchmark_quality/lr/sklearn-lr-multi.py
similarity index 100%
rename from examples/benchmark_quality/sklearn-lr-multi.py
rename to examples/benchmark_quality/lr/sklearn-lr-multi.py
diff --git a/examples/benchmark_quality/lr/vehicle_config.yaml b/examples/benchmark_quality/lr/vehicle_config.yaml
new file mode 100644
index 0000000000..9312bf7a07
--- /dev/null
+++ b/examples/benchmark_quality/lr/vehicle_config.yaml
@@ -0,0 +1,19 @@
+data_guest: "examples/data/vehicle_scale_hetero_guest.csv"
+data_host: "examples/data/vehicle_scale_hetero_host.csv"
+idx: "id"
+label_name: "y"
+epochs: 20
+init_param:
+  fit_intercept: True
+  method: "zeros"
+learning_rate_scheduler:
+  method: "constant"
+  scheduler_params:
+    lr: 0.3
+    factor: 1.0
+    total_iters: 800
+optimizer:
+  method: "adam"
+  penalty: "L2"
+batch_size: 16
+early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/vehicle_lr_sklearn_config.yaml b/examples/benchmark_quality/lr/vehicle_lr_sklearn_config.yaml
new file mode 100644
index 0000000000..0d9bda1717
--- /dev/null
+++ b/examples/benchmark_quality/lr/vehicle_lr_sklearn_config.yaml
@@ -0,0 +1,19 @@
+data_guest: "examples/data/vehicle_scale_hetero_guest.csv"
+data_host: "examples/data/vehicle_scale_hetero_host.csv"
+idx: "id"
+label_name: "y"
+epochs: 30
+init_param:
+  fit_intercept: True
+  method: "zeros"
+learning_rate_scheduler:
+  method: "constant"
+  scheduler_params:
+    lr: 0.15
+    factor: 1.0
+    total_iters: 800'
+optimizer:
+  method: "rmsprop"
+  penalty: "L2"
+batch_size: None
+early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/vehicle_config.yaml b/examples/benchmark_quality/vehicle_config.yaml
deleted file mode 100644
index 29dceb4345..0000000000
--- a/examples/benchmark_quality/vehicle_config.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-data_guest: "examples/data/vehicle_scale_hetero_guest.csv"
-data_host: "examples/data/vehicle_scale_hetero_host.csv"
-idx: "id"
-label_name: "y"
-penalty: "L2"
-max_iter: 20
-alpha: 0.00001
-learning_rate: 0.3
-optimizer: "adam"
-batch_size: 16
-early_stop: "diff"
-init_method: "random_uniform"
\ No newline at end of file
diff --git a/examples/benchmark_quality/vehicle_lr_sklearn_config.yaml b/examples/benchmark_quality/vehicle_lr_sklearn_config.yaml
deleted file mode 100644
index f70fdb409a..0000000000
--- a/examples/benchmark_quality/vehicle_lr_sklearn_config.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-data_guest: "examples/data/vehicle_scale_hetero_guest.csv"
-data_host: "examples/data/vehicle_scale_hetero_host.csv"
-idx: "id"
-label_name: "y"
-penalty: "L2"
-max_iter: 30
-alpha: 0.001
-learning_rate: 0.15
-optimizer: "rmsprop"
-batch_size: -1
-early_stop: "diff"
-init_method: "zeros"
\ No newline at end of file
diff --git a/examples/config.yaml b/examples/config.yaml
index 08d3da7420..2905ff3ec8 100644
--- a/examples/config.yaml
+++ b/examples/config.yaml
@@ -7,6 +7,4 @@ parties: # parties default id
   arbiter:
     - 10000
 
-work_mode: 0 # 0 for standalone, or 1 for cluster
-
 data_base_dir: "/data/projects/fate" # path to project base where data is located
\ No newline at end of file
diff --git a/examples/pipeline/test_upload_sid.py b/examples/pipeline/test_upload_sid.py
index dfc82d1b18..d65e696fc3 100644
--- a/examples/pipeline/test_upload_sid.py
+++ b/examples/pipeline/test_upload_sid.py
@@ -31,10 +31,10 @@
         'tag_with_value': False,
         'weight_type': 'float64'}
 
-pipeline.transform_local_file_to_dataframe("/Users/yuwu/PycharmProjects/FATE/examples/data/breast_hetero_guest_sid.csv",
-                                           meta=meta, head=True, extend_sid=False,
-                                           namespace="experiment_sid",
-                                           name="breast_hetero_guest")
+pipeline.transform_local_file_to_dataframe(  # file="${abs_path_of_data_guest}",
+    meta=meta, head=True, extend_sid=False,
+    namespace="experiment_sid",
+    name="breast_hetero_guest")
 
 meta = {'delimiter': ',',
         'dtype': 'float64',
@@ -47,7 +47,7 @@
         'tag_with_value': False,
         'weight_type': 'float64'}
 
-pipeline.transform_local_file_to_dataframe("/Users/yuwu/PycharmProjects/FATE/examples/data/breast_hetero_host_sid.csv",
-                                           meta=meta, head=True, extend_sid=False,
-                                           namespace="experiment_sid",
-                                           name="breast_hetero_host")
+pipeline.transform_local_file_to_dataframe(  # file="${abs_path_of_data_host}",
+    meta=meta, head=True, extend_sid=False,
+    namespace="experiment_sid",
+    name="breast_hetero_host")
diff --git a/python/fate_test/fate_test/scripts/_options.py b/python/fate_test/fate_test/scripts/_options.py
index ae30f748a0..364c43a264 100644
--- a/python/fate_test/fate_test/scripts/_options.py
+++ b/python/fate_test/fate_test/scripts/_options.py
@@ -18,9 +18,9 @@ class SharedOptions(object):
                                False),
         "yes": (('-y', '--yes',), dict(type=bool, is_flag=True, help="Skip double check", default=None),
                 False),
-        # "extend_sid": (('--extend_sid',),
-        #               dict(type=bool, is_flag=True, help="whether to append uuid as sid when uploading data",
-        #                    default=None), None),
+        "extend_sid": (('--extend_sid',),
+                       dict(type=bool, is_flag=True, help="whether to append uuid as sid when uploading data",
+                            default=None), None),
         # "auto_increasing_sid": (('--auto_increasing_sid',),
         #                        dict(type=bool, is_flag=True, help="whether to generate sid value starting at 0",
         #                             default=None), None),
diff --git a/python/fate_test/fate_test/scripts/benchmark_cli.py b/python/fate_test/fate_test/scripts/benchmark_cli.py
index d9f82d4139..d66a61058d 100644
--- a/python/fate_test/fate_test/scripts/benchmark_cli.py
+++ b/python/fate_test/fate_test/scripts/benchmark_cli.py
@@ -50,8 +50,9 @@ def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, stora
     config_inst = ctx.obj["config"]
     if ctx.obj["extend_sid"] is not None:
         config_inst.extend_sid = ctx.obj["extend_sid"]
-    if ctx.obj["auto_increasing_sid"] is not None:
-        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
+
+    """if ctx.obj["auto_increasing_sid"] is not None:
+        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]"""
     if clean_data is None:
         clean_data = config_inst.clean_data
     data_namespace_mangling = ctx.obj["namespace_mangling"]
@@ -61,13 +62,15 @@ def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, stora
     echo.echo(f"testsuite namespace: {namespace}", fg='red')
     echo.echo("loading testsuites:")
     suites = _load_testsuites(includes=include, excludes=exclude, glob=glob,
-                              suffix="benchmark.json", suite_type="benchmark")
+                              suffix="benchmark.yaml", suite_type="benchmark")
     for suite in suites:
         echo.echo(f"\tdataset({len(suite.dataset)}) benchmark groups({len(suite.pairs)}) {suite.path}")
     if not yes and not click.confirm("running?"):
         return
     client = Clients(config_inst)
-    fate_version = client["guest_0"].get_version()
+    # @todo: get version
+    # fate_version = client["guest_0"].get_version()
+    fate_version = "2.0.0-beta"
     for i, suite in enumerate(suites):
         # noinspection PyBroadException
         try:
diff --git a/python/fate_test/fate_test/scripts/testsuite_cli.py b/python/fate_test/fate_test/scripts/testsuite_cli.py
index f308ad1674..3bc295bfe1 100644
--- a/python/fate_test/fate_test/scripts/testsuite_cli.py
+++ b/python/fate_test/fate_test/scripts/testsuite_cli.py
@@ -64,9 +64,9 @@ def run_suite(ctx, include, exclude, glob,
     ctx.obj.update(**kwargs)
     ctx.obj.post_process()
     config_inst = ctx.obj["config"]
-    """if ctx.obj["extend_sid"] is not None:
+    if ctx.obj["extend_sid"] is not None:
         config_inst.extend_sid = ctx.obj["extend_sid"]
-    if ctx.obj["auto_increasing_sid"] is not None:
+    """if ctx.obj["auto_increasing_sid"] is not None:
         config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]"""
     if clean_data is None:
         clean_data = config_inst.clean_data

From e9a2a7468da3692c248024b720e4fa9432902414 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Tue, 1 Aug 2023 16:21:52 +0800
Subject: [PATCH 08/30] edit fate-test bq & examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/benchmark_quality/lr/breast_config.yaml    |  4 +++-
 .../lr/breast_lr_sklearn_config.yaml                | 12 ++++++++++++
 .../benchmark_quality/lr/default_credit_config.yaml |  3 ++-
 .../benchmark_quality/lr/epsilon_5k_config.yaml     |  3 ++-
 .../benchmark_quality/lr/give_credit_config.yaml    |  3 ++-
 examples/benchmark_quality/lr/lr_benchmark.yaml     |  2 +-
 examples/benchmark_quality/lr/pipeline-lr-binary.py | 13 ++++++++-----
 examples/benchmark_quality/lr/pipeline-lr-multi.py  |  6 +++---
 examples/benchmark_quality/lr/sklearn-lr-binary.py  |  8 ++++----
 examples/benchmark_quality/lr/sklearn-lr-multi.py   |  4 ++--
 examples/config.yaml                                |  2 +-
 python/fate_test/fate_test/_flow_client.py          | 12 ++++++++++++
 12 files changed, 52 insertions(+), 20 deletions(-)
 create mode 100644 examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml

diff --git a/examples/benchmark_quality/lr/breast_config.yaml b/examples/benchmark_quality/lr/breast_config.yaml
index 46a52cd575..80bc467254 100644
--- a/examples/benchmark_quality/lr/breast_config.yaml
+++ b/examples/benchmark_quality/lr/breast_config.yaml
@@ -9,11 +9,13 @@ init_param:
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
-    lr: 0.15
     factor: 1.0
     total_iters: 100
 optimizer:
   method: "rmsprop"
   penalty: "L2"
+  optimizer_params:
+    lr: 0.1
+  alpha: 0.5
 batch_size: 5000
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml b/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml
new file mode 100644
index 0000000000..02483f1f61
--- /dev/null
+++ b/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml
@@ -0,0 +1,12 @@
+data_guest: "examples/data/breast_hetero_guest.csv"
+data_host: "examples/data/breast_hetero_host.csv"
+idx: "id"
+label_name: "y"
+epochs: 30
+fit_intercept: True
+method: "rmsprop"
+penalty: "L2"
+eta0: 0.1
+alpha: 0.5
+batch_size: 5000
+early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/default_credit_config.yaml b/examples/benchmark_quality/lr/default_credit_config.yaml
index a6c833bc1d..b143418832 100644
--- a/examples/benchmark_quality/lr/default_credit_config.yaml
+++ b/examples/benchmark_quality/lr/default_credit_config.yaml
@@ -9,11 +9,12 @@ init_param:
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
-    lr: 0.15
     factor: 1.0
     total_iters: 100
 optimizer:
   method: "zeros"
   penalty: "L2"
+  optimizer_params:
+    lr: 0.15
 batch_size: 500
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/epsilon_5k_config.yaml b/examples/benchmark_quality/lr/epsilon_5k_config.yaml
index fdc50ec717..232b830d6c 100644
--- a/examples/benchmark_quality/lr/epsilon_5k_config.yaml
+++ b/examples/benchmark_quality/lr/epsilon_5k_config.yaml
@@ -9,11 +9,12 @@ init_param:
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
-    lr: 0.15
     factor: 1.0
     total_iters: 800
 optimizer:
   method: "rmsprop"
   penalty: "L2"
+  optimizer_params:
+    lr: 0.15
 batch_size: 5000
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/give_credit_config.yaml b/examples/benchmark_quality/lr/give_credit_config.yaml
index d1ba9f48f7..f6971ec107 100644
--- a/examples/benchmark_quality/lr/give_credit_config.yaml
+++ b/examples/benchmark_quality/lr/give_credit_config.yaml
@@ -9,11 +9,12 @@ init_param:
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
-    lr: 0.15
     factor: 1.0
     total_iters: 100
 optimizer:
   method: "adam"
   penalty: "L2"
+  optimizer_params:
+    lr: 0.15
 batch_size: 550
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/lr_benchmark.yaml b/examples/benchmark_quality/lr/lr_benchmark.yaml
index dad81264ed..692e74db2d 100644
--- a/examples/benchmark_quality/lr/lr_benchmark.yaml
+++ b/examples/benchmark_quality/lr/lr_benchmark.yaml
@@ -118,7 +118,7 @@ data:
 hetero_lr-binary-0:
   local:
     script: "./sklearn-lr-binary.py"
-    conf: "./breast_config.yaml"
+    conf: "./breast_lr_sklearn_config.yaml"
   FATE-hetero-lr:
     script: "./pipeline-lr-binary.py"
     conf: "./breast_config.yaml"
diff --git a/examples/benchmark_quality/lr/pipeline-lr-binary.py b/examples/benchmark_quality/lr/pipeline-lr-binary.py
index 4b6c003060..ee397d1e14 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-binary.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-binary.py
@@ -99,15 +99,18 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     print(pipeline.get_dag())
     pipeline.fit()
 
-    lr_0_data = pipeline.get_component("lr_0").get_output_data()
-    lr_1_data = pipeline.get_component("lr_1").get_output_data()
+    lr_0_data = pipeline.get_task_info("lr_0").get_output_data()["train_output_data"]
+    lr_1_data = pipeline.get_task_info("lr_1").get_output_data()["test_output_data"]
     lr_0_score = extract_data(lr_0_data, "predict_result")
-    lr_0_label = extract_data(lr_0_data, "label")
+    lr_0_label = extract_data(lr_0_data, "y")
     lr_1_score = extract_data(lr_1_data, "predict_result")
-    lr_1_label = extract_data(lr_1_data, "label")
+    lr_1_label = extract_data(lr_1_data, "y")
     lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
     lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
-    result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_metric())
+    """print(f"evaluation result: {pipeline.get_task_info('evaluation_0').get_output_metric()};"
+          f"result type: {type(pipeline.get_task_info('evaluation_0').get_output_metric())}")
+    """
+    result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_output_metric())
     print(f"result_summary")
 
     data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
diff --git a/examples/benchmark_quality/lr/pipeline-lr-multi.py b/examples/benchmark_quality/lr/pipeline-lr-multi.py
index 5d504c19c3..cc5e9602de 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-multi.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-multi.py
@@ -84,10 +84,10 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     print(pipeline.get_dag())
     pipeline.fit()
 
-    lr_0_data = pipeline.get_component("lr_0").get_output_data()
-    lr_1_data = pipeline.get_component("lr_1").get_output_data()
+    lr_0_data = pipeline.get_component("lr_0").get_output_data()["train_output_data"]
+    lr_1_data = pipeline.get_component("lr_1").get_output_data()["test_output_data"]
 
-    result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_metric())
+    result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_output_metric())
     lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
     lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
 
diff --git a/examples/benchmark_quality/lr/sklearn-lr-binary.py b/examples/benchmark_quality/lr/sklearn-lr-binary.py
index 5b17692621..ffdaf2b945 100644
--- a/examples/benchmark_quality/lr/sklearn-lr-binary.py
+++ b/examples/benchmark_quality/lr/sklearn-lr-binary.py
@@ -18,12 +18,12 @@
 import os
 
 import pandas
-from pipeline.utils.tools import JobConfig
+from fate_client.pipeline.utils.test_utils import JobConfig
 from sklearn.linear_model import SGDClassifier
 from sklearn.metrics import roc_auc_score, precision_score, accuracy_score, recall_score, roc_curve
 
 
-def main(config="../../config.yaml", param="./vechile_config.yaml"):
+def main(config="../../config.yaml", param="./breast_lr_sklearn_config.yaml"):
     # obtain config
     if isinstance(param, str):
         param = JobConfig.load_from_file(param)
@@ -45,7 +45,7 @@ def main(config="../../config.yaml", param="./vechile_config.yaml"):
         "max_iter": 100,
         "alpha": param["alpha"],
         "learning_rate": "optimal",
-        "eta0": param["learning_rate"],
+        "eta0": param["eta0"],
         "random_state": 105
     }
 
@@ -84,7 +84,7 @@ def main(config="../../config.yaml", param="./vechile_config.yaml"):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("BENCHMARK-QUALITY SKLEARN JOB")
-    parser.add_argument("-p", "--param", type=str, default="./breast_config.yaml",
+    parser.add_argument("-p", "--param", type=str, default="./breast_lr_sklearn_config.yaml",
                         help="config file for params")
     args = parser.parse_args()
     main(param=args.param)
diff --git a/examples/benchmark_quality/lr/sklearn-lr-multi.py b/examples/benchmark_quality/lr/sklearn-lr-multi.py
index 0b33e57c8f..fb8da4827f 100644
--- a/examples/benchmark_quality/lr/sklearn-lr-multi.py
+++ b/examples/benchmark_quality/lr/sklearn-lr-multi.py
@@ -18,12 +18,12 @@
 import os
 
 import pandas
-from pipeline.utils.tools import JobConfig
+from fate_client.pipeline.utils.test_utils import JobConfig
 from sklearn.linear_model import SGDClassifier
 from sklearn.metrics import precision_score, accuracy_score, recall_score
 
 
-def main(config="../../config.yaml", param="./vechile_config.yaml"):
+def main(config="../../config.yaml", param="./vehicle_config.yaml"):
     # obtain config
     if isinstance(param, str):
         param = JobConfig.load_from_file(param)
diff --git a/examples/config.yaml b/examples/config.yaml
index 2905ff3ec8..2eec4548d5 100644
--- a/examples/config.yaml
+++ b/examples/config.yaml
@@ -7,4 +7,4 @@ parties: # parties default id
   arbiter:
     - 10000
 
-data_base_dir: "/data/projects/fate" # path to project base where data is located
\ No newline at end of file
+data_base_dir: "/Users/yuwu/PycharmProjects/FATE/" # path to project base where data is located
\ No newline at end of file
diff --git a/python/fate_test/fate_test/_flow_client.py b/python/fate_test/fate_test/_flow_client.py
index 2d0d3f8d98..e27c2098e0 100644
--- a/python/fate_test/fate_test/_flow_client.py
+++ b/python/fate_test/fate_test/_flow_client.py
@@ -269,6 +269,18 @@ def _query_job(self, job_id, role, party_id):
             raise RuntimeError(f"get version error: {response}") from e
         return fate_version"""
 
+    def get_version(self):
+        response = self._client.provider.query(name="fate")
+        try:
+            retcode = response['code']
+            retmsg = response['message']
+            if retcode != 0 or retmsg != 'success':
+                raise RuntimeError(f"get version error: {response}")
+            fate_version = response["data"]["provider_name"]
+        except Exception as e:
+            raise RuntimeError(f"get version error: {response}") from e
+        return fate_version
+
     def _add_notes(self, job_id, role, party_id, notes):
         data = dict(job_id=job_id, role=role, party_id=party_id, notes=notes)
         response = AddNotesResponse(self._post(url='job/update', json=data))

From 444a3f56be23edbb645ad9a52d0e022925e96d84 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Tue, 1 Aug 2023 16:23:01 +0800
Subject: [PATCH 09/30] edit fate-test bq & examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 python/fate_test/fate_test/scripts/benchmark_cli.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/fate_test/fate_test/scripts/benchmark_cli.py b/python/fate_test/fate_test/scripts/benchmark_cli.py
index d66a61058d..79899a8ff8 100644
--- a/python/fate_test/fate_test/scripts/benchmark_cli.py
+++ b/python/fate_test/fate_test/scripts/benchmark_cli.py
@@ -68,9 +68,7 @@ def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, stora
     if not yes and not click.confirm("running?"):
         return
     client = Clients(config_inst)
-    # @todo: get version
-    # fate_version = client["guest_0"].get_version()
-    fate_version = "2.0.0-beta"
+    fate_version = client["guest_0"].get_version()
     for i, suite in enumerate(suites):
         # noinspection PyBroadException
         try:

From 26da6fbe83ce55877f6037b0946c4aed7bba5f85 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Tue, 1 Aug 2023 16:24:01 +0800
Subject: [PATCH 10/30] edit fate-test bq examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/benchmark_quality/lr/vehicle_config.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/benchmark_quality/lr/vehicle_config.yaml b/examples/benchmark_quality/lr/vehicle_config.yaml
index 9312bf7a07..2cff7f33b5 100644
--- a/examples/benchmark_quality/lr/vehicle_config.yaml
+++ b/examples/benchmark_quality/lr/vehicle_config.yaml
@@ -9,11 +9,12 @@ init_param:
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
-    lr: 0.3
     factor: 1.0
     total_iters: 800
 optimizer:
   method: "adam"
   penalty: "L2"
+  optimizer_params:
+    lr: 0.3
 batch_size: 16
 early_stop: "diff"
\ No newline at end of file

From 1f40c89655be0e3a39f286f175d85c91a7e3559f Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Tue, 1 Aug 2023 19:51:30 +0800
Subject: [PATCH 11/30] edit fate-test cli & examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../lr/breast_lr_sklearn_config.yaml          |  3 +-
 .../lr/default_credit_lr_config.yaml          | 11 ++++++++
 .../benchmark_quality/lr/lr_benchmark.yaml    |  2 +-
 .../lr/pipeline-lr-binary.py                  |  2 +-
 .../fate_test/scripts/benchmark_cli.py        |  4 ++-
 python/fate_test/fate_test/utils.py           | 28 +++++++++++++++++--
 6 files changed, 42 insertions(+), 8 deletions(-)
 create mode 100644 examples/benchmark_quality/lr/default_credit_lr_config.yaml

diff --git a/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml b/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml
index 02483f1f61..2993795c78 100644
--- a/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml
+++ b/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml
@@ -8,5 +8,4 @@ method: "rmsprop"
 penalty: "L2"
 eta0: 0.1
 alpha: 0.5
-batch_size: 5000
-early_stop: "diff"
\ No newline at end of file
+batch_size: 5000
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/default_credit_lr_config.yaml b/examples/benchmark_quality/lr/default_credit_lr_config.yaml
new file mode 100644
index 0000000000..e1dd4f6932
--- /dev/null
+++ b/examples/benchmark_quality/lr/default_credit_lr_config.yaml
@@ -0,0 +1,11 @@
+data_guest: "examples/data/default_credit_hetero_guest.csv"
+data_host: "examples/data/default_credit_hetero_host.csv"
+idx: "id"
+label_name: "y"
+epochs: 30
+fit_intercept: True
+method: "rmsprop"
+penalty: "L2"
+eta0: 0.1
+alpha: 0.5
+batch_size: 5000
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/lr_benchmark.yaml b/examples/benchmark_quality/lr/lr_benchmark.yaml
index 692e74db2d..18cf77b34f 100644
--- a/examples/benchmark_quality/lr/lr_benchmark.yaml
+++ b/examples/benchmark_quality/lr/lr_benchmark.yaml
@@ -130,7 +130,7 @@ hetero_lr-binary-1:
     conf: "./default_credit_config.yaml"
   FATE-hetero-lr:
     script: "./pipeline-lr-binary.py"
-    conf: "./default_credit_config.yaml"
+    conf: "./default_credit_sklearn_config.yaml"
   compare_setting:
     relative_tol: 0.01
 hetero_lr-binary-2:
diff --git a/examples/benchmark_quality/lr/pipeline-lr-binary.py b/examples/benchmark_quality/lr/pipeline-lr-binary.py
index ee397d1e14..ed53c4091a 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-binary.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-binary.py
@@ -110,7 +110,7 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     """print(f"evaluation result: {pipeline.get_task_info('evaluation_0').get_output_metric()};"
           f"result type: {type(pipeline.get_task_info('evaluation_0').get_output_metric())}")
     """
-    result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_output_metric())
+    result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_output_metric()[0]["data"])
     print(f"result_summary")
 
     data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
diff --git a/python/fate_test/fate_test/scripts/benchmark_cli.py b/python/fate_test/fate_test/scripts/benchmark_cli.py
index 79899a8ff8..a6ba20383f 100644
--- a/python/fate_test/fate_test/scripts/benchmark_cli.py
+++ b/python/fate_test/fate_test/scripts/benchmark_cli.py
@@ -68,7 +68,9 @@ def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, stora
     if not yes and not click.confirm("running?"):
         return
     client = Clients(config_inst)
-    fate_version = client["guest_0"].get_version()
+    # @todo: change to client query result
+    # fate_version = client["guest_0"].get_version()
+    fate_version = "beta-2.0.0"
     for i, suite in enumerate(suites):
         # noinspection PyBroadException
         try:
diff --git a/python/fate_test/fate_test/utils.py b/python/fate_test/fate_test/utils.py
index d31b07bfb3..74775354ee 100644
--- a/python/fate_test/fate_test/utils.py
+++ b/python/fate_test/fate_test/utils.py
@@ -321,10 +321,32 @@ def _save_quality(storage_tag, cache_directory, **results):
 def parse_summary_result(rs_dict):
     for model_key in rs_dict:
         rs_content = rs_dict[model_key]
-        if 'validate' in rs_content:
-            return rs_content['validate']
+        if 'test_set' in rs_content:
+            metric_result = rs_content['test_set']
+        if 'validate_set' in rs_content:
+            metric_result = rs_content['validate_set']
         else:
-            return rs_content['train']
+            metric_result = rs_content['train_set']
+        return extract_and_flatten_summary_metric(metric_result)
+
+
+def extract_and_flatten_summary_metric(metric_dict_list):
+    flatten_metric_summary = {}
+    for metric_group in metric_dict_list:
+        if isinstance(metric_group, dict):
+            metric_name = metric_group['metric']
+            metric_val = metric_group['val']
+            if isinstance(metric_val, float) or isinstance(metric_val, int):
+                flatten_metric_summary[metric_name] = metric_val
+        elif isinstance(metric_group, list):
+            for metric_subset in metric_group:
+                metric_name = metric_subset['metric']
+                metric_val = metric_subset['val']
+                if isinstance(metric_val, float) or isinstance(metric_val, int):
+                    flatten_metric_summary[metric_name] = metric_val
+        else:
+            raise ValueError(f"Invalid metric group: {metric_group}")
+    return flatten_metric_summary
 
 
 def extract_data(df, col_name, convert_float=True, keep_id=False):

From 9737fec22abefedac7bb62eee10895a4ee302e71 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Wed, 2 Aug 2023 10:03:45 +0800
Subject: [PATCH 12/30] edit fate-test examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/benchmark_quality/lr/pipeline-lr-binary.py | 2 +-
 examples/benchmark_quality/lr/sklearn-lr-binary.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/benchmark_quality/lr/pipeline-lr-binary.py b/examples/benchmark_quality/lr/pipeline-lr-binary.py
index ed53c4091a..bd845bc9d9 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-binary.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-binary.py
@@ -87,7 +87,7 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     evaluation_0 = Evaluation("evaluation_0",
                               label_column_name="y",
                               runtime_roles=["guest"],
-                              default_eval_setting="binary",
+                              metrics=["auc", "binary_precision", "binary_accuracy", "binary_recall"],
                               input_data=lr_0.outputs["train_output_data"])
 
     pipeline.add_task(intersect_0)
diff --git a/examples/benchmark_quality/lr/sklearn-lr-binary.py b/examples/benchmark_quality/lr/sklearn-lr-binary.py
index ffdaf2b945..603c39d75f 100644
--- a/examples/benchmark_quality/lr/sklearn-lr-binary.py
+++ b/examples/benchmark_quality/lr/sklearn-lr-binary.py
@@ -76,7 +76,7 @@ def main(config="../../config.yaml", param="./breast_lr_sklearn_config.yaml"):
     fpr, tpr, thresholds = roc_curve(y_test, y_prob)
 
     ks = max(tpr - fpr)
-    result = {"auc": auc_score, "recall": recall, "precision": pr, "accuracy": acc}
+    result = {"auc": auc_score, "binary_recall": recall, "binary_precision": pr, "binary_accuracy": acc}
     print(result)
     print(f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}")
     return {}, result

From bb9f2b8ba67332ee14ff4bf4869639a7b59ba427 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Fri, 4 Aug 2023 19:17:59 +0800
Subject: [PATCH 13/30] add performance cli (#5008) add timeout & task-cores
 param to bq & testsuite cli(#5008) add examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../coordinated_lr/breast_config.yaml         |  24 ++
 .../coordinated_lr/config.yaml                |  11 +
 .../coordinated_lr_performance.yaml           |  39 +++
 .../coordinated_lr/test_lr_sid.py             | 116 +++++++
 examples/benchmark_quality/linr/fate-linr.py  | 115 +++++++
 .../linr/hetero_linr_benchmark.yaml           |  47 +++
 .../benchmark_quality/linr/linr_config.yaml   |  22 ++
 .../linr/linr_sklearn_config.yaml             |  11 +
 .../benchmark_quality/lr/breast_config.yaml   |  17 +-
 .../lr/default_credit_config.yaml             |  12 +-
 ... => default_credit_lr_sklearn_config.yaml} |   0
 .../lr/epsilon_5k_config.yaml                 |  16 +-
 .../lr/epsilon_5k_lr_sklearn_config.yaml      |  11 +
 .../lr/give_credit_config.yaml                |   4 +-
 .../lr/give_credit_lr_sklearn_config.yaml     |  11 +
 .../benchmark_quality/lr/lr_benchmark.yaml    | 184 ++++++----
 .../lr/pipeline-lr-binary.py                  |  31 +-
 .../benchmark_quality/lr/pipeline-lr-multi.py |  17 +-
 .../benchmark_quality/lr/sklearn-lr-binary.py |   8 +-
 .../benchmark_quality/lr/sklearn-lr-multi.py  |  15 +-
 .../benchmark_quality/lr/vehicle_config.yaml  |  11 +-
 .../lr/vehicle_lr_sklearn_config.yaml         |  20 +-
 .../pipeline/coordinated_lr/test_lr_sid.py    |   4 +
 .../pipeline/coordinated_lr/test_lr_sid_cv.py |   4 +
 .../coordinated_lr/test_lr_sid_warm_start.py  |   4 +
 examples/pipeline/test_linr_sid_warm_start.py |  64 ++--
 python/fate_test/fate_test/_config.py         |   6 +
 python/fate_test/fate_test/_flow_client.py    |   4 +
 python/fate_test/fate_test/_parser.py         |  42 ++-
 python/fate_test/fate_test/scripts/_utils.py  |   4 +-
 .../fate_test/scripts/benchmark_cli.py        |  17 +-
 .../fate_test/scripts/performance_cli.py      | 315 ++++++------------
 .../fate_test/scripts/testsuite_cli.py        |  14 +-
 python/fate_test/fate_test/utils.py           |  27 ++
 34 files changed, 857 insertions(+), 390 deletions(-)
 create mode 100644 examples/benchmark_performance/coordinated_lr/breast_config.yaml
 create mode 100644 examples/benchmark_performance/coordinated_lr/config.yaml
 create mode 100644 examples/benchmark_performance/coordinated_lr/coordinated_lr_performance.yaml
 create mode 100644 examples/benchmark_performance/coordinated_lr/test_lr_sid.py
 create mode 100644 examples/benchmark_quality/linr/fate-linr.py
 create mode 100644 examples/benchmark_quality/linr/hetero_linr_benchmark.yaml
 create mode 100644 examples/benchmark_quality/linr/linr_config.yaml
 create mode 100644 examples/benchmark_quality/linr/linr_sklearn_config.yaml
 rename examples/benchmark_quality/lr/{default_credit_lr_config.yaml => default_credit_lr_sklearn_config.yaml} (100%)
 create mode 100644 examples/benchmark_quality/lr/epsilon_5k_lr_sklearn_config.yaml
 create mode 100644 examples/benchmark_quality/lr/give_credit_lr_sklearn_config.yaml

diff --git a/examples/benchmark_performance/coordinated_lr/breast_config.yaml b/examples/benchmark_performance/coordinated_lr/breast_config.yaml
new file mode 100644
index 0000000000..d827c47236
--- /dev/null
+++ b/examples/benchmark_performance/coordinated_lr/breast_config.yaml
@@ -0,0 +1,24 @@
+data_guest: "breast_hetero_guest"
+data_host: "breast_hetero_host"
+idx: "id"
+label_name: "y"
+epochs: 20
+init_param:
+  fit_intercept: True
+  method: "random_uniform"
+  random_state: 42
+learning_rate_scheduler:
+  method: "constant"
+  scheduler_params:
+    factor: 1.0
+    total_iters: 100
+optimizer:
+  method: "rmsprop"
+  penalty: "L2"
+  optimizer_params:
+    lr: 0.05
+  alpha: 0.1
+batch_size: null
+early_stop: "diff"
+task_cores: 4
+timeout: 3600
\ No newline at end of file
diff --git a/examples/benchmark_performance/coordinated_lr/config.yaml b/examples/benchmark_performance/coordinated_lr/config.yaml
new file mode 100644
index 0000000000..1c021a7223
--- /dev/null
+++ b/examples/benchmark_performance/coordinated_lr/config.yaml
@@ -0,0 +1,11 @@
+parties: # parties default id
+  guest:
+    - 9999
+  host:
+    - 9998
+    - 9999
+  arbiter:
+    - 9998
+
+data_base_dir: "" # path to project base where data is located
+timeout: 3600
\ No newline at end of file
diff --git a/examples/benchmark_performance/coordinated_lr/coordinated_lr_performance.yaml b/examples/benchmark_performance/coordinated_lr/coordinated_lr_performance.yaml
new file mode 100644
index 0000000000..81afb73e56
--- /dev/null
+++ b/examples/benchmark_performance/coordinated_lr/coordinated_lr_performance.yaml
@@ -0,0 +1,39 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_0
+tasks:
+  normal-lr:
+    script: test_lr_sid.py
+    conf: "./breast_config.yaml"
diff --git a/examples/benchmark_performance/coordinated_lr/test_lr_sid.py b/examples/benchmark_performance/coordinated_lr/test_lr_sid.py
new file mode 100644
index 0000000000..ebe2b289e0
--- /dev/null
+++ b/examples/benchmark_performance/coordinated_lr/test_lr_sid.py
@@ -0,0 +1,116 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
+    # obtain config
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    if isinstance(param, str):
+        param = test_utils.JobConfig.load_from_file(param)
+
+    assert isinstance(param, dict)
+
+    data_set = param.get("data_guest").split('/')[-1]
+    if data_set == "default_credit_hetero_guest.csv":
+        guest_data_table = 'default_credit_hetero_guest'
+        host_data_table = 'default_credit_hetero_host'
+    elif data_set == 'breast_hetero_guest.csv':
+        guest_data_table = 'breast_hetero_guest'
+        host_data_table = 'breast_hetero_host'
+    elif data_set == 'give_credit_hetero_guest.csv':
+        guest_data_table = 'give_credit_hetero_guest'
+        host_data_table = 'give_credit_hetero_host'
+    elif data_set == 'epsilon_5k_hetero_guest.csv':
+        guest_data_table = 'epsilon_5k_hetero_guest'
+        host_data_table = 'epsilon_5k_hetero_host'
+    else:
+        raise ValueError(f"Cannot recognized data_set: {data_set}")
+
+    guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"}
+    host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    intersect_0 = Intersection("intersect_0", method="raw")
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
+                                                                        namespace=guest_train_data["namespace"]))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
+                                                                           namespace=host_train_data["namespace"]))
+
+    lr_param = {
+    }
+
+    config_param = {
+        "epochs": param["epochs"],
+        "learning_rate_scheduler": param["learning_rate_scheduler"],
+        "optimizer": param["optimizer"],
+        "batch_size": param["batch_size"],
+        "early_stop": param["early_stop"],
+        "init_param": param["init_param"],
+        "tol": 1e-5
+    }
+    lr_param.update(config_param)
+    lr_0 = CoordinatedLR("lr_0",
+                         train_data=intersect_0.outputs["output_data"],
+                         **lr_param)
+    lr_1 = CoordinatedLR("lr_1",
+                         test_data=intersect_0.outputs["output_data"],
+                         input_model=lr_0.outputs["output_model"])
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="y",
+                              runtime_roles=["guest"],
+                              metrics=["auc", "binary_precision", "binary_accuracy", "binary_recall"],
+                              input_data=lr_0.outputs["train_output_data"])
+
+    pipeline.add_task(intersect_0)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(lr_1)
+    pipeline.add_task(evaluation_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    job_id = pipeline.model_info.job_id
+    return job_id
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("BENCHMARK-QUALITY PIPELINE JOB")
+    parser.add_argument("-c", "--config", type=str,
+                        help="config file", default="../../config.yaml")
+    parser.add_argument("-p", "--param", type=str,
+                        help="config file for params", default="./breast_config.yaml")
+    args = parser.parse_args()
+    main(args.config, args.param)
diff --git a/examples/benchmark_quality/linr/fate-linr.py b/examples/benchmark_quality/linr/fate-linr.py
new file mode 100644
index 0000000000..cb7866ad8d
--- /dev/null
+++ b/examples/benchmark_quality/linr/fate-linr.py
@@ -0,0 +1,115 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLinR, Intersection
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+from fate_test.utils import parse_summary_result
+
+
+def main(config="../../config.yaml", param="./linr_config.yaml", namespace=""):
+    # obtain config
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    if isinstance(param, str):
+        param = test_utils.JobConfig.load_from_file(param)
+
+    assert isinstance(param, dict)
+
+    guest_train_data = {"name": "motor_hetero_guest", "namespace": f"experiment{namespace}"}
+    host_train_data = {"name": "motor_hetero_host", "namespace": f"experiment{namespace}"}
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+
+    intersect_0 = Intersection("intersect_0", method="raw")
+    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
+                                                                        namespace=guest_train_data["namespace"]))
+    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
+                                                                           namespace=host_train_data["namespace"]))
+
+    linr_param = {
+    }
+
+    config_param = {
+        "epochs": param["epochs"],
+        "learning_rate_scheduler": param["learning_rate_scheduler"],
+        "optimizer": param["optimizer"],
+        "batch_size": param["batch_size"],
+        "early_stop": param["early_stop"],
+        "init_param": param["init_param"],
+        "tol": 1e-5
+    }
+    linr_param.update(config_param)
+    linr_0 = CoordinatedLinR("linr_0",
+                             train_data=intersect_0.outputs["output_data"],
+                             **config_param)
+    """linr_1 = CoordinatedLinR("linr_1",
+                             test_data=intersect_0.outputs["output_data"],
+                             input_model=linr_0.outputs["output_model"])"""
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="motor_speed",
+                              runtime_roles=["guest"],
+                              metrics=["r2_score",
+                                       "mse",
+                                       "rmse"],
+                              input_data=linr_0.outputs["train_output_data"])
+
+    pipeline.add_task(intersect_0)
+    pipeline.add_task(linr_0)
+    # pipeline.add_task(linr_1)
+    pipeline.add_task(evaluation_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    """linr_0_data = pipeline.get_task_info("linr_0").get_output_data()["train_output_data"]
+    linr_1_data = pipeline.get_task_info("linr_1").get_output_data()["test_output_data"]
+    linr_0_score = extract_data(linr_0_data, "predict_result")
+    linr_0_label = extract_data(linr_0_data, "motor_speed")
+    linr_1_score = extract_data(linr_1_data, "predict_result")
+    linr_1_label = extract_data(linr_1_data, "motor_speed")
+    linr_0_score_label = extract_data(linr_0_data, "predict_result", keep_id=True)
+    linr_1_score_label = extract_data(linr_1_data, "predict_result", keep_id=True)"""
+
+    result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_output_metric()[0]["data"])
+    print(f"result_summary")
+
+    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
+                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
+                    }
+
+    return data_summary, result_summary
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("BENCHMARK-QUALITY PIPELINE JOB")
+    parser.add_argument("-c", "--config", type=str,
+                        help="config file", default="../../config.yaml")
+    parser.add_argument("-p", "--param", type=str,
+                        help="config file for params", default="./breast_config.yaml")
+    args = parser.parse_args()
+    main(args.config, args.param)
diff --git a/examples/benchmark_quality/linr/hetero_linr_benchmark.yaml b/examples/benchmark_quality/linr/hetero_linr_benchmark.yaml
new file mode 100644
index 0000000000..6d106aeb85
--- /dev/null
+++ b/examples/benchmark_quality/linr/hetero_linr_benchmark.yaml
@@ -0,0 +1,47 @@
+data:
+  - file: examples/data/motor_hetero_guest_sid.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: float64
+      label_name: motor_speed
+      match_id_name: "idx"
+      match_id_range: 0
+      sample_id_name: "sid"
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: false
+    table_name: motor_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/motor_hetero_host_sid.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: "idx"
+      match_id_range: 0
+      sample_id_name: "sid"
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: false
+    table_name: motor_hetero_host
+    namespace: experiment
+    role: host_0
+
+hetero_linr:
+  local:
+    script: "./local-linr.py"
+    conf: "./linr_sklearn_config.yaml"
+  FATE-hetero-linr:
+    script: "./fate-linr.py"
+    conf: "./linr_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
diff --git a/examples/benchmark_quality/linr/linr_config.yaml b/examples/benchmark_quality/linr/linr_config.yaml
new file mode 100644
index 0000000000..13f5199e90
--- /dev/null
+++ b/examples/benchmark_quality/linr/linr_config.yaml
@@ -0,0 +1,22 @@
+data_guest: "examples/data/motor_hetero_guest.csv"
+data_host: "examples/data/motor_hetero_host.csv"
+label_name: "motor_speed"
+penalty: "L2"
+epochs: 10
+init_param:
+  fit_intercept: True
+  method: "zeros"
+  random_state: 42
+learning_rate_scheduler:
+  method: "constant"
+  scheduler_params:
+    factor: 1.0
+    total_iters: 100
+optimizer:
+  method: "sgd"
+  penalty: "L2"
+  optimizer_params:
+    lr: 0.13
+  alpha: 0.01
+batch_size: 100
+early_stop: "diff"
diff --git a/examples/benchmark_quality/linr/linr_sklearn_config.yaml b/examples/benchmark_quality/linr/linr_sklearn_config.yaml
new file mode 100644
index 0000000000..38a15edc00
--- /dev/null
+++ b/examples/benchmark_quality/linr/linr_sklearn_config.yaml
@@ -0,0 +1,11 @@
+data_guest: "examples/data/motor_hetero_guest.csv"
+data_host: "examples/data/motor_hetero_host.csv"
+label_name: "motor_speed"
+penalty: "L2"
+idx: "idx"
+epochs: 20
+fit_intercept: True
+method: "rmsprop"
+eta0: 0.1
+alpha: 0.5
+batch_size: 5000
diff --git a/examples/benchmark_quality/lr/breast_config.yaml b/examples/benchmark_quality/lr/breast_config.yaml
index 80bc467254..142f056628 100644
--- a/examples/benchmark_quality/lr/breast_config.yaml
+++ b/examples/benchmark_quality/lr/breast_config.yaml
@@ -1,11 +1,12 @@
-data_guest: "examples/data/breast_hetero_guest.csv"
-data_host: "examples/data/breast_hetero_host.csv"
+data_guest: "breast_hetero_guest"
+data_host: "breast_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 30
+epochs: 15
 init_param:
   fit_intercept: True
-  method: "zeros"
+  method: "uniform"
+  random_state: 42
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
@@ -13,9 +14,9 @@ learning_rate_scheduler:
     total_iters: 100
 optimizer:
   method: "rmsprop"
-  penalty: "L2"
+  penalty: "l1"
   optimizer_params:
-    lr: 0.1
-  alpha: 0.5
-batch_size: 5000
+    lr: 0.5
+  alpha: 0.1
+batch_size: null
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/default_credit_config.yaml b/examples/benchmark_quality/lr/default_credit_config.yaml
index b143418832..c45ef53d8a 100644
--- a/examples/benchmark_quality/lr/default_credit_config.yaml
+++ b/examples/benchmark_quality/lr/default_credit_config.yaml
@@ -1,20 +1,22 @@
-data_guest: "examples/data/default_credit_hetero_guest.csv"
-data_host: "examples/data/default_credit_hetero_host.csv"
+data_guest: "default_credit_hetero_guest"
+data_host: "default_credit_hetero_host"
 idx: "id"
 label_name: "y"
 epochs: 30
 init_param:
   fit_intercept: True
   method: "zeros"
+  random_state: 42
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
     factor: 1.0
-    total_iters: 100
+    total_iters: 10000
 optimizer:
-  method: "zeros"
+  method: "rmsprop"
   penalty: "L2"
+  alpha: 0.001
   optimizer_params:
     lr: 0.15
-batch_size: 500
+batch_size: 3200
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/default_credit_lr_config.yaml b/examples/benchmark_quality/lr/default_credit_lr_sklearn_config.yaml
similarity index 100%
rename from examples/benchmark_quality/lr/default_credit_lr_config.yaml
rename to examples/benchmark_quality/lr/default_credit_lr_sklearn_config.yaml
diff --git a/examples/benchmark_quality/lr/epsilon_5k_config.yaml b/examples/benchmark_quality/lr/epsilon_5k_config.yaml
index 232b830d6c..6822e02ea7 100644
--- a/examples/benchmark_quality/lr/epsilon_5k_config.yaml
+++ b/examples/benchmark_quality/lr/epsilon_5k_config.yaml
@@ -1,20 +1,22 @@
-data_guest: "examples/data/epsilon_5k_hetero_guest.csv"
-data_host: "examples/data/epsilon_5k_hetero_host.csv"
+data_guest: "epsilon_5k_hetero_guest"
+data_host: "epsilon_5k_hetero_host"
 idx: "id"
 label_name: "y"
 epochs: 30
+batch_size: 2500
 init_param:
   fit_intercept: True
-  method: "zeros"
+  method: "random"
+  random_state: 42
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
     factor: 1.0
-    total_iters: 800
+    total_iters: 1000
 optimizer:
-  method: "rmsprop"
+  method: "adam"
   penalty: "L2"
+  alpha: 0.0001
   optimizer_params:
-    lr: 0.15
-batch_size: 5000
+    lr: 0.3
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/epsilon_5k_lr_sklearn_config.yaml b/examples/benchmark_quality/lr/epsilon_5k_lr_sklearn_config.yaml
new file mode 100644
index 0000000000..bef62e89aa
--- /dev/null
+++ b/examples/benchmark_quality/lr/epsilon_5k_lr_sklearn_config.yaml
@@ -0,0 +1,11 @@
+data_guest: "examples/data/epsilon_5k_hetero_guest.csv"
+data_host: "examples/data/epsilon_5k_hetero_host.csv"
+idx: "id"
+label_name: "y"
+epochs: 30
+fit_intercept: True
+method: "rmsprop"
+penalty: "L2"
+eta0: 0.1
+alpha: 0.5
+batch_size: 5000
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/give_credit_config.yaml b/examples/benchmark_quality/lr/give_credit_config.yaml
index f6971ec107..73f2285fa1 100644
--- a/examples/benchmark_quality/lr/give_credit_config.yaml
+++ b/examples/benchmark_quality/lr/give_credit_config.yaml
@@ -1,5 +1,5 @@
-data_guest: "examples/data/give_credit_hetero_guest.csv"
-data_host: "examples/data/give_credit_hetero_host.csv"
+data_guest: "give_credit_hetero_guest"
+data_host: "give_credit_hetero_host"
 idx: "id"
 label_name: "y"
 epochs: 6
diff --git a/examples/benchmark_quality/lr/give_credit_lr_sklearn_config.yaml b/examples/benchmark_quality/lr/give_credit_lr_sklearn_config.yaml
new file mode 100644
index 0000000000..4dcb136b99
--- /dev/null
+++ b/examples/benchmark_quality/lr/give_credit_lr_sklearn_config.yaml
@@ -0,0 +1,11 @@
+data_guest: "examples/data/give_credit_hetero_guest.csv"
+data_host: "examples/data/give_credit_hetero_host.csv"
+idx: "id"
+label_name: "y"
+epochs: 30
+fit_intercept: True
+method: "rmsprop"
+penalty: "L2"
+eta0: 0.1
+alpha: 0.5
+batch_size: 5000
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/lr_benchmark.yaml b/examples/benchmark_quality/lr/lr_benchmark.yaml
index 18cf77b34f..c857d20e3f 100644
--- a/examples/benchmark_quality/lr/lr_benchmark.yaml
+++ b/examples/benchmark_quality/lr/lr_benchmark.yaml
@@ -1,45 +1,48 @@
 data:
-  - file: examples/data/breast_hetero_guest.csv
+  - file: examples/data/breast_hetero_guest_sid.csv
     meta:
       delimiter: ","
       dtype: float64
       input_format: dense
       label_type: int64
       label_name: y
-      match_id_name: id
+      match_id_name: "id"
       match_id_range: 0
+      sample_id_name: "sid"
       tag_value_delimiter: ":"
       tag_with_value: false
       weight_type: float64
     partitions: 4
     head: true
-    extend_sid: true
+    extend_sid: false
     table_name: breast_hetero_guest
     namespace: experiment
     role: guest_0
-  - file: examples/data/breast_hetero_host.csv
+  - file: examples/data/breast_hetero_host_sid.csv
     meta:
       delimiter: ","
       dtype: float64
       input_format: dense
-      match_id_name: id
+      match_id_name: "id"
       match_id_range: 0
+      sample_id_name: "sid"
       tag_value_delimiter: ":"
       tag_with_value: false
       weight_type: float64
     partitions: 4
     head: true
-    extend_sid: true
+    extend_sid: false
     table_name: breast_hetero_host
     namespace: experiment
     role: host_0
-  - file: "../../data/default_credit_hetero_guest.csv"
+  - file: "../../data/default_credit_hetero_guest_sid.csv"
     meta:
       delimiter: ","
       dtype: float64
       input_format: dense
-      match_id_name: id
+      match_id_name: "id"
       match_id_range: 0
+      sample_id_name: "sid"
       label_type: int64
       label_name: y
       tag_value_delimiter: ":"
@@ -47,33 +50,35 @@ data:
       weight_type: float64
     partitions: 4
     head: true
-    extend_sid: true
+    extend_sid: false
     table_name: default_credit_hetero_guest
     namespace: experiment
     role: guest_0
-  - file: "../../data/default_credit_hetero_host.csv"
+  - file: "../../data/default_credit_hetero_host_sid.csv"
     meta:
       delimiter: ","
       dtype: float64
       input_format: dense
-      match_id_name: id
+      match_id_name: "id"
       match_id_range: 0
+      sample_id_name: "sid"
       tag_value_delimiter: ":"
       tag_with_value: false
       weight_type: float64
     partitions: 4
     head: true
-    extend_sid: true
+    extend_sid: false
     table_name: default_credit_hetero_host
     namespace: experiment
     role: host_0
-  - file: "../../data/give_credit_hetero_guest.csv"
+  - file: "../../data/give_credit_hetero_guest_sid.csv"
     meta:
       delimiter: ","
       dtype: float64
       input_format: dense
-      match_id_name: id
+      match_id_name: "id"
       match_id_range: 0
+      sample_id_name: "sid"
       label_type: int64
       label_name: y
       tag_value_delimiter: ":"
@@ -81,41 +86,100 @@ data:
       weight_type: float64
     partitions: 4
     head: true
-    extend_sid: true
+    extend_sid: false
     table_name: give_credit_hetero_guest
     namespace: experiment
     role: guest_0
-  - file: "../../data/give_credit_hetero_host.csv"
-    head: 1
-    partition: 16
+  - file: "../../data/give_credit_hetero_host_sid.csv"
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: "id"
+      match_id_range: 0
+      sample_id_name: "sid"
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    head: true
+    partition: 4
+    extend_sid: false
     table_name: give_credit_hetero_host
     namespace: experiment
     role: host_0
-  - file: "../../data/epsilon_5k_hetero_guest.csv"
-    head: 1
-    partition: 16
+  - file: "../../data/epsilon_5k_hetero_guest_sid.csv"
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: "id"
+      match_id_range: 0
+      sample_id_name: "sid"
+      label_type: int64
+      label_name: y
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    head: true
+    partition: 4
+    extend_sid: false
     table_name: epsilon_5k_hetero_guest
     namespace: experiment
     role: guest_0
-  - file: "../../data/epsilon_5k_hetero_host.csv"
-    head: 1
-    partition: 16
+  - file: "../../data/epsilon_5k_hetero_host_sid.csv"
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: "id"
+      match_id_range: 0
+      sample_id_name: "sid"
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    head: true
+    partition: 4
+    extend_sid: false
     table_name: epsilon_5k_hetero_host
     namespace: experiment
     role: host_0
-  - file: "../../data/vehicle_scale_hetero_guest.csv"
-    head: 1
-    partition: 16
+  - file: "../../data/vehicle_scale_hetero_guest_sid.csv"
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: "id"
+      match_id_range: 0
+      sample_id_name: "sid"
+      label_type: int64
+      label_name: y
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    head: true
+    partition: 4
+    extend_sid: false
     table_name: vehicle_scale_hetero_guest
     namespace: experiment
     role: guest_0
-  - file: "../../data/vehicle_scale_hetero_host.csv"
-    head: 1
-    partition: 16
+  - file: "../../data/vehicle_scale_hetero_host_sid.csv"
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: "id"
+      match_id_range: 0
+      sample_id_name: "sid"
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    head: true
+    partition: 4
+    extend_sid: false
     table_name: vehicle_scale_hetero_host
     namespace: experiment
     role: host_0
-hetero_lr-binary-0:
+hetero_lr-binary-0-breast:
   local:
     script: "./sklearn-lr-binary.py"
     conf: "./breast_lr_sklearn_config.yaml"
@@ -124,39 +188,39 @@ hetero_lr-binary-0:
     conf: "./breast_config.yaml"
   compare_setting:
     relative_tol: 0.01
-hetero_lr-binary-1:
+#hetero_lr-binary-1-default-credit:
+#  local:
+#    script: "./sklearn-lr-binary.py"
+#    conf: "./default_credit_lr_sklearn_config.yaml"
+#  FATE-hetero-lr:
+#    script: "./pipeline-lr-binary.py"
+#    conf: "./default_credit_config.yaml"
+#  compare_setting:
+#    relative_tol: 0.01
+hetero_lr-binary-2-epsilon-5k:
   local:
     script: "./sklearn-lr-binary.py"
-    conf: "./default_credit_config.yaml"
-  FATE-hetero-lr:
-    script: "./pipeline-lr-binary.py"
-    conf: "./default_credit_sklearn_config.yaml"
-  compare_setting:
-    relative_tol: 0.01
-hetero_lr-binary-2:
-  local:
-    script: "./sklearn-lr-binary.py"
-    conf: "./epsilon_5k_config.yaml"
+    conf: "./epsilon_5k_lr_sklearn_config.yaml"
   FATE-hetero-lr:
     script: "./pipeline-lr-binary.py"
     conf: "./epsilon_5k_config.yaml"
   compare_setting:
     relative_tol: 0.01
-hetero_lr-binary-3:
-  local:
-    script: "./sklearn-lr-binary.py"
-    conf: "./give_credit_config.yaml"
-  FATE-hetero-lr:
-    script: "./pipeline-lr-binary.py"
-    conf: "./give_credit_config.yaml"
-  compare_setting:
-    relative_tol: 0.01
-multi:
-  local:
-    script: "./sklearn-lr-multi.py"
-    conf: "./vehicle_lr_sklearn_config.yaml"
-  FATE-hetero-lr:
-    script: "./pipeline-lr-multi.py"
-    conf: "./vehicle_config.yaml"
-  compare_setting:
-    relative_tol: 0.01
+#hetero_lr-binary-3-give-credit:
+#  local:
+#    script: "./sklearn-lr-binary.py"
+#    conf: "./give_credit_lr_sklearn_config.yaml"
+#  FATE-hetero-lr:
+#    script: "./pipeline-lr-binary.py"
+#    conf: "./give_credit_config.yaml"
+#  compare_setting:
+#    relative_tol: 0.01
+#multi-vehicle:
+#  local:
+#    script: "./sklearn-lr-multi.py"
+#    conf: "./vehicle_lr_sklearn_config.yaml"
+#  FATE-hetero-lr:
+#    script: "./pipeline-lr-multi.py"
+#    conf: "./vehicle_config.yaml"
+#  compare_setting:
+#    relative_tol: 0.01
diff --git a/examples/benchmark_quality/lr/pipeline-lr-binary.py b/examples/benchmark_quality/lr/pipeline-lr-binary.py
index bd845bc9d9..c10dd7fcb6 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-binary.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-binary.py
@@ -24,7 +24,7 @@
 from fate_test.utils import extract_data, parse_summary_result
 
 
-def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
+def main(config="../../config.yaml", param="./breast_config.yaml", namespace=""):
     # obtain config
     if isinstance(config, str):
         config = test_utils.load_job_config(config)
@@ -38,21 +38,8 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
 
     assert isinstance(param, dict)
 
-    data_set = param.get("data_guest").split('/')[-1]
-    if data_set == "default_credit_hetero_guest.csv":
-        guest_data_table = 'default_credit_hetero_guest'
-        host_data_table = 'default_credit_hetero_host'
-    elif data_set == 'breast_hetero_guest.csv':
-        guest_data_table = 'breast_hetero_guest'
-        host_data_table = 'breast_hetero_host'
-    elif data_set == 'give_credit_hetero_guest.csv':
-        guest_data_table = 'give_credit_hetero_guest'
-        host_data_table = 'give_credit_hetero_host'
-    elif data_set == 'epsilon_5k_hetero_guest.csv':
-        guest_data_table = 'epsilon_5k_hetero_guest'
-        host_data_table = 'epsilon_5k_hetero_host'
-    else:
-        raise ValueError(f"Cannot recognized data_set: {data_set}")
+    guest_data_table = param.get("data_guest")
+    host_data_table = param.get("data_host")
 
     guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"}
     host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}
@@ -79,7 +66,7 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     lr_param.update(config_param)
     lr_0 = CoordinatedLR("lr_0",
                          train_data=intersect_0.outputs["output_data"],
-                         **config_param)
+                         **lr_param)
     lr_1 = CoordinatedLR("lr_1",
                          test_data=intersect_0.outputs["output_data"],
                          input_model=lr_0.outputs["output_model"])
@@ -95,6 +82,10 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     pipeline.add_task(lr_1)
     pipeline.add_task(evaluation_0)
 
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
     pipeline.compile()
     print(pipeline.get_dag())
     pipeline.fit()
@@ -107,11 +98,9 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     lr_1_label = extract_data(lr_1_data, "y")
     lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True)
     lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True)
-    """print(f"evaluation result: {pipeline.get_task_info('evaluation_0').get_output_metric()};"
-          f"result type: {type(pipeline.get_task_info('evaluation_0').get_output_metric())}")
-    """
+
     result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_output_metric()[0]["data"])
-    print(f"result_summary")
+    print(f"result_summary: {result_summary}")
 
     data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                     "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
diff --git a/examples/benchmark_quality/lr/pipeline-lr-multi.py b/examples/benchmark_quality/lr/pipeline-lr-multi.py
index cc5e9602de..3868acbd60 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-multi.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-multi.py
@@ -24,7 +24,7 @@
 from fate_test.utils import extract_data, parse_summary_result
 
 
-def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
+def main(config="../../config.yaml", param="./vehicle_config.yaml", namespace=""):
     # obtain config
     if isinstance(config, str):
         config = test_utils.load_job_config(config)
@@ -37,12 +37,8 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
         param = test_utils.JobConfig.load_from_file(param)
 
     assert isinstance(param, dict)
-    data_set = param.get("data_guest").split('/')[-1]
-    if data_set == "vehicle_scale_hetero_guest.csv":
-        guest_data_table = 'vehicle_scale_hetero_guest'
-        host_data_table = 'vehicle_scale_hetero_host'
-    else:
-        raise ValueError(f"Cannot recognized data_set: {data_set}")
+    guest_data_table = param.get("data_guest")
+    host_data_table = param.get("data_host")
 
     guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"}
     host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}
@@ -74,11 +70,16 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
                          test_data=intersect_0.outputs["output_data"],
                          input_model=lr_0.outputs["output_model"])
 
-    evaluation_0 = Evaluation('evaluation_0', default_eval_setting="multi")
+    evaluation_0 = Evaluation('evaluation_0',
+                              metrics=['multi_recall', 'multi_accuracy', 'multi_precision'])
     pipeline.add_task(intersect_0)
     pipeline.add_task(lr_0)
     pipeline.add_task(lr_1)
     pipeline.add_task(evaluation_0)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
 
     pipeline.compile()
     print(pipeline.get_dag())
diff --git a/examples/benchmark_quality/lr/sklearn-lr-binary.py b/examples/benchmark_quality/lr/sklearn-lr-binary.py
index 603c39d75f..e418a1297a 100644
--- a/examples/benchmark_quality/lr/sklearn-lr-binary.py
+++ b/examples/benchmark_quality/lr/sklearn-lr-binary.py
@@ -84,7 +84,9 @@ def main(config="../../config.yaml", param="./breast_lr_sklearn_config.yaml"):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("BENCHMARK-QUALITY SKLEARN JOB")
-    parser.add_argument("-p", "--param", type=str, default="./breast_lr_sklearn_config.yaml",
-                        help="config file for params")
+    parser.add_argument("-c", "--config", type=str,
+                        help="config file", default="../../config.yaml")
+    parser.add_argument("-p", "--param", type=str,
+                        help="config file for params", default="./breast_lr_sklearn_config.yaml")
     args = parser.parse_args()
-    main(param=args.param)
+    main(args.config, args.param)
diff --git a/examples/benchmark_quality/lr/sklearn-lr-multi.py b/examples/benchmark_quality/lr/sklearn-lr-multi.py
index fb8da4827f..ae931db9fb 100644
--- a/examples/benchmark_quality/lr/sklearn-lr-multi.py
+++ b/examples/benchmark_quality/lr/sklearn-lr-multi.py
@@ -23,7 +23,7 @@
 from sklearn.metrics import precision_score, accuracy_score, recall_score
 
 
-def main(config="../../config.yaml", param="./vehicle_config.yaml"):
+def main(config="../../config.yaml", param="./vehicle_lr_sklearn_config.yaml"):
     # obtain config
     if isinstance(param, str):
         param = JobConfig.load_from_file(param)
@@ -65,15 +65,18 @@ def main(config="../../config.yaml", param="./vehicle_config.yaml"):
     pr = precision_score(y, y_pred, average="macro")
     acc = accuracy_score(y, y_pred)
 
-    result = {"accuracy": acc}
+    result = {"multi_accuracy": acc,
+              "multi_precision": pr,
+              "multi_recall": recall}
     print(result)
     return {}, result
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("BENCHMARK-QUALITY SKLEARN JOB")
-    parser.add_argument("-param", type=str,
-                        help="config file for params")
+    parser.add_argument("-c", "--config", type=str,
+                        help="config file", default="../../config.yaml")
+    parser.add_argument("-p", "--param", type=str,
+                        help="config file for params", default="./vehicle_lr_sklearn_config.yaml")
     args = parser.parse_args()
-    if args.param is not None:
-        main(args.param)
+    main(args.config, args.param)
diff --git a/examples/benchmark_quality/lr/vehicle_config.yaml b/examples/benchmark_quality/lr/vehicle_config.yaml
index 2cff7f33b5..00a610c0ee 100644
--- a/examples/benchmark_quality/lr/vehicle_config.yaml
+++ b/examples/benchmark_quality/lr/vehicle_config.yaml
@@ -1,11 +1,11 @@
-data_guest: "examples/data/vehicle_scale_hetero_guest.csv"
-data_host: "examples/data/vehicle_scale_hetero_host.csv"
+data_guest: "vehicle_scale_hetero_guest"
+data_host: "vehicle_scale_hetero_host"
 idx: "id"
 label_name: "y"
 epochs: 20
 init_param:
   fit_intercept: True
-  method: "zeros"
+  method: "random_uniform"
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
@@ -14,7 +14,10 @@ learning_rate_scheduler:
 optimizer:
   method: "adam"
   penalty: "L2"
+  alpha: 0.00001
   optimizer_params:
     lr: 0.3
 batch_size: 16
-early_stop: "diff"
\ No newline at end of file
+early_stop: "diff"
+task_cores: null
+timeout: 3600
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/vehicle_lr_sklearn_config.yaml b/examples/benchmark_quality/lr/vehicle_lr_sklearn_config.yaml
index 0d9bda1717..4fdb56e151 100644
--- a/examples/benchmark_quality/lr/vehicle_lr_sklearn_config.yaml
+++ b/examples/benchmark_quality/lr/vehicle_lr_sklearn_config.yaml
@@ -3,17 +3,9 @@ data_host: "examples/data/vehicle_scale_hetero_host.csv"
 idx: "id"
 label_name: "y"
 epochs: 30
-init_param:
-  fit_intercept: True
-  method: "zeros"
-learning_rate_scheduler:
-  method: "constant"
-  scheduler_params:
-    lr: 0.15
-    factor: 1.0
-    total_iters: 800'
-optimizer:
-  method: "rmsprop"
-  penalty: "L2"
-batch_size: None
-early_stop: "diff"
\ No newline at end of file
+fit_intercept: True
+method: "rmsprop"
+penalty: "L2"
+eta0: 0.1
+alpha: 0.5
+batch_size: 5000
\ No newline at end of file
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid.py b/examples/pipeline/coordinated_lr/test_lr_sid.py
index 5fb0905ff1..9b2323fb05 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid.py
+++ b/examples/pipeline/coordinated_lr/test_lr_sid.py
@@ -31,6 +31,10 @@ def main(config="./config.yaml", namespace=""):
     arbiter = parties.arbiter[0]
 
     pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
 
     intersect_0 = Intersection("intersect_0", method="raw")
     intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid_cv.py b/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
index 16ce51d4a7..5e5a3f40bc 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
+++ b/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
@@ -29,6 +29,10 @@ def main(config="./config.yaml", namespace=""):
     host = parties.host[0]
     arbiter = parties.arbiter[0]
     pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
 
     intersect_0 = Intersection("intersect_0", method="raw")
     intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py b/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
index fb8090064d..0c33c952d6 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
+++ b/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
@@ -30,6 +30,10 @@ def main(config="./config.yaml", namespace=""):
     host = parties.host[0]
     arbiter = parties.arbiter[0]
     pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
 
     intersect_0 = Intersection("intersect_0", method="raw")
     intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
diff --git a/examples/pipeline/test_linr_sid_warm_start.py b/examples/pipeline/test_linr_sid_warm_start.py
index 14837e09a9..0fe2bdea06 100644
--- a/examples/pipeline/test_linr_sid_warm_start.py
+++ b/examples/pipeline/test_linr_sid_warm_start.py
@@ -21,19 +21,27 @@
 
 intersect_0 = Intersection("intersect_0", method="raw")
 intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                    namespace="experiment_sid"))
+                                                                    namespace="experiment"))
 intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                       namespace="experiment_sid"))
+                                                                       namespace="experiment"))
 linr_0 = CoordinatedLinR("linr_0",
                          epochs=3,
                          batch_size=None,
-                         optimizer={"method": "sgd", "optimizer_params": {"lr": 0.01}},
+                         optimizer={"method": "sgd", "optimizer_params": {"lr": 0.15}, "alpha": 0.1},
                          init_param={"fit_intercept": True, "method": "zeros"},
-                         train_data=intersect_0.outputs["output_data"])
+                         train_data=intersect_0.outputs["output_data"],
+                         shuffle=False)
 linr_1 = CoordinatedLinR("linr_1", train_data=intersect_0.outputs["output_data"],
                          warm_start_model=linr_0.outputs["output_model"],
                          epochs=2,
-                         batch_size=200)
+                         batch_size=None)
+linr_2 = CoordinatedLinR("linr_2",
+                         epochs=5,
+                         batch_size=None,
+                         optimizer={"method": "sgd", "optimizer_params": {"lr": 0.15}, "alpha": 0.1},
+                         init_param={"fit_intercept": True, "method": "zeros"},
+                         train_data=intersect_0.outputs["output_data"],
+                         shuffle=False)
 
 """linr_0.guest.component_setting(train_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
                                                              namespace="experiment"))
@@ -42,40 +50,40 @@
 
 evaluation_0 = Evaluation("evaluation_0",
                           runtime_roles=["guest"],
-                          input_data=linr_0.outputs["train_output_data"])
+                          metrics=["r2_score", "mse"],
+                          label_column_name="y",
+                          input_data=[linr_1.outputs["train_output_data"], linr_2.outputs["train_output_data"]])
 
 # pipeline.add_task(feature_scale_0)
 # pipeline.add_task(feature_scale_1)
 pipeline.add_task(intersect_0)
 pipeline.add_task(linr_0)
 pipeline.add_task(linr_1)
-# pipeline.add_task(evaluation_0)
+pipeline.add_task(linr_2)
+pipeline.add_task(evaluation_0)
 # pipeline.add_task(hetero_feature_binning_0)
 pipeline.compile()
 print(pipeline.get_dag())
 pipeline.fit()
-print(f"linr_0 model: {pipeline.get_task_info('linr_0').get_output_model()}")
-# print(f"linr_0 data: {pipeline.get_task_info('linr_0').get_output_data()}")
-print(f"\nlinr_1 model: {pipeline.get_task_info('linr_1').get_output_model()}")
-
-"""# print(pipeline.get_task_info("statistics_0").get_output_model())
-print(pipeline.get_task_info("linr_0").get_output_model())
-print(pipeline.get_task_info("linr_0").get_output_metrics())
-print(f"evaluation metrics: ")
-print(pipeline.get_task_info("evaluation_0").get_output_metrics())
+import numpy as np
 
-pipeline.deploy([intersect_0, linr_0])
+linr_0_coef = np.array(
+    pipeline.get_task_info('linr_0').get_output_model()["output_model"]["data"]['estimator']["param"]["coef_"])
+linr_0_intercept = np.array(
+    pipeline.get_task_info('linr_0').get_output_model()["output_model"]["data"]['estimator']["param"]["intercept_"])
 
-predict_pipeline = FateFlowPipeline()
+linr_1_coef = np.array(
+    pipeline.get_task_info('linr_1').get_output_model()["output_model"]["data"]['estimator']["param"]["coef_"])
+linr_1_intercept = np.array(
+    pipeline.get_task_info('linr_1').get_output_model()["output_model"]["data"]['estimator']["param"]["intercept_"])
+# print(f"linr_1 data: {pipeline.get_task_info('linr_0').get_output_data()}")
+linr_2_coef = np.array(
+    pipeline.get_task_info('linr_2').get_output_model()["output_model"]["data"]['estimator']["param"]["coef_"])
+linr_2_intercept = np.array(
+    pipeline.get_task_info('linr_2').get_output_model()["output_model"]["data"]['estimator']["param"]["intercept_"])
 
-deployed_pipeline = pipeline.get_deployed_pipeline()
-deployed_pipeline.intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                                      namespace="experiment_sid"))
-deployed_pipeline.intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                                         namespace="experiment_sid"))
+print(f"linr_1 coef: {linr_1_coef}, intercept: {linr_1_intercept}")
+print(f"linr_2 coef: {linr_2_coef}, intercept: {linr_2_intercept}")
+print(f"linr_1 vs l2_1 coef diff: {linr_1_coef - linr_2_coef}, intercept diff: {linr_1_intercept - linr_2_intercept}")
 
-predict_pipeline.add_task(deployed_pipeline)
-predict_pipeline.compile()
-# print("\n\n\n")
-# print(predict_pipeline.compile().get_dag())
-predict_pipeline.predict()"""
+print(f"\n evaluation result: {pipeline.get_task_info('evaluation_0').get_output_metric()[0]['data']}")
diff --git a/python/fate_test/fate_test/_config.py b/python/fate_test/fate_test/_config.py
index b81b25e59e..16d6fb1e34 100644
--- a/python/fate_test/fate_test/_config.py
+++ b/python/fate_test/fate_test/_config.py
@@ -181,6 +181,8 @@ def __init__(self, config):
         self.tunnel_id_to_tunnel = {}
         self.extend_sid = None
         self.auto_increasing_sid = None
+        self.task_cores = None
+        self.timeout = None
         # self.work_mode = config.get("work_mode", 0)
 
         service_id = 0
@@ -194,6 +196,10 @@ def __init__(self, config):
                 for party in flow_service["parties"]:
                     self.party_to_service_id[party] = service_id
 
+    def update_conf(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
     @staticmethod
     def load(path: typing.Union[str, Path], **kwargs):
         if isinstance(path, str):
diff --git a/python/fate_test/fate_test/_flow_client.py b/python/fate_test/fate_test/_flow_client.py
index e27c2098e0..e8aa76fdbc 100644
--- a/python/fate_test/fate_test/_flow_client.py
+++ b/python/fate_test/fate_test/_flow_client.py
@@ -175,6 +175,10 @@ def _delete_data(self, table_name, namespace):
         response = self._client.table.delete(namespace=namespace, table_name=table_name)
         return response
 
+    def query_job(self, job_id, role, party_id):
+        response = self._client.task.query(job_id, role=role, party_id=party_id)
+        return response
+
     """def _submit_job(self, conf, dsl):
         param = {
             'job_dsl': self._save_json(dsl, 'submit_dsl.json'),
diff --git a/python/fate_test/fate_test/_parser.py b/python/fate_test/fate_test/_parser.py
index 7d2e898382..114ec71f56 100644
--- a/python/fate_test/fate_test/_parser.py
+++ b/python/fate_test/fate_test/_parser.py
@@ -192,16 +192,16 @@ def pretty_final_summary(self, time_consuming, suite_file=None):
 
         return table.get_string(title=f"{TxtStyle.TITLE}Testsuite Summary: {self.suite_name}{TxtStyle.END}")
 
-    def model_in_dep(self, name):
+    """def model_in_dep(self, name):
         return name in self._dependency
 
-    """def get_dependent_jobs(self, name):
-        return self._dependency[name]"""
+    def get_dependent_jobs(self, name):
+        return self._dependency[name]
 
     def remove_dependency(self, name):
         del self._dependency[name]
 
-    """def feed_dep_info(self, job, name, model_info=None, table_info=None, cache_info=None, model_loader_info=None):
+    def feed_dep_info(self, job, name, model_info=None, table_info=None, cache_info=None, model_loader_info=None):
         if model_info is not None:
             job.set_pre_work(name, **model_info)
         if table_info is not None:
@@ -213,7 +213,7 @@ def remove_dependency(self, name):
         if name in job.pre_works:
             job.pre_works.remove(name)
         if job.is_submit_ready():
-            self._ready_jobs.appendleft(job)"""
+            self._ready_jobs.appendleft(job)
 
     def reflash_configs(self, config: Config):
         failed = []
@@ -225,7 +225,7 @@ def reflash_configs(self, config: Config):
             except ValueError as e:
                 failed.append((job, e))
         return failed
-
+    """
     def update_status(
             self, job_name, job_id: str = None, status: str = None, exception_id: str = None
     ):
@@ -325,6 +325,36 @@ def load(path: Path):
         return suite
 
 
+class PerformanceSuite(object):
+    def __init__(
+            self, dataset: typing.List[Data], pipeline_jobs: typing.List[BenchmarkJob], path: Path
+    ):
+        self.dataset = dataset
+        self.pipeline_jobs = pipeline_jobs
+        self.path = path
+
+    @staticmethod
+    def load(path: Path):
+        with path.open("r") as f:
+            # testsuite_config = json.load(f, object_hook=DATA_JSON_HOOK.hook)
+            testsuite_config = yaml.safe_load(f)
+            # testsuite_config = DATA_JSON_HOOK.hook(testsuite_config)
+
+        dataset = []
+        for d in testsuite_config.get("data"):
+            d = DATA_LOAD_HOOK.hook(d)
+            dataset.append(Data.load(d, path))
+
+        pipeline_jobs = []
+        for job_name, job_configs in testsuite_config.get("tasks", {}).items():
+            script_path = path.parent.joinpath(job_configs["script"]).resolve()
+            config_path = path.parent.joinpath(job_configs.get("conf", "")).resolve()
+            pipeline_jobs.append(BenchmarkJob(job_name, script_path, config_path))
+
+        suite = PerformanceSuite(dataset, pipeline_jobs, path)
+        return suite
+
+
 def non_success_summary():
     status = {}
     for job in _config.non_success_jobs:
diff --git a/python/fate_test/fate_test/scripts/_utils.py b/python/fate_test/fate_test/scripts/_utils.py
index 8445e55bc1..53ffc92859 100644
--- a/python/fate_test/fate_test/scripts/_utils.py
+++ b/python/fate_test/fate_test/scripts/_utils.py
@@ -10,7 +10,7 @@
 from fate_test._config import Config
 from fate_test._flow_client import DataProgress, UploadDataResponse, QueryJobResponse
 from fate_test._io import echo, LOGGER, set_logger
-from fate_test._parser import Testsuite, BenchmarkSuite, DATA_LOAD_HOOK, CONF_LOAD_HOOK, DSL_LOAD_HOOK
+from fate_test._parser import Testsuite, BenchmarkSuite, PerformanceSuite, DATA_LOAD_HOOK, CONF_LOAD_HOOK, DSL_LOAD_HOOK
 
 
 def _big_data_task(includes, guest_data_size, host_data_size, guest_feature_num, host_feature_num, host_data_type,
@@ -82,6 +82,8 @@ def _find_testsuite_files(path):
                 suite = Testsuite.load(suite_path.resolve(), provider)
             elif suite_type == "benchmark":
                 suite = BenchmarkSuite.load(suite_path.resolve())
+            elif suite_type == "performance":
+                suite = PerformanceSuite.load(suite_path.resolve())
             else:
                 raise ValueError(f"Unsupported suite type: {suite_type}. Only accept type 'testsuite' or 'benchmark'.")
         except Exception as e:
diff --git a/python/fate_test/fate_test/scripts/benchmark_cli.py b/python/fate_test/fate_test/scripts/benchmark_cli.py
index a6ba20383f..4c5d84b8ee 100644
--- a/python/fate_test/fate_test/scripts/benchmark_cli.py
+++ b/python/fate_test/fate_test/scripts/benchmark_cli.py
@@ -19,9 +19,12 @@
 
 @click.command(name="benchmark-quality")
 @click.option('-i', '--include', required=True, type=click.Path(exists=True), multiple=True, metavar="<include>",
-              help="include *benchmark.json under these paths")
+              help="include *benchmark.yaml under these paths")
 @click.option('-e', '--exclude', type=click.Path(exists=True), multiple=True,
-              help="exclude *benchmark.json under these paths")
+              help="exclude *benchmark.yaml under these paths")
+@click.option('-p', '--task-cores', type=int, help="processors per node", default=None)
+@click.option('-m', '--timeout', type=int, default=None,
+              help="maximum running time of job")
 @click.option('-g', '--glob', type=str,
               help="glob string to filter sub-directory of path specified by <include>")
 @click.option('-t', '--tol', type=float,
@@ -35,12 +38,14 @@
               default="all", help="Error value display in algorithm comparison")
 @click.option('--skip-data', is_flag=True, default=False,
               help="skip uploading data specified in benchmark conf")
+@click.option("--data-only", is_flag=True, default=False,
+              help="upload data only")
 @click.option("--disable-clean-data", "clean_data", flag_value=False, default=None)
 @click.option("--enable-clean-data", "clean_data", flag_value=True, default=None)
 @SharedOptions.get_shared_options(hidden=True)
 @click.pass_context
 def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, storage_tag, history_tag, match_details,
-                  **kwargs):
+                  task_cores, timeout, **kwargs):
     """
     process benchmark suite, alias: bq
     """
@@ -50,6 +55,10 @@ def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, stora
     config_inst = ctx.obj["config"]
     if ctx.obj["extend_sid"] is not None:
         config_inst.extend_sid = ctx.obj["extend_sid"]
+    if task_cores is not None:
+        config_inst.update_conf(task_cores=task_cores)
+    if timeout is not None:
+        config_inst.update_conf(timeout=timeout)
 
     """if ctx.obj["auto_increasing_sid"] is not None:
         config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]"""
@@ -81,6 +90,8 @@ def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, stora
                     _upload_data(client, suite, config_inst)
                 except Exception as e:
                     raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e
+                if kwargs.get("data_only"):
+                    continue
             try:
                 _run_benchmark_pairs(config_inst, suite, tol, namespace, data_namespace_mangling, storage_tag,
                                      history_tag, fate_version, match_details)
diff --git a/python/fate_test/fate_test/scripts/performance_cli.py b/python/fate_test/fate_test/scripts/performance_cli.py
index e07791cc9a..0f120de1fd 100644
--- a/python/fate_test/fate_test/scripts/performance_cli.py
+++ b/python/fate_test/fate_test/scripts/performance_cli.py
@@ -14,35 +14,33 @@
 #  limitations under the License.
 #
 import glob
-import json
 import os
 import time
 import uuid
 from datetime import timedelta
+from inspect import signature
+from ruamel import yaml
 
 import click
 from fate_test._client import Clients
 from fate_test._config import Config
-from fate_test._flow_client import JobProgress, QueryJobResponse
 from fate_test._io import LOGGER, echo
-from fate_test._parser import Testsuite
+from fate_test._parser import PerformanceSuite
 from fate_test.scripts._options import SharedOptions
 from fate_test.scripts._utils import _load_testsuites, _upload_data, _delete_data, _load_module_from_script, \
     _add_replace_hook
-from fate_test.utils import TxtStyle
+from fate_test.utils import TxtStyle, parse_job_time_info, pretty_time_info_summary
 from prettytable import PrettyTable, ORGMODE
 
-from fate_test import _config
-
 
 @click.command("performance")
 @click.option('-t', '--job-type', type=click.Choice(['intersect', 'intersect_multi', 'hetero_lr', 'hetero_sbt']),
               help="Select the job type, you can also set through include")
 @click.option('-i', '--include', type=click.Path(exists=True), multiple=True, metavar="<include>",
-              help="include *testsuite.json under these paths")
-@click.option('-m', '--timeout', type=int, default=3600,
-              help="maximun running time of job")
-@click.option('-e', '--max-iter', type=int, help="When the algorithm model is LR, the number of iterations is set")
+              help="include *performance.yaml under these paths")
+@click.option('-m', '--timeout', type=int,
+              help="maximum running time of job")
+@click.option('-e', '--epochs', type=int, help="When the algorithm model is LR, the number of iterations is set")
 @click.option('-d', '--max-depth', type=int,
               help="When the algorithm model is SecureBoost, set the number of model layers")
 @click.option('-nt', '--num-trees', type=int, help="When the algorithm model is SecureBoost, set the number of trees")
@@ -58,7 +56,7 @@
 @click.option("--disable-clean-data", "clean_data", flag_value=False, default=None)
 @SharedOptions.get_shared_options(hidden=True)
 @click.pass_context
-def run_task(ctx, job_type, include, replace, timeout, update_job_parameters, update_component_parameters, max_iter,
+def run_task(ctx, job_type, include, replace, timeout, epochs,
              max_depth, num_trees, task_cores, storage_tag, history_tag, skip_data, clean_data, provider, **kwargs):
     """
     Test the performance of big data tasks, alias: bp
@@ -68,8 +66,12 @@ def run_task(ctx, job_type, include, replace, timeout, update_job_parameters, up
     config_inst = ctx.obj["config"]
     if ctx.obj["extend_sid"] is not None:
         config_inst.extend_sid = ctx.obj["extend_sid"]
-    if ctx.obj["auto_increasing_sid"] is not None:
-        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
+    if task_cores is not None:
+        config_inst.update_conf(task_cores=task_cores)
+    if timeout is not None:
+        config_inst.update_conf(timeout=timeout)
+    """if ctx.obj["auto_increasing_sid"] is not None:
+        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]"""
     namespace = ctx.obj["namespace"]
     yes = ctx.obj["yes"]
     data_namespace_mangling = ctx.obj["namespace_mangling"]
@@ -77,7 +79,7 @@ def run_task(ctx, job_type, include, replace, timeout, update_job_parameters, up
         clean_data = config_inst.clean_data
 
     def get_perf_template(conf: Config, job_type):
-        perf_dir = os.path.join(os.path.abspath(conf.perf_template_dir) + '/' + job_type + '/' + "*testsuite.json")
+        perf_dir = os.path.join(os.path.abspath(conf.perf_template_dir) + '/' + job_type + '/' + "*testsuite.yaml")
         return glob.glob(perf_dir)
 
     if not include:
@@ -88,7 +90,8 @@ def get_perf_template(conf: Config, job_type):
     echo.welcome()
     echo.echo(f"testsuite namespace: {namespace}", fg='red')
     echo.echo("loading testsuites:")
-    suites = _load_testsuites(includes=include, excludes=tuple(), glob=None, provider=provider)
+    suites = _load_testsuites(includes=include, excludes=tuple(), glob=None, provider=provider,
+                              suffix="performance.yaml", suite_type="performance")
     for i, suite in enumerate(suites):
         echo.echo(f"\tdataset({len(suite.dataset)}) dsl jobs({len(suite.jobs)}) {suite.path}")
 
@@ -112,21 +115,41 @@ def get_perf_template(conf: Config, job_type):
 
             echo.stdout_newline()
             try:
-                time_consuming = _submit_job(client, suite, namespace, config_inst, timeout, update_job_parameters,
-                                             storage_tag, history_tag, update_component_parameters, max_iter,
-                                             max_depth, num_trees, task_cores)
-            except Exception as e:
-                raise RuntimeError(f"exception occur while submit job for {suite.path}") from e
-
-            try:
-                _run_pipeline_jobs(config_inst, suite, namespace, data_namespace_mangling)
+                job_time_info = _run_performance_jobs(config_inst, suite, namespace, data_namespace_mangling, client,
+                                                      epochs, max_depth, num_trees)
             except Exception as e:
                 raise RuntimeError(f"exception occur while running pipeline jobs for {suite.path}") from e
 
             echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red')
             if not skip_data and clean_data:
                 _delete_data(client, suite)
-            echo.echo(suite.pretty_final_summary(time_consuming), fg='red')
+            # echo.echo(suite.pretty_final_summary(job_time_info), fg='red')
+            all_summary = []
+            compare_summary = []
+            for job_name, job_time in job_time_info.items():
+                performance_dir = "/".join(
+                    [os.path.join(os.path.abspath(config_inst.cache_directory),
+                                  'benchmark_history', "performance.yaml")])
+                # @todo: change to client query result
+                # fate_version = clients["guest_0"].get_version()
+                fate_version = "beta-2.0.0"
+                if history_tag:
+                    history_tag = ["_".join([i, job_name]) for i in history_tag]
+                    history_compare_result = comparison_quality(job_name,
+                                                                history_tag,
+                                                                performance_dir,
+                                                                job_time["time_summary"])
+                    compare_summary.append(history_compare_result)
+                if storage_tag:
+                    storage_tag = "_".join(['FATE', fate_version, storage_tag, job_name])
+                    save_quality(storage_tag, performance_dir, job_time["time_summary"])
+                res_str = pretty_time_info_summary(job_time, job_name)
+                all_summary.append(res_str)
+            echo.echo("\n".join(all_summary))
+            echo.echo("#" * 60)
+            echo.echo("\n".join(compare_summary))
+
+            echo.echo()
 
         except Exception:
             exception_id = uuid.uuid1()
@@ -139,195 +162,65 @@ def get_perf_template(conf: Config, job_type):
     echo.echo(f"testsuite namespace: {namespace}", fg='red')
 
 
-def _submit_job(clients: Clients, suite: Testsuite, namespace: str, config: Config, timeout, update_job_parameters,
-                storage_tag, history_tag, update_component_parameters, max_iter, max_depth, num_trees, task_cores):
-    # submit jobs
-    with click.progressbar(length=len(suite.jobs),
-                           label="jobs",
-                           show_eta=False,
-                           show_pos=True,
-                           width=24) as bar:
-        time_list = []
-        for job in suite.jobs_iter():
-            start = time.time()
-            job_progress = JobProgress(job.job_name)
-
-            def _raise():
-                exception_id = str(uuid.uuid1())
-                job_progress.exception(exception_id)
-                suite.update_status(job_name=job.job_name, exception_id=exception_id)
-                echo.file(f"exception({exception_id})")
-                LOGGER.exception(f"exception id: {exception_id}")
-
-            # noinspection PyBroadException
-            try:
-                if max_iter is not None:
-                    job.job_conf.update_component_parameters('max_iter', max_iter)
-                if max_depth is not None:
-                    job.job_conf.update_component_parameters('max_depth', max_depth)
-                if num_trees is not None:
-                    job.job_conf.update_component_parameters('num_trees', num_trees)
-                if task_cores is not None:
-                    job.job_conf.update_job_common_parameters(task_cores=task_cores)
-                job.job_conf.update(config.parties, timeout, update_job_parameters, update_component_parameters)
-            except Exception:
-                _raise()
-                continue
-
-            def update_bar(n_step):
-                bar.item_show_func = lambda x: job_progress.show()
-                time.sleep(0.1)
-                bar.update(n_step)
-
-            update_bar(1)
-
-            def _call_back(resp):
-                """if isinstance(resp, SubmitJobResponse):
-                    job_progress.submitted(resp.job_id)
-                    echo.file(f"[jobs] {resp.job_id} ", nl=False)
-                    suite.update_status(job_name=job.job_name, job_id=resp.job_id)"""
-
-                if isinstance(resp, QueryJobResponse):
-                    job_progress.running(resp.status, resp.progress)
-
-                update_bar(0)
-
-            # noinspection PyBroadException
-            try:
-                response = clients["guest_0"].submit_job(job=job, callback=_call_back)
-
-                # noinspection PyBroadException
-                try:
-                    # add notes
-                    notes = f"{job.job_name}@{suite.path}@{namespace}"
-                    for role, party_id_list in job.job_conf.role.items():
-                        for i, party_id in enumerate(party_id_list):
-                            clients[f"{role}_{i}"].add_notes(job_id=response.job_id, role=role, party_id=party_id,
-                                                             notes=notes)
-                except Exception:
-                    pass
-            except Exception:
-                _raise()
-            else:
-                job_progress.final(response.status)
-                suite.update_status(job_name=job.job_name, status=response.status.status)
-                if response.status.is_success():
-                    if suite.model_in_dep(job.job_name):
-                        dependent_jobs = suite.get_dependent_jobs(job.job_name)
-                        for predict_job in dependent_jobs:
-                            model_info, table_info, cache_info, model_loader_info = None, None, None, None
-                            for i in _config.deps_alter[predict_job.job_name]:
-                                if isinstance(i, dict):
-                                    name = i.get('name')
-                                    data_pre = i.get('data')
-
-                            if 'data_deps' in _config.deps_alter[predict_job.job_name]:
-                                roles = list(data_pre.keys())
-                                table_info, hierarchy = [], []
-                                for role_ in roles:
-                                    role, index = role_.split("_")
-                                    input_ = data_pre[role_]
-                                    for data_input, cpn in input_.items():
-                                        try:
-                                            table_name = clients["guest_0"].output_data_table(
-                                                job_id=response.job_id,
-                                                role=role,
-                                                party_id=config.role[role][int(index)],
-                                                component_name=cpn)
-                                        except Exception:
-                                            _raise()
-                                        if predict_job.job_conf.dsl_version == 2:
-                                            hierarchy.append([role, index, data_input])
-                                            table_info.append({'table': table_name})
-                                        else:
-                                            hierarchy.append([role, 'args', 'data'])
-                                            table_info.append({data_input: [table_name]})
-                                table_info = {'hierarchy': hierarchy, 'table_info': table_info}
-                            if 'model_deps' in _config.deps_alter[predict_job.job_name]:
-                                if predict_job.job_conf.dsl_version == 2:
-                                    # noinspection PyBroadException
-                                    try:
-                                        model_info = clients["guest_0"].deploy_model(
-                                            model_id=response.model_info["model_id"],
-                                            model_version=response.model_info["model_version"],
-                                            dsl=predict_job.job_dsl.as_dict())
-                                    except Exception:
-                                        _raise()
-                                else:
-                                    model_info = response.model_info
-                            if 'cache_deps' in _config.deps_alter[predict_job.job_name]:
-                                cache_dsl = predict_job.job_dsl.as_dict()
-                                cache_info = []
-                                for cpn in cache_dsl.get("components").keys():
-                                    if "CacheLoader" in cache_dsl.get("components").get(cpn).get("module"):
-                                        cache_info.append({cpn: {'job_id': response.job_id}})
-                                cache_info = {'hierarchy': [""], 'cache_info': cache_info}
-                            if 'model_loader_deps' in _config.deps_alter[predict_job.job_name]:
-                                model_loader_dsl = predict_job.job_dsl.as_dict()
-                                model_loader_info = []
-                                for cpn in model_loader_dsl.get("components").keys():
-                                    if "ModelLoader" in model_loader_dsl.get("components").get(cpn).get("module"):
-                                        model_loader_info.append({cpn: response.model_info})
-                                model_loader_info = {'hierarchy': [""], 'model_loader_info': model_loader_info}
-
-                            suite.feed_dep_info(predict_job, name, model_info=model_info, table_info=table_info,
-                                                cache_info=cache_info, model_loader_info=model_loader_info)
-                        suite.remove_dependency(job.job_name)
-            update_bar(0)
-            time_consuming = time.time() - start
-            performance_dir = "/".join(
-                [os.path.join(os.path.abspath(config.cache_directory), 'benchmark_history', "performance.json")])
-            fate_version = clients["guest_0"].get_version()
-            if history_tag:
-                history_tag = ["_".join([i, job.job_name]) for i in history_tag]
-                comparison_quality(job.job_name, history_tag, performance_dir, time_consuming)
-            if storage_tag:
-                storage_tag = "_".join(['FATE', fate_version, storage_tag, job.job_name])
-                save_quality(storage_tag, performance_dir, time_consuming)
-            echo.stdout_newline()
-            time_list.append(time_consuming)
-        return [str(int(i)) + "s" for i in time_list]
-
-
-def _run_pipeline_jobs(config: Config, suite: Testsuite, namespace: str, data_namespace_mangling: bool):
+@LOGGER.catch
+def _run_performance_jobs(config: Config, suite: PerformanceSuite, tol: float, namespace: str,
+                          data_namespace_mangling: bool, client, epochs, max_depth, num_trees):
     # pipeline demo goes here
     job_n = len(suite.pipeline_jobs)
-    for i, pipeline_job in enumerate(suite.pipeline_jobs):
-        echo.echo(f"Running [{i + 1}/{job_n}] job: {pipeline_job.job_name}")
-
-        def _raise(err_msg, status="failed"):
-            exception_id = str(uuid.uuid1())
-            suite.update_status(job_name=job_name, exception_id=exception_id, status=status)
-            echo.file(f"exception({exception_id}), error message:\n{err_msg}")
-            # LOGGER.exception(f"exception id: {exception_id}")
-
-        job_name, script_path = pipeline_job.job_name, pipeline_job.script_path
-        mod = _load_module_from_script(script_path)
+    fate_base = config.fate_base
+    PYTHONPATH = os.environ.get('PYTHONPATH') + ":" + os.path.join(fate_base, "python")
+    os.environ['PYTHONPATH'] = PYTHONPATH
+    job_time_history = {}
+    for j, job in enumerate(suite.pipeline_jobs):
         try:
-            if data_namespace_mangling:
-                try:
-                    mod.main(config=config, namespace=f"_{namespace}")
-                    suite.update_status(job_name=job_name, status="success")
-                except Exception as e:
-                    _raise(e)
-                    continue
+            echo.echo(f"Running [{j + 1}/{job_n}] job: {job.job_name}")
+            job_name, script_path, conf_path = job.job_name, job.script_path, job.conf_path
+            param = Config.load_from_file(conf_path)
+            if epochs is not None:
+                param['epochs'] = epochs
+            if max_depth is not None:
+                param['max_depth'] = max_depth
+            if num_trees is not None:
+                param['num_trees'] = num_trees
+
+            mod = _load_module_from_script(script_path)
+            input_params = signature(mod.main).parameters
+            # local script
+            if len(input_params) == 1:
+                job_id = mod.main(param=param)
+            elif len(input_params) == 2:
+                job_id = mod.main(config=config, param=param)
+            # pipeline script
+            elif len(input_params) == 3:
+                if data_namespace_mangling:
+                    job_id = mod.main(config=config, param=param, namespace=f"_{namespace}")
+                else:
+                    job_id = mod.main(config=config, param=param)
             else:
-                try:
-                    mod.main(config=config)
-                    suite.update_status(job_name=job_name, status="success")
-                except Exception as e:
-                    _raise(e)
-                    continue
+                job_id = mod.main()
+            echo.echo(f"[{j + 1}/{job_n}] job: {job.job_name} Success!\n")
+            ret_msg = client.query_time_elapse(job_id, role="guest", party_id=config.parties.guest[0]).get("data")
+            time_summary = parse_job_time_info(ret_msg)
+            job_time_history[job_name] = {"job_id": job_id, "time_summary": time_summary}
+            echo.echo(f"[{j + 1}/{job_n}] job: {job.job_name} time info: {time_summary}\n")
+
         except Exception as e:
-            _raise(e, status="not submitted")
+            exception_id = uuid.uuid1()
+            echo.echo(f"exception while running [{j + 1}/{job_n}] job, exception_id={exception_id}", err=True,
+                      fg='red')
+            LOGGER.exception(f"exception id: {exception_id}, error message: \n{e}")
             continue
+    return job_time_history
 
 
 def comparison_quality(group_name, history_tags, history_info_dir, time_consuming):
     assert os.path.exists(history_info_dir), f"Please check the {history_info_dir} Is it deleted"
     with open(history_info_dir, 'r') as f:
-        benchmark_quality = json.load(f, object_hook=dict)
+        benchmark_quality = yaml.load(f)
     benchmark_performance = {}
+    table = PrettyTable()
+    table.set_style(ORGMODE)
+    table.field_names = ["Script Model Name", "component", "time consuming"]
     for history_tag in history_tags:
         for tag in benchmark_quality:
             if '_'.join(tag.split("_")[2:]) == history_tag:
@@ -335,28 +228,28 @@ def comparison_quality(group_name, history_tags, history_info_dir, time_consumin
     if benchmark_performance is not None:
         benchmark_performance[group_name] = time_consuming
 
-    table = PrettyTable()
-    table.set_style(ORGMODE)
-    table.field_names = ["Script Model Name", "time consuming"]
     for script_model_name in benchmark_performance:
-        table.add_row([f"{script_model_name}"] +
-                      [f"{TxtStyle.FIELD_VAL}{benchmark_performance[script_model_name]}{TxtStyle.END}"])
-    print("\n")
-    print(table.get_string(title=f"{TxtStyle.TITLE}Performance comparison results{TxtStyle.END}"))
-    print("#" * 60)
+        for cpn, time in benchmark_performance[script_model_name].items():
+            table.add_row([f"{script_model_name}"] +
+                          [f"{TxtStyle.FIELD_VAL}{cpn}{TxtStyle.END}"] +
+                          [f"{TxtStyle.FIELD_VAL}{time}{TxtStyle.END}"])
+    # print("\n")
+    # print(table.get_string(title=f"{TxtStyle.TITLE}Performance comparison results{TxtStyle.END}"))
+    # print("#" * 60)
+    return table.get_string(title=f"{TxtStyle.TITLE}Performance comparison results{TxtStyle.END}")
 
 
 def save_quality(storage_tag, save_dir, time_consuming):
     os.makedirs(os.path.dirname(save_dir), exist_ok=True)
     if os.path.exists(save_dir):
         with open(save_dir, 'r') as f:
-            benchmark_quality = json.load(f, object_hook=dict)
+            benchmark_quality = yaml.load(f)
     else:
         benchmark_quality = {}
     benchmark_quality.update({storage_tag: time_consuming})
     try:
         with open(save_dir, 'w') as fp:
-            json.dump(benchmark_quality, fp, indent=2)
+            yaml.dump(benchmark_quality, fp)
         print("\n" + "Storage successful, please check: ", save_dir)
     except Exception:
         print("\n" + "Storage failed, please check: ", save_dir)
diff --git a/python/fate_test/fate_test/scripts/testsuite_cli.py b/python/fate_test/fate_test/scripts/testsuite_cli.py
index 3bc295bfe1..b58c9ee73d 100644
--- a/python/fate_test/fate_test/scripts/testsuite_cli.py
+++ b/python/fate_test/fate_test/scripts/testsuite_cli.py
@@ -39,9 +39,12 @@
 
 @click.command("suite")
 @click.option('-i', '--include', required=True, type=click.Path(exists=True), multiple=True, metavar="<include>",
-              help="include *testsuite.json under these paths")
+              help="include *testsuite.yaml under these paths")
 @click.option('-e', '--exclude', type=click.Path(exists=True), multiple=True,
-              help="exclude *testsuite.json under these paths")
+              help="exclude *testsuite.yaml under these paths")
+@click.option('-p', '--task-cores', type=int, help="processors per node")
+@click.option('-m', '--timeout', type=int,
+              help="maximum running time of job")
 @click.option("-g", '--glob', type=str,
               help="glob string to filter sub-directory of path specified by <include>")
 @click.option("--skip-jobs", is_flag=True, default=False,
@@ -57,7 +60,7 @@
 @SharedOptions.get_shared_options(hidden=True)
 @click.pass_context
 def run_suite(ctx, include, exclude, glob,
-              skip_jobs, skip_data, data_only, clean_data, provider, **kwargs):
+              skip_jobs, skip_data, data_only, clean_data, provider, task_cores, timeout, **kwargs):
     """
     process testsuite
     """
@@ -66,6 +69,11 @@ def run_suite(ctx, include, exclude, glob,
     config_inst = ctx.obj["config"]
     if ctx.obj["extend_sid"] is not None:
         config_inst.extend_sid = ctx.obj["extend_sid"]
+    if task_cores is not None:
+        config_inst.update_conf(task_cores=task_cores)
+    if timeout is not None:
+        config_inst.update_conf(timeout=timeout)
+
     """if ctx.obj["auto_increasing_sid"] is not None:
         config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]"""
     if clean_data is None:
diff --git a/python/fate_test/fate_test/utils.py b/python/fate_test/fate_test/utils.py
index 74775354ee..a66508dd1e 100644
--- a/python/fate_test/fate_test/utils.py
+++ b/python/fate_test/fate_test/utils.py
@@ -369,3 +369,30 @@ def extract_data(df, col_name, convert_float=True, keep_id=False):
         return df[[df.columns[0], col_name]].to_numpy()
     else:
         return df[col_name].to_numpy().astype(np.float64)
+
+
+def parse_job_time_info(job_time_info):
+    time_info_summary = []
+    for cpn in job_time_info:
+        cpn_name = cpn.get("task_name")
+        cpn_elapsed = cpn.get("elapsed")
+        time_info_summary.append((cpn_name, cpn_elapsed))
+    return time_info_summary
+
+
+def pretty_time_info_summary(time_info_summary, job_name):
+    table = PrettyTable()
+    table.set_style(ORGMODE)
+    field_names = ["component name", "time consuming"]
+    table.field_names = field_names
+    time_summary = time_info_summary.get("time_summary", [])
+    for cpn_name, cpn_elapse in time_summary:
+        table.add_row(
+            [
+                f"{TxtStyle.FIELD_VAL}{cpn_name}{TxtStyle.END}",
+                f"{TxtStyle.FIELD_VAL}{cpn_elapse}{TxtStyle.END}",
+            ]
+        )
+
+    return table.get_string(title=f"{TxtStyle.TITLE}Component Time Summary: "
+                                  f"{job_name}({time_info_summary['job_id']}){TxtStyle.END}")

From d5f3ff50054771a3cf32671111a198183b758f8a Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Fri, 4 Aug 2023 19:20:31 +0800
Subject: [PATCH 14/30] add examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/benchmark_quality/linr/fate-linr.py  |  5 ++
 examples/benchmark_quality/linr/local-linr.py | 72 +++++++++++++++++++
 examples/pipeline/test_upload.py              |  2 -
 3 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 examples/benchmark_quality/linr/local-linr.py

diff --git a/examples/benchmark_quality/linr/fate-linr.py b/examples/benchmark_quality/linr/fate-linr.py
index cb7866ad8d..248b0afc79 100644
--- a/examples/benchmark_quality/linr/fate-linr.py
+++ b/examples/benchmark_quality/linr/fate-linr.py
@@ -82,6 +82,11 @@ def main(config="../../config.yaml", param="./linr_config.yaml", namespace=""):
     # pipeline.add_task(linr_1)
     pipeline.add_task(evaluation_0)
 
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
     pipeline.compile()
     print(pipeline.get_dag())
     pipeline.fit()
diff --git a/examples/benchmark_quality/linr/local-linr.py b/examples/benchmark_quality/linr/local-linr.py
new file mode 100644
index 0000000000..bffafbb524
--- /dev/null
+++ b/examples/benchmark_quality/linr/local-linr.py
@@ -0,0 +1,72 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import argparse
+import os
+
+import numpy as np
+import pandas
+from fate_client.pipeline.utils.test_utils import JobConfig
+from sklearn.linear_model import SGDRegressor
+from sklearn.metrics import mean_squared_error, r2_score, explained_variance_score
+
+
+def main(config="../../config.yaml", param="./linr_sklearn_config.yaml"):
+    # obtain config
+    if isinstance(param, str):
+        param = JobConfig.load_from_file(param)
+    data_guest = param["data_guest"]
+    data_host = param["data_host"]
+    idx = param["idx"]
+    label_name = param["label_name"]
+
+    if isinstance(config, str):
+        config = JobConfig.load_from_file(config)
+        print(f"config: {config}")
+        data_base_dir = config["data_base_dir"]
+    else:
+        data_base_dir = config.data_base_dir
+
+    # prepare data
+    df_guest = pandas.read_csv(os.path.join(data_base_dir, data_guest), index_col=idx)
+    df_host = pandas.read_csv(os.path.join(data_base_dir, data_host), index_col=idx)
+    df = df_guest.join(df_host, rsuffix="host")
+    y = df[label_name]
+    X = df.drop(label_name, axis=1)
+    lm = SGDRegressor(loss="squared_error", penalty=param["penalty"], random_state=42,
+                      fit_intercept=True, max_iter=param["epochs"], average=param["batch_size"])
+    lm_fit = lm.fit(X, y)
+    y_pred = lm_fit.predict(X)
+
+    mse = mean_squared_error(y, y_pred)
+    rmse = np.sqrt(mse)
+    r2 = r2_score(y, y_pred)
+    explained_var = explained_variance_score(y, y_pred)
+    metric_summary = {"r2_score": r2,
+                      "mse": mse,
+                      "rmse": rmse}
+    data_summary = {}
+    return data_summary, metric_summary
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("BENCHMARK-QUALITY LOCAL JOB")
+    parser.add_argument("-c", "--config", type=str,
+                        help="config file", default="../../config.yaml")
+    parser.add_argument("-p", "--param", type=str,
+                        help="config file for params", default="./linr_sklearn_config.yaml")
+    args = parser.parse_args()
+    main(args.config, args.param)
diff --git a/examples/pipeline/test_upload.py b/examples/pipeline/test_upload.py
index c44261de8a..403926bbe6 100644
--- a/examples/pipeline/test_upload.py
+++ b/examples/pipeline/test_upload.py
@@ -31,7 +31,6 @@
         'weight_type': 'float32'}
 
 pipeline.transform_local_file_to_dataframe(  # file="${abs_path_of_data_guest}",
-    file="/Users/yuwu/PycharmProjects/FATE/examples/data/breast_hetero_guest.csv",
     meta=meta, head=True,
     namespace="experiment",
     name="breast_hetero_guest")
@@ -53,7 +52,6 @@
 pipeline.set_site_party_id("0")
 
 pipeline.transform_local_file_to_dataframe(  # file="${abs_path_of_data_host}",
-    file="/Users/yuwu/PycharmProjects/FATE/examples/data/breast_hetero_host.csv",
     meta=meta, head=True,
     namespace="experiment",
     name="breast_hetero_host")

From fe4b99610ff1809cb7836903f28b37a944f8fe88 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Mon, 7 Aug 2023 19:06:18 +0800
Subject: [PATCH 15/30] fixed cpn order in fate-test performance comparison
 table(#5008) fix fate-test performance param parsing(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../benchmark_quality/lr/breast_config.yaml   | 16 ++++----
 python/fate_test/fate_test/_flow_client.py    |  2 +-
 .../fate_test/scripts/benchmark_cli.py        |  4 +-
 .../fate_test/scripts/performance_cli.py      | 40 +++++++++----------
 python/fate_test/fate_test/utils.py           | 20 ++++++----
 5 files changed, 43 insertions(+), 39 deletions(-)

diff --git a/examples/benchmark_quality/lr/breast_config.yaml b/examples/benchmark_quality/lr/breast_config.yaml
index 142f056628..4feac1af67 100644
--- a/examples/benchmark_quality/lr/breast_config.yaml
+++ b/examples/benchmark_quality/lr/breast_config.yaml
@@ -2,21 +2,21 @@ data_guest: "breast_hetero_guest"
 data_host: "breast_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 15
+epochs: 8
 init_param:
   fit_intercept: True
-  method: "uniform"
+  method: "random_uniform"
   random_state: 42
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
-    factor: 1.0
-    total_iters: 100
+    factor: 0.2
+    total_iters: 18
 optimizer:
   method: "rmsprop"
-  penalty: "l1"
+  penalty: "l2"
   optimizer_params:
-    lr: 0.5
-  alpha: 0.1
-batch_size: null
+    lr: 0.15
+  alpha: 0.2
+batch_size: 240
 early_stop: "diff"
\ No newline at end of file
diff --git a/python/fate_test/fate_test/_flow_client.py b/python/fate_test/fate_test/_flow_client.py
index e8aa76fdbc..098c6a1bd9 100644
--- a/python/fate_test/fate_test/_flow_client.py
+++ b/python/fate_test/fate_test/_flow_client.py
@@ -280,7 +280,7 @@ def get_version(self):
             retmsg = response['message']
             if retcode != 0 or retmsg != 'success':
                 raise RuntimeError(f"get version error: {response}")
-            fate_version = response["data"]["provider_name"]
+            fate_version = response["data"][0]["provider_name"]
         except Exception as e:
             raise RuntimeError(f"get version error: {response}") from e
         return fate_version
diff --git a/python/fate_test/fate_test/scripts/benchmark_cli.py b/python/fate_test/fate_test/scripts/benchmark_cli.py
index 4c5d84b8ee..365484a74e 100644
--- a/python/fate_test/fate_test/scripts/benchmark_cli.py
+++ b/python/fate_test/fate_test/scripts/benchmark_cli.py
@@ -77,9 +77,7 @@ def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, stora
     if not yes and not click.confirm("running?"):
         return
     client = Clients(config_inst)
-    # @todo: change to client query result
-    # fate_version = client["guest_0"].get_version()
-    fate_version = "beta-2.0.0"
+    fate_version = client["guest_0"].get_version()
     for i, suite in enumerate(suites):
         # noinspection PyBroadException
         try:
diff --git a/python/fate_test/fate_test/scripts/performance_cli.py b/python/fate_test/fate_test/scripts/performance_cli.py
index 0f120de1fd..dbd981ac5a 100644
--- a/python/fate_test/fate_test/scripts/performance_cli.py
+++ b/python/fate_test/fate_test/scripts/performance_cli.py
@@ -19,7 +19,6 @@
 import uuid
 from datetime import timedelta
 from inspect import signature
-from ruamel import yaml
 
 import click
 from fate_test._client import Clients
@@ -27,10 +26,10 @@
 from fate_test._io import LOGGER, echo
 from fate_test._parser import PerformanceSuite
 from fate_test.scripts._options import SharedOptions
-from fate_test.scripts._utils import _load_testsuites, _upload_data, _delete_data, _load_module_from_script, \
-    _add_replace_hook
+from fate_test.scripts._utils import _load_testsuites, _upload_data, _delete_data, _load_module_from_script
 from fate_test.utils import TxtStyle, parse_job_time_info, pretty_time_info_summary
 from prettytable import PrettyTable, ORGMODE
+from ruamel import yaml
 
 
 @click.command("performance")
@@ -56,7 +55,7 @@
 @click.option("--disable-clean-data", "clean_data", flag_value=False, default=None)
 @SharedOptions.get_shared_options(hidden=True)
 @click.pass_context
-def run_task(ctx, job_type, include, replace, timeout, epochs,
+def run_task(ctx, job_type, include, timeout, epochs,
              max_depth, num_trees, task_cores, storage_tag, history_tag, skip_data, clean_data, provider, **kwargs):
     """
     Test the performance of big data tasks, alias: bp
@@ -79,13 +78,13 @@ def run_task(ctx, job_type, include, replace, timeout, epochs,
         clean_data = config_inst.clean_data
 
     def get_perf_template(conf: Config, job_type):
-        perf_dir = os.path.join(os.path.abspath(conf.perf_template_dir) + '/' + job_type + '/' + "*testsuite.yaml")
+        perf_dir = os.path.join(os.path.abspath(conf.perf_template_dir) + '/' + job_type + '/' + "*performance.yaml")
         return glob.glob(perf_dir)
 
     if not include:
         include = get_perf_template(config_inst, job_type)
     # prepare output dir and json hooks
-    _add_replace_hook(replace)
+    # _add_replace_hook(replace)
 
     echo.welcome()
     echo.echo(f"testsuite namespace: {namespace}", fg='red')
@@ -93,7 +92,7 @@ def get_perf_template(conf: Config, job_type):
     suites = _load_testsuites(includes=include, excludes=tuple(), glob=None, provider=provider,
                               suffix="performance.yaml", suite_type="performance")
     for i, suite in enumerate(suites):
-        echo.echo(f"\tdataset({len(suite.dataset)}) dsl jobs({len(suite.jobs)}) {suite.path}")
+        echo.echo(f"\tdataset({len(suite.dataset)}) pipeline jobs({len(suite.pipeline_jobs)}) {suite.path}")
 
     if not yes and not click.confirm("running?"):
         return
@@ -115,7 +114,8 @@ def get_perf_template(conf: Config, job_type):
 
             echo.stdout_newline()
             try:
-                job_time_info = _run_performance_jobs(config_inst, suite, namespace, data_namespace_mangling, client,
+                job_time_info = _run_performance_jobs(config_inst, suite, namespace, data_namespace_mangling,
+                                                      client,
                                                       epochs, max_depth, num_trees)
             except Exception as e:
                 raise RuntimeError(f"exception occur while running pipeline jobs for {suite.path}") from e
@@ -130,9 +130,8 @@ def get_perf_template(conf: Config, job_type):
                 performance_dir = "/".join(
                     [os.path.join(os.path.abspath(config_inst.cache_directory),
                                   'benchmark_history', "performance.yaml")])
-                # @todo: change to client query result
-                # fate_version = clients["guest_0"].get_version()
-                fate_version = "beta-2.0.0"
+                fate_version = client["guest_0"].get_version()
+                # fate_version = "beta-2.0.0"
                 if history_tag:
                     history_tag = ["_".join([i, job_name]) for i in history_tag]
                     history_compare_result = comparison_quality(job_name,
@@ -149,8 +148,6 @@ def get_perf_template(conf: Config, job_type):
             echo.echo("#" * 60)
             echo.echo("\n".join(compare_summary))
 
-            echo.echo()
-
         except Exception:
             exception_id = uuid.uuid1()
             echo.echo(f"exception in {suite.path}, exception_id={exception_id}")
@@ -163,7 +160,7 @@ def get_perf_template(conf: Config, job_type):
 
 
 @LOGGER.catch
-def _run_performance_jobs(config: Config, suite: PerformanceSuite, tol: float, namespace: str,
+def _run_performance_jobs(config: Config, suite: PerformanceSuite, namespace: str,
                           data_namespace_mangling: bool, client, epochs, max_depth, num_trees):
     # pipeline demo goes here
     job_n = len(suite.pipeline_jobs)
@@ -199,7 +196,9 @@ def _run_performance_jobs(config: Config, suite: PerformanceSuite, tol: float, n
             else:
                 job_id = mod.main()
             echo.echo(f"[{j + 1}/{job_n}] job: {job.job_name} Success!\n")
-            ret_msg = client.query_time_elapse(job_id, role="guest", party_id=config.parties.guest[0]).get("data")
+            ret_msg = client["guest_0"].query_job(job_id=job_id,
+                                                  role="guest",
+                                                  party_id=config.parties.guest[0]).get("data")
             time_summary = parse_job_time_info(ret_msg)
             job_time_history[job_name] = {"job_id": job_id, "time_summary": time_summary}
             echo.echo(f"[{j + 1}/{job_n}] job: {job.job_name} time info: {time_summary}\n")
@@ -216,7 +215,7 @@ def _run_performance_jobs(config: Config, suite: PerformanceSuite, tol: float, n
 def comparison_quality(group_name, history_tags, history_info_dir, time_consuming):
     assert os.path.exists(history_info_dir), f"Please check the {history_info_dir} Is it deleted"
     with open(history_info_dir, 'r') as f:
-        benchmark_quality = yaml.load(f)
+        benchmark_quality = yaml.safe_load(f)
     benchmark_performance = {}
     table = PrettyTable()
     table.set_style(ORGMODE)
@@ -229,10 +228,11 @@ def comparison_quality(group_name, history_tags, history_info_dir, time_consumin
         benchmark_performance[group_name] = time_consuming
 
     for script_model_name in benchmark_performance:
-        for cpn, time in benchmark_performance[script_model_name].items():
+        time_history = benchmark_performance[script_model_name]
+        for cpn in time_history.get("cpn_list"):
             table.add_row([f"{script_model_name}"] +
-                          [f"{TxtStyle.FIELD_VAL}{cpn}{TxtStyle.END}"] +
-                          [f"{TxtStyle.FIELD_VAL}{time}{TxtStyle.END}"])
+                          [f"{cpn}"] +
+                          [f"{TxtStyle.FIELD_VAL}{timedelta(seconds=time_history.get(cpn))}{TxtStyle.END}"])
     # print("\n")
     # print(table.get_string(title=f"{TxtStyle.TITLE}Performance comparison results{TxtStyle.END}"))
     # print("#" * 60)
@@ -243,7 +243,7 @@ def save_quality(storage_tag, save_dir, time_consuming):
     os.makedirs(os.path.dirname(save_dir), exist_ok=True)
     if os.path.exists(save_dir):
         with open(save_dir, 'r') as f:
-            benchmark_quality = yaml.load(f)
+            benchmark_quality = yaml.safe_load(f)
     else:
         benchmark_quality = {}
     benchmark_quality.update({storage_tag: time_consuming})
diff --git a/python/fate_test/fate_test/utils.py b/python/fate_test/fate_test/utils.py
index a66508dd1e..2176109e2c 100644
--- a/python/fate_test/fate_test/utils.py
+++ b/python/fate_test/fate_test/utils.py
@@ -16,6 +16,7 @@
 
 import math
 import os
+from datetime import timedelta
 
 import numpy as np
 from colorama import init, deinit, Fore, Style
@@ -372,11 +373,15 @@ def extract_data(df, col_name, convert_float=True, keep_id=False):
 
 
 def parse_job_time_info(job_time_info):
-    time_info_summary = []
+    time_info_summary = {}
+    cpn_list = []
     for cpn in job_time_info:
         cpn_name = cpn.get("task_name")
-        cpn_elapsed = cpn.get("elapsed")
-        time_info_summary.append((cpn_name, cpn_elapsed))
+        # convert milliseconds to seconds
+        cpn_elapsed = round(cpn.get("elapsed") / 1000)
+        time_info_summary[cpn_name] = cpn_elapsed
+        cpn_list.append(cpn_name)
+    time_info_summary["cpn_list"] = cpn_list
     return time_info_summary
 
 
@@ -385,12 +390,13 @@ def pretty_time_info_summary(time_info_summary, job_name):
     table.set_style(ORGMODE)
     field_names = ["component name", "time consuming"]
     table.field_names = field_names
-    time_summary = time_info_summary.get("time_summary", [])
-    for cpn_name, cpn_elapse in time_summary:
+    time_summary = time_info_summary.get("time_summary", {})
+    for cpn_name in time_summary["cpn_list"]:
+        cpn_elapse = time_summary.get(cpn_name)
         table.add_row(
             [
-                f"{TxtStyle.FIELD_VAL}{cpn_name}{TxtStyle.END}",
-                f"{TxtStyle.FIELD_VAL}{cpn_elapse}{TxtStyle.END}",
+                f"{cpn_name}",
+                f"{TxtStyle.FIELD_VAL}{timedelta(seconds=cpn_elapse)}{TxtStyle.END}",
             ]
         )
 

From 5713ee76ae90d9dc762360d138e5f01680ac9b84 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Wed, 9 Aug 2023 10:51:47 +0800
Subject: [PATCH 16/30] fix l1 penalty of optimizer(#4659) edit fate-test
 performance examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../coordinated_lr/test_lr_sid.py              | 17 ++---------------
 .../benchmark_quality/lr/breast_config.yaml    | 10 +++++-----
 .../benchmark_quality/lr/lr_benchmark.yaml     | 18 +++++++++---------
 .../benchmark_quality/lr/sklearn-lr-binary.py  |  4 ++--
 .../fate/ml/glm/hetero/coordinated_lr/guest.py |  2 +-
 .../fate/ml/glm/hetero/coordinated_lr/host.py  |  3 ++-
 python/fate/ml/utils/_optimizer.py             |  6 +++---
 7 files changed, 24 insertions(+), 36 deletions(-)

diff --git a/examples/benchmark_performance/coordinated_lr/test_lr_sid.py b/examples/benchmark_performance/coordinated_lr/test_lr_sid.py
index ebe2b289e0..fc3f69209a 100644
--- a/examples/benchmark_performance/coordinated_lr/test_lr_sid.py
+++ b/examples/benchmark_performance/coordinated_lr/test_lr_sid.py
@@ -37,21 +37,8 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
 
     assert isinstance(param, dict)
 
-    data_set = param.get("data_guest").split('/')[-1]
-    if data_set == "default_credit_hetero_guest.csv":
-        guest_data_table = 'default_credit_hetero_guest'
-        host_data_table = 'default_credit_hetero_host'
-    elif data_set == 'breast_hetero_guest.csv':
-        guest_data_table = 'breast_hetero_guest'
-        host_data_table = 'breast_hetero_host'
-    elif data_set == 'give_credit_hetero_guest.csv':
-        guest_data_table = 'give_credit_hetero_guest'
-        host_data_table = 'give_credit_hetero_host'
-    elif data_set == 'epsilon_5k_hetero_guest.csv':
-        guest_data_table = 'epsilon_5k_hetero_guest'
-        host_data_table = 'epsilon_5k_hetero_host'
-    else:
-        raise ValueError(f"Cannot recognized data_set: {data_set}")
+    guest_data_table = param.get("data_guest")
+    host_data_table = param.get("data_host")
 
     guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"}
     host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}
diff --git a/examples/benchmark_quality/lr/breast_config.yaml b/examples/benchmark_quality/lr/breast_config.yaml
index 4feac1af67..a3bef0a73c 100644
--- a/examples/benchmark_quality/lr/breast_config.yaml
+++ b/examples/benchmark_quality/lr/breast_config.yaml
@@ -2,7 +2,7 @@ data_guest: "breast_hetero_guest"
 data_host: "breast_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 8
+epochs: 20
 init_param:
   fit_intercept: True
   method: "random_uniform"
@@ -10,13 +10,13 @@ init_param:
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
-    factor: 0.2
-    total_iters: 18
+    factor: 1.0
+    total_iters: 5
 optimizer:
   method: "rmsprop"
   penalty: "l2"
   optimizer_params:
-    lr: 0.15
-  alpha: 0.2
+    lr: 0.12
+  alpha: 0.1
 batch_size: 240
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/lr_benchmark.yaml b/examples/benchmark_quality/lr/lr_benchmark.yaml
index c857d20e3f..0dd056766e 100644
--- a/examples/benchmark_quality/lr/lr_benchmark.yaml
+++ b/examples/benchmark_quality/lr/lr_benchmark.yaml
@@ -179,15 +179,15 @@ data:
     table_name: vehicle_scale_hetero_host
     namespace: experiment
     role: host_0
-hetero_lr-binary-0-breast:
-  local:
-    script: "./sklearn-lr-binary.py"
-    conf: "./breast_lr_sklearn_config.yaml"
-  FATE-hetero-lr:
-    script: "./pipeline-lr-binary.py"
-    conf: "./breast_config.yaml"
-  compare_setting:
-    relative_tol: 0.01
+#hetero_lr-binary-0-breast:
+#  local:
+#    script: "./sklearn-lr-binary.py"
+#    conf: "./breast_lr_sklearn_config.yaml"
+#  FATE-hetero-lr:
+#    script: "./pipeline-lr-binary.py"
+#    conf: "./breast_config.yaml"
+#  compare_setting:
+#    relative_tol: 0.01
 #hetero_lr-binary-1-default-credit:
 #  local:
 #    script: "./sklearn-lr-binary.py"
diff --git a/examples/benchmark_quality/lr/sklearn-lr-binary.py b/examples/benchmark_quality/lr/sklearn-lr-binary.py
index e418a1297a..94ac82fe18 100644
--- a/examples/benchmark_quality/lr/sklearn-lr-binary.py
+++ b/examples/benchmark_quality/lr/sklearn-lr-binary.py
@@ -42,7 +42,7 @@ def main(config="../../config.yaml", param="./breast_lr_sklearn_config.yaml"):
 
     config_param = {
         "penalty": param["penalty"],
-        "max_iter": 100,
+        "max_iter": param["epochs"],
         "alpha": param["alpha"],
         "learning_rate": "optimal",
         "eta0": param["eta0"],
@@ -76,7 +76,7 @@ def main(config="../../config.yaml", param="./breast_lr_sklearn_config.yaml"):
     fpr, tpr, thresholds = roc_curve(y_test, y_prob)
 
     ks = max(tpr - fpr)
-    result = {"auc": auc_score, "binary_recall": recall, "binary_precision": pr, "binary_accuracy": acc}
+    result = {"auc": auc_score, "recall": recall, "binary": pr, "accuracy": acc}
     print(result)
     print(f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}")
     return {}, result
diff --git a/python/fate/ml/glm/hetero/coordinated_lr/guest.py b/python/fate/ml/glm/hetero/coordinated_lr/guest.py
index 5adc10b73b..5164bb7d5c 100644
--- a/python/fate/ml/glm/hetero/coordinated_lr/guest.py
+++ b/python/fate/ml/glm/hetero/coordinated_lr/guest.py
@@ -365,7 +365,7 @@ def predict(self, ctx, test_data):
             test_data["intercept"] = 1.0
         X = test_data.values.as_tensor()
         # logger.info(f"in predict, w: {self.w}")
-        pred = torch.matmul(X, self.w)
+        pred = torch.matmul(X, self.w.detach())
         for h_pred in ctx.hosts.get("h_pred"):
             pred += h_pred
         pred = torch.sigmoid(pred)
diff --git a/python/fate/ml/glm/hetero/coordinated_lr/host.py b/python/fate/ml/glm/hetero/coordinated_lr/host.py
index 78e52e85ed..cb49a5a954 100644
--- a/python/fate/ml/glm/hetero/coordinated_lr/host.py
+++ b/python/fate/ml/glm/hetero/coordinated_lr/host.py
@@ -200,10 +200,11 @@ def asynchronous_compute_gradient(self, batch_ctx, encryptor, w, X):
 
         batch_ctx.guest.put("Xw2_h", encryptor.encrypt(torch.matmul(Xw_h.T, Xw_h)))
         loss_norm = self.optimizer.loss_norm(w)
+
         if loss_norm is not None:
             batch_ctx.guest.put("h_loss", encryptor.encrypt(loss_norm))
         else:
-            batch_ctx.guest.put(h_loss=loss_norm)
+            batch_ctx.guest.put("h_loss", loss_norm)
 
         g = 1 / h * (half_g + guest_half_g)
         return g
diff --git a/python/fate/ml/utils/_optimizer.py b/python/fate/ml/utils/_optimizer.py
index b0833163d2..e7868e85d6 100644
--- a/python/fate/ml/utils/_optimizer.py
+++ b/python/fate/ml/utils/_optimizer.py
@@ -158,8 +158,8 @@ def _l1_updator(self, model_weights, gradient, fit_intercept, lr):
         )
 
         if fit_intercept:
-            new_weights = torch.concat((new_weights, model_weights.intercept_))
-            new_weights[-1] -= gradient[-1]
+            new_intercept = model_weights[-1] - gradient[-1]
+            new_weights = torch.concat((new_weights, new_intercept.reshape((1, 1))))
 
         return new_weights
 
@@ -213,7 +213,7 @@ def regularization_update(
 
     def __l1_loss_norm(self, model_weights):
         loss_norm = torch.sum(self.alpha * model_weights)
-        return loss_norm
+        return loss_norm.reshape((1, 1))
 
     def __l2_loss_norm(self, model_weights):
         loss_norm = 0.5 * self.alpha * \

From 97a903a967561f133c46c8f1513f1a1215b87658 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Wed, 9 Aug 2023 15:14:24 +0800
Subject: [PATCH 17/30] edit fate-test performance examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../benchmark_quality/lr/default_credit_config.yaml   |  8 ++++----
 examples/benchmark_quality/lr/epsilon_5k_config.yaml  |  8 ++++----
 .../lr/epsilon_5k_lr_sklearn_config.yaml              |  4 ++--
 examples/benchmark_quality/lr/give_credit_config.yaml | 11 ++++++-----
 examples/benchmark_quality/lr/sklearn-lr-binary.py    |  2 +-
 python/fate_test/fate_test/scripts/data_cli.py        |  8 ++++----
 6 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/examples/benchmark_quality/lr/default_credit_config.yaml b/examples/benchmark_quality/lr/default_credit_config.yaml
index c45ef53d8a..8033d8af0d 100644
--- a/examples/benchmark_quality/lr/default_credit_config.yaml
+++ b/examples/benchmark_quality/lr/default_credit_config.yaml
@@ -2,16 +2,16 @@ data_guest: "default_credit_hetero_guest"
 data_host: "default_credit_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 30
+epochs: 22
 init_param:
   fit_intercept: True
   method: "zeros"
   random_state: 42
 learning_rate_scheduler:
-  method: "constant"
+  method: "linear"
   scheduler_params:
-    factor: 1.0
-    total_iters: 10000
+    start_factor: 0.7
+    total_iters: 1000
 optimizer:
   method: "rmsprop"
   penalty: "L2"
diff --git a/examples/benchmark_quality/lr/epsilon_5k_config.yaml b/examples/benchmark_quality/lr/epsilon_5k_config.yaml
index 6822e02ea7..39144f4fdb 100644
--- a/examples/benchmark_quality/lr/epsilon_5k_config.yaml
+++ b/examples/benchmark_quality/lr/epsilon_5k_config.yaml
@@ -2,21 +2,21 @@ data_guest: "epsilon_5k_hetero_guest"
 data_host: "epsilon_5k_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 30
+epochs: 8
 batch_size: 2500
 init_param:
   fit_intercept: True
   method: "random"
   random_state: 42
 learning_rate_scheduler:
-  method: "constant"
+  method: "linear"
   scheduler_params:
-    factor: 1.0
+    start_factor: 0.7
     total_iters: 1000
 optimizer:
   method: "adam"
   penalty: "L2"
   alpha: 0.0001
   optimizer_params:
-    lr: 0.3
+    lr: 0.43
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/epsilon_5k_lr_sklearn_config.yaml b/examples/benchmark_quality/lr/epsilon_5k_lr_sklearn_config.yaml
index bef62e89aa..7559f0bfa6 100644
--- a/examples/benchmark_quality/lr/epsilon_5k_lr_sklearn_config.yaml
+++ b/examples/benchmark_quality/lr/epsilon_5k_lr_sklearn_config.yaml
@@ -2,10 +2,10 @@ data_guest: "examples/data/epsilon_5k_hetero_guest.csv"
 data_host: "examples/data/epsilon_5k_hetero_host.csv"
 idx: "id"
 label_name: "y"
-epochs: 30
+epochs: 10
 fit_intercept: True
 method: "rmsprop"
 penalty: "L2"
 eta0: 0.1
-alpha: 0.5
+alpha: 0.001
 batch_size: 5000
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/give_credit_config.yaml b/examples/benchmark_quality/lr/give_credit_config.yaml
index 73f2285fa1..dc041b48fe 100644
--- a/examples/benchmark_quality/lr/give_credit_config.yaml
+++ b/examples/benchmark_quality/lr/give_credit_config.yaml
@@ -7,14 +7,15 @@ init_param:
   fit_intercept: True
   method: "zeros"
 learning_rate_scheduler:
-  method: "constant"
+  method: "linear"
   scheduler_params:
-    factor: 1.0
-    total_iters: 100
+    factor: 0.7
+    total_iters: 1000
 optimizer:
   method: "adam"
   penalty: "L2"
+  alpha: 10
   optimizer_params:
-    lr: 0.15
-batch_size: 550
+    lr: 0.2
+batch_size: 5500
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/sklearn-lr-binary.py b/examples/benchmark_quality/lr/sklearn-lr-binary.py
index 94ac82fe18..2a2710be2f 100644
--- a/examples/benchmark_quality/lr/sklearn-lr-binary.py
+++ b/examples/benchmark_quality/lr/sklearn-lr-binary.py
@@ -76,7 +76,7 @@ def main(config="../../config.yaml", param="./breast_lr_sklearn_config.yaml"):
     fpr, tpr, thresholds = roc_curve(y_test, y_prob)
 
     ks = max(tpr - fpr)
-    result = {"auc": auc_score, "recall": recall, "binary": pr, "accuracy": acc}
+    result = {"auc": auc_score, "recall": recall, "binary_precision": pr, "accuracy": acc}
     print(result)
     print(f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}")
     return {}, result
diff --git a/python/fate_test/fate_test/scripts/data_cli.py b/python/fate_test/fate_test/scripts/data_cli.py
index 01d8c19c33..7a09980dd2 100644
--- a/python/fate_test/fate_test/scripts/data_cli.py
+++ b/python/fate_test/fate_test/scripts/data_cli.py
@@ -51,8 +51,8 @@ def upload(ctx, include, exclude, glob, suite_type, role, config_type, **kwargs)
     config_inst = ctx.obj["config"]
     if ctx.obj["extend_sid"] is not None:
         config_inst.extend_sid = ctx.obj["extend_sid"]
-    if ctx.obj["auto_increasing_sid"] is not None:
-        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
+    """if ctx.obj["auto_increasing_sid"] is not None:
+        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]"""
     yes = ctx.obj["yes"]
     echo.welcome()
     echo.echo(f"testsuite namespace: {namespace}", fg='red')
@@ -176,8 +176,8 @@ def generate(ctx, include, host_data_type, encryption_type, match_rate, sparsity
     config_inst = ctx.obj["config"]
     if ctx.obj["extend_sid"] is not None:
         config_inst.extend_sid = ctx.obj["extend_sid"]
-    if ctx.obj["auto_increasing_sid"] is not None:
-        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
+    """if ctx.obj["auto_increasing_sid"] is not None:
+        config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]"""
     if parallelize and upload_data:
         upload_data = False
     yes = ctx.obj["yes"]

From 2db064db9f8b49db8eadd1b14141d8a96d2ceab9 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Wed, 9 Aug 2023 19:26:34 +0800
Subject: [PATCH 18/30] use encrypt_tensor api for phe encryptor(#4659) edit
 fate_test examples(#5008) replace Intersection with PSI in examples

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/benchmark_quality/linr/fate-linr.py  | 18 ++---
 .../benchmark_quality/lr/breast_config.yaml   |  6 +-
 .../benchmark_quality/lr/lr_benchmark.yaml    | 72 +++++++++----------
 .../lr/pipeline-lr-binary.py                  | 18 ++---
 .../benchmark_quality/lr/pipeline-lr-multi.py | 18 ++---
 .../pipeline/coordinated_lr/test_lr_sid.py    | 33 ++++-----
 .../pipeline/coordinated_lr/test_lr_sid_cv.py | 16 ++---
 .../coordinated_lr/test_lr_sid_warm_start.py  | 20 +++---
 .../ml/glm/hetero/coordinated_linr/host.py    | 13 ++--
 .../ml/glm/hetero/coordinated_lr/guest.py     |  3 +-
 .../fate/ml/glm/hetero/coordinated_lr/host.py | 13 ++--
 11 files changed, 117 insertions(+), 113 deletions(-)

diff --git a/examples/benchmark_quality/linr/fate-linr.py b/examples/benchmark_quality/linr/fate-linr.py
index 248b0afc79..bc85a9363e 100644
--- a/examples/benchmark_quality/linr/fate-linr.py
+++ b/examples/benchmark_quality/linr/fate-linr.py
@@ -17,7 +17,7 @@
 import argparse
 
 from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLinR, Intersection
+from fate_client.pipeline.components.fate import CoordinatedLinR, PSI
 from fate_client.pipeline.components.fate import Evaluation
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
@@ -43,11 +43,11 @@ def main(config="../../config.yaml", param="./linr_config.yaml", namespace=""):
 
     pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
 
-    intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
-                                                                        namespace=guest_train_data["namespace"]))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
-                                                                           namespace=host_train_data["namespace"]))
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
+                                                                  namespace=guest_train_data["namespace"]))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
+                                                                     namespace=host_train_data["namespace"]))
 
     linr_param = {
     }
@@ -63,10 +63,10 @@ def main(config="../../config.yaml", param="./linr_config.yaml", namespace=""):
     }
     linr_param.update(config_param)
     linr_0 = CoordinatedLinR("linr_0",
-                             train_data=intersect_0.outputs["output_data"],
+                             train_data=psi_0.outputs["output_data"],
                              **config_param)
     """linr_1 = CoordinatedLinR("linr_1",
-                             test_data=intersect_0.outputs["output_data"],
+                             test_data=psi_0.outputs["output_data"],
                              input_model=linr_0.outputs["output_model"])"""
 
     evaluation_0 = Evaluation("evaluation_0",
@@ -77,7 +77,7 @@ def main(config="../../config.yaml", param="./linr_config.yaml", namespace=""):
                                        "rmse"],
                               input_data=linr_0.outputs["train_output_data"])
 
-    pipeline.add_task(intersect_0)
+    pipeline.add_task(psi_0)
     pipeline.add_task(linr_0)
     # pipeline.add_task(linr_1)
     pipeline.add_task(evaluation_0)
diff --git a/examples/benchmark_quality/lr/breast_config.yaml b/examples/benchmark_quality/lr/breast_config.yaml
index a3bef0a73c..3d1747cc04 100644
--- a/examples/benchmark_quality/lr/breast_config.yaml
+++ b/examples/benchmark_quality/lr/breast_config.yaml
@@ -10,13 +10,13 @@ init_param:
 learning_rate_scheduler:
   method: "constant"
   scheduler_params:
-    factor: 1.0
+    factor: 0.5
     total_iters: 5
 optimizer:
   method: "rmsprop"
   penalty: "l2"
   optimizer_params:
-    lr: 0.12
-  alpha: 0.1
+    lr: 0.15
+  alpha: 0.01
 batch_size: 240
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/lr_benchmark.yaml b/examples/benchmark_quality/lr/lr_benchmark.yaml
index 0dd056766e..d7852909a2 100644
--- a/examples/benchmark_quality/lr/lr_benchmark.yaml
+++ b/examples/benchmark_quality/lr/lr_benchmark.yaml
@@ -179,24 +179,24 @@ data:
     table_name: vehicle_scale_hetero_host
     namespace: experiment
     role: host_0
-#hetero_lr-binary-0-breast:
-#  local:
-#    script: "./sklearn-lr-binary.py"
-#    conf: "./breast_lr_sklearn_config.yaml"
-#  FATE-hetero-lr:
-#    script: "./pipeline-lr-binary.py"
-#    conf: "./breast_config.yaml"
-#  compare_setting:
-#    relative_tol: 0.01
-#hetero_lr-binary-1-default-credit:
-#  local:
-#    script: "./sklearn-lr-binary.py"
-#    conf: "./default_credit_lr_sklearn_config.yaml"
-#  FATE-hetero-lr:
-#    script: "./pipeline-lr-binary.py"
-#    conf: "./default_credit_config.yaml"
-#  compare_setting:
-#    relative_tol: 0.01
+hetero_lr-binary-0-breast:
+  local:
+    script: "./sklearn-lr-binary.py"
+    conf: "./breast_lr_sklearn_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-binary.py"
+    conf: "./breast_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
+hetero_lr-binary-1-default-credit:
+  local:
+    script: "./sklearn-lr-binary.py"
+    conf: "./default_credit_lr_sklearn_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-binary.py"
+    conf: "./default_credit_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
 hetero_lr-binary-2-epsilon-5k:
   local:
     script: "./sklearn-lr-binary.py"
@@ -206,21 +206,21 @@ hetero_lr-binary-2-epsilon-5k:
     conf: "./epsilon_5k_config.yaml"
   compare_setting:
     relative_tol: 0.01
-#hetero_lr-binary-3-give-credit:
-#  local:
-#    script: "./sklearn-lr-binary.py"
-#    conf: "./give_credit_lr_sklearn_config.yaml"
-#  FATE-hetero-lr:
-#    script: "./pipeline-lr-binary.py"
-#    conf: "./give_credit_config.yaml"
-#  compare_setting:
-#    relative_tol: 0.01
-#multi-vehicle:
-#  local:
-#    script: "./sklearn-lr-multi.py"
-#    conf: "./vehicle_lr_sklearn_config.yaml"
-#  FATE-hetero-lr:
-#    script: "./pipeline-lr-multi.py"
-#    conf: "./vehicle_config.yaml"
-#  compare_setting:
-#    relative_tol: 0.01
+hetero_lr-binary-3-give-credit:
+  local:
+    script: "./sklearn-lr-binary.py"
+    conf: "./give_credit_lr_sklearn_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-binary.py"
+    conf: "./give_credit_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
+multi-vehicle:
+  local:
+    script: "./sklearn-lr-multi.py"
+    conf: "./vehicle_lr_sklearn_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-multi.py"
+    conf: "./vehicle_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
diff --git a/examples/benchmark_quality/lr/pipeline-lr-binary.py b/examples/benchmark_quality/lr/pipeline-lr-binary.py
index c10dd7fcb6..9b41bbe612 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-binary.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-binary.py
@@ -17,7 +17,7 @@
 import argparse
 
 from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI
 from fate_client.pipeline.components.fate import Evaluation
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
@@ -45,11 +45,11 @@ def main(config="../../config.yaml", param="./breast_config.yaml", namespace="")
     host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}
     pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
 
-    intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
-                                                                        namespace=guest_train_data["namespace"]))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
-                                                                           namespace=host_train_data["namespace"]))
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
+                                                                  namespace=guest_train_data["namespace"]))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
+                                                                     namespace=host_train_data["namespace"]))
 
     lr_param = {
     }
@@ -65,10 +65,10 @@ def main(config="../../config.yaml", param="./breast_config.yaml", namespace="")
     }
     lr_param.update(config_param)
     lr_0 = CoordinatedLR("lr_0",
-                         train_data=intersect_0.outputs["output_data"],
+                         train_data=psi_0.outputs["output_data"],
                          **lr_param)
     lr_1 = CoordinatedLR("lr_1",
-                         test_data=intersect_0.outputs["output_data"],
+                         test_data=psi_0.outputs["output_data"],
                          input_model=lr_0.outputs["output_model"])
 
     evaluation_0 = Evaluation("evaluation_0",
@@ -77,7 +77,7 @@ def main(config="../../config.yaml", param="./breast_config.yaml", namespace="")
                               metrics=["auc", "binary_precision", "binary_accuracy", "binary_recall"],
                               input_data=lr_0.outputs["train_output_data"])
 
-    pipeline.add_task(intersect_0)
+    pipeline.add_task(psi_0)
     pipeline.add_task(lr_0)
     pipeline.add_task(lr_1)
     pipeline.add_task(evaluation_0)
diff --git a/examples/benchmark_quality/lr/pipeline-lr-multi.py b/examples/benchmark_quality/lr/pipeline-lr-multi.py
index 3868acbd60..a598403238 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-multi.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-multi.py
@@ -17,7 +17,7 @@
 import argparse
 
 from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI
 from fate_client.pipeline.components.fate import Evaluation
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
@@ -44,11 +44,11 @@ def main(config="../../config.yaml", param="./vehicle_config.yaml", namespace=""
     host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}
     pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
 
-    intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
-                                                                        namespace=guest_train_data["namespace"]))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
-                                                                           namespace=host_train_data["namespace"]))
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
+                                                                  namespace=guest_train_data["namespace"]))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
+                                                                     namespace=host_train_data["namespace"]))
 
     lr_param = {
     }
@@ -64,15 +64,15 @@ def main(config="../../config.yaml", param="./vehicle_config.yaml", namespace=""
     }
     lr_param.update(config_param)
     lr_0 = CoordinatedLR("lr_0",
-                         train_data=intersect_0.outputs["output_data"],
+                         train_data=psi_0.outputs["output_data"],
                          **config_param)
     lr_1 = CoordinatedLR("lr_1",
-                         test_data=intersect_0.outputs["output_data"],
+                         test_data=psi_0.outputs["output_data"],
                          input_model=lr_0.outputs["output_model"])
 
     evaluation_0 = Evaluation('evaluation_0',
                               metrics=['multi_recall', 'multi_accuracy', 'multi_precision'])
-    pipeline.add_task(intersect_0)
+    pipeline.add_task(psi_0)
     pipeline.add_task(lr_0)
     pipeline.add_task(lr_1)
     pipeline.add_task(evaluation_0)
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid.py b/examples/pipeline/coordinated_lr/test_lr_sid.py
index 9b2323fb05..b13c24f8db 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid.py
+++ b/examples/pipeline/coordinated_lr/test_lr_sid.py
@@ -16,7 +16,7 @@
 import argparse
 
 from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI
 from fate_client.pipeline.components.fate import Evaluation
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
@@ -36,19 +36,19 @@ def main(config="./config.yaml", namespace=""):
     if config.timeout:
         pipeline.conf.set("timeout", config.timeout)
 
-    intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                        namespace=f"experiment{namespace}"))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                           namespace=f"experiment{namespace}"))
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
     lr_0 = CoordinatedLR("lr_0",
-                         epochs=4,
+                         epochs=10,
                          batch_size=None,
-                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
-                         init_param={"fit_intercept": True, "method": "zeros"},
-                         train_data=intersect_0.outputs["output_data"],
-                         learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
-                                                                                             "total_iters": 100}})
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.21}},
+                         init_param={"fit_intercept": True, "method": "random_uniform"},
+                         train_data=psi_0.outputs["output_data"],
+                         learning_rate_scheduler={"method": "linear", "scheduler_params": {"start_factor": 0.7,
+                                                                                           "total_iters": 100}})
 
     evaluation_0 = Evaluation("evaluation_0",
                               label_column_name="y",
@@ -56,22 +56,23 @@ def main(config="./config.yaml", namespace=""):
                               default_eval_setting="binary",
                               input_data=lr_0.outputs["train_output_data"])
 
-    pipeline.add_task(intersect_0)
+    pipeline.add_task(psi_0)
     pipeline.add_task(lr_0)
+    pipeline.add_task(evaluation_0)
 
     pipeline.compile()
     print(pipeline.get_dag())
     pipeline.fit()
 
-    pipeline.deploy([intersect_0, lr_0])
+    pipeline.deploy([psi_0, lr_0])
 
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    deployed_pipeline.intersect_0.guest.component_setting(
+    deployed_pipeline.psi_0.guest.component_setting(
         input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                         namespace=f"experiment{namespace}"))
-    deployed_pipeline.intersect_0.hosts[0].component_setting(
+    deployed_pipeline.psi_0.hosts[0].component_setting(
         input_data=DataWarehouseChannel(name="breast_hetero_host",
                                         namespace=f"experiment{namespace}"))
 
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid_cv.py b/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
index 5e5a3f40bc..8caffd245b 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
+++ b/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
@@ -16,7 +16,7 @@
 import argparse
 
 from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
 
@@ -34,20 +34,20 @@ def main(config="./config.yaml", namespace=""):
     if config.timeout:
         pipeline.conf.set("timeout", config.timeout)
 
-    intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                        namespace=f"experiment{namespace}"))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                           namespace=f"experiment{namespace}"))
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
     lr_0 = CoordinatedLR("lr_0",
                          epochs=2,
                          batch_size=100,
                          optimizer={"method": "sgd", "optimizer_params": {"lr": 0.01}},
                          init_param={"fit_intercept": True},
-                         cv_data=intersect_0.outputs["output_data"],
+                         cv_data=psi_0.outputs["output_data"],
                          cv_param={"n_splits": 3})
 
-    pipeline.add_task(intersect_0)
+    pipeline.add_task(psi_0)
     pipeline.add_task(lr_0)
     pipeline.compile()
     print(pipeline.get_dag())
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py b/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
index 0c33c952d6..25ba007959 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
+++ b/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
@@ -16,7 +16,7 @@
 import argparse
 
 from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI
 from fate_client.pipeline.components.fate import Evaluation
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
@@ -35,20 +35,20 @@ def main(config="./config.yaml", namespace=""):
     if config.timeout:
         pipeline.conf.set("timeout", config.timeout)
 
-    intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                        namespace=f"experiment{namespace}"))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                           namespace=f"experiment{namespace}"))
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
     lr_0 = CoordinatedLR("lr_0",
                          epochs=4,
                          batch_size=None,
                          optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
                          init_param={"fit_intercept": True, "method": "zeros"},
-                         train_data=intersect_0.outputs["output_data"],
+                         train_data=psi_0.outputs["output_data"],
                          learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
                                                                                              "total_iters": 100}})
-    lr_1 = CoordinatedLR("lr_1", train_data=intersect_0.outputs["output_data"],
+    lr_1 = CoordinatedLR("lr_1", train_data=psi_0.outputs["output_data"],
                          warm_start_model=lr_0.outputs["output_model"],
                          epochs=2,
                          batch_size=None,
@@ -59,7 +59,7 @@ def main(config="./config.yaml", namespace=""):
                          batch_size=None,
                          optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
                          init_param={"fit_intercept": True, "method": "zeros"},
-                         train_data=intersect_0.outputs["output_data"],
+                         train_data=psi_0.outputs["output_data"],
                          learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
                                                                                              "total_iters": 100}})
 
@@ -69,7 +69,7 @@ def main(config="./config.yaml", namespace=""):
                               default_eval_setting="binary",
                               input_data=[lr_1.outputs["train_output_data"], lr_2.outputs["train_output_data"]])
 
-    pipeline.add_task(intersect_0)
+    pipeline.add_task(psi_0)
     pipeline.add_task(lr_0)
     pipeline.add_task(lr_1)
     pipeline.add_task(lr_2)
diff --git a/python/fate/ml/glm/hetero/coordinated_linr/host.py b/python/fate/ml/glm/hetero/coordinated_linr/host.py
index 1473094c9b..a33a7019cf 100644
--- a/python/fate/ml/glm/hetero/coordinated_linr/host.py
+++ b/python/fate/ml/glm/hetero/coordinated_linr/host.py
@@ -15,6 +15,7 @@
 import logging
 
 import torch
+
 from fate.arch import Context
 from fate.arch.dataframe import DataLoader
 from fate.ml.abc.module import HeteroModule
@@ -121,15 +122,15 @@ def __init__(self, epochs=None, batch_size=None, optimizer=None, learning_rate_s
     def asynchronous_compute_gradient(self, batch_ctx, encryptor, w, X):
         h = X.shape[0]
         Xw_h = torch.matmul(X, w.detach())
-        batch_ctx.guest.put("Xw_h", encryptor.encrypt(Xw_h))
+        batch_ctx.guest.put("Xw_h", encryptor.encrypt_tensor(Xw_h))
         half_g = torch.matmul(X.T, Xw_h)
         guest_half_d = batch_ctx.guest.get("half_d")
         guest_half_g = torch.matmul(X.T, guest_half_d)
 
-        batch_ctx.guest.put("Xw2_h", encryptor.encrypt(torch.matmul(Xw_h.T, Xw_h)))
+        batch_ctx.guest.put("Xw2_h", encryptor.encrypt_tensor(torch.matmul(Xw_h.T, Xw_h)))
         loss_norm = self.optimizer.loss_norm(w)
         if loss_norm is not None:
-            batch_ctx.guest.put("h_loss", encryptor.encrypt(loss_norm))
+            batch_ctx.guest.put("h_loss", encryptor.encrypt_tensor(loss_norm))
         else:
             batch_ctx.guest.put(h_loss=loss_norm)
 
@@ -139,12 +140,12 @@ def asynchronous_compute_gradient(self, batch_ctx, encryptor, w, X):
     def centralized_compute_gradient(self, batch_ctx, encryptor, w, X):
         h = X.shape[0]
         Xw_h = torch.matmul(X, w.detach())
-        batch_ctx.guest.put("Xw_h", encryptor.encrypt(Xw_h))
-        batch_ctx.guest.put("Xw2_h", encryptor.encrypt(torch.matmul(Xw_h.T, Xw_h)))
+        batch_ctx.guest.put("Xw_h", encryptor.encrypt_tensor(Xw_h))
+        batch_ctx.guest.put("Xw2_h", encryptor.encrypt_tensor(torch.matmul(Xw_h.T, Xw_h)))
 
         loss_norm = self.optimizer.loss_norm(w)
         if loss_norm is not None:
-            batch_ctx.guest.put("h_loss", encryptor.encrypt(loss_norm))
+            batch_ctx.guest.put("h_loss", encryptor.encrypt_tensor(loss_norm))
         else:
             batch_ctx.guest.put(h_loss=loss_norm)
 
diff --git a/python/fate/ml/glm/hetero/coordinated_lr/guest.py b/python/fate/ml/glm/hetero/coordinated_lr/guest.py
index ea4d6cb662..1969bba4d2 100644
--- a/python/fate/ml/glm/hetero/coordinated_lr/guest.py
+++ b/python/fate/ml/glm/hetero/coordinated_lr/guest.py
@@ -16,6 +16,7 @@
 import logging
 
 import torch
+
 from fate.arch import Context, dataframe
 from fate.ml.abc.module import HeteroModule
 from fate.ml.utils import predict_tools
@@ -247,7 +248,7 @@ def asynchronous_compute_gradient(self, batch_ctx, encryptor, w, X, Y, weight):
         half_d = 0.25 * Xw - 0.5 * Y
         if weight:
             half_d = half_d * weight
-        batch_ctx.hosts.put("half_d", encryptor.encrypt(half_d))
+        batch_ctx.hosts.put("half_d", encryptor.encrypt_tensor(half_d))
         half_g = torch.matmul(X.T, half_d)
 
         Xw_h = batch_ctx.hosts.get("Xw_h")[0]
diff --git a/python/fate/ml/glm/hetero/coordinated_lr/host.py b/python/fate/ml/glm/hetero/coordinated_lr/host.py
index 5395f2ab36..d1957e69ac 100644
--- a/python/fate/ml/glm/hetero/coordinated_lr/host.py
+++ b/python/fate/ml/glm/hetero/coordinated_lr/host.py
@@ -15,6 +15,7 @@
 import logging
 
 import torch
+
 from fate.arch import Context
 from fate.arch.dataframe import DataLoader
 from fate.ml.abc.module import HeteroModule
@@ -205,17 +206,17 @@ def __init__(self, epochs=None, batch_size=None, optimizer=None, learning_rate_s
     def asynchronous_compute_gradient(self, batch_ctx, encryptor, w, X):
         h = X.shape[0]
         Xw_h = 0.25 * torch.matmul(X, w.detach())
-        batch_ctx.guest.put("Xw_h", encryptor.encrypt(Xw_h))
+        batch_ctx.guest.put("Xw_h", encryptor.encrypt_tensor(Xw_h))
         half_g = torch.matmul(X.T, Xw_h)
 
         guest_half_d = batch_ctx.guest.get("half_d")
         guest_half_g = torch.matmul(X.T, guest_half_d)
 
-        batch_ctx.guest.put("Xw2_h", encryptor.encrypt(torch.matmul(Xw_h.T, Xw_h)))
+        batch_ctx.guest.put("Xw2_h", encryptor.encrypt_tensor(torch.matmul(Xw_h.T, Xw_h)))
         loss_norm = self.optimizer.loss_norm(w)
 
         if loss_norm is not None:
-            batch_ctx.guest.put("h_loss", encryptor.encrypt(loss_norm))
+            batch_ctx.guest.put("h_loss", encryptor.encrypt_tensor(loss_norm))
         else:
             batch_ctx.guest.put("h_loss", loss_norm)
 
@@ -225,12 +226,12 @@ def asynchronous_compute_gradient(self, batch_ctx, encryptor, w, X):
     def centralized_compute_gradient(self, batch_ctx, encryptor, w, X):
         h = X.shape[0]
         Xw_h = 0.25 * torch.matmul(X, w.detach())
-        batch_ctx.guest.put("Xw_h", encryptor.encrypt(Xw_h))
-        batch_ctx.guest.put("Xw2_h", encryptor.encrypt(torch.matmul(Xw_h.T, Xw_h)))
+        batch_ctx.guest.put("Xw_h", encryptor.encrypt_tensor(Xw_h))
+        batch_ctx.guest.put("Xw2_h", encryptor.encrypt_tensor(torch.matmul(Xw_h.T, Xw_h)))
 
         loss_norm = self.optimizer.loss_norm(w)
         if loss_norm is not None:
-            batch_ctx.guest.put("h_loss", encryptor.encrypt(loss_norm))
+            batch_ctx.guest.put("h_loss", encryptor.encrypt_tensor(loss_norm))
         else:
             batch_ctx.guest.put(h_loss=loss_norm)
 

From 9e2086939d25d8ea4b730798b363df186e104e83 Mon Sep 17 00:00:00 2001
From: mgqa34 <mgq3374541@163.com>
Date: Thu, 10 Aug 2023 16:39:51 +0800
Subject: [PATCH 19/30] dataframe: add block_row_size to manager single block
 size

Signed-off-by: mgqa34 <mgq3374541@163.com>
---
 python/fate/arch/dataframe/_frame_reader.py   | 43 +++++++---
 python/fate/arch/dataframe/conf/__init__.py   |  0
 .../arch/dataframe/conf/default_config.py     | 17 ++++
 .../arch/dataframe/manager/data_manager.py    | 13 ++-
 python/fate/arch/dataframe/ops/_indexer.py    | 79 ++++++++++++++-----
 5 files changed, 119 insertions(+), 33 deletions(-)
 create mode 100644 python/fate/arch/dataframe/conf/__init__.py
 create mode 100644 python/fate/arch/dataframe/conf/default_config.py

diff --git a/python/fate/arch/dataframe/_frame_reader.py b/python/fate/arch/dataframe/_frame_reader.py
index 214c521773..5b6a3c9ecd 100644
--- a/python/fate/arch/dataframe/_frame_reader.py
+++ b/python/fate/arch/dataframe/_frame_reader.py
@@ -17,6 +17,7 @@
 from typing import Union
 
 
+from .conf.default_config import DATAFRAME_BLOCK_ROW_SIZE
 from .entity import types
 from ._dataframe import DataFrame
 from .manager import DataManager
@@ -41,7 +42,8 @@ def __init__(
         na_values: Union[str, list, dict] = None,
         input_format: str = "dense",
         tag_with_value: bool = False,
-        tag_value_delimiter: str = ":"
+        tag_value_delimiter: str = ":",
+        block_row_size: int = None
     ):
         self._sample_id_name = sample_id_name
         self._match_id_name = match_id_name
@@ -60,6 +62,7 @@ def __init__(
         self._input_format = input_format
         self._tag_with_value = tag_with_value
         self._tag_value_delimiter = tag_value_delimiter
+        self._block_row_size = block_row_size if block_row_size is not None else DATAFRAME_BLOCK_ROW_SIZE
 
         self.check_params()
 
@@ -67,6 +70,9 @@ def check_params(self):
         if not self._sample_id_name:
             raise ValueError("Please provide sample_id_name")
 
+        if not isinstance(self._block_row_size, int) or self._block_row_size < 0:
+            raise ValueError("block_row_size should be positive integer")
+
     def to_frame(self, ctx, table):
         if self._input_format != "dense":
             raise ValueError("Only support dense input format in this version.")
@@ -74,7 +80,7 @@ def to_frame(self, ctx, table):
         return self._dense_format_to_frame(ctx, table)
 
     def _dense_format_to_frame(self, ctx, table):
-        data_manager = DataManager()
+        data_manager = DataManager(block_row_size=self._block_row_size)
         columns = self._header.split(self._delimiter, -1)
         columns.remove(self._sample_id_name)
         retrieval_index_dict = data_manager.init_from_local_file(
@@ -84,7 +90,7 @@ def _dense_format_to_frame(self, ctx, table):
             dtype=self._dtype, default_type=types.DEFAULT_DATA_TYPE)
 
         from .ops._indexer import get_partition_order_by_raw_table
-        partition_order_mappings = get_partition_order_by_raw_table(table)
+        partition_order_mappings = get_partition_order_by_raw_table(table, data_manager.block_row_size)
         # partition_order_mappings = _get_partition_order(table)
         table = table.mapValues(lambda value: value.split(self._delimiter, -1))
         to_block_func = functools.partial(_to_blocks,
@@ -129,7 +135,8 @@ def __init__(
         weight_type: str = "float32",
         dtype: str = "float32",
         na_values: Union[None, str, list, dict] = None,
-        partition: int = 4
+        partition: int = 4,
+        block_row_size: int = None
     ):
         self._sample_id_name = sample_id_name
         self._match_id_list = match_id_list
@@ -142,6 +149,7 @@ def __init__(
         self._dtype = dtype
         self._na_values = na_values
         self._partition = partition
+        self._block_row_size = block_row_size if block_row_size is not None else DATAFRAME_BLOCK_ROW_SIZE
 
     def to_frame(self, ctx, path):
         # TODO: use table put data instead of read all data
@@ -156,6 +164,7 @@ def to_frame(self, ctx, path):
             weight_name=self._weight_name,
             dtype=self._dtype,
             partition=self._partition,
+            block_row_size=self._block_row_size
         ).to_frame(ctx, df)
 
 
@@ -194,6 +203,7 @@ def __init__(
         weight_type: str = "float32",
         dtype: str = "float32",
         partition: int = 4,
+        block_row_size: int = None,
     ):
         self._sample_id_name = sample_id_name
         self._match_id_list = match_id_list
@@ -204,6 +214,7 @@ def __init__(
         self._weight_type = weight_type
         self._dtype = dtype
         self._partition = partition
+        self._block_row_size = block_row_size if block_row_size is not None else DATAFRAME_BLOCK_ROW_SIZE
 
         if self._sample_id_name and not self._match_id_name:
             raise ValueError(f"As sample_id {self._sample_id_name} is given, match_id should be given too")
@@ -215,7 +226,7 @@ def to_frame(self, ctx, df: "pd.DataFrame"):
         else:
             df = df.set_index(self._sample_id_name)
 
-        data_manager = DataManager()
+        data_manager = DataManager(block_row_size=self._block_row_size)
         retrieval_index_dict = data_manager.init_from_local_file(
             sample_id_name=self._sample_id_name, columns=df.columns.tolist(), match_id_list=self._match_id_list,
             match_id_name=self._match_id_name, label_name=self._label_name, weight_name=self._weight_name,
@@ -260,11 +271,11 @@ def _to_blocks(kvs,
     """
     sample_id/match_id,label(maybe missing),weight(maybe missing),X
     """
-    partition_id = None
+    block_id = None
 
     schema = data_manager.schema
 
-    splits = [[] for idx in range(data_manager.block_num)]
+    splits = [[] for _ in range(data_manager.block_num)]
     sample_id_block = data_manager.loc_block(schema.sample_id_name, with_offset=False) if schema.sample_id_name else None
 
     match_id_block = data_manager.loc_block(schema.match_id_name, with_offset=False)if schema.match_id_name else None
@@ -287,9 +298,13 @@ def _to_blocks(kvs,
 
         column_blocks_mapping[bid].append(col_id)
 
+    block_row_size = data_manager.block_row_size
+
+    lid = 0
     for key, value in kvs:
-        if partition_id is None:
-            partition_id = partition_order_mappings[key]["block_id"]
+        if block_id is None:
+            block_id = partition_order_mappings[key]["start_block_id"]
+        lid += 1
 
         # columns = value.split(",", -1)
         splits[sample_id_block].append(key)
@@ -303,6 +318,12 @@ def _to_blocks(kvs,
         for bid, col_id_list in column_blocks_mapping.items():
             splits[bid].append([value[col_id] for col_id in col_id_list])
 
-    converted_blocks = data_manager.convert_to_blocks(splits)
+        if lid % block_row_size == 0:
+            converted_blocks = data_manager.convert_to_blocks(splits)
+            yield  block_id, converted_blocks
+            block_id += 1
+            splits = [[] for _ in range(data_manager.block_num)]
 
-    return [(partition_id, converted_blocks)]
+    if lid % block_row_size:
+        converted_blocks = data_manager.convert_to_blocks(splits)
+        yield block_id, converted_blocks
diff --git a/python/fate/arch/dataframe/conf/__init__.py b/python/fate/arch/dataframe/conf/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/fate/arch/dataframe/conf/default_config.py b/python/fate/arch/dataframe/conf/default_config.py
new file mode 100644
index 0000000000..41b439b1f0
--- /dev/null
+++ b/python/fate/arch/dataframe/conf/default_config.py
@@ -0,0 +1,17 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+DATAFRAME_BLOCK_ROW_SIZE = 2**6
+
diff --git a/python/fate/arch/dataframe/manager/data_manager.py b/python/fate/arch/dataframe/manager/data_manager.py
index 5d0b5c20ad..56fc5d427e 100644
--- a/python/fate/arch/dataframe/manager/data_manager.py
+++ b/python/fate/arch/dataframe/manager/data_manager.py
@@ -19,12 +19,19 @@
 from .block_manager import BlockType
 from ..entity import types
 from typing import Union, List, Tuple
+from ..conf.default_config import DATAFRAME_BLOCK_ROW_SIZE
 
 
 class DataManager(object):
-    def __init__(self, schema_manager: SchemaManager = None, block_manager: BlockManager = None):
+    def __init__(
+            self,
+            schema_manager: SchemaManager = None,
+            block_manager: BlockManager = None,
+            block_row_size: int = DATAFRAME_BLOCK_ROW_SIZE
+    ):
         self._schema_manager = schema_manager
         self._block_manager = block_manager
+        self._block_row_size = block_row_size
 
     @property
     def blocks(self):
@@ -34,6 +41,10 @@ def blocks(self):
     def block_num(self):
         return len(self._block_manager.blocks)
 
+    @property
+    def block_row_size(self):
+        return self._block_row_size
+
     @property
     def schema(self):
         return self._schema_manager.schema
diff --git a/python/fate/arch/dataframe/ops/_indexer.py b/python/fate/arch/dataframe/ops/_indexer.py
index a054007a66..e92f698be5 100644
--- a/python/fate/arch/dataframe/ops/_indexer.py
+++ b/python/fate/arch/dataframe/ops/_indexer.py
@@ -41,7 +41,6 @@ def _aggregate(kvs):
         return list(aggregate_ret.items())
 
     agg_indexer = indexer.mapReducePartitions(_aggregate, lambda l1, l2: l1 + l2)
-    # agg_indexer = agg_indexer.mapValues(lambda v: sorted(v, key=lambda x: x[1]))
 
     return agg_indexer
 
@@ -60,19 +59,38 @@ def _convert_to_order_index(kvs):
                                      use_previous_behavior=False)
 
 
-def get_partition_order_mappings(block_table):
-    block_info = sorted(list(block_table.mapValues(lambda blocks: (blocks[0][0], len(blocks[0]))).collect()))
+def get_partition_order_mappings_by_block_table(block_table, block_row_size):
+    def _block_counter(kvs):
+        partition_key = None
+        size = 0
+        first_block_id = 0
+        for k, v in kvs:
+            if partition_key is None:
+                partition_key = k
+
+            size += len(v[0])
+
+        return first_block_id, (partition_key, size)
+
+    block_info = sorted([summary[1] for summary in block_table.applyPartitions(_block_counter).collect()])
     block_order_mappings = dict()
     start_index = 0
+    acc_block_num = 0
     for block_id, (block_key, block_size) in block_info:
+        block_num = (block_size + block_row_size - 1) // block_row_size
         block_order_mappings[block_key] = dict(
-            start_index=start_index, end_index=start_index + block_size - 1, block_id=block_id)
+            start_index=start_index,
+            end_index=start_index + block_size - 1,
+            start_block_id=acc_block_num,
+            end_block_id=acc_block_num + block_num - 1
+        )
         start_index += block_size
+        acc_block_num += block_num
 
     return block_order_mappings
 
 
-def get_partition_order_by_raw_table(table):
+def get_partition_order_by_raw_table(table, block_row_size):
     def _get_block_summary(kvs):
         try:
             key = next(kvs)[0]
@@ -84,15 +102,19 @@ def _get_block_summary(kvs):
 
     block_summary = table.mapPartitions(_get_block_summary).reduce(lambda blk1, blk2: {**blk1, **blk2})
 
-    start_index, block_id = 0, 0
+    start_index, acc_block_num = 0, 0
     block_order_mappings = dict()
     for blk_key, blk_size in block_summary.items():
+        block_num = (blk_size + block_row_size - 1) // block_row_size
         block_order_mappings[blk_key] = dict(
-            start_index=start_index, end_index=start_index + blk_size - 1, block_id=block_id
+            start_index=start_index,
+            end_index=start_index + blk_size - 1,
+            start_block_id=acc_block_num,
+            end_block_id=acc_block_num + block_num - 1
         )
 
         start_index += blk_size
-        block_id += 1
+        acc_block_num += block_num
 
     return block_order_mappings
 
@@ -198,7 +220,7 @@ def _convert_to_block(kvs):
         block_table = block_table.mapValues(lambda values: [v[1] for v in values])
         block_table = transform_list_block_to_frame_block(block_table, df.data_manager)
 
-    partition_order_mappings = get_partition_order_mappings(block_table)
+    partition_order_mappings = get_partition_order_mappings_by_block_table(block_table, df.data_manager.block_row_size)
     return DataFrame(
         df._ctx,
         block_table,
@@ -249,18 +271,18 @@ def _retrieval_mapper(key, value):
         return retrieval_ret
 
     agg_indexer = indexer.mapReducePartitions(_agg_mapper, _agg_reducer)
-
     raw_table = df.block_table.join(agg_indexer, lambda v1, v2: (v1, v2)).flatMap(_retrieval_mapper)
-
-    partition_order_mappings = get_partition_order_by_raw_table(raw_table)
+    partition_order_mappings = get_partition_order_by_raw_table(raw_table, data_manager.block_row_size)
 
     def _convert_to_blocks(kvs):
         bid = None
         ret_blocks = [[] for _ in range(block_num)]
 
-        for offset, (sample_id, data) in enumerate(kvs):
+        lid = 0
+        for sample_id, data in kvs:
+            lid += 1
             if bid is None:
-                bid = partition_order_mappings[sample_id]["block_id"]
+                bid = partition_order_mappings[sample_id]["start_block_id"]
 
             if return_new_indexer:
                 data = data[0]
@@ -271,9 +293,15 @@ def _convert_to_blocks(kvs):
                 else:
                     ret_blocks[i].append(data[i])
 
-        ret_blocks = [data_manager.blocks[i].convert_block(block) for i, block in enumerate(ret_blocks)]
+            if lid % data_manager.block_row_size == 0:
+                ret_blocks = [data_manager.blocks[i].convert_block(block) for i, block in enumerate(ret_blocks)]
+                yield bid, ret_blocks
+                bid += 1
+                ret_blocks = [[] for _ in range(block_num)]
 
-        return [(bid, ret_blocks)]
+        if lid % data_manager.block_row_size:
+            ret_blocks = [data_manager.blocks[i].convert_block(block) for i, block in enumerate(ret_blocks)]
+            yield bid, ret_blocks
 
     block_table = raw_table.mapPartitions(_convert_to_blocks, use_previous_behavior=False)
 
@@ -289,11 +317,17 @@ def _convert_to_blocks(kvs):
     else:
         def _mapper(kvs):
             bid = None
-            for offset, (sample_id, (_, k)) in enumerate(kvs):
+            offset = 0
+            for sample_id, (_, k) in kvs:
                 if bid is None:
-                    bid = partition_order_mappings[sample_id]["block_id"]
+                    bid = partition_order_mappings[sample_id]["start_block_id"]
 
                 yield k, [(sample_id, bid, offset)]
+                offset += 1
+
+                if offset == data_manager.block_row_size:
+                    bid += 1
+                    offset = 0
 
         new_indexer = raw_table.mapReducePartitions(_mapper, lambda v1, v2: v1 + v2)
 
@@ -304,7 +338,7 @@ def loc_with_sample_id_replacement(df: DataFrame, indexer):
     """
     indexer: table,
             row: (key=random_key,
-            value=((src_partition_id, src_offset), [(sample_id, dst_partition_id, dst_offset) ...])
+            value=((src_partition_id, src_offset), [(sample_id, dst_block_id, dst_offset) ...])
     """
     agg_indexer = aggregate_indexer(indexer)
 
@@ -316,7 +350,6 @@ def _convert_to_block(kvs):
             """
             block_indexer: row_id, [(sample_id, new_block_id, new_row_id)...]
             """
-
             for src_row_id, dst_indexer_list in block_indexer:
                 for sample_id, dst_block_id, dst_row_id in dst_indexer_list:
                     if dst_block_id not in ret_dict:
@@ -343,7 +376,11 @@ def _convert_to_block(kvs):
     block_table = block_table.mapValues(lambda values: [v[1] for v in values])
     block_table = transform_list_block_to_frame_block(block_table, df.data_manager)
 
-    partition_order_mappings = get_partition_order_mappings(block_table)
+    partition_order_mappings = get_partition_order_mappings_by_block_table(
+        block_table,
+        df.data_manager.block_row_size
+    )
+
     return DataFrame(
         df._ctx,
         block_table,

From c557b2f7c4e08827ae55ae23218e2a85a1027383 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Thu, 10 Aug 2023 20:00:13 +0800
Subject: [PATCH 20/30] fix LR ovr predict(#4659) fix fate-test data cli(#5008)
 edit bq examples & add pipeline testsuite(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 doc/api/fate_test.md                          | 916 ++++++++++++++++++
 doc/tutorial/fate_test_tutorial.md            |  91 ++
 .../coordinated_lr/test_lr_sid.py             |  18 +-
 .../lr/default_credit_config.yaml             |   4 +-
 .../lr/epsilon_5k_config.yaml                 |   2 +-
 .../lr/give_credit_config.yaml                |  10 +-
 .../benchmark_quality/lr/lr_benchmark.yaml    |  36 +-
 .../benchmark_quality/lr/pipeline-lr-multi.py |   1 +
 .../benchmark_quality/lr/sklearn-lr-multi.py  |   4 +-
 .../pipeline/{coordinated_lr => }/config.yaml |   0
 .../coordinated_linr_testsuite.yaml           |  60 ++
 .../pipeline/coordinated_linr/test_linr.py    |  87 ++
 .../pipeline/coordinated_linr/test_linr_cv.py |  64 ++
 .../coordinated_linr/test_linr_multi_host.py  |  93 ++
 .../coordinated_linr/test_linr_warm_start.py  |  95 ++
 .../coordinated_lr_testsuite.yaml             |  62 +-
 .../{test_lr_sid.py => test_lr.py}            |   8 +-
 .../{test_lr_sid_cv.py => test_lr_cv.py}      |   8 +-
 .../coordinated_lr/test_lr_multi_class.py     |  94 ++
 .../test_lr_multi_host.py}                    |  44 +-
 .../coordinated_lr/test_lr_validate.py        |  80 ++
 ...id_warm_start.py => test_lr_warm_start.py} |   7 +-
 .../data_split/data_split_lr_testsuite.yaml   |  40 +
 .../pipeline/data_split/test_data_split.py    |  91 ++
 .../data_split/test_data_split_stratified.py  |  94 ++
 .../binning_testsuite.yaml                    |  42 +
 .../test_feature_binning_asymmetric.py        |  92 ++
 .../test_feature_binning_bucket.py            |  96 ++
 .../test_feature_binning_quantile.py          |  91 ++
 .../selection_testsuite.yaml                  |  44 +
 .../test_feature_selection_binning.py         |  88 ++
 .../test_feature_selection_manual.py          |  80 ++
 .../test_feature_selection_multi_model.py     |  94 ++
 .../test_feature_selection_statistics.py      |  83 ++
 examples/pipeline/multi_model/test_multi.py   | 129 +++
 .../pipeline/sample/sample_testsuite.yaml     |  40 +
 examples/pipeline/sample/test_sample.py       |  79 ++
 .../pipeline/sample/test_sample_unilateral.py |  80 ++
 examples/pipeline/scale/scale_testsuite.yaml  |  42 +
 examples/pipeline/scale/test_scale_min_max.py |  99 ++
 .../pipeline/scale/test_scale_standard.py     |  94 ++
 examples/pipeline/scale/test_scale_w_lr.py    | 103 ++
 .../statistics/statistics_testsuite.yaml      |  38 +
 .../pipeline/statistics/test_statistics.py    |  61 ++
 examples/pipeline/test_data_split.py          |  68 --
 .../pipeline/test_data_split_stratified.py    |  69 --
 examples/pipeline/test_linr_sid_cv.py         |  38 -
 examples/pipeline/test_linr_sid_warm_start.py |  89 --
 examples/pipeline/test_sample.py              |  62 --
 examples/pipeline/test_scale.py               |  72 --
 examples/pipeline/test_single_linr.py         |  72 --
 examples/pipeline/test_single_lr.py           |  71 --
 examples/pipeline/test_single_lr_multi.py     |  73 --
 examples/pipeline/union/test_union.py         |  81 ++
 examples/pipeline/union/union_testsuite.yaml  |  38 +
 .../ml/glm/hetero/coordinated_lr/guest.py     |   2 +-
 .../fate_test/fate_test/scripts/data_cli.py   |  46 +-
 57 files changed, 3559 insertions(+), 706 deletions(-)
 create mode 100644 doc/api/fate_test.md
 create mode 100644 doc/tutorial/fate_test_tutorial.md
 rename examples/pipeline/{coordinated_lr => }/config.yaml (100%)
 create mode 100644 examples/pipeline/coordinated_linr/coordinated_linr_testsuite.yaml
 create mode 100644 examples/pipeline/coordinated_linr/test_linr.py
 create mode 100644 examples/pipeline/coordinated_linr/test_linr_cv.py
 create mode 100644 examples/pipeline/coordinated_linr/test_linr_multi_host.py
 create mode 100644 examples/pipeline/coordinated_linr/test_linr_warm_start.py
 rename examples/pipeline/coordinated_lr/{test_lr_sid.py => test_lr.py} (94%)
 rename examples/pipeline/coordinated_lr/{test_lr_sid_cv.py => test_lr_cv.py} (91%)
 create mode 100644 examples/pipeline/coordinated_lr/test_lr_multi_class.py
 rename examples/pipeline/{test_single_lr_multi_host.py => coordinated_lr/test_lr_multi_host.py} (74%)
 create mode 100644 examples/pipeline/coordinated_lr/test_lr_validate.py
 rename examples/pipeline/coordinated_lr/{test_lr_sid_warm_start.py => test_lr_warm_start.py} (95%)
 create mode 100644 examples/pipeline/data_split/data_split_lr_testsuite.yaml
 create mode 100644 examples/pipeline/data_split/test_data_split.py
 create mode 100644 examples/pipeline/data_split/test_data_split_stratified.py
 create mode 100644 examples/pipeline/hetero_feature_binning/binning_testsuite.yaml
 create mode 100644 examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py
 create mode 100644 examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py
 create mode 100644 examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py
 create mode 100644 examples/pipeline/hetero_feature_selection/selection_testsuite.yaml
 create mode 100644 examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py
 create mode 100644 examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py
 create mode 100644 examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py
 create mode 100644 examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py
 create mode 100644 examples/pipeline/multi_model/test_multi.py
 create mode 100644 examples/pipeline/sample/sample_testsuite.yaml
 create mode 100644 examples/pipeline/sample/test_sample.py
 create mode 100644 examples/pipeline/sample/test_sample_unilateral.py
 create mode 100644 examples/pipeline/scale/scale_testsuite.yaml
 create mode 100644 examples/pipeline/scale/test_scale_min_max.py
 create mode 100644 examples/pipeline/scale/test_scale_standard.py
 create mode 100644 examples/pipeline/scale/test_scale_w_lr.py
 create mode 100644 examples/pipeline/statistics/statistics_testsuite.yaml
 create mode 100644 examples/pipeline/statistics/test_statistics.py
 delete mode 100644 examples/pipeline/test_data_split.py
 delete mode 100644 examples/pipeline/test_data_split_stratified.py
 delete mode 100644 examples/pipeline/test_linr_sid_cv.py
 delete mode 100644 examples/pipeline/test_linr_sid_warm_start.py
 delete mode 100644 examples/pipeline/test_sample.py
 delete mode 100644 examples/pipeline/test_scale.py
 delete mode 100644 examples/pipeline/test_single_linr.py
 delete mode 100644 examples/pipeline/test_single_lr.py
 delete mode 100644 examples/pipeline/test_single_lr_multi.py
 create mode 100644 examples/pipeline/union/test_union.py
 create mode 100644 examples/pipeline/union/union_testsuite.yaml

diff --git a/doc/api/fate_test.md b/doc/api/fate_test.md
new file mode 100644
index 0000000000..c6bd4c4af4
--- /dev/null
+++ b/doc/api/fate_test.md
@@ -0,0 +1,916 @@
+# FATE Test
+
+A collection of useful tools to running FATE's test.
+
+## Testsuite
+
+Testsuite is used for running a collection of jobs in sequence. Data
+used for jobs could be uploaded before jobs are submitted and,
+optionally, be cleaned after jobs finish. This tool is useful for FATE's
+release test.
+
+### command options
+
+```bash
+fate_test suite --help
+```
+
+1. include:
+
+   ```bash
+   fate_test suite -i <path1 contains *testsuite.yaml>
+   ```
+
+   will run testsuites in
+   *path1*
+
+2. exclude:
+
+   ```bash
+   fate_test suite -i <path1 contains *testsuite.yaml> -e <path2 to exclude> -e <path3 to exclude> ...
+   ```
+
+   will run testsuites in *path1* but not in *path2* and *path3*
+
+3. glob:
+
+   ```bash
+   fate_test suite -i <path1 contains *testsuite.yaml> -g "hetero*"
+   ```
+
+   will run testsuites in sub directory start with *hetero* of
+   *path1*
+
+4. timeout:
+
+    ```bash
+    fate_test suite -i <path1 contains *testsuite.yaml> -m 3600
+    ```
+
+   will run testsuites in *path1* and timeout when job does not finish
+   within 3600s; if tasks need more time, use a larger threshold
+
+5. task-cores
+
+   ```bash
+   fate_test suite -i <path1 contains *testsuite.yaml> -p 4
+   ```
+
+   will run testsuites in *path1* with EGGROLL "task-cores" set to 4;
+   only effective for DSL conf
+
+6. skip-data:
+
+    ```bash
+    fate_test suite -i <path1 contains *testsuite.yaml> --skip-data
+    ```
+
+   will run testsuites in *path1* without uploading data specified in
+   *testsuite.yaml*.
+
+7. data-only:
+
+    ```bash
+    fate_test suite -i <path1 contains *testsuite.yaml> --data-only
+    ```
+
+   will only upload data specified in *testsuite.yaml* without running
+   jobs
+
+8. disable-clean-data:
+
+    ```bash
+    fate_test suite -i <path1 contains *testsuite.yaml> --disable-clean-data
+    ```
+
+   will run testsuites in *path1* without removing data from storage
+   after tasks
+   finish
+
+9. enable-clean-data:
+
+    ```bash
+    fate_test suite -i <path1 contains *testsuite.yaml> --enable-clean-data
+    ```
+
+   will remove data from storage after finishing running testsuites
+
+10. yes:
+
+    ```bash
+    fate_test suite -i <path1 contains *testsuite.yaml> --yes
+    ```
+
+    will run testsuites in *path1* directly, skipping double check
+
+### testsuite configuration
+
+Configuration of jobs should be specified in a testsuite whose file name
+ends with "\*testsuite.yaml". For testsuite examples, please refer [pipeline
+examples](../../examples/pipeline).
+
+A testsuite includes the following elements:
+
+- data: list of local data to be uploaded before running FATE jobs
+
+    - file: path to original data file to be uploaded, should be
+      relative to testsuite or FATE installation path
+    - meta: information regarding parsing input data, including
+        - delimiter
+        - dtype,
+        - label\_type
+        - weight\_type
+        - input format
+        - match\_id\_name
+        - sample\_id\_name
+    - partitions: number of partition for data storage
+    - head: whether table includes header
+    - extend_sid: whether automatically extend sample id
+    - table\_name: table name in storage
+    - namespace: table namespace in storage
+    - role: which role to upload the data, as specified in
+      fate\_test.config; naming format is:
+      "{role\_type}\_{role\_index}", index starts at 0
+
+  ```yaml
+  data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+    ```
+
+- tasks: includes arbitrary number of pipeline jobs with
+  paths to corresponding python script
+
+    - job: name of job to be run, must be unique within each group
+      list
+
+        - script: path to pipeline script, should be relative to
+          testsuite
+
+      ```yaml
+          tasks:
+            normal-lr:
+            script: test_lr_sid.py
+      ```
+
+## Benchmark Quality
+
+Benchmark-quality is used for comparing modeling quality between FATE
+and other machine learning systems. Benchmark produces a metrics
+comparison summary for each benchmark job group.
+
+Benchmark can also compare metrics of different models from the same
+script/PipeLine job. Please refer to the [script writing
+guide](#testing-script(quality)) below for
+instructions.
+
+```bash
+fate_test benchmark-quality -i examples/benchmark_quality/hetero_linear_regression
+```
+
+```bash
+|----------------------------------------------------------------------|
+|                             Data Summary                             |
+|-------+--------------------------------------------------------------|
+|  Data |                         Information                          |
+|-------+--------------------------------------------------------------|
+| train | {'guest': 'motor_hetero_guest', 'host': 'motor_hetero_host'} |
+|  test | {'guest': 'motor_hetero_guest', 'host': 'motor_hetero_host'} |
+|-------+--------------------------------------------------------------|
+
+
+|-------------------------------------------------------------------------------------------------------------------------------------|
+|                                                           Metrics Summary                                                           |
+|-------------------------------------------+-------------------------+--------------------+---------------------+--------------------|
+|                 Model Name                | root_mean_squared_error |      r2_score      |  mean_squared_error | explained_variance |
+|-------------------------------------------+-------------------------+--------------------+---------------------+--------------------|
+| local-hetero_linear_regression-regression |    0.312552080517407    | 0.9040310440206087 | 0.09768880303575968 | 0.9040312584426697 |
+|  FATE-hetero_linear_regression-regression |    0.3139977881119483   | 0.9031411831961411 | 0.09859461093919598 | 0.903146386539082  |
+|-------------------------------------------+-------------------------+--------------------+---------------------+--------------------|
+|-------------------------------------|
+|            Match Results            |
+|-------------------------+-----------|
+|          Metric         | All Match |
+| root_mean_squared_error |    True   |
+|         r2_score        |    True   |
+|    mean_squared_error   |    True   |
+|    explained_variance   |    True   |
+|-------------------------+-----------|
+
+
+|-------------------------------------------------------------------------------------|
+|                             FATE Script Metrics Summary                             |
+|--------------------+---------------------+--------------------+---------------------|
+| Script Model Name  |         min         |        max         |         mean        |
+|--------------------+---------------------+--------------------+---------------------|
+|  linr_train-FATE   | -1.5305666678748353 | 1.4968292506353484 | 0.03948016870496807 |
+| linr_validate-FATE | -1.5305666678748353 | 1.4968292506353484 | 0.03948016870496807 |
+|--------------------+---------------------+--------------------+---------------------|
+|---------------------------------------|
+|   FATE Script Metrics Match Results   |
+|----------------+----------------------|
+|     Metric     |      All Match       |
+|----------------+----------------------|
+|      min       |         True         |
+|      max       |         True         |
+|      mean      |         True         |
+|----------------+----------------------|
+```
+
+### command options
+
+use the following command to show help message
+
+```bash
+fate_test benchmark-quality --help
+```
+
+1. include:
+
+   ```bash
+   fate_test benchmark-quality -i <path1 contains *benchmark.yaml>
+   ```
+
+   will run benchmark testsuites in
+   *path1*
+
+2. exclude:
+
+   ```bash
+   fate_test benchmark-quality -i <path1 contains *benchmark.yaml> -e <path2 to exclude> -e <path3 to exclude> ...
+   ```
+
+   will run benchmark testsuites in *path1* but not in *path2* and
+   *path3*
+
+3. glob:
+
+   ```bash
+   fate_test benchmark-quality -i <path1 contains *benchmark.yaml> -g "hetero*"
+   ```
+
+   will run benchmark testsuites in sub directory start with *hetero*
+   of
+   *path1*
+
+4. tol:
+
+   ```bash
+   fate_test benchmark-quality -i <path1 contains *benchmark.yaml> -t 1e-3
+   ```
+
+   will run benchmark testsuites in *path1* with absolute tolerance of
+   difference between metrics set to 0.001. If absolute difference
+   between metrics is smaller than *tol*, then metrics are considered
+   almost equal. Check benchmark testsuite [writing
+   guide](#benchmark-testsuite) on setting alternative tolerance.
+
+5. skip-data:
+
+    ```bash
+    fate_test benchmark-quality -i <path1 contains *benchmark.yaml> --skip-data
+    ```
+
+   will run benchmark testsuites in *path1* without uploading data
+   specified in
+   *benchmark.yaml*.
+
+6. data-only:
+
+    ```bash
+    fate_test benchmark-quality -i <path1 contains *testsuite.yaml> --data-only
+    ```
+
+   will only upload data specified in *testsuite.yaml* without running
+   jobs
+
+7. disable-clean-data:
+
+    ```bash
+    fate_test benchmark-quality -i <path1 contains *benchmark.yaml> --disable-clean-data
+    ```
+
+   will run benchmark testsuites in *path1* without removing data from
+   storage after tasks
+   finish
+
+8. enable-clean-data:
+
+    ```bash
+    fate_test benchmark-quality -i <path1 contains *benchmark.yaml> --enable-clean-data
+    ```
+
+   will remove data from storage after finishing running benchmark
+   testsuites
+
+9. yes:
+    ```bash
+    fate_test benchmark-quality -i <path1 contains *benchmark.yaml> --yes
+    ```
+
+   will run benchmark testsuites in *path1* directly, skipping double
+   check
+
+### benchmark quality job configuration
+
+Configuration of jobs should be specified in a benchmark quality testsuite whose
+file name ends with "\*benchmark.yaml". For benchmark testsuite example,
+please refer [here](../../examples/benchmark_quality).
+
+A benchmark testsuite includes the following elements:
+
+- data: list of local data to be uploaded before running FATE jobs
+
+    - file: path to original data file to be uploaded, should be
+      relative to testsuite or FATE installation path
+    - meta: information regarding parsing input data, including
+        - delimiter
+        - dtype,
+        - label\_type
+        - weight\_type
+        - input format
+        - match\_id\_name
+        - sample\_id\_name
+    - partitions: number of partition for data storage
+    - head: whether table includes header
+    - extend_sid: whether automatically extend sample id
+    - table\_name: table name in storage
+    - namespace: table namespace in storage
+    - role: which role to upload the data, as specified in
+      fate\_test.config; naming format is:
+      "{role\_type}\_{role\_index}", index starts at 0
+
+  ```yaml
+  data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+    ```
+
+- job group: each group includes arbitrary number of jobs with paths
+  to corresponding script and configuration
+
+    - job: name of job to be run, must be unique within each group
+      list
+
+        - script: path to [testing script](#testing-script(quality)), should be
+          relative to testsuite
+        - conf: path to job configuration file for script, should be
+          relative to testsuite
+
+      ```yaml
+      "local": {
+           "script": "./local-linr.py",
+           "conf": "./linr_config.yaml"
+      }
+      ```
+
+    - compare\_setting: additional setting for quality metrics
+      comparison, currently only takes `relative_tol`
+
+      If metrics *a* and *b* satisfy *abs(a-b) \<= max(relative\_tol
+      \* max(abs(a), abs(b)), absolute\_tol)* (from [math
+      module](https://docs.python.org/3/library/math.html#math.isclose)),
+      they are considered almost equal. In the below example, metrics
+      from "local" and "FATE" jobs are considered almost equal if
+      their relative difference is smaller than *0.05 \*
+      max(abs(local\_metric), abs(pipeline\_metric)*.
+
+  ```yaml
+  "linear_regression-regression": {
+      "local": {
+          "script": "./local-linr.py",
+          "conf": "./linr_config.yaml"
+      },
+      "FATE": {
+          "script": "./fate-linr.py",
+          "conf": "./linr_config.yaml"
+      },
+      "compare_setting": {
+          "relative_tol": 0.01
+      }
+  }
+  ```
+
+### testing script(quality)
+
+All job scripts need to have `Main` function as an entry point for
+executing jobs; scripts should return two dictionaries: first with data
+information key-value pairs: {data\_type}: {data\_name\_dictionary}; the
+second contains {metric\_name}: {metric\_value} key-value pairs for
+metric comparison.
+
+By default, the final data summary shows the output from the job named
+"FATE"; if no such job exists, data information returned by the first
+job is shown. For clear presentation, we suggest that user follow this
+general [guideline](../../examples/data/README.md#data-set-naming-rule)
+for data set naming. In the case of multi-host task, consider numbering
+host as such:
+
+    {'guest': 'default_credit_homo_guest',
+     'host_1': 'default_credit_homo_host_1',
+     'host_2': 'default_credit_homo_host_2'}
+
+Returned quality metrics of the same key are to be compared. Note that
+only **real-value** metrics can be compared.
+
+To compare metrics of different models from the same script, metrics of
+each model need to be wrapped into dictionary in the same format as the
+general metric output above.
+
+In the returned dictionary of script, use reserved key `script_metrics`
+to indicate the collection of metrics to be compared.
+
+- FATE script: `Main` should have three inputs:
+    - config: job configuration,
+      [JobConfig](../../python/fate_client/pipeline/utils/fate_utils.py)
+      object loaded from "fate\_test\_config.yaml"
+    - param: job parameter setting, dictionary loaded from "conf" file
+      specified in benchmark testsuite
+    - namespace: namespace suffix, user-given *namespace* or generated
+      timestamp string when using *namespace-mangling*
+- non-FATE script: `Main` should have one or two inputs:
+    - param: job parameter setting, dictionary loaded from "conf" file
+      specified in benchmark testsuite
+    - (optional) config: job configuration,
+      [JobConfig](../../python/fate_client/pipeline/utils/fate_utils.py)
+      object loaded from "fate\_test\_config.yaml"
+
+Note that `Main` in FATE & non-FATE scripts can also be set to take zero
+input argument.
+
+## Benchmark Performance
+
+`Performance` sub-command is used to test
+efficiency of designated FATE jobs.
+
+Example tests may be found [here](../../examples/benchmark_performance).
+
+### command options
+
+```bash
+fate_test performance --help
+```
+
+1. job-type:
+
+   ```bash
+   fate_test performance -t intersect
+   ```
+
+   will run testsuites from intersect subdirectory (set in config) in
+   the default performance directory; note that only one of `task` and
+   `include` is
+   needed
+
+2. include:
+
+   ```bash
+   fate_test performance -i <path1 contains *performance.yaml>; note that only one of ``task`` and ``include`` needs to be specified.
+   ```
+
+   will run testsuites in *path1*. Note that only one of `task` and
+   `include` needs to be specified; when both are given, path from
+   `include` takes
+   priority.
+
+3. timeout:
+
+    ```bash
+    fate_test performance -i <path1 contains *performance.yaml> -m 3600
+    ```
+
+   will run testsuites in *path1* and timeout when job does not finish
+   within 3600s; if tasks need more time, use a larger threshold
+
+4. epochs:
+
+   ```bash
+   fate_test performance -i <path1 contains *performance.yaml> -e 5
+   ```
+
+   will run testsuites in *path1* with all values to key "max\_iter"
+   set to 5
+
+5. max-depth
+
+   ```bash
+   fate_test performance -i <path1 contains *performance.yaml> -d 4
+   ```
+
+   will run testsuites in *path1* with all values to key "max\_depth"
+   set to 4
+
+6. num-trees
+
+   ```bash
+   fate_test performance -i <path1 contains *performance.yaml> -nt 5
+   ```
+
+   will run testsuites in *path1* with all values to key "num\_trees"
+   set to 5
+
+7. task-cores
+
+   ```bash
+   fate_test performance -i <path1 contains *performance.yaml> -p 4
+   ```
+
+   will run testsuites in *path1* with EGGROLL "task\_cores" set to 4
+
+8. storage-tag
+
+    ```bash
+    fate_test performance -i <path1 contains *performance.yaml> -s test
+    ```
+
+   will run testsuites in *path1* with performance time stored under
+   provided tag for future comparison; note that FATE-Test always
+   records the most recent run for each tag; if the same tag is used
+   more than once, only performance from the latest job is
+   kept
+
+9. history-tag
+
+    ```bash
+    fate_test performance -i <path1 contains *performance.yaml> -v test1 -v test2
+    ```
+
+   will run performance testsuites in *path1* with performance time compared to
+   history jobs under provided
+   tag(s)
+
+10. skip-data:
+
+    ```bash
+    fate_test performance -i <path1 contains *performance.yaml> --skip-data
+    ```
+
+    will run performance testsuites in *path1* without uploading data specified in
+    *testsuite.yaml*.
+
+11. data-only:
+
+    ```bash
+    fate_test performance -i <path1 contains *performance.yaml> --data-only
+    ```
+
+    will only upload data specified in *performance.yaml* without running
+    jobs
+
+12. disable-clean-data:
+
+    ```bash
+    fate_test performance -i <path1 contains *performance.yaml> --disable-clean-data
+    ```
+
+    will run testsuites in *path1* without removing data from storage
+    after tasks finish
+
+14. yes:
+
+    ```bash
+    fate_test performance -i <path1 contains *perforamnce.yaml> --yes
+    ```
+
+    will run testsuites in *path1* directly, skipping double check
+
+Configuration of jobs should be specified in a benchmark performance testsuite whose
+file name ends with "\*performance.yaml". For benchmark testsuite example,
+please refer [here](../../examples/benchmark_performance).
+
+A benchmark testsuite includes the following elements:
+
+- data: list of local data to be uploaded before running FATE jobs
+
+    - file: path to original data file to be uploaded, should be
+      relative to testsuite or FATE installation path
+    - meta: information regarding parsing input data, including
+        - delimiter
+        - dtype,
+        - label\_type
+        - weight\_type
+        - input format
+        - match\_id\_name
+        - sample\_id\_name
+    - partitions: number of partition for data storage
+    - head: whether table includes header
+    - extend_sid: whether automatically extend sample id
+    - table\_name: table name in storage
+    - namespace: table namespace in storage
+    - role: which role to upload the data, as specified in
+      fate\_test.config; naming format is:
+      "{role\_type}\_{role\_index}", index starts at 0
+
+  ```yaml
+  data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+    ```
+- tasks: includes arbitrary number of pipeline jobs with
+  paths to corresponding python script
+
+    - job: name of job to be run, must be unique within each group
+      list
+
+        - script: path to [testing script](#testing-script(performance))), should be
+          relative to testsuite
+        - conf: path to job configuration file for script, should be
+          relative to testsuite
+
+      ```yaml
+      "local": {
+           "script": "./local-linr.py",
+           "conf": "./linr_config.yaml"
+      }
+      ```
+
+### testing script(performance)
+
+All job scripts need to have `Main` function as an entry point for
+executing jobs; scripts should obtain and return job id of pipeline job as follows:
+
+```python
+from fate_client.pipeline import FateFlowPipeline
+
+pipeline = FateFlowPipeline()
+...
+pipeline.compile()
+pipeline.fit()
+job_id = pipeline.model_info.job_id
+print(job_id)
+```
+
+Returned job id will be used to query job status and time usage details for each component in job.
+
+- FATE script: `Main` should have three inputs:
+    - config: job configuration,
+      [JobConfig](../../python/fate_client/pipeline/utils/fate_utils.py)
+      object loaded from "fate\_test\_config.yaml"
+    - param: job parameter setting, dictionary loaded from "conf" file
+      specified in benchmark performance testsuite
+    - namespace: namespace suffix, user-given *namespace* or generated
+      timestamp string when using *namespace-mangling*
+
+Note that `Main` in FATE scripts can also be set to take zero
+input argument.
+
+## data
+
+`Data` sub-command is used for upload,
+delete, and generate dataset.
+
+### data command options
+
+```bash
+fate_test data --help
+```
+
+1. include:
+
+    ```bash
+    fate_test data [upload|delete] -i <path1 contains *testsuite.yaml | *benchmark.yaml>
+    ```
+
+   will upload/delete dataset in testsuites in
+   *path1*
+
+2. exclude:
+
+    ```bash
+    fate_test data [upload|delete] -i <path1 contains *testsuite.yaml | *benchmark.yaml> -e <path2 to exclude> -e <path3 to exclude> ...
+    ```
+
+   will upload/delete dataset in testsuites in *path1* but not in
+   *path2* and
+   *path3*
+
+3. glob:
+
+    ```bash
+    fate_test data [upload|delete] -i <path1 contains \*testsuite.yaml | \*benchmark.yaml> -g "hetero*"
+    ```
+
+   will upload/delete dataset in testsuites in sub directory start with
+   *hetero* of
+   *path1*
+
+4. upload example data:
+
+    ```bash
+    fate_test data upload -t [min_test|all_examples]
+    ```
+
+   will upload dataset for min_test or all examples of fate. Once command is executed successfully,
+   you are expected to see the following feedback which showing the table information for you:
+
+    ```bash
+    [2020-06-12 14:19:39]uploading @examples/data/breast_hetero_guest.csv >> experiment.breast_hetero_guest
+    [2020-06-12 14:19:39]upload done @examples/data/breast_hetero_guest.csv >> experiment.breast_hetero_guest, job_id=2020061214193960279930
+    [2020-06-12 14:19:42]2020061214193960279930 success, elapse: 0:00:02
+    [2020-06-12 14:19:42] check_data_out {'data': {'count': 569, 'namespace': 'experiment', 'partition': 16, 'table_name': 'breast_hetero_guest'}, 'retcode': 0, 'retmsg': 'success'}
+    ```
+
+   Note: uploading configurations are [min_test_config](../../examples/data/upload_config/min_test_data_testsuite.yaml)
+   and [all_examples](../../examples/data/upload_config/all_examples_data_testsuite.yaml),
+   user can add more data by modifying them or check out the example data's name and namespace.
+
+6. download mnist data:
+
+    ```bash
+    fate_test data download -t mnist -o ${mnist_data_dir}
+    ```
+
+   -t: if not specified, default is "mnist"
+   -o: directory of download data, default is "examples/data"
+
+### generate command options
+
+```bash
+fate_test data --help
+```
+
+1. include:
+
+   ```bash
+   fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml>
+   ```
+
+   will generate dataset in testsuites in *path1*; note that only one
+   of `type` and `include` is
+   needed
+
+2. host-data-type:
+
+   ```bash
+   fate_test suite -i <path1 contains *testsuite.yaml | *benchmark.yaml> -ht {tag-value | dense | tag }
+   ```
+
+   will generate dataset in testsuites *path1* where host data are of
+   selected
+   format
+
+3. sparsity:
+
+   ```bash
+   fate_test suite -i <path1 contains *testsuite.yaml | *benchmark.yaml> -s 0.2
+   ```
+
+   will generate dataset in testsuites in *path1* with sparsity at 0.1;
+   useful for tag-formatted
+   data
+
+4. encryption-type:
+
+   ```bash
+   fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml> -p {sha256 | md5}
+   ```
+
+   will generate dataset in testsuites in *path1* with hash id using
+   SHA256
+   method
+
+5. match-rate:
+
+   ```bash
+   fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml> -m 1.0
+   ```
+
+   will generate dataset in testsuites in *path1* where generated host
+   and guest data have intersection rate of
+   1.0
+
+6. guest-data-size:
+
+   ```bash
+   fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml> -ng 10000
+   ```
+
+   will generate dataset in testsuites *path1* where guest data each
+   have 10000
+   entries
+
+7. host-data-size:
+
+   ```bash
+   fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml> -nh 10000
+   ```
+
+   will generate dataset in testsuites *path1* where host data have
+   10000
+   entries
+
+8. guest-feature-num:
+
+   ```bash
+   fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml> -fg 20
+   ```
+
+   will generate dataset in testsuites *path1* where guest data have 20
+   features
+
+9. host-feature-num:
+
+   ```bash
+   fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml> -fh 200
+   ```
+
+   will generate dataset in testsuites *path1* where host data have 200
+   features
+
+10. output-path:
+
+    ```bash
+    fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml> -o <path2>
+    ```
+
+    will generate dataset in testsuites *path1* and write file to
+    *path2*
+
+11. force:
+
+    ```bash
+    fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml> -o <path2> --force
+    ```
+
+    will generate dataset in testsuites *path1* and write file to
+    *path2*; will overwrite existing file(s) if designated file name
+    found under
+    *path2*
+
+12. split-host:
+
+    ```bash
+    fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml> -nh 10000 --split-host
+    ```
+
+    will generate dataset in testsuites *path1*; 10000 entries will be
+    divided equally among all host data
+    sets
+
+13. upload-data
+
+    ```bash
+    fate_test data generate  -i <path1 contains *testsuite.yaml | *benchmark.yaml> --upload-data
+    ```
+
+    will generate dataset in testsuites *path1* and upload generated
+    data for all parties to
+    FATE
+
+14. remove-data
+
+    ```bash
+    fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml> --upload-data --remove-data
+    ```
+
+    (effective with `upload-data` set to True) will delete generated
+    data after generate and upload dataset in testsuites
+    *path1*
diff --git a/doc/tutorial/fate_test_tutorial.md b/doc/tutorial/fate_test_tutorial.md
new file mode 100644
index 0000000000..29a08bbc71
--- /dev/null
+++ b/doc/tutorial/fate_test_tutorial.md
@@ -0,0 +1,91 @@
+# FATE Test Tutorial
+
+A collection of useful tools to running FATE tests and [:file_folder:examples](../../examples).
+
+## quick start
+
+1. install
+
+    ```bash
+    pip install -e python/fate_test
+    ```
+2. edit default fate\_test\_config.yaml
+
+   ```bash
+   # edit priority config file with system default editor
+   # filling some field according to comments
+   fate_test config edit
+   ```
+
+3. configure FATE-Flow Commandline server setting
+
+    ```bash
+    # configure FATE-Flow Commandline server setting
+    flow init --port 9380 --ip 127.0.0.1
+    ```
+
+4. run some fate\_test suite
+
+   ```bash
+   fate_test suite -i <path contains *testsuite.yaml>
+   ```
+
+5. run some fate\_test benchmark quality
+
+   ```bash
+   fate_test benchmark-quality -i <path contains *benchmark.yaml>
+   ```
+
+6. run some fate\_test benchmark performance
+
+   ```bash
+   fate_test benchmark-quality -i <path contains *performance.yaml>
+   ```
+
+7useful logs or exception will be saved to logs dir with namespace
+shown in last step
+
+## command types
+
+- [suite](../api/fate_test.md#testsuite): used for running [testsuites](../api/fate_test.md#testsuite-configuration),
+  collection of FATE jobs
+
+  ```bash
+  fate_test suite -i <path contains *testsuite.yaml>
+  ```
+
+- [data](../api/fate_test.md#data): used for upload, delete, and generate dataset
+
+    - [upload/delete data](../api/fate_test.md#data-command-options) command:
+
+      ```bash
+      fate_test data [upload|delete] -i <path1 contains *testsuite.yaml | *benchmark.yaml>
+      ```
+    - [upload example data of min_test/all_examples](../api/fate_test.md#data-command-options) command:
+
+      ```bash
+      fate_test data upload -t min_test
+      fate_test data upload -t all_examples
+      ```
+
+    - [generate data](../api/fate_test.md#generate-command-options) command:
+
+      ```bash
+      fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml>
+      ```
+
+- [benchmark-quality](../api/fate_test.md#benchmark-quality): used for comparing modeling quality between FATE
+  and other machine learning systems, as specified
+  in [benchmark job configuration](../api/fate_test.md#benchmark-job-configuration)
+
+  ```bash
+  fate_test bq -i <path contains *benchmark.yaml>
+  ```
+
+- [benchmark-performance](../api/fate_test.md#benchmark-performance): used for checking FATE algorithm performance; user
+  should first generate and upload data before running performance testsuite
+
+  ```bash
+  fate_test data generate -i <path contains *performance.yaml> -ng 10000 -fg 10 -fh 10 -m 1.0 --upload-data
+  fate_test performance -i <path contains *performance.yaml> --skip-data
+  ```
\ No newline at end of file
diff --git a/examples/benchmark_performance/coordinated_lr/test_lr_sid.py b/examples/benchmark_performance/coordinated_lr/test_lr_sid.py
index fc3f69209a..dd447362d8 100644
--- a/examples/benchmark_performance/coordinated_lr/test_lr_sid.py
+++ b/examples/benchmark_performance/coordinated_lr/test_lr_sid.py
@@ -17,7 +17,7 @@
 import argparse
 
 from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI
 from fate_client.pipeline.components.fate import Evaluation
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
@@ -48,11 +48,11 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     if config.timeout:
         pipeline.conf.set("timeout", config.timeout)
 
-    intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
-                                                                        namespace=guest_train_data["namespace"]))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
-                                                                           namespace=host_train_data["namespace"]))
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
+                                                                  namespace=guest_train_data["namespace"]))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
+                                                                     namespace=host_train_data["namespace"]))
 
     lr_param = {
     }
@@ -68,10 +68,10 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
     }
     lr_param.update(config_param)
     lr_0 = CoordinatedLR("lr_0",
-                         train_data=intersect_0.outputs["output_data"],
+                         train_data=psi_0.outputs["output_data"],
                          **lr_param)
     lr_1 = CoordinatedLR("lr_1",
-                         test_data=intersect_0.outputs["output_data"],
+                         test_data=psi_0.outputs["output_data"],
                          input_model=lr_0.outputs["output_model"])
 
     evaluation_0 = Evaluation("evaluation_0",
@@ -80,7 +80,7 @@ def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
                               metrics=["auc", "binary_precision", "binary_accuracy", "binary_recall"],
                               input_data=lr_0.outputs["train_output_data"])
 
-    pipeline.add_task(intersect_0)
+    pipeline.add_task(psi_0)
     pipeline.add_task(lr_0)
     pipeline.add_task(lr_1)
     pipeline.add_task(evaluation_0)
diff --git a/examples/benchmark_quality/lr/default_credit_config.yaml b/examples/benchmark_quality/lr/default_credit_config.yaml
index 8033d8af0d..b547c333b9 100644
--- a/examples/benchmark_quality/lr/default_credit_config.yaml
+++ b/examples/benchmark_quality/lr/default_credit_config.yaml
@@ -2,7 +2,7 @@ data_guest: "default_credit_hetero_guest"
 data_host: "default_credit_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 22
+epochs: 30
 init_param:
   fit_intercept: True
   method: "zeros"
@@ -17,6 +17,6 @@ optimizer:
   penalty: "L2"
   alpha: 0.001
   optimizer_params:
-    lr: 0.15
+    lr: 0.21
 batch_size: 3200
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/epsilon_5k_config.yaml b/examples/benchmark_quality/lr/epsilon_5k_config.yaml
index 39144f4fdb..034d61378c 100644
--- a/examples/benchmark_quality/lr/epsilon_5k_config.yaml
+++ b/examples/benchmark_quality/lr/epsilon_5k_config.yaml
@@ -3,7 +3,7 @@ data_host: "epsilon_5k_hetero_host"
 idx: "id"
 label_name: "y"
 epochs: 8
-batch_size: 2500
+batch_size: 2200
 init_param:
   fit_intercept: True
   method: "random"
diff --git a/examples/benchmark_quality/lr/give_credit_config.yaml b/examples/benchmark_quality/lr/give_credit_config.yaml
index dc041b48fe..480077d4ec 100644
--- a/examples/benchmark_quality/lr/give_credit_config.yaml
+++ b/examples/benchmark_quality/lr/give_credit_config.yaml
@@ -2,20 +2,20 @@ data_guest: "give_credit_hetero_guest"
 data_host: "give_credit_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 6
+epochs: 12
 init_param:
   fit_intercept: True
   method: "zeros"
 learning_rate_scheduler:
   method: "linear"
   scheduler_params:
-    factor: 0.7
+    start_factor: 0.71
     total_iters: 1000
 optimizer:
-  method: "adam"
+  method: "rmsprop"
   penalty: "L2"
-  alpha: 10
+  alpha: 0.01
   optimizer_params:
-    lr: 0.2
+    lr: 0.29
 batch_size: 5500
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/lr_benchmark.yaml b/examples/benchmark_quality/lr/lr_benchmark.yaml
index d7852909a2..63cb2603bd 100644
--- a/examples/benchmark_quality/lr/lr_benchmark.yaml
+++ b/examples/benchmark_quality/lr/lr_benchmark.yaml
@@ -179,15 +179,15 @@ data:
     table_name: vehicle_scale_hetero_host
     namespace: experiment
     role: host_0
-hetero_lr-binary-0-breast:
-  local:
-    script: "./sklearn-lr-binary.py"
-    conf: "./breast_lr_sklearn_config.yaml"
-  FATE-hetero-lr:
-    script: "./pipeline-lr-binary.py"
-    conf: "./breast_config.yaml"
-  compare_setting:
-    relative_tol: 0.01
+#hetero_lr-binary-0-breast:
+#  local:
+#    script: "./sklearn-lr-binary.py"
+#    conf: "./breast_lr_sklearn_config.yaml"
+#  FATE-hetero-lr:
+#    script: "./pipeline-lr-binary.py"
+#    conf: "./breast_config.yaml"
+#  compare_setting:
+#    relative_tol: 0.01
 hetero_lr-binary-1-default-credit:
   local:
     script: "./sklearn-lr-binary.py"
@@ -197,15 +197,15 @@ hetero_lr-binary-1-default-credit:
     conf: "./default_credit_config.yaml"
   compare_setting:
     relative_tol: 0.01
-hetero_lr-binary-2-epsilon-5k:
-  local:
-    script: "./sklearn-lr-binary.py"
-    conf: "./epsilon_5k_lr_sklearn_config.yaml"
-  FATE-hetero-lr:
-    script: "./pipeline-lr-binary.py"
-    conf: "./epsilon_5k_config.yaml"
-  compare_setting:
-    relative_tol: 0.01
+#hetero_lr-binary-2-epsilon-5k:
+#  local:
+#    script: "./sklearn-lr-binary.py"
+#    conf: "./epsilon_5k_lr_sklearn_config.yaml"
+#  FATE-hetero-lr:
+#    script: "./pipeline-lr-binary.py"
+#    conf: "./epsilon_5k_config.yaml"
+#  compare_setting:
+#    relative_tol: 0.01
 hetero_lr-binary-3-give-credit:
   local:
     script: "./sklearn-lr-binary.py"
diff --git a/examples/benchmark_quality/lr/pipeline-lr-multi.py b/examples/benchmark_quality/lr/pipeline-lr-multi.py
index a598403238..ed3851e510 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-multi.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-multi.py
@@ -71,6 +71,7 @@ def main(config="../../config.yaml", param="./vehicle_config.yaml", namespace=""
                          input_model=lr_0.outputs["output_model"])
 
     evaluation_0 = Evaluation('evaluation_0',
+                              input_data=lr_0.outputs["train_output_data"],
                               metrics=['multi_recall', 'multi_accuracy', 'multi_precision'])
     pipeline.add_task(psi_0)
     pipeline.add_task(lr_0)
diff --git a/examples/benchmark_quality/lr/sklearn-lr-multi.py b/examples/benchmark_quality/lr/sklearn-lr-multi.py
index ae931db9fb..b56fc80dce 100644
--- a/examples/benchmark_quality/lr/sklearn-lr-multi.py
+++ b/examples/benchmark_quality/lr/sklearn-lr-multi.py
@@ -42,10 +42,10 @@ def main(config="../../config.yaml", param="./vehicle_lr_sklearn_config.yaml"):
 
     config_param = {
         "penalty": param["penalty"],
-        "max_iter": param["max_iter"],
+        "max_iter": param["epochs"],
         "alpha": param["alpha"],
         "learning_rate": "optimal",
-        "eta0": param["learning_rate"],
+        "eta0": param["eta0"],
         "random_state": 105
     }
 
diff --git a/examples/pipeline/coordinated_lr/config.yaml b/examples/pipeline/config.yaml
similarity index 100%
rename from examples/pipeline/coordinated_lr/config.yaml
rename to examples/pipeline/config.yaml
diff --git a/examples/pipeline/coordinated_linr/coordinated_linr_testsuite.yaml b/examples/pipeline/coordinated_linr/coordinated_linr_testsuite.yaml
new file mode 100644
index 0000000000..e3e319adb6
--- /dev/null
+++ b/examples/pipeline/coordinated_linr/coordinated_linr_testsuite.yaml
@@ -0,0 +1,60 @@
+data:
+  - file: examples/data/motor_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: motor_speed
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: motor_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/motor_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: motor_hetero_host
+    namespace: experiment
+    role: host_0
+  - file: examples/data/motor_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: motor_hetero_host
+    namespace: experiment
+    role: host_1
+tasks:
+  normal-linr:
+    script: test_linr.py
+  linr-cv:
+    script: test_linr_cv.py
+  linr-warm-start:
+    script: test_linr_warm_start.py
+  linr-multi-host:
+    script: test_linr_multi_host.py
diff --git a/examples/pipeline/coordinated_linr/test_linr.py b/examples/pipeline/coordinated_linr/test_linr.py
new file mode 100644
index 0000000000..dbc47daeae
--- /dev/null
+++ b/examples/pipeline/coordinated_linr/test_linr.py
@@ -0,0 +1,87 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLinR, PSI, Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="motor_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="motor_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+    linr_0 = CoordinatedLinR("linr_0",
+                             epochs=10,
+                             batch_size=100,
+                             optimizer={"method": "rmsprop", "optimizer_params": {"lr": 0.01}},
+                             init_param={"fit_intercept": True},
+                             train_data=psi_0.outputs["output_data"])
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="motor_speed",
+                              runtime_roles=["guest"],
+                              default_eval_setting="regression",
+                              input_data=linr_0.outputs["train_output_data"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(linr_0)
+    pipeline.add_task(evaluation_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    pipeline.deploy([psi_0, linr_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    deployed_pipeline.psi_0.guest.component_setting(
+        input_data=DataWarehouseChannel(name="motor_hetero_guest",
+                                        namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(
+        input_data=DataWarehouseChannel(name="motor_hetero_host",
+                                        namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/coordinated_linr/test_linr_cv.py b/examples/pipeline/coordinated_linr/test_linr_cv.py
new file mode 100644
index 0000000000..ed33e0556a
--- /dev/null
+++ b/examples/pipeline/coordinated_linr/test_linr_cv.py
@@ -0,0 +1,64 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLinR, PSI
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="motor_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="motor_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+    linr_0 = CoordinatedLinR("linr_0",
+                             epochs=10,
+                             batch_size=None,
+                             optimizer={"method": "sgd", "optimizer_params": {"lr": 0.01}},
+                             init_param={"fit_intercept": True},
+                             cv_data=psi_0.outputs["output_data"],
+                             cv_param={"n_splits": 3})
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(linr_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/coordinated_linr/test_linr_multi_host.py b/examples/pipeline/coordinated_linr/test_linr_multi_host.py
new file mode 100644
index 0000000000..cbf374b4e5
--- /dev/null
+++ b/examples/pipeline/coordinated_linr/test_linr_multi_host.py
@@ -0,0 +1,93 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host
+    arbiter = parties.arbiter[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="motor_hetero_guest",
+                                                                  namespace=f"{namespace}experiment"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="motor_hetero_host",
+                                                                     namespace=f"{namespace}experiment"))
+    psi_0.hosts[1].component_setting(input_data=DataWarehouseChannel(name="motor_hetero_host",
+                                                                     namespace=f"{namespace}experiment"))
+    lr_0 = CoordinatedLR("lr_0",
+                         epochs=5,
+                         batch_size=None,
+                         early_stop="weight_diff",
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.1}},
+                         init_param={"fit_intercept": True, "method": "random_uniform"},
+                         train_data=psi_0.outputs["output_data"],
+                         learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
+                                                                                             "total_iters": 100}})
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="motor_speed",
+                              runtime_roles=["guest"],
+                              default_eval_setting="regression",
+                              input_data=lr_0.outputs["train_output_data"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(evaluation_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    pipeline.deploy([psi_0, lr_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    deployed_pipeline.psi_0.guest.component_setting(
+        input_data=DataWarehouseChannel(name="motor_hetero_guest",
+                                        namespace=f"{namespace}experiment"))
+    deployed_pipeline.psi_0.hosts[[0, 1]].component_setting(
+        input_data=DataWarehouseChannel(name="motor_hetero_host",
+                                        namespace=f"{namespace}experiment"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+    # print(f"predict lr_0 data: {pipeline.get_task_info('lr_0').get_output_data()}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/coordinated_linr/test_linr_warm_start.py b/examples/pipeline/coordinated_linr/test_linr_warm_start.py
new file mode 100644
index 0000000000..30f887254c
--- /dev/null
+++ b/examples/pipeline/coordinated_linr/test_linr_warm_start.py
@@ -0,0 +1,95 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLinR, PSI
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="motor_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="motor_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+    linr_0 = CoordinatedLinR("linr_0",
+                             epochs=4,
+                             batch_size=None,
+                             optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
+                             init_param={"fit_intercept": True, "method": "zeros"},
+                             train_data=psi_0.outputs["output_data"],
+                             learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
+                                                                                                 "total_iters": 100}})
+    linr_1 = CoordinatedLinR("linr_1", train_data=psi_0.outputs["output_data"],
+                             warm_start_model=linr_0.outputs["output_model"],
+                             epochs=2,
+                             batch_size=None,
+                             optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
+                             )
+
+    linr_2 = CoordinatedLinR("linr_2", epochs=6,
+                             batch_size=None,
+                             optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
+                             init_param={"fit_intercept": True, "method": "zeros"},
+                             train_data=psi_0.outputs["output_data"],
+                             learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
+                                                                                                 "total_iters": 100}})
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="motor_speed",
+                              runtime_roles=["guest"],
+                              default_eval_setting="regression",
+                              input_data=[linr_1.outputs["train_output_data"], linr_2.outputs["train_output_data"]])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(linr_0)
+    pipeline.add_task(linr_1)
+    pipeline.add_task(linr_2)
+    pipeline.add_task(evaluation_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+    print(f"linr_1 model: {pipeline.get_task_info('linr_1').get_output_model()}")
+    # print(f"train linr_1 data: {pipeline.get_task_info('linr_1').get_output_data()}")
+
+    print(f"linr_2 model: {pipeline.get_task_info('linr_2').get_output_model()}")
+    # print(f"train linr_2 data: {pipeline.get_task_info('linr_2').get_output_data()}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml b/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml
index 029d8c6dfc..70de986820 100644
--- a/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml
+++ b/examples/pipeline/coordinated_lr/coordinated_lr_testsuite.yaml
@@ -33,10 +33,66 @@ data:
     table_name: breast_hetero_host
     namespace: experiment
     role: host_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_1
+  - file: "../../data/vehicle_scale_hetero_guest.csv"
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: "id"
+      match_id_range: 0
+      label_type: int64
+      label_name: y
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    head: true
+    partition: 4
+    extend_sid: false
+    table_name: vehicle_scale_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: "../../data/vehicle_scale_hetero_host.csv"
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: "id"
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    head: true
+    partition: 4
+    extend_sid: false
+    table_name: vehicle_scale_hetero_host
+    namespace: experiment
+    role: host_0
 tasks:
   normal-lr:
-    script: test_lr_sid.py
+    script: test_lr.py
   lr-cv:
-    script: test_lr_sid_cv.py
+    script: test_lr_cv.py
+  lr-validate:
+    script: test_lr_validate.py
   lr-warm-start:
-    script: test_lr_sid_warm_start.py
+    script: test_lr_warm_start.py
+  lr-multi-class:
+    script: test_lr_multi_class.py
+  lr-multi-host:
+    script: test_lr_multi_host.py
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid.py b/examples/pipeline/coordinated_lr/test_lr.py
similarity index 94%
rename from examples/pipeline/coordinated_lr/test_lr_sid.py
rename to examples/pipeline/coordinated_lr/test_lr.py
index b13c24f8db..88b73ad325 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid.py
+++ b/examples/pipeline/coordinated_lr/test_lr.py
@@ -22,7 +22,7 @@
 from fate_client.pipeline.utils import test_utils
 
 
-def main(config="./config.yaml", namespace=""):
+def main(config="../config.yaml", namespace=""):
     if isinstance(config, str):
         config = test_utils.load_job_config(config)
     parties = config.parties
@@ -43,7 +43,7 @@ def main(config="./config.yaml", namespace=""):
                                                                      namespace=f"experiment{namespace}"))
     lr_0 = CoordinatedLR("lr_0",
                          epochs=10,
-                         batch_size=None,
+                         batch_size=300,
                          optimizer={"method": "SGD", "optimizer_params": {"lr": 0.21}},
                          init_param={"fit_intercept": True, "method": "random_uniform"},
                          train_data=psi_0.outputs["output_data"],
@@ -86,9 +86,9 @@ def main(config="./config.yaml", namespace=""):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("PIPELINE DEMO")
-    parser.add_argument("-config", type=str, default="./config.yaml",
+    parser.add_argument("--config", type=str, default="../config.yaml",
                         help="config file")
-    parser.add_argument("-namespace", type=str, default="",
+    parser.add_argument("--namespace", type=str, default="",
                         help="namespace for data stored in FATE")
     args = parser.parse_args()
     main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid_cv.py b/examples/pipeline/coordinated_lr/test_lr_cv.py
similarity index 91%
rename from examples/pipeline/coordinated_lr/test_lr_sid_cv.py
rename to examples/pipeline/coordinated_lr/test_lr_cv.py
index 8caffd245b..b981f005e1 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid_cv.py
+++ b/examples/pipeline/coordinated_lr/test_lr_cv.py
@@ -21,7 +21,7 @@
 from fate_client.pipeline.utils import test_utils
 
 
-def main(config="./config.yaml", namespace=""):
+def main(config="../config.yaml", namespace=""):
     if isinstance(config, str):
         config = test_utils.load_job_config(config)
     parties = config.parties
@@ -41,7 +41,7 @@ def main(config="./config.yaml", namespace=""):
                                                                      namespace=f"experiment{namespace}"))
     lr_0 = CoordinatedLR("lr_0",
                          epochs=2,
-                         batch_size=100,
+                         batch_size=None,
                          optimizer={"method": "sgd", "optimizer_params": {"lr": 0.01}},
                          init_param={"fit_intercept": True},
                          cv_data=psi_0.outputs["output_data"],
@@ -56,9 +56,9 @@ def main(config="./config.yaml", namespace=""):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("PIPELINE DEMO")
-    parser.add_argument("-config", type=str, default="./config.yaml",
+    parser.add_argument("--config", type=str, default="../config.yaml",
                         help="config file")
-    parser.add_argument("-namespace", type=str, default="",
+    parser.add_argument("--namespace", type=str, default="",
                         help="namespace for data stored in FATE")
     args = parser.parse_args()
     main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/coordinated_lr/test_lr_multi_class.py b/examples/pipeline/coordinated_lr/test_lr_multi_class.py
new file mode 100644
index 0000000000..9ce85fe3d9
--- /dev/null
+++ b/examples/pipeline/coordinated_lr/test_lr_multi_class.py
@@ -0,0 +1,94 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="vehicle_scale_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="vehicle_scale_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+    lr_0 = CoordinatedLR("lr_0",
+                         epochs=10,
+                         batch_size=None,
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.21}},
+                         init_param={"fit_intercept": True, "method": "random_uniform"},
+                         train_data=psi_0.outputs["output_data"],
+                         learning_rate_scheduler={"method": "linear", "scheduler_params": {"start_factor": 0.7,
+                                                                                           "total_iters": 100}})
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="y",
+                              runtime_roles=["guest"],
+                              default_eval_setting="multi",
+                              input_data=lr_0.outputs["train_output_data"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(evaluation_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    pipeline.deploy([psi_0, lr_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    deployed_pipeline.psi_0.guest.component_setting(
+        input_data=DataWarehouseChannel(name="vehicle_scale_hetero_guest",
+                                        namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(
+        input_data=DataWarehouseChannel(name="vehicle_scale_hetero_host",
+                                        namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+    # print(f"predict lr_0 data: {pipeline.get_task_info('lr_0').get_output_data()}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/test_single_lr_multi_host.py b/examples/pipeline/coordinated_lr/test_lr_multi_host.py
similarity index 74%
rename from examples/pipeline/test_single_lr_multi_host.py
rename to examples/pipeline/coordinated_lr/test_lr_multi_host.py
index cd332ad64e..a94ff8afcf 100644
--- a/examples/pipeline/test_single_lr_multi_host.py
+++ b/examples/pipeline/coordinated_lr/test_lr_multi_host.py
@@ -16,13 +16,13 @@
 import argparse
 
 from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, Intersection
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI
 from fate_client.pipeline.components.fate import Evaluation
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
 
 
-def main(config="./config.yaml", namespace=""):
+def main(config="../config.yaml", namespace=""):
     if isinstance(config, str):
         config = test_utils.load_job_config(config)
     parties = config.parties
@@ -32,20 +32,20 @@ def main(config="./config.yaml", namespace=""):
 
     pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
 
-    intersect_0 = Intersection("intersect_0", method="raw")
-    intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                        namespace=f"{namespace}experiment_sid"))
-    intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                           namespace=f"{namespace}experiment_sid"))
-    intersect_0.hosts[1].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                           namespace=f"{namespace}experiment_sid"))
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"{namespace}experiment"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"{namespace}experiment"))
+    psi_0.hosts[1].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"{namespace}experiment"))
     lr_0 = CoordinatedLR("lr_0",
-                         epochs=4,
+                         epochs=5,
                          batch_size=None,
                          early_stop="weight_diff",
-                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.01}},
-                         init_param={"fit_intercept": True, "method": "zeros"},
-                         train_data=intersect_0.outputs["output_data"],
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.1}},
+                         init_param={"fit_intercept": True, "method": "random_uniform"},
+                         train_data=psi_0.outputs["output_data"],
                          learning_rate_scheduler={"method": "constant", "scheduler_params": {"factor": 1.0,
                                                                                              "total_iters": 100}})
 
@@ -55,7 +55,7 @@ def main(config="./config.yaml", namespace=""):
                               default_eval_setting="binary",
                               input_data=lr_0.outputs["train_output_data"])
 
-    pipeline.add_task(intersect_0)
+    pipeline.add_task(psi_0)
     pipeline.add_task(lr_0)
     pipeline.add_task(evaluation_0)
 
@@ -63,17 +63,17 @@ def main(config="./config.yaml", namespace=""):
     print(pipeline.get_dag())
     pipeline.fit()
 
-    pipeline.deploy([intersect_0, lr_0])
+    pipeline.deploy([psi_0, lr_0])
 
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    deployed_pipeline.intersect_0.guest.component_setting(
+    deployed_pipeline.psi_0.guest.component_setting(
         input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                        namespace=f"{namespace}experiment_sid"))
-    deployed_pipeline.intersect_0.hosts[[0, 1]].component_setting(
-        input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
-                                        namespace=f"{namespace}experiment_sid"))
+                                        namespace=f"{namespace}experiment"))
+    deployed_pipeline.psi_0.hosts[[0, 1]].component_setting(
+        input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                        namespace=f"{namespace}experiment"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
@@ -85,9 +85,9 @@ def main(config="./config.yaml", namespace=""):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("PIPELINE DEMO")
-    parser.add_argument("-config", type=str, default="./config.yaml",
+    parser.add_argument("--config", type=str, default="../config.yaml",
                         help="config file")
-    parser.add_argument("-namespace", type=str, default="",
+    parser.add_argument("--namespace", type=str, default="",
                         help="namespace for data stored in FATE")
     args = parser.parse_args()
     main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/coordinated_lr/test_lr_validate.py b/examples/pipeline/coordinated_lr/test_lr_validate.py
new file mode 100644
index 0000000000..19c44e3903
--- /dev/null
+++ b/examples/pipeline/coordinated_lr/test_lr_validate.py
@@ -0,0 +1,80 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI, DataSplit
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+    data_split_0 = DataSplit("data_split_0",
+                             train_size=0.8,
+                             validate_size=0.2,
+                             input_data=psi_0.outputs["output_data"])
+    lr_0 = CoordinatedLR("lr_0",
+                         epochs=10,
+                         batch_size=300,
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.21}},
+                         init_param={"fit_intercept": True, "method": "random_uniform"},
+                         train_data=data_split_0.outputs["train_output_data"],
+                         validate_data=data_split_0.outputs["validate_output_data"],
+                         learning_rate_scheduler={"method": "linear", "scheduler_params": {"start_factor": 0.7,
+                                                                                           "total_iters": 100}})
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="y",
+                              runtime_roles=["guest"],
+                              default_eval_setting="binary",
+                              input_data=lr_0.outputs["train_output_data"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(evaluation_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py b/examples/pipeline/coordinated_lr/test_lr_warm_start.py
similarity index 95%
rename from examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
rename to examples/pipeline/coordinated_lr/test_lr_warm_start.py
index 25ba007959..5e554e837f 100644
--- a/examples/pipeline/coordinated_lr/test_lr_sid_warm_start.py
+++ b/examples/pipeline/coordinated_lr/test_lr_warm_start.py
@@ -22,7 +22,7 @@
 from fate_client.pipeline.utils import test_utils
 
 
-def main(config="./config.yaml", namespace=""):
+def main(config="../config.yaml", namespace=""):
     if isinstance(config, str):
         config = test_utils.load_job_config(config)
     parties = config.parties
@@ -73,6 +73,7 @@ def main(config="./config.yaml", namespace=""):
     pipeline.add_task(lr_0)
     pipeline.add_task(lr_1)
     pipeline.add_task(lr_2)
+    pipeline.add_task(evaluation_0)
 
     pipeline.compile()
     print(pipeline.get_dag())
@@ -86,9 +87,9 @@ def main(config="./config.yaml", namespace=""):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser("PIPELINE DEMO")
-    parser.add_argument("-config", type=str, default="./config.yaml",
+    parser.add_argument("--config", type=str, default="../config.yaml",
                         help="config file")
-    parser.add_argument("-namespace", type=str, default="",
+    parser.add_argument("--namespace", type=str, default="",
                         help="namespace for data stored in FATE")
     args = parser.parse_args()
     main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/data_split/data_split_lr_testsuite.yaml b/examples/pipeline/data_split/data_split_lr_testsuite.yaml
new file mode 100644
index 0000000000..468a41f510
--- /dev/null
+++ b/examples/pipeline/data_split/data_split_lr_testsuite.yaml
@@ -0,0 +1,40 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_0
+tasks:
+  data-split:
+    script: test_data_split.py
+  data-split-stratified:
+    script: test_data_split_stratified.py
diff --git a/examples/pipeline/data_split/test_data_split.py b/examples/pipeline/data_split/test_data_split.py
new file mode 100644
index 0000000000..484d26fa93
--- /dev/null
+++ b/examples/pipeline/data_split/test_data_split.py
@@ -0,0 +1,91 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import DataSplit, PSI
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    psi_1 = PSI("psi_1")
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    data_split_0 = DataSplit("data_split_0",
+                             train_size=0.6,
+                             validate_size=0.1,
+                             test_size=None,
+                             input_data=psi_0.outputs["output_data"])
+
+    data_split_1 = DataSplit("data_split_1",
+                             train_size=200,
+                             test_size=50,
+                             input_data=psi_0.outputs["output_data"]
+                             )
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(psi_1)
+    pipeline.add_task(data_split_0)
+    pipeline.add_task(data_split_1)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    # print(pipeline.get_task_info("data_split_0").get_output_data())
+    """output_data = pipeline.get_task_info("data_split_0").get_output_data()
+    import pandas as pd
+    
+    print(f"data split 0 train size: {pd.DataFrame(output_data['train_output_data']).shape};"
+          f"validate size: {pd.DataFrame(output_data['validate_output_data']).shape}"
+          f"test size: {pd.DataFrame(output_data['test_output_data']).shape}")
+    output_data = pipeline.get_task_info("data_split_1").get_output_data()
+    print(f"data split 1train size: {pd.DataFrame(output_data['train_output_data']).shape};"
+          f"validate size: {pd.DataFrame(output_data['validate_output_data']).shape}"
+          f"test size: {pd.DataFrame(output_data['test_output_data']).shape}")"""
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/data_split/test_data_split_stratified.py b/examples/pipeline/data_split/test_data_split_stratified.py
new file mode 100644
index 0000000000..647d42ad63
--- /dev/null
+++ b/examples/pipeline/data_split/test_data_split_stratified.py
@@ -0,0 +1,94 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import DataSplit, PSI
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    psi_1 = PSI("psi_1")
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    Linear: 0.7
+    data_split_0 = DataSplit("data_split_0",
+                             train_size=0.6,
+                             validate_size=0.0,
+                             test_size=0.4,
+                             stratified=True,
+                             input_data=psi_0.outputs["output_data"])
+
+    data_split_1 = DataSplit("data_split_1",
+                             train_size=200,
+                             test_size=50,
+                             stratified=True,
+                             input_data=psi_0.outputs["output_data"]
+                             )
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(psi_1)
+    pipeline.add_task(data_split_0)
+    pipeline.add_task(data_split_1)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    # print(pipeline.get_task_info("data_split_0").get_output_data())
+    """output_data = pipeline.get_task_info("data_split_0").get_output_data()
+    import pandas as pd
+
+    print(f"data split 0 train size: {pd.DataFrame(output_data['train_output_data']).shape};"
+          f"validate size: {pd.DataFrame(output_data['validate_output_data']).shape}"
+          f"test size: {pd.DataFrame(output_data['test_output_data']).shape}")
+    output_data = pipeline.get_task_info("data_split_1").get_output_data()
+    print(f"data split 1train size: {pd.DataFrame(output_data['train_output_data']).shape};"
+          f"validate size: {pd.DataFrame(output_data['validate_output_data']).shape}"
+          f"test size: {pd.DataFrame(output_data['test_output_data']).shape}")"""
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/hetero_feature_binning/binning_testsuite.yaml b/examples/pipeline/hetero_feature_binning/binning_testsuite.yaml
new file mode 100644
index 0000000000..2e9d95043e
--- /dev/null
+++ b/examples/pipeline/hetero_feature_binning/binning_testsuite.yaml
@@ -0,0 +1,42 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_0
+tasks:
+  binning-bucket:
+    script: test_feature_binning_bucket.py
+  binning-quantile:
+    script: test_feature_binning_quantile.py
+  binning-asymmetric:
+    script: test_feature_binning_asymmetric.py
diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py
new file mode 100644
index 0000000000..9b353527de
--- /dev/null
+++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py
@@ -0,0 +1,92 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import PSI, HeteroFeatureBinning
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    binning_0 = HeteroFeatureBinning("binning_0",
+                                     method="quantile",
+                                     n_bins=10,
+                                     train_data=psi_0.outputs["output_data"],
+                                     local_only=True
+                                     )
+    binning_0.guest.component_setting(bin_col=["x0"], transform_method="bin_idx")
+
+    binning_1 = HeteroFeatureBinning("binning_1",
+                                     transform_method="bin_idx",
+                                     method="quantile",
+                                     train_data=binning_0.outputs["train_output_data"])
+    binning_1.guest.component_setting(category_col=["x0"], transform_method="woe")
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(binning_0)
+    pipeline.add_task(binning_1)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    print(pipeline.get_task_info("binning_1").get_output_model())
+    # print(pipeline.get_task_info("feature_scale_1").get_output_model())
+
+    pipeline.deploy([psi_0, binning_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py
new file mode 100644
index 0000000000..f40c443070
--- /dev/null
+++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py
@@ -0,0 +1,96 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import PSI, HeteroFeatureBinning
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    psi_1 = PSI("psi_1")
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    binning_0 = HeteroFeatureBinning("binning_0",
+                                     method="bucket",
+                                     n_bins=10,
+                                     transform_method="bin_idx",
+                                     train_data=psi_0.outputs["output_data"]
+                                     )
+    binning_1 = HeteroFeatureBinning("binning_1",
+                                     transform_method="bin_idx",
+                                     input_model=binning_0.outputs["output_model"],
+                                     test_data=psi_1.outputs["output_data"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(psi_1)
+    pipeline.add_task(binning_0)
+    pipeline.add_task(binning_1)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    print(pipeline.get_task_info("binning_0").get_output_model())
+    # print(pipeline.get_task_info("feature_scale_1").get_output_model())
+
+    pipeline.deploy([psi_0, binning_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py
new file mode 100644
index 0000000000..8a0b9819a8
--- /dev/null
+++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py
@@ -0,0 +1,91 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import PSI, HeteroFeatureBinning
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    binning_0 = HeteroFeatureBinning("binning_0",
+                                     method="quantile",
+                                     n_bins=10,
+                                     bin_col=["x0"],
+                                     transform_method="bin_idx",
+                                     train_data=psi_0.outputs["output_data"]
+                                     )
+    binning_1 = HeteroFeatureBinning("binning_1",
+                                     transform_method="bin_idx",
+                                     method="quantile",
+                                     category_col=["x0"],
+                                     train_data=binning_0.outputs["train_output_data"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(binning_0)
+    pipeline.add_task(binning_1)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    print(pipeline.get_task_info("binning_1").get_output_model())
+    # print(pipeline.get_task_info("feature_scale_1").get_output_model())
+
+    pipeline.deploy([psi_0, binning_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/hetero_feature_selection/selection_testsuite.yaml b/examples/pipeline/hetero_feature_selection/selection_testsuite.yaml
new file mode 100644
index 0000000000..050dc39a14
--- /dev/null
+++ b/examples/pipeline/hetero_feature_selection/selection_testsuite.yaml
@@ -0,0 +1,44 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_0
+tasks:
+  selection-binning:
+    script: test_feature_selection_binning.py
+  selection-manual:
+    script: test_feature_selection_manual.py
+  binning-statistics:
+    script: test_feature_selection_statistics.py
+  binning-multi-model:
+    script: test_feature_selection_multi_model.py
\ No newline at end of file
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py
new file mode 100644
index 0000000000..95b06406a4
--- /dev/null
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py
@@ -0,0 +1,88 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import PSI, HeteroFeatureSelection, HeteroFeatureBinning
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config=".../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    binning_0 = HeteroFeatureBinning("binning_0",
+                                     method="quantile",
+                                     n_bins=10,
+                                     bin_col=["x0"],
+                                     transform_method="bin_idx",
+                                     train_data=psi_0.outputs["output_data"]
+                                     )
+    selection_0 = HeteroFeatureSelection("selection_0",
+                                         method=["iv"],
+                                         train_data=psi_0.outputs["output_data"],
+                                         input_models=[binning_0.outputs["output_model"]],
+                                         iv_param={"metrics": "iv", "filter_type": "threshold", "threshold": 0.1})
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(binning_0)
+    pipeline.add_task(selection_0)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    # print(pipeline.get_task_info("feature_scale_1").get_output_model())
+
+    pipeline.deploy([psi_0, selection_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py
new file mode 100644
index 0000000000..ab4a7729de
--- /dev/null
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py
@@ -0,0 +1,80 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import PSI, HeteroFeatureSelection
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config=".../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    selection_0 = HeteroFeatureSelection("selection_0",
+                                         method=["statistics"],
+                                         train_data=psi_0.outputs["output_data"])
+    selection_0.guest.component_setting(manual_param={"keep_col": ["x0", "x1"]})
+    selection_0.hosts[0].component_setting(manual_param={"filter_out_col": ["x0", "x1"]})
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(selection_0)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    # print(pipeline.get_task_info("feature_scale_1").get_output_model())
+
+    pipeline.deploy([psi_0, selection_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py
new file mode 100644
index 0000000000..551c1d81e7
--- /dev/null
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py
@@ -0,0 +1,94 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import PSI, HeteroFeatureSelection, HeteroFeatureBinning, Statistics
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config=".../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    binning_0 = HeteroFeatureBinning("binning_0",
+                                     method="quantile",
+                                     n_bins=10,
+                                     bin_col=["x0"],
+                                     transform_method="bin_idx",
+                                     train_data=psi_0.outputs["output_data"]
+                                     )
+    statistics_0 = Statistics("statistics_0", input_data=psi_0.outputs["output_data"])
+    selection_0 = HeteroFeatureSelection("selection_0",
+                                         method=["iv", "statistics", "manual"],
+                                         train_data=psi_0.outputs["output_data"],
+                                         input_models=[binning_0.outputs["output_model"],
+                                                       statistics_0.outputs["output_model"]],
+                                         iv_param={"metrics": "iv", "filter_type": "threshold", "threshold": 0.1},
+                                         statistic_param={"metrics": ["max", "mean"],
+                                                          "filter_type": "top_k", "threshold": 5},
+                                         manual_param={"keep_col": ["x0", "x1"]}
+                                         )
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(binning_0)
+    pipeline.add_task(selection_0)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    # print(pipeline.get_task_info("feature_scale_1").get_output_model())
+
+    pipeline.deploy([psi_0, selection_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py
new file mode 100644
index 0000000000..c614a89e93
--- /dev/null
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py
@@ -0,0 +1,83 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import PSI, HeteroFeatureSelection, Statistics
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config=".../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    statistics_0 = Statistics("statistics_0", input_data=psi_0.outputs["output_data"])
+    selection_0 = HeteroFeatureSelection("selection_0",
+                                         method=["statistics"],
+                                         train_data=psi_0.outputs["output_data"],
+                                         input_models=[statistics_0.outputs["output_model"]],
+                                         statistic_param={"metrics": ["max", "mean"],
+                                                          "filter_type": "top_k", "threshold": 5})
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(statistics_0)
+    pipeline.add_task(selection_0)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    # print(pipeline.get_task_info("feature_scale_1").get_output_model())
+
+    pipeline.deploy([psi_0, selection_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/multi_model/test_multi.py b/examples/pipeline/multi_model/test_multi.py
new file mode 100644
index 0000000000..3ea0424ef7
--- /dev/null
+++ b/examples/pipeline/multi_model/test_multi.py
@@ -0,0 +1,129 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import PSI, HeteroFeatureSelection, HeteroFeatureBinning, \
+    FeatureScale, Union, DataSplit, CoordinatedLR, CoordinatedLinR, Statistics, Sample, Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    data_split_0 = DataSplit("data_split_0", input_data=psi_0.outputs["output_data"],
+                             train_size=0.8, test_size=0.2, random_state=42)
+    union_0 = Union("union_0", input_data_list=[data_split_0.outputs["train_output_data"],
+                                                data_split_0.outputs["test_output_data"]])
+    sample_0 = Sample("sample_0", input_data=data_split_0.outputs["train_output_data"],
+                      n=800, replace=True, hetero_sync=True)
+
+    binning_0 = HeteroFeatureBinning("binning_0",
+                                     method="quantile",
+                                     n_bins=10,
+                                     train_data=sample_0.outputs["output_data"]
+                                     )
+    statistics_0 = Statistics("statistics_0",
+                              input_data=psi_0.outputs["output_data"])
+    selection_0 = HeteroFeatureSelection("selection_0",
+                                         method=["iv", "statistics"],
+                                         train_data=sample_0.outputs["output_data"],
+                                         input_models=[binning_0.outputs["output_model"],
+                                                       statistics_0.outputs["output_model"]],
+                                         iv_param={"metrics": "iv", "filter_type": "threshold", "value": 0.1},
+                                         statistic_param={"metrics": ["max", "min"], "filter_type": "top_k",
+                                                          "threshold": 5})
+
+    selection_1 = HeteroFeatureSelection("selection_1",
+                                         input_model=selection_0.outputs["train_output_model"],
+                                         test_data=data_split_0.outputs["test_output_data"])
+
+    scale_0 = FeatureScale("scale_0", method="min_max",
+                           train_data=selection_0.outputs["train_output_data"], )
+
+    lr_0 = CoordinatedLR("lr_0", train_data=selection_0.outputs["train_output_data"],
+                         validate_data=selection_1.outputs["test_output_data"], epochs=3)
+    linr_0 = CoordinatedLR("linr_0", train_data=selection_0.outputs["train_output_data"],
+                           validate_data=selection_1.outputs["test_output_data"], epochs=3)
+
+    evaluation_0 = Evaluation("evaluation_0", input_data=lr_0.outputs["train_output_data"],
+                              label_column_name="y",
+                              runtime_roles=["guest"])
+    evaluation_1 = Evaluation("evaluation_1", input_data=linr_0.outputs["train_output_data"],
+                              default_eval_setting="regression",
+                              label_column_name="y",
+                              runtime_roles=["guest"])
+    pipeline.add_task(psi_0)
+    pipeline.add_task(data_split_0)
+    pipeline.add_task(union_0)
+    pipeline.add_task(sample_0)
+    pipeline.add_task(binning_0)
+    pipeline.add_task(statistics_0)
+    pipeline.add_task(selection_0)
+    pipeline.add_task(scale_0)
+    pipeline.add_task(selection_1)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(linr_0)
+    pipeline.add_task(evaluation_0)
+    pipeline.add_task(evaluation_1)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    # print(pipeline.get_task_info("feature_scale_1").get_output_model())
+
+    pipeline.deploy([psi_0, selection_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/sample/sample_testsuite.yaml b/examples/pipeline/sample/sample_testsuite.yaml
new file mode 100644
index 0000000000..3df4d44118
--- /dev/null
+++ b/examples/pipeline/sample/sample_testsuite.yaml
@@ -0,0 +1,40 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_0
+tasks:
+  sample:
+    script: test_sample.py
+  sample-unilateral:
+    script: test_sample_unilateral.py
diff --git a/examples/pipeline/sample/test_sample.py b/examples/pipeline/sample/test_sample.py
new file mode 100644
index 0000000000..86fbf04a97
--- /dev/null
+++ b/examples/pipeline/sample/test_sample.py
@@ -0,0 +1,79 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import Sample, PSI
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    psi_1 = PSI("psi_1")
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    sample_0 = Sample("sample_0",
+                      frac={0: 0.5},
+                      replace=False,
+                      hetero_sync=True,
+                      input_data=psi_0.outputs["output_data"])
+
+    sample_1 = Sample("sample_1",
+                      n=100,
+                      replace=False,
+                      hetero_sync=True,
+                      input_data=psi_0.outputs["output_data"]
+                      )
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(psi_1)
+    pipeline.add_task(sample_0)
+    pipeline.add_task(sample_1)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/sample/test_sample_unilateral.py b/examples/pipeline/sample/test_sample_unilateral.py
new file mode 100644
index 0000000000..8bdc9b3bef
--- /dev/null
+++ b/examples/pipeline/sample/test_sample_unilateral.py
@@ -0,0 +1,80 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import Sample, PSI
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    psi_1 = PSI("psi_1")
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    sample_0 = Sample("sample_0",
+                      runtime_roles=["guest"],
+                      frac={0: 0.5},
+                      replace=False,
+                      hetero_sync=False,
+                      input_data=psi_0.outputs["output_data"])
+
+    sample_1 = Sample("sample_1",
+                      runtime_roles=["host"],
+                      n=1000,
+                      replace=True,
+                      hetero_sync=False,
+                      input_data=psi_0.outputs["output_data"]
+                      )
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(psi_1)
+    pipeline.add_task(sample_0)
+    pipeline.add_task(sample_1)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/scale/scale_testsuite.yaml b/examples/pipeline/scale/scale_testsuite.yaml
new file mode 100644
index 0000000000..dfb9771821
--- /dev/null
+++ b/examples/pipeline/scale/scale_testsuite.yaml
@@ -0,0 +1,42 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_0
+tasks:
+  scale-min-max:
+    script: test_scale_min_max.py
+  scale-standard:
+    script: test_scale_standard.py
+  scale-with-lr:
+    script: test_scale_w_lr.py
\ No newline at end of file
diff --git a/examples/pipeline/scale/test_scale_min_max.py b/examples/pipeline/scale/test_scale_min_max.py
new file mode 100644
index 0000000000..2ceb11bc70
--- /dev/null
+++ b/examples/pipeline/scale/test_scale_min_max.py
@@ -0,0 +1,99 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import PSI, FeatureScale, Statistics
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    psi_1 = PSI("psi_1")
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    feature_scale_0 = FeatureScale("feature_scale_0",
+                                   method="min_max",
+                                   feature_range={"x0": [-1, 1]},
+                                   scale_col=["x0", "x1", "x3"],
+                                   train_data=psi_0.outputs["output_data"])
+
+    feature_scale_1 = FeatureScale("feature_scale_1",
+                                   test_data=psi_1.outputs["output_data"],
+                                   input_model=feature_scale_0.outputs["output_model"])
+
+    statistics_0 = Statistics("statistics_0",
+                              metrics=["max", "min", "mean", "std"],
+                              input_data=feature_scale_1.outputs["train_output_data"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(psi_1)
+    pipeline.add_task(feature_scale_0)
+    pipeline.add_task(feature_scale_1)
+    pipeline.add_task(statistics_0)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    print(pipeline.get_task_info("statistics_0").get_output_model())
+
+    pipeline.deploy([psi_0, feature_scale_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/scale/test_scale_standard.py b/examples/pipeline/scale/test_scale_standard.py
new file mode 100644
index 0000000000..8bc7625334
--- /dev/null
+++ b/examples/pipeline/scale/test_scale_standard.py
@@ -0,0 +1,94 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI, FeatureScale, Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    psi_1 = PSI("psi_1")
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    feature_scale_0 = FeatureScale("feature_scale_0",
+                                   method="standard",
+                                   train_data=psi_0.outputs["output_data"])
+
+    feature_scale_1 = FeatureScale("feature_scale_1",
+                                   test_data=psi_1.outputs["output_data"],
+                                   input_model=feature_scale_0.outputs["output_model"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(psi_1)
+    pipeline.add_task(feature_scale_0)
+    pipeline.add_task(feature_scale_1)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    print(pipeline.get_task_info("feature_scale_0").get_output_model())
+    # print(pipeline.get_task_info("feature_scale_1").get_output_model())
+
+    pipeline.deploy([psi_0, feature_scale_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/scale/test_scale_w_lr.py b/examples/pipeline/scale/test_scale_w_lr.py
new file mode 100644
index 0000000000..03390a95d4
--- /dev/null
+++ b/examples/pipeline/scale/test_scale_w_lr.py
@@ -0,0 +1,103 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI, FeatureScale, Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    psi_1 = PSI("psi_1")
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    feature_scale_0 = FeatureScale("feature_scale_0",
+                                   method="standard",
+                                   train_data=psi_0.outputs["output_data"])
+
+    lr_0 = CoordinatedLR("lr_0",
+                         epochs=10,
+                         batch_size=None,
+                         optimizer={"method": "SGD", "optimizer_params": {"lr": 0.21}},
+                         init_param={"fit_intercept": True, "method": "random_uniform"},
+                         train_data=feature_scale_0.outputs["train_output_data"],
+                         learning_rate_scheduler={"method": "linear", "scheduler_params": {"start_factor": 0.7,
+                                                                                           "total_iters": 100}})
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="y",
+                              runtime_roles=["guest"],
+                              default_eval_setting="binary",
+                              input_data=lr_0.outputs["train_output_data"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(psi_1)
+    pipeline.add_task(feature_scale_0)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(evaluation_0)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    pipeline.deploy([psi_0, feature_scale_0, lr_0])
+
+    predict_pipeline = FateFlowPipeline()
+
+    deployed_pipeline = pipeline.get_deployed_pipeline()
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    predict_pipeline.add_task(deployed_pipeline)
+    predict_pipeline.compile()
+    # print("\n\n\n")
+    # print(predict_pipeline.compile().get_dag())
+    predict_pipeline.predict()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/statistics/statistics_testsuite.yaml b/examples/pipeline/statistics/statistics_testsuite.yaml
new file mode 100644
index 0000000000..c3d80416fe
--- /dev/null
+++ b/examples/pipeline/statistics/statistics_testsuite.yaml
@@ -0,0 +1,38 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_0
+tasks:
+  statistics:
+    script: test_statistics.py
diff --git a/examples/pipeline/statistics/test_statistics.py b/examples/pipeline/statistics/test_statistics.py
new file mode 100644
index 0000000000..9a17395f2e
--- /dev/null
+++ b/examples/pipeline/statistics/test_statistics.py
@@ -0,0 +1,61 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import PSI, Statistics
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config=".../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                     namespace=f"experiment{namespace}"))
+
+    statistics_0 = Statistics("statistics_0", input_data=psi_0.outputs["output_data"],
+                              metrics=["mean", "std", "min", "max"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(statistics_0)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/test_data_split.py b/examples/pipeline/test_data_split.py
deleted file mode 100644
index a84dd4a3a5..0000000000
--- a/examples/pipeline/test_data_split.py
+++ /dev/null
@@ -1,68 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import DataSplit
-from fate_client.pipeline.components.fate import Intersection
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-intersection_0 = Intersection("intersection_0",
-                              method="raw")
-intersection_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
-                                                                       namespace="experiment"))
-intersection_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
-                                                                          namespace="experiment"))
-
-intersection_1 = Intersection("intersection_1",
-                              method="raw")
-intersection_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
-                                                                       namespace="experiment"))
-intersection_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host_sid",
-                                                                          namespace="experiment"))
-
-data_split_0 = DataSplit("data_split_0",
-                         train_size=0.6,
-                         validate_size=0.1,
-                         test_size=None,
-                         input_data=intersection_0.outputs["output_data"])
-
-data_split_1 = DataSplit("data_split_1",
-                         train_size=200,
-                         test_size=50,
-                         input_data=intersection_0.outputs["output_data"]
-                         )
-
-pipeline.add_task(intersection_0)
-pipeline.add_task(intersection_1)
-pipeline.add_task(data_split_0)
-pipeline.add_task(data_split_1)
-
-# pipeline.add_task(hetero_feature_binning_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
-
-# print(pipeline.get_task_info("data_split_0").get_output_data())
-output_data = pipeline.get_task_info("data_split_0").get_output_data()
-import pandas as pd
-
-print(f"data split 0 train size: {pd.DataFrame(output_data['train_output_data']).shape};"
-      f"validate size: {pd.DataFrame(output_data['validate_output_data']).shape}"
-      f"test size: {pd.DataFrame(output_data['test_output_data']).shape}")
-output_data = pipeline.get_task_info("data_split_1").get_output_data()
-print(f"data split 1train size: {pd.DataFrame(output_data['train_output_data']).shape};"
-      f"validate size: {pd.DataFrame(output_data['validate_output_data']).shape}"
-      f"test size: {pd.DataFrame(output_data['test_output_data']).shape}")
diff --git a/examples/pipeline/test_data_split_stratified.py b/examples/pipeline/test_data_split_stratified.py
deleted file mode 100644
index 75fb4b9652..0000000000
--- a/examples/pipeline/test_data_split_stratified.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import DataSplit
-from fate_client.pipeline.components.fate import Intersection
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-intersection_0 = Intersection("intersection_0",
-                              method="raw")
-intersection_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                       namespace="experiment_sid"))
-intersection_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                          namespace="experiment_sid"))
-
-intersection_1 = Intersection("intersection_1",
-                              method="raw")
-intersection_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                       namespace="experiment_sid"))
-intersection_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                          namespace="experiment_sid"))
-
-data_split_0 = DataSplit("data_split_0",
-                         train_size=0.6,
-                         validate_size=0.0,
-                         test_size=0.4,
-                         stratified=True,
-                         input_data=intersection_0.outputs["output_data"])
-
-data_split_1 = DataSplit("data_split_1",
-                         train_size=200,
-                         test_size=50,
-                         input_data=intersection_0.outputs["output_data"]
-                         )
-
-pipeline.add_task(intersection_0)
-pipeline.add_task(intersection_1)
-pipeline.add_task(data_split_0)
-pipeline.add_task(data_split_1)
-
-# pipeline.add_task(hetero_feature_binning_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
-
-# print(pipeline.get_task_info("data_split_0").get_output_data())
-output_data = pipeline.get_task_info("data_split_0").get_output_data()
-import pandas as pd
-
-print(f"data split 0 train size: {pd.DataFrame(output_data['train_output_data']).shape};"
-      f"validate size: {pd.DataFrame(output_data['validate_output_data']).shape}"
-      f"test size: {pd.DataFrame(output_data['test_output_data']).shape}")
-output_data = pipeline.get_task_info("data_split_1").get_output_data()
-print(f"data split 1train size: {pd.DataFrame(output_data['train_output_data']).shape};"
-      f"validate size: {pd.DataFrame(output_data['validate_output_data']).shape}"
-      f"test size: {pd.DataFrame(output_data['test_output_data']).shape}")
diff --git a/examples/pipeline/test_linr_sid_cv.py b/examples/pipeline/test_linr_sid_cv.py
deleted file mode 100644
index a7e7d3a1e2..0000000000
--- a/examples/pipeline/test_linr_sid_cv.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLinR, Intersection
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-intersect_0 = Intersection("intersect_0", method="raw")
-intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="motor_hetero_guest",
-                                                                    namespace="experiment_sid"))
-intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="motor_hetero_host",
-                                                                       namespace="experiment_sid"))
-linr_0 = CoordinatedLinR("linr_0",
-                         epochs=2,
-                         batch_size=100,
-                         optimizer={"method": "sgd", "optimizer_params": {"lr": 0.2}},
-                         init_param={"fit_intercept": True},
-                         cv_data=intersect_0.outputs["output_data"],
-                         cv_param={"n_splits": 3})
-
-pipeline.add_task(intersect_0)
-pipeline.add_task(linr_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
diff --git a/examples/pipeline/test_linr_sid_warm_start.py b/examples/pipeline/test_linr_sid_warm_start.py
deleted file mode 100644
index 0fe2bdea06..0000000000
--- a/examples/pipeline/test_linr_sid_warm_start.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLinR, Intersection
-from fate_client.pipeline.components.fate import Evaluation
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-intersect_0 = Intersection("intersect_0", method="raw")
-intersect_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                    namespace="experiment"))
-intersect_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                       namespace="experiment"))
-linr_0 = CoordinatedLinR("linr_0",
-                         epochs=3,
-                         batch_size=None,
-                         optimizer={"method": "sgd", "optimizer_params": {"lr": 0.15}, "alpha": 0.1},
-                         init_param={"fit_intercept": True, "method": "zeros"},
-                         train_data=intersect_0.outputs["output_data"],
-                         shuffle=False)
-linr_1 = CoordinatedLinR("linr_1", train_data=intersect_0.outputs["output_data"],
-                         warm_start_model=linr_0.outputs["output_model"],
-                         epochs=2,
-                         batch_size=None)
-linr_2 = CoordinatedLinR("linr_2",
-                         epochs=5,
-                         batch_size=None,
-                         optimizer={"method": "sgd", "optimizer_params": {"lr": 0.15}, "alpha": 0.1},
-                         init_param={"fit_intercept": True, "method": "zeros"},
-                         train_data=intersect_0.outputs["output_data"],
-                         shuffle=False)
-
-"""linr_0.guest.component_setting(train_data=DataWarehouseChannel(name="breast_hetero_guest_sid",
-                                                             namespace="experiment"))
-linr_0.hosts[0].component_setting(train_data=DataWarehouseChannel(name="breast_hetero_host_sid",
-                                                                namespace="experiment"))"""
-
-evaluation_0 = Evaluation("evaluation_0",
-                          runtime_roles=["guest"],
-                          metrics=["r2_score", "mse"],
-                          label_column_name="y",
-                          input_data=[linr_1.outputs["train_output_data"], linr_2.outputs["train_output_data"]])
-
-# pipeline.add_task(feature_scale_0)
-# pipeline.add_task(feature_scale_1)
-pipeline.add_task(intersect_0)
-pipeline.add_task(linr_0)
-pipeline.add_task(linr_1)
-pipeline.add_task(linr_2)
-pipeline.add_task(evaluation_0)
-# pipeline.add_task(hetero_feature_binning_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
-import numpy as np
-
-linr_0_coef = np.array(
-    pipeline.get_task_info('linr_0').get_output_model()["output_model"]["data"]['estimator']["param"]["coef_"])
-linr_0_intercept = np.array(
-    pipeline.get_task_info('linr_0').get_output_model()["output_model"]["data"]['estimator']["param"]["intercept_"])
-
-linr_1_coef = np.array(
-    pipeline.get_task_info('linr_1').get_output_model()["output_model"]["data"]['estimator']["param"]["coef_"])
-linr_1_intercept = np.array(
-    pipeline.get_task_info('linr_1').get_output_model()["output_model"]["data"]['estimator']["param"]["intercept_"])
-# print(f"linr_1 data: {pipeline.get_task_info('linr_0').get_output_data()}")
-linr_2_coef = np.array(
-    pipeline.get_task_info('linr_2').get_output_model()["output_model"]["data"]['estimator']["param"]["coef_"])
-linr_2_intercept = np.array(
-    pipeline.get_task_info('linr_2').get_output_model()["output_model"]["data"]['estimator']["param"]["intercept_"])
-
-print(f"linr_1 coef: {linr_1_coef}, intercept: {linr_1_intercept}")
-print(f"linr_2 coef: {linr_2_coef}, intercept: {linr_2_intercept}")
-print(f"linr_1 vs l2_1 coef diff: {linr_1_coef - linr_2_coef}, intercept diff: {linr_1_intercept - linr_2_intercept}")
-
-print(f"\n evaluation result: {pipeline.get_task_info('evaluation_0').get_output_metric()[0]['data']}")
diff --git a/examples/pipeline/test_sample.py b/examples/pipeline/test_sample.py
deleted file mode 100644
index a6d54c08f2..0000000000
--- a/examples/pipeline/test_sample.py
+++ /dev/null
@@ -1,62 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import Intersection
-from fate_client.pipeline.components.fate import Sample
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998")
-
-intersection_0 = Intersection("intersection_0",
-                              method="raw")
-intersection_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                       namespace="experiment_sid"))
-intersection_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                          namespace="experiment_sid"))
-
-intersection_1 = Intersection("intersection_1",
-                              method="raw")
-intersection_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                       namespace="experiment_sid"))
-intersection_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                          namespace="experiment_sid"))
-
-sample_0 = Sample("sample_0",
-                  frac={0: 0.5},
-                  replace=False,
-                  hetero_sync=True,
-                  input_data=intersection_0.outputs["output_data"])
-
-sample_1 = Sample("sample_1",
-                  runtime_roles=["guest"],
-                  n=1000,
-                  replace=True,
-                  hetero_sync=False,
-                  input_data=intersection_0.outputs["output_data"]
-                  )
-
-pipeline.add_task(intersection_0)
-pipeline.add_task(intersection_1)
-pipeline.add_task(sample_0)
-pipeline.add_task(sample_1)
-
-# pipeline.add_task(hetero_feature_binning_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
-output_data_0 = pipeline.get_task_info("sample_0").get_output_data()
-output_data_1 = pipeline.get_task_info("sample_1").get_output_data()
-print(f"sample 0: {output_data_0};"
-      f"sample 1: {output_data_1}")
diff --git a/examples/pipeline/test_scale.py b/examples/pipeline/test_scale.py
deleted file mode 100644
index 1b00541dd3..0000000000
--- a/examples/pipeline/test_scale.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import FeatureScale
-from fate_client.pipeline.components.fate import Intersection
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-intersection_0 = Intersection("intersection_0",
-                              method="raw")
-intersection_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                       namespace="experiment"))
-intersection_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                          namespace="experiment"))
-
-intersection_1 = Intersection("intersection_1",
-                              method="raw")
-intersection_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                       namespace="experiment"))
-intersection_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                          namespace="experiment"))
-
-feature_scale_0 = FeatureScale("feature_scale_0",
-                               method="standard",
-                               train_data=intersection_0.outputs["output_data"])
-
-feature_scale_1 = FeatureScale("feature_scale_1",
-                               test_data=intersection_1.outputs["output_data"],
-                               input_model=feature_scale_0.outputs["output_model"])
-
-pipeline.add_task(intersection_0)
-pipeline.add_task(intersection_1)
-pipeline.add_task(feature_scale_0)
-pipeline.add_task(feature_scale_1)
-
-# pipeline.add_task(hetero_feature_binning_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
-
-print(pipeline.get_task_info("feature_scale_0").get_output_model())
-# print(pipeline.get_task_info("feature_scale_1").get_output_model())
-
-pipeline.deploy([intersection_0, feature_scale_0])
-
-predict_pipeline = FateFlowPipeline()
-
-deployed_pipeline = pipeline.get_deployed_pipeline()
-intersection_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                       namespace="experiment"))
-intersection_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                          namespace="experiment"))
-
-predict_pipeline.add_task(deployed_pipeline)
-predict_pipeline.compile()
-# print("\n\n\n")
-# print(predict_pipeline.compile().get_dag())
-predict_pipeline.predict()
-print(predict_pipeline.get_task_info("feature_scale_0").get_output_model())
diff --git a/examples/pipeline/test_single_linr.py b/examples/pipeline/test_single_linr.py
deleted file mode 100644
index ec58f83a78..0000000000
--- a/examples/pipeline/test_single_linr.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLinR
-from fate_client.pipeline.components.fate import Evaluation
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-"""feature_scale_0 = FeatureScale(name="feature_scale_0",
-                               method="min_max",
-                               train_data=intersection_0.outputs["output_data"])
-
-feature_scale_1 = FeatureScale(name="feature_scale_1",
-                               test_data=intersection_1.outputs["output_data"],
-                               input_model=feature_scale_0.outputs["output_model"])"""
-
-linr_0 = CoordinatedLinR("linr_0",
-                         epochs=10,
-                         batch_size=None,
-                         init_param={"fit_intercept": False})
-
-linr_0.guest.component_setting(train_data=DataWarehouseChannel(name="motor_hetero_guest",
-                                                               namespace="experiment"))
-linr_0.hosts[0].component_setting(train_data=DataWarehouseChannel(name="motor_hetero_host",
-                                                                  namespace="experiment"))
-
-evaluation_0 = Evaluation("evaluation_0",
-                          runtime_roles=["guest"],
-                          input_data=linr_0.outputs["train_output_data"])
-
-# pipeline.add_task(feature_scale_0)
-# pipeline.add_task(feature_scale_1)
-pipeline.add_task(linr_0)
-pipeline.add_task(evaluation_0)
-# pipeline.add_task(hetero_feature_binning_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
-
-# print(pipeline.get_task_info("statistics_0").get_output_model())
-print(pipeline.get_task_info("linr_0").get_output_model())
-print(pipeline.get_task_info("linr_0").get_output_data())
-print(pipeline.get_task_info("evaluation_0").get_output_metrics())
-
-pipeline.deploy([linr_0])
-
-predict_pipeline = FateFlowPipeline()
-
-deployed_pipeline = pipeline.get_deployed_pipeline()
-deployed_pipeline.linr_0.guest.component_setting(test_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                                namespace="experiment"))
-deployed_pipeline.linr_0.hosts[0].component_setting(test_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                                   namespace="experiment"))
-
-predict_pipeline.add_task(deployed_pipeline)
-predict_pipeline.compile()
-# print("\n\n\n")
-# print(predict_pipeline.compile().get_dag())
-predict_pipeline.predict()
diff --git a/examples/pipeline/test_single_lr.py b/examples/pipeline/test_single_lr.py
deleted file mode 100644
index fb23747d3f..0000000000
--- a/examples/pipeline/test_single_lr.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR
-from fate_client.pipeline.components.fate import Evaluation
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-lr_0 = CoordinatedLR("lr_0",
-                     epochs=10,
-                     batch_size=100,
-                     optimizer={"method": "sgd", "optimizer_params": {"lr": 0.1}, "alpha": 0.5},
-                     init_param={"fit_intercept": True})
-lr_1 = CoordinatedLR("lr_1", input_model=lr_0.outputs["output_model"],
-                     test_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                    namespace="experiment_64")
-                     )
-
-lr_0.guest.component_setting(train_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                             namespace="experiment_64"))
-lr_0.hosts[0].component_setting(train_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                namespace="experiment_64"))
-
-evaluation_0 = Evaluation("evaluation_0",
-                          runtime_roles=["guest"],
-                          input_data=lr_0.outputs["train_output_data"])
-
-# pipeline.add_task(feature_scale_0)
-# pipeline.add_task(feature_scale_1)
-pipeline.add_task(lr_0)
-pipeline.add_task(lr_1)
-pipeline.add_task(evaluation_0)
-# pipeline.add_task(hetero_feature_binning_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
-
-# print(pipeline.get_task_info("statistics_0").get_output_model())
-print(pipeline.get_task_info("lr_0").get_output_model())
-print(pipeline.get_task_info("lr_0").get_output_metric())
-print(f"evaluation metric: ")
-print(pipeline.get_task_info("evaluation_0").get_output_metric())
-
-pipeline.deploy([lr_0])
-
-predict_pipeline = FateFlowPipeline()
-
-deployed_pipeline = pipeline.get_deployed_pipeline()
-deployed_pipeline.lr_0.guest.component_setting(test_data=DataWarehouseChannel(name="breast_hetero_guest_data",
-                                                                              namespace="experiment"))
-deployed_pipeline.lr_0.hosts[0].component_setting(test_data=DataWarehouseChannel(name="breast_hetero_guest_data",
-                                                                                 namespace="experiment"))
-
-predict_pipeline.add_task(deployed_pipeline)
-predict_pipeline.compile()
-# print("\n\n\n")
-# print(predict_pipeline.compile().get_dag())
-predict_pipeline.predict()
diff --git a/examples/pipeline/test_single_lr_multi.py b/examples/pipeline/test_single_lr_multi.py
deleted file mode 100644
index 3dc1a6e41c..0000000000
--- a/examples/pipeline/test_single_lr_multi.py
+++ /dev/null
@@ -1,73 +0,0 @@
-#
-#  Copyright 2019 The FATE Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR
-from fate_client.pipeline.components.fate import Evaluation
-from fate_client.pipeline.interface import DataWarehouseChannel
-
-pipeline = FateFlowPipeline().set_roles(guest="9999", host="9998", arbiter="9998")
-
-"""feature_scale_0 = FeatureScale(name="feature_scale_0",
-                               method="min_max",
-                               train_data=intersection_0.outputs["output_data"])
-
-feature_scale_1 = FeatureScale(name="feature_scale_1",
-                               test_data=intersection_1.outputs["output_data"],
-                               input_model=feature_scale_0.outputs["output_model"])"""
-
-lr_0 = CoordinatedLR("lr_0",
-                     epochs=10,
-                     batch_size=None,
-                     init_param={"fit_intercept": False})
-
-lr_0.guest.component_setting(train_data=DataWarehouseChannel(name="vehicle_scale_hetero_guest",
-                                                             namespace="experiment_64"))
-lr_0.hosts[0].component_setting(train_data=DataWarehouseChannel(name="vehicle_scale_hetero_guest",
-                                                                namespace="experiment_64"))
-
-evaluation_0 = Evaluation("evaluation_0",
-                          default_eval_metrics="multi",
-                          runtime_roles=["guest"],
-                          input_data=lr_0.outputs["train_output_data"])
-
-# pipeline.add_task(feature_scale_0)
-# pipeline.add_task(feature_scale_1)
-pipeline.add_task(lr_0)
-pipeline.add_task(evaluation_0)
-# pipeline.add_task(hetero_feature_binning_0)
-pipeline.compile()
-print(pipeline.get_dag())
-pipeline.fit()
-
-# print(pipeline.get_task_info("statistics_0").get_output_model())
-print(pipeline.get_task_info("lr_0").get_output_model())
-print(pipeline.get_task_info("lr_0").get_output_data())
-print(pipeline.get_task_info("evaluation_0").get_output_metrics())
-
-pipeline.deploy([lr_0])
-
-predict_pipeline = FateFlowPipeline()
-
-deployed_pipeline = pipeline.get_deployed_pipeline()
-deployed_pipeline.lr_0.guest.component_setting(test_data=DataWarehouseChannel(name="breast_hetero_guest",
-                                                                              namespace="experiment_64"))
-deployed_pipeline.lr_0.hosts[0].component_setting(test_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                                 namespace="experiment_64"))
-
-predict_pipeline.add_task(deployed_pipeline)
-predict_pipeline.compile()
-# print("\n\n\n")
-# print(predict_pipeline.compile().get_dag())
-predict_pipeline.predict()
diff --git a/examples/pipeline/union/test_union.py b/examples/pipeline/union/test_union.py
new file mode 100644
index 0000000000..a1138117e1
--- /dev/null
+++ b/examples/pipeline/union/test_union.py
@@ -0,0 +1,81 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import DataSplit, PSI, Union
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    psi_1 = PSI("psi_1")
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    data_split_0 = DataSplit("data_split_0",
+                             train_size=0.6,
+                             validate_size=0.1,
+                             input_data=psi_0.outputs["output_data"])
+
+    data_split_1 = DataSplit("data_split_1",
+                             train_size=200,
+                             test_size=50,
+                             input_data=psi_0.outputs["output_data"]
+                             )
+
+    union_0 = Union("union_0", input_data_list=[data_split_0.outputs["train_output_data"],
+                                                data_split_0.outputs["test_output_data"]])
+    pipeline.add_task(psi_0)
+    pipeline.add_task(psi_1)
+    pipeline.add_task(data_split_0)
+    pipeline.add_task(data_split_1)
+    pipeline.add_task(union_0)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/union/union_testsuite.yaml b/examples/pipeline/union/union_testsuite.yaml
new file mode 100644
index 0000000000..b5eab53a5b
--- /dev/null
+++ b/examples/pipeline/union/union_testsuite.yaml
@@ -0,0 +1,38 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_0
+tasks:
+  union:
+    script: test_union.py
diff --git a/python/fate/ml/glm/hetero/coordinated_lr/guest.py b/python/fate/ml/glm/hetero/coordinated_lr/guest.py
index 1969bba4d2..d849aabeb6 100644
--- a/python/fate/ml/glm/hetero/coordinated_lr/guest.py
+++ b/python/fate/ml/glm/hetero/coordinated_lr/guest.py
@@ -157,7 +157,7 @@ def predict(self, ctx, test_data):
             for i, class_ctx in ctx.sub_ctx("class").ctxs_range(len(self.labels)):
                 estimator = self.estimator[i]
                 pred = estimator.predict(class_ctx, test_data)
-                pred_score[self.labels[i]] = pred
+                pred_score[str(self.labels[i])] = pred
             pred_df[predict_tools.PREDICT_SCORE] = pred_score.apply_row(lambda v: [list(v)])
             predict_result = predict_tools.compute_predict_details(
                 pred_df, task_type=predict_tools.MULTI, classes=self.labels
diff --git a/python/fate_test/fate_test/scripts/data_cli.py b/python/fate_test/fate_test/scripts/data_cli.py
index 7a09980dd2..0c5c58e455 100644
--- a/python/fate_test/fate_test/scripts/data_cli.py
+++ b/python/fate_test/fate_test/scripts/data_cli.py
@@ -8,11 +8,11 @@
 from pathlib import Path
 
 import click
-# from fate_test._client import Clients
+from fate_test._client import Clients
 from fate_test._config import Config
 from fate_test._io import LOGGER, echo
 from fate_test.scripts._options import SharedOptions
-from fate_test.scripts._utils import _load_testsuites, _delete_data, _big_data_task
+from fate_test.scripts._utils import _load_testsuites, _delete_data, _big_data_task, _upload_data
 from ruamel import yaml
 
 from fate_test import _config
@@ -28,14 +28,15 @@ def data_group():
 
 @data_group.command("upload")
 @click.option('-i', '--include', required=False, type=click.Path(exists=True), multiple=True, metavar="<include>",
-              help="include *benchmark.json under these paths")
+              help="include *benchmark.yaml under these paths")
 @click.option('-e', '--exclude', type=click.Path(exists=True), multiple=True,
               help="exclude *benchmark.json under these paths")
 @click.option("-t", "--config-type", type=click.Choice(["min_test", "all_examples"]), default="min_test",
               help="config file")
 @click.option('-g', '--glob', type=str,
               help="glob string to filter sub-directory of path specified by <include>")
-@click.option('-s', '--suite-type', required=False, type=click.Choice(["testsuite", "benchmark"]), default="testsuite",
+@click.option('-s', '--suite-type', required=False, type=click.Choice(["testsuite", "benchmark", "performance"]),
+              default="testsuite",
               help="suite type")
 @click.option('-r', '--role', type=str, default='all', help="role to process, default to `all`. "
                                                             "use option likes: `guest_0`, `host_0`, `host`")
@@ -56,9 +57,17 @@ def upload(ctx, include, exclude, glob, suite_type, role, config_type, **kwargs)
     yes = ctx.obj["yes"]
     echo.welcome()
     echo.echo(f"testsuite namespace: {namespace}", fg='red')
+    client = Clients(config_inst)
     if len(include) != 0:
         echo.echo("loading testsuites:")
-        suffix = "benchmark.json" if suite_type == "benchmark" else "testsuite.json"
+        if suite_type == "benchmark":
+            suffix = "benchmark.yaml"
+        elif suite_type == "testsuite":
+            suite_type = "testsuite.yaml"
+        elif suite_type == "performance":
+            suffix = "performance.yaml"
+        else:
+            raise ValueError(f"unknown suite type: {suite_type}")
         suites = _load_testsuites(includes=include, excludes=exclude, glob=glob,
                                   suffix=suffix, suite_type=suite_type)
         for suite in suites:
@@ -67,8 +76,9 @@ def upload(ctx, include, exclude, glob, suite_type, role, config_type, **kwargs)
             echo.echo(f"\tdataset({len(suite.dataset)}) {suite.path}")
         if not yes and not click.confirm("running?"):
             return
-        # client_upload(suites=suites, config_inst=config_inst, namespace=namespace)
-        # todo: upload with pipeline
+
+        for suite in suites:
+            _upload_data(client, suite, config_inst)
     else:
         config = get_config(config_inst)
         if config_type == 'min_test':
@@ -77,14 +87,12 @@ def upload(ctx, include, exclude, glob, suite_type, role, config_type, **kwargs)
             config_file = config.all_examples_data_config
 
         with open(config_file, 'r', encoding='utf-8') as f:
-            upload_data = json.loads(f.read())
+            upload_data = yaml.safe_load(f.read())
 
         echo.echo(f"\tdataset({len(upload_data['data'])}) {config_file}")
         if not yes and not click.confirm("running?"):
             return
-        """with Clients(config_inst) as client:
-            data_upload(client, config_inst, upload_data)"""
-        # @todo: upload data with pipeline
+        _upload_data(client, upload_data, config_inst)
         echo.farewell()
         echo.echo(f"testsuite namespace: {namespace}", fg='red')
 
@@ -121,9 +129,9 @@ def delete(ctx, include, exclude, glob, yes, suite_type, **kwargs):
         echo.echo(f"\tdataset({len(suite.dataset)}) {suite.path}")
     if not yes and not click.confirm("running?"):
         return
-    with Clients(config_inst) as client:
-        for i, suite in enumerate(suites):
-            _delete_data(client, suite)
+    client = Clients(config_inst)
+    for i, suite in enumerate(suites):
+        _delete_data(client, suite)
     echo.farewell()
     echo.echo(f"testsuite namespace: {namespace}", fg='red')
 
@@ -200,11 +208,12 @@ def generate(ctx, include, host_data_type, encryption_type, match_rate, sparsity
     _big_data_task(include, guest_data_size, host_data_size, guest_feature_num, host_feature_num, host_data_type,
                    config_inst, encryption_type, match_rate, sparsity, force, split_host, output_path, parallelize)
     if upload_data:
-        if use_local_data:
+        """if use_local_data:
             _config.use_local_data = 0
-        _config.data_switch = remove_data
-        # client_upload(suites=suites, config_inst=config_inst, namespace=namespace, output_path=output_path)
-        # todo: upload with pipeline
+        _config.data_switch = remove_data"""
+        client = Clients(config_inst)
+        for suite in suites:
+            _upload_data(client, upload_data, config_inst)
 
 
 @data_group.command("download")
@@ -265,6 +274,7 @@ def query_schema(ctx, component_name, job_id, role, party_id, **kwargs):
 
     if not yes and not click.confirm("running?"):
         return
+    client = Clients(config_inst)
     # todo: upload data with pipeline
     """with Clients(config_inst) as client:
         query_component_output_data(client, config_inst, component_name, job_id, role, party_id)"""

From af8417c1e1c74ba9f84d93a86ea3a1a1d756d28c Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Thu, 10 Aug 2023 20:09:43 +0800
Subject: [PATCH 21/30] edit bq examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/benchmark_quality/lr/pipeline-lr-multi.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/benchmark_quality/lr/pipeline-lr-multi.py b/examples/benchmark_quality/lr/pipeline-lr-multi.py
index ed3851e510..8088df2917 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-multi.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-multi.py
@@ -71,6 +71,7 @@ def main(config="../../config.yaml", param="./vehicle_config.yaml", namespace=""
                          input_model=lr_0.outputs["output_model"])
 
     evaluation_0 = Evaluation('evaluation_0',
+                              runtime_roles=['guest'],
                               input_data=lr_0.outputs["train_output_data"],
                               metrics=['multi_recall', 'multi_accuracy', 'multi_precision'])
     pipeline.add_task(psi_0)

From d31f580f31e6c785e344c393121ad2c9931f5cb4 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Thu, 10 Aug 2023 20:19:53 +0800
Subject: [PATCH 22/30] rm unittest cli(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 python/fate_test/fate_test/scripts/cli.py     |  4 +-
 .../fate_test/scripts/quick_test_cli.py       | 95 -------------------
 2 files changed, 2 insertions(+), 97 deletions(-)
 delete mode 100644 python/fate_test/fate_test/scripts/quick_test_cli.py

diff --git a/python/fate_test/fate_test/scripts/cli.py b/python/fate_test/fate_test/scripts/cli.py
index 8dc444c7d8..f59bd6c4d4 100644
--- a/python/fate_test/fate_test/scripts/cli.py
+++ b/python/fate_test/fate_test/scripts/cli.py
@@ -22,7 +22,7 @@
 from fate_test.scripts.data_cli import data_group
 # from fate_test.scripts.flow_test_cli import flow_group
 from fate_test.scripts.performance_cli import run_task
-from fate_test.scripts.quick_test_cli import unittest_group
+# from fate_test.scripts.quick_test_cli import unittest_group
 # from fate_test.scripts.secure_protocol_cli import secure_protocol_group
 from fate_test.scripts.testsuite_cli import run_suite
 
@@ -32,7 +32,7 @@
     "performance": run_task,
     "benchmark-quality": run_benchmark,
     "data": data_group,
-    "unittest": unittest_group
+    # "unittest": unittest_group
 }
 
 commands_alias = {
diff --git a/python/fate_test/fate_test/scripts/quick_test_cli.py b/python/fate_test/fate_test/scripts/quick_test_cli.py
deleted file mode 100644
index 08f95e9964..0000000000
--- a/python/fate_test/fate_test/scripts/quick_test_cli.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import os
-import subprocess
-
-import click
-from fate_test._config import Config
-from fate_test._io import echo
-from fate_test.scripts._options import SharedOptions
-
-
-@click.group(name="unittest")
-def unittest_group():
-    """
-    unit test
-    """
-    ...
-
-
-@unittest_group.command("federatedml")
-@click.option('-i', '--include', type=click.Path(exists=True), multiple=True, metavar="<include>",
-              help="Specify federatedml test units for testing")
-@SharedOptions.get_shared_options(hidden=True)
-@click.pass_context
-def unit_test(ctx, include, **kwargs):
-    """
-    federatedml unit test
-    """
-    ctx.obj.update(**kwargs)
-    ctx.obj.post_process()
-    namespace = ctx.obj["namespace"]
-    config_inst = ctx.obj["config"]
-    yes = ctx.obj["yes"]
-    echo.echo(f"testsuite namespace: {namespace}", fg='red')
-
-    if not yes and not click.confirm("running?"):
-        return
-
-    error_log_file = f"./logs/{namespace}/error_test.log"
-    os.makedirs(os.path.dirname(error_log_file), exist_ok=True)
-    run_test(includes=include, conf=config_inst, error_log_file=error_log_file)
-
-
-def run_test(includes, conf: Config, error_log_file):
-    def error_log(stdout):
-        if stdout is None:
-            return os.path.abspath(error_log_file)
-        with open(error_log_file, "a") as f:
-            f.write(stdout)
-
-    def run_test(file):
-        global failed_count
-        echo.echo("start to run test {}".format(file))
-        try:
-            subp = subprocess.Popen(["python", file],
-                                    stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-            stdout, stderr = subp.communicate()
-            stdout = stdout.decode("utf-8")
-            echo.echo(stdout)
-            if "FAILED" in stdout:
-                failed_count += 1
-                error_log(stdout=f"error sequence {failed_count}: {file}")
-                error_log(stdout=stdout)
-        except Exception:
-            return
-
-    def traverse_folder(file_fullname):
-        if os.path.isfile(file_fullname):
-            if "_test.py" in file_fullname and "ftl" not in file_fullname:
-                run_test(file_fullname)
-        else:
-            for file in os.listdir(file_fullname):
-                file_fullname_new = os.path.join(file_fullname, file)
-                if os.path.isdir(file_fullname_new):
-                    traverse_folder(file_fullname_new)
-                if "_test.py" in file and ("/test" in file_fullname or "tests" in file_fullname):
-                    if "ftl" in file_fullname_new:
-                        continue
-                    else:
-                        run_test(file_fullname_new)
-
-    global failed_count
-    failed_count = 0
-    fate_base = conf.fate_base
-    ml_dir = os.path.join(fate_base, "python/federatedml")
-    PYTHONPATH = os.environ.get('PYTHONPATH') + ":" + os.path.join(fate_base, "python")
-    os.environ['PYTHONPATH'] = PYTHONPATH
-    if len(includes) == 0:
-        traverse_folder(ml_dir)
-    else:
-        ml_dir = includes
-        for v in ml_dir:
-            traverse_folder(os.path.abspath(v))
-
-    echo.echo(f"there are {failed_count} failed test")
-    if failed_count > 0:
-        print('Please check the error content: {}'.format(error_log(None)))

From 8dd7c033bf7f42305ba222a9450e5c2f86c620f6 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Fri, 11 Aug 2023 10:11:19 +0800
Subject: [PATCH 23/30] fix multi lr bq pipeline script(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/benchmark_quality/lr/default_credit_config.yaml | 4 ++--
 examples/benchmark_quality/lr/give_credit_config.yaml    | 6 +++---
 examples/benchmark_quality/lr/pipeline-lr-multi.py       | 1 +
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/benchmark_quality/lr/default_credit_config.yaml b/examples/benchmark_quality/lr/default_credit_config.yaml
index b547c333b9..dacc80dcd2 100644
--- a/examples/benchmark_quality/lr/default_credit_config.yaml
+++ b/examples/benchmark_quality/lr/default_credit_config.yaml
@@ -2,7 +2,7 @@ data_guest: "default_credit_hetero_guest"
 data_host: "default_credit_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 30
+epochs: 20
 init_param:
   fit_intercept: True
   method: "zeros"
@@ -17,6 +17,6 @@ optimizer:
   penalty: "L2"
   alpha: 0.001
   optimizer_params:
-    lr: 0.21
+    lr: 0.17
 batch_size: 3200
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/give_credit_config.yaml b/examples/benchmark_quality/lr/give_credit_config.yaml
index 480077d4ec..f5e47fcc76 100644
--- a/examples/benchmark_quality/lr/give_credit_config.yaml
+++ b/examples/benchmark_quality/lr/give_credit_config.yaml
@@ -2,7 +2,7 @@ data_guest: "give_credit_hetero_guest"
 data_host: "give_credit_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 12
+epochs: 16
 init_param:
   fit_intercept: True
   method: "zeros"
@@ -13,9 +13,9 @@ learning_rate_scheduler:
     total_iters: 1000
 optimizer:
   method: "rmsprop"
-  penalty: "L2"
+  penalty: "L1"
   alpha: 0.01
   optimizer_params:
-    lr: 0.29
+    lr: 0.25
 batch_size: 5500
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/pipeline-lr-multi.py b/examples/benchmark_quality/lr/pipeline-lr-multi.py
index 8088df2917..463d3cc91a 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-multi.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-multi.py
@@ -72,6 +72,7 @@ def main(config="../../config.yaml", param="./vehicle_config.yaml", namespace=""
 
     evaluation_0 = Evaluation('evaluation_0',
                               runtime_roles=['guest'],
+                              label_column_name=param.get("label_name"),
                               input_data=lr_0.outputs["train_output_data"],
                               metrics=['multi_recall', 'multi_accuracy', 'multi_precision'])
     pipeline.add_task(psi_0)

From a57015133139294f3652a1853ffe200423b8f3bb Mon Sep 17 00:00:00 2001
From: mgqa34 <mgq3374541@163.com>
Date: Fri, 11 Aug 2023 13:33:09 +0800
Subject: [PATCH 24/30] dataframe: fix where op, _sample_util bug

Signed-off-by: mgqa34 <mgq3374541@163.com>
---
 .../arch/dataframe/ops/_dimension_scaling.py  | 24 ++++++++++++-------
 python/fate/arch/dataframe/ops/_where.py      | 18 ++++++++++----
 python/fate/arch/dataframe/utils/_sample.py   |  2 +-
 3 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/python/fate/arch/dataframe/ops/_dimension_scaling.py b/python/fate/arch/dataframe/ops/_dimension_scaling.py
index 8f75901874..81822752aa 100644
--- a/python/fate/arch/dataframe/ops/_dimension_scaling.py
+++ b/python/fate/arch/dataframe/ops/_dimension_scaling.py
@@ -148,7 +148,7 @@ def _align_blocks(blocks, align_fields_loc=None, full_block_migrate_set=None, ds
         r_flatten = r_block_table.mapPartitions(r_flatten_func, use_previous_behavior=False)
         l_flatten = l_flatten.union(r_flatten)
 
-    partition_order_mappings = get_partition_order_by_raw_table(l_flatten)
+    partition_order_mappings = get_partition_order_by_raw_table(l_flatten, data_manager.block_row_size)
     _convert_to_block_func = functools.partial(to_blocks, dm=data_manager, partition_mappings=partition_order_mappings)
     block_table = l_flatten.mapPartitions(_convert_to_block_func, use_previous_behavior=False)
     block_table, data_manager = compress_blocks(block_table, data_manager)
@@ -187,7 +187,9 @@ def drop(df: "DataFrame", index: "DataFrame" = None) -> "DataFrame":
     r_flatten_table = index.block_table.mapPartitions(r_flatten_func, use_previous_behavior=False)
 
     drop_flatten = l_flatten_table.subtractByKey(r_flatten_table)
-    partition_order_mappings = get_partition_order_by_raw_table(drop_flatten) if drop_flatten.count() else dict()
+    partition_order_mappings = get_partition_order_by_raw_table(
+        drop_flatten, data_manager.block_row_size
+    ) if drop_flatten.count() else dict()
 
     _convert_to_block_func = functools.partial(to_blocks,
                                                dm=data_manager,
@@ -286,16 +288,20 @@ def _flatten_partition(kvs, block_num=0):
 
 
 def to_blocks(kvs, dm: DataManager = None, partition_mappings: dict = None):
-    ret_blocks = [[] for i in range(dm.block_num)]
+    ret_blocks = [[] for _ in range(dm.block_num)]
 
-    partition_id = None
-    for sample_id, value in kvs:
-        if partition_id is None:
-            partition_id = partition_mappings[sample_id]["block_id"]
+    block_id = None
+    for lid, (sample_id, value) in enumerate(kvs):
+        if block_id is None:
+            block_id = partition_mappings[sample_id]["start_block_id"]
         ret_blocks[0].append(sample_id)
         for bid, buf in enumerate(value):
             ret_blocks[bid + 1].append(buf)
 
-    ret_blocks = dm.convert_to_blocks(ret_blocks)
+        if (lid + 1) % dm.block_row_size == 0:
+            yield block_id, dm.convert_to_blocks(ret_blocks)
+            ret_blocks = [[] for i in range(dm.block_num)]
+            block_id += 1
 
-    return [(partition_id, ret_blocks)]
+    if ret_blocks[0]:
+        yield block_id, dm.convert_to_blocks(ret_blocks)
diff --git a/python/fate/arch/dataframe/ops/_where.py b/python/fate/arch/dataframe/ops/_where.py
index 96a195bcbd..b04aaea9d2 100644
--- a/python/fate/arch/dataframe/ops/_where.py
+++ b/python/fate/arch/dataframe/ops/_where.py
@@ -22,6 +22,16 @@
 
 
 def where(df: DataFrame, other: DataFrame):
+    """
+    df[mask]触发该操作
+    a. mask的列可能于df不一致，这个时候，df在mask中不出现的列均为nan
+        (1) columns完全对等
+        (2) columns一致，但顺序不一致
+        (3) mask columns数少于df columns数
+    b. 当mask中某一列有false的时候，需要考虑类型问题：如果原类型为int/bool等，需要上升为float32，如果为float32，保持不变
+        (1) mask 计算哪些列出现False，提前做列类型对齐
+    c. 要求df与mask的key是一致的
+    """
     if df.shape[0] != other.shape[0]:
         raise ValueError("Row numbers should be identical.")
 
@@ -106,7 +116,7 @@ def _where_float_type(l_block_table, r_block_table,
 
     def __convert_na(l_blocks, r_blocks):
         ret_blocks = []
-        for block in ret_blocks:
+        for block in l_blocks:
             if isinstance(block, torch.Tensor):
                 ret_blocks.append(block.clone())
             elif isinstance(block, np.ndarray):
@@ -115,10 +125,10 @@ def __convert_na(l_blocks, r_blocks):
                 ret_blocks.append(block)
 
         for (l_bid, l_offset), (r_bid, r_offset) in zip(l_loc_info, r_loc_info):
-            if isinstance(ret_blocks[l_blocks], torch.Tensor):
-                ret_blocks[l_bid][:, l_offset][~r_blocks[r_bid][: r_offset]] = torch.nan
+            if isinstance(ret_blocks[l_bid], torch.Tensor):
+                ret_blocks[l_bid][:, l_offset][~r_blocks[r_bid][:, r_offset]] = torch.nan
             else:
-                ret_blocks[l_bid][:, l_offset][~r_blocks[r_bid][: r_offset]] = np.nan
+                ret_blocks[l_bid][:, l_offset][~r_blocks[r_bid][:, r_offset]] = np.nan
 
         return ret_blocks
 
diff --git a/python/fate/arch/dataframe/utils/_sample.py b/python/fate/arch/dataframe/utils/_sample.py
index 1887b45de6..9d4ca78d70 100644
--- a/python/fate/arch/dataframe/utils/_sample.py
+++ b/python/fate/arch/dataframe/utils/_sample.py
@@ -186,7 +186,7 @@ def _convert_raw_table_to_df(
 ):
     from ..ops._indexer import get_partition_order_by_raw_table
     from ..ops._dimension_scaling import to_blocks
-    partition_order_mapping = get_partition_order_by_raw_table(table)
+    partition_order_mapping = get_partition_order_by_raw_table(table, data_manager.block_row_size)
     to_block_func = functools.partial(to_blocks, dm=data_manager, partition_mappings=partition_order_mapping)
     block_table = table.mapPartitions(to_block_func,
                                       use_previous_behavior=False)

From 5f6ee9d19668fffb15044462754a3d3102ce4de5 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Fri, 11 Aug 2023 14:10:41 +0800
Subject: [PATCH 25/30] make input models optional for hetero feature
 selection(#4661) edit pipeline examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../test_feature_binning_asymmetric.py        | 10 +++----
 .../test_feature_binning_bucket.py            | 12 ++++----
 .../test_feature_binning_quantile.py          | 10 +++----
 .../selection_testsuite.yaml                  |  4 +--
 .../test_feature_selection_binning.py         | 11 ++++---
 .../test_feature_selection_manual.py          | 12 ++++----
 .../test_feature_selection_multi_model.py     | 11 ++++---
 .../test_feature_selection_statistics.py      | 10 +++----
 examples/pipeline/multi_model/test_multi.py   |  6 ++--
 examples/pipeline/scale/test_scale_min_max.py | 14 ++++-----
 .../pipeline/scale/test_scale_standard.py     | 14 ++++-----
 examples/pipeline/scale/test_scale_w_lr.py    | 12 ++++----
 .../pipeline/statistics/test_statistics.py    |  2 +-
 .../components/hetero_feature_selection.py    | 30 +++++++++----------
 14 files changed, 78 insertions(+), 80 deletions(-)

diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py
index 9b353527de..2662615184 100644
--- a/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py
+++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py
@@ -36,7 +36,7 @@ def main(config="../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     binning_0 = HeteroFeatureBinning("binning_0",
@@ -70,10 +70,10 @@ def main(config="../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                  namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                     namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                                    namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                                       namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py
index f40c443070..fae56d4dc4 100644
--- a/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py
+++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py
@@ -36,11 +36,11 @@ def main(config="../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     psi_1 = PSI("psi_1")
-    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
     psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
@@ -74,10 +74,10 @@ def main(config="../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                  namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                     namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                                    namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                                       namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py
index 8a0b9819a8..727f622089 100644
--- a/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py
+++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py
@@ -36,7 +36,7 @@ def main(config="../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     binning_0 = HeteroFeatureBinning("binning_0",
@@ -69,10 +69,10 @@ def main(config="../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                  namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                     namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                                    namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                                       namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/hetero_feature_selection/selection_testsuite.yaml b/examples/pipeline/hetero_feature_selection/selection_testsuite.yaml
index 050dc39a14..5d0778dcb1 100644
--- a/examples/pipeline/hetero_feature_selection/selection_testsuite.yaml
+++ b/examples/pipeline/hetero_feature_selection/selection_testsuite.yaml
@@ -38,7 +38,7 @@ tasks:
     script: test_feature_selection_binning.py
   selection-manual:
     script: test_feature_selection_manual.py
-  binning-statistics:
+  selection-statistics:
     script: test_feature_selection_statistics.py
-  binning-multi-model:
+  selection-multi-model:
     script: test_feature_selection_multi_model.py
\ No newline at end of file
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py
index 95b06406a4..0e969544f9 100644
--- a/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py
@@ -36,13 +36,12 @@ def main(config=".../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     binning_0 = HeteroFeatureBinning("binning_0",
                                      method="quantile",
                                      n_bins=10,
-                                     bin_col=["x0"],
                                      transform_method="bin_idx",
                                      train_data=psi_0.outputs["output_data"]
                                      )
@@ -68,10 +67,10 @@ def main(config=".../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                  namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                     namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                                    namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                                       namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py
index ab4a7729de..722bb36c18 100644
--- a/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py
@@ -36,11 +36,11 @@ def main(config=".../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     selection_0 = HeteroFeatureSelection("selection_0",
-                                         method=["statistics"],
+                                         method=["manual"],
                                          train_data=psi_0.outputs["output_data"])
     selection_0.guest.component_setting(manual_param={"keep_col": ["x0", "x1"]})
     selection_0.hosts[0].component_setting(manual_param={"filter_out_col": ["x0", "x1"]})
@@ -60,10 +60,10 @@ def main(config=".../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                  namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                     namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                                    namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                                       namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py
index 551c1d81e7..b0dc8440ea 100644
--- a/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py
@@ -36,13 +36,12 @@ def main(config=".../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     binning_0 = HeteroFeatureBinning("binning_0",
                                      method="quantile",
                                      n_bins=10,
-                                     bin_col=["x0"],
                                      transform_method="bin_idx",
                                      train_data=psi_0.outputs["output_data"]
                                      )
@@ -74,10 +73,10 @@ def main(config=".../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                  namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                     namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                                    namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                                       namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py
index c614a89e93..bb3a3c9839 100644
--- a/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py
@@ -36,7 +36,7 @@ def main(config=".../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     statistics_0 = Statistics("statistics_0", input_data=psi_0.outputs["output_data"])
@@ -63,10 +63,10 @@ def main(config=".../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                  namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                     namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                                    namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                                       namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/multi_model/test_multi.py b/examples/pipeline/multi_model/test_multi.py
index 3ea0424ef7..25f2a4b9d2 100644
--- a/examples/pipeline/multi_model/test_multi.py
+++ b/examples/pipeline/multi_model/test_multi.py
@@ -16,7 +16,7 @@
 
 from fate_client.pipeline import FateFlowPipeline
 from fate_client.pipeline.components.fate import PSI, HeteroFeatureSelection, HeteroFeatureBinning, \
-    FeatureScale, Union, DataSplit, CoordinatedLR, CoordinatedLinR, Statistics, Sample, Evaluation
+    FeatureScale, Union, DataSplit, CoordinatedLR, Statistics, Sample, Evaluation
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
 
@@ -38,7 +38,7 @@ def main(config="../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     data_split_0 = DataSplit("data_split_0", input_data=psi_0.outputs["output_data"],
@@ -109,7 +109,7 @@ def main(config="../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
     psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
diff --git a/examples/pipeline/scale/test_scale_min_max.py b/examples/pipeline/scale/test_scale_min_max.py
index 2ceb11bc70..71f12abab5 100644
--- a/examples/pipeline/scale/test_scale_min_max.py
+++ b/examples/pipeline/scale/test_scale_min_max.py
@@ -36,11 +36,11 @@ def main(config="../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     psi_1 = PSI("psi_1")
-    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
     psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
@@ -57,7 +57,7 @@ def main(config="../config.yaml", namespace=""):
 
     statistics_0 = Statistics("statistics_0",
                               metrics=["max", "min", "mean", "std"],
-                              input_data=feature_scale_1.outputs["train_output_data"])
+                              input_data=feature_scale_1.outputs["test_output_data"])
 
     pipeline.add_task(psi_0)
     pipeline.add_task(psi_1)
@@ -77,10 +77,10 @@ def main(config="../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                  namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                     namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                                    namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                                       namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/scale/test_scale_standard.py b/examples/pipeline/scale/test_scale_standard.py
index 8bc7625334..008e7c2a75 100644
--- a/examples/pipeline/scale/test_scale_standard.py
+++ b/examples/pipeline/scale/test_scale_standard.py
@@ -15,7 +15,7 @@
 import argparse
 
 from fate_client.pipeline import FateFlowPipeline
-from fate_client.pipeline.components.fate import CoordinatedLR, PSI, FeatureScale, Evaluation
+from fate_client.pipeline.components.fate import PSI, FeatureScale
 from fate_client.pipeline.interface import DataWarehouseChannel
 from fate_client.pipeline.utils import test_utils
 
@@ -37,11 +37,11 @@ def main(config="../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     psi_1 = PSI("psi_1")
-    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
     psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
@@ -72,10 +72,10 @@ def main(config="../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                  namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                     namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                                    namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                                       namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/scale/test_scale_w_lr.py b/examples/pipeline/scale/test_scale_w_lr.py
index 03390a95d4..2a06ed5e00 100644
--- a/examples/pipeline/scale/test_scale_w_lr.py
+++ b/examples/pipeline/scale/test_scale_w_lr.py
@@ -37,11 +37,11 @@ def main(config="../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     psi_1 = PSI("psi_1")
-    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
     psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
@@ -81,10 +81,10 @@ def main(config="../config.yaml", namespace=""):
     predict_pipeline = FateFlowPipeline()
 
     deployed_pipeline = pipeline.get_deployed_pipeline()
-    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                  namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
-                                                                     namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                                    namespace=f"experiment{namespace}"))
+    deployed_pipeline.psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                                       namespace=f"experiment{namespace}"))
 
     predict_pipeline.add_task(deployed_pipeline)
     predict_pipeline.compile()
diff --git a/examples/pipeline/statistics/test_statistics.py b/examples/pipeline/statistics/test_statistics.py
index 9a17395f2e..e5e7605856 100644
--- a/examples/pipeline/statistics/test_statistics.py
+++ b/examples/pipeline/statistics/test_statistics.py
@@ -36,7 +36,7 @@ def main(config=".../config.yaml", namespace=""):
     psi_0 = PSI("psi_0")
     psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
                                                                   namespace=f"experiment{namespace}"))
-    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
     statistics_0 = Statistics("statistics_0", input_data=psi_0.outputs["output_data"],
diff --git a/python/fate/components/components/hetero_feature_selection.py b/python/fate/components/components/hetero_feature_selection.py
index 9291965205..8e23a91089 100644
--- a/python/fate/components/components/hetero_feature_selection.py
+++ b/python/fate/components/components/hetero_feature_selection.py
@@ -29,20 +29,20 @@ def hetero_feature_selection(ctx, role):
 
 @hetero_feature_selection.train()
 def train(
-    ctx: Context,
-    role: Role,
-    train_data: cpn.dataframe_input(roles=[GUEST, HOST]),
-    input_models: cpn.json_model_inputs(roles=[GUEST, HOST]),
-    method: cpn.parameter(
-        type=List[params.string_choice(["manual", "iv", "statistics"])],
-        default=["manual"],
-        optional=False,
-        desc="selection method, options: {manual, binning, statistics}",
-    ),
-    select_col: cpn.parameter(
-        type=List[str],
-        default=None,
-        desc="list of column names to be selected, if None, all columns will be considered",
+        ctx: Context,
+        role: Role,
+        train_data: cpn.dataframe_input(roles=[GUEST, HOST]),
+        input_models: cpn.json_model_inputs(roles=[GUEST, HOST], optional=True),
+        method: cpn.parameter(
+            type=List[params.string_choice(["manual", "iv", "statistics"])],
+            default=["manual"],
+            optional=False,
+            desc="selection method, options: {manual, binning, statistics}",
+        ),
+        select_col: cpn.parameter(
+            type=List[str],
+            default=None,
+            desc="list of column names to be selected, if None, all columns will be considered",
     ),
     iv_param: cpn.parameter(
         type=params.iv_filter_param(),
@@ -105,7 +105,7 @@ def train(
     # temp code end
     # logger.info(f"input_models: {input_models}, len: {len(input_models)}")
 
-    input_iso_models = [model.read() for model in input_models]
+    input_iso_models = [model.read() for model in input_models] if input_models is not None else None
     # logger.info(f"read in input_models len: {len(input_iso_models)}; \n read in input models: {input_iso_models}")
     if role.is_guest:
         selection = HeteroSelectionModuleGuest(

From be08926892985cb73d3eb881ed05d096d8714df7 Mon Sep 17 00:00:00 2001
From: mgqa34 <mgq3374541@163.com>
Date: Fri, 11 Aug 2023 16:09:08 +0800
Subject: [PATCH 26/30] dataframe: fix retrieval row api by passing
 block_row_size

Signed-off-by: mgqa34 <mgq3374541@163.com>
---
 python/fate/arch/dataframe/ops/_dimension_scaling.py |  2 +-
 python/fate/arch/dataframe/ops/_where.py             | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/python/fate/arch/dataframe/ops/_dimension_scaling.py b/python/fate/arch/dataframe/ops/_dimension_scaling.py
index 81822752aa..d4c4d39ea0 100644
--- a/python/fate/arch/dataframe/ops/_dimension_scaling.py
+++ b/python/fate/arch/dataframe/ops/_dimension_scaling.py
@@ -256,7 +256,7 @@ def _retrieval(blocks, t: torch.Tensor):
     if retrieval_raw_table.count() == 0:
         return df.empty_frame()
 
-    partition_order_mappings = get_partition_order_by_raw_table(retrieval_raw_table)
+    partition_order_mappings = get_partition_order_by_raw_table(retrieval_raw_table, df.data_manager.block_row_size)
     to_blocks_func = functools.partial(to_blocks, dm=df.data_manager, partition_mappings=partition_order_mappings)
 
     block_table = retrieval_raw_table.mapPartitions(to_blocks_func, use_previous_behavior=False)
diff --git a/python/fate/arch/dataframe/ops/_where.py b/python/fate/arch/dataframe/ops/_where.py
index b04aaea9d2..e3e3308c51 100644
--- a/python/fate/arch/dataframe/ops/_where.py
+++ b/python/fate/arch/dataframe/ops/_where.py
@@ -22,16 +22,6 @@
 
 
 def where(df: DataFrame, other: DataFrame):
-    """
-    df[mask]触发该操作
-    a. mask的列可能于df不一致，这个时候，df在mask中不出现的列均为nan
-        (1) columns完全对等
-        (2) columns一致，但顺序不一致
-        (3) mask columns数少于df columns数
-    b. 当mask中某一列有false的时候，需要考虑类型问题：如果原类型为int/bool等，需要上升为float32，如果为float32，保持不变
-        (1) mask 计算哪些列出现False，提前做列类型对齐
-    c. 要求df与mask的key是一致的
-    """
     if df.shape[0] != other.shape[0]:
         raise ValueError("Row numbers should be identical.")
 

From b643c22b9dfc2a501f6307197c2c60f2dfdd2fe1 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Fri, 11 Aug 2023 18:03:44 +0800
Subject: [PATCH 27/30] edit pipeline examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../lr/default_credit_config.yaml             |   2 +-
 .../pipeline/coordinated_linr/test_linr_cv.py |   2 +-
 .../coordinated_linr/test_linr_warm_start.py  |   6 +-
 .../pipeline/coordinated_lr/test_lr_cv.py     |   2 +-
 .../coordinated_lr/test_lr_multi_class.py     |   2 +-
 .../coordinated_lr/test_lr_multi_host.py      |   2 +-
 .../coordinated_lr/test_lr_validate.py        |   3 +-
 .../coordinated_lr/test_lr_warm_start.py      |   6 +-
 .../pipeline/data_split/test_data_split.py    |   2 +-
 .../data_split/test_data_split_stratified.py  |   3 +-
 .../scale_testsuite.yaml                      |   0
 .../test_scale_min_max.py                     |   0
 .../test_scale_standard.py                    |   0
 .../test_scale_w_lr.py                        |   0
 .../test_feature_binning_asymmetric.py        |   4 +-
 .../test_feature_binning_bucket.py            |   4 +-
 .../test_feature_binning_quantile.py          |   4 +-
 .../test_feature_selection_binning.py         |   2 +-
 .../test_feature_selection_manual.py          |   2 +-
 .../test_feature_selection_multi_model.py     |   3 +-
 .../test_feature_selection_statistics.py      |   2 +-
 examples/pipeline/multi_model/test_multi.py   |   2 +-
 .../multi_model/test_multi_preprocessing.py   | 113 ++++++++++++++++++
 examples/pipeline/sample/test_sample.py       |   2 +-
 .../pipeline/sample/test_sample_unilateral.py |   2 +-
 25 files changed, 142 insertions(+), 28 deletions(-)
 rename examples/pipeline/{scale => feature_scale}/scale_testsuite.yaml (100%)
 rename examples/pipeline/{scale => feature_scale}/test_scale_min_max.py (100%)
 rename examples/pipeline/{scale => feature_scale}/test_scale_standard.py (100%)
 rename examples/pipeline/{scale => feature_scale}/test_scale_w_lr.py (100%)
 create mode 100644 examples/pipeline/multi_model/test_multi_preprocessing.py

diff --git a/examples/benchmark_quality/lr/default_credit_config.yaml b/examples/benchmark_quality/lr/default_credit_config.yaml
index dacc80dcd2..07144a2426 100644
--- a/examples/benchmark_quality/lr/default_credit_config.yaml
+++ b/examples/benchmark_quality/lr/default_credit_config.yaml
@@ -2,7 +2,7 @@ data_guest: "default_credit_hetero_guest"
 data_host: "default_credit_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 20
+epochs: 30
 init_param:
   fit_intercept: True
   method: "zeros"
diff --git a/examples/pipeline/coordinated_linr/test_linr_cv.py b/examples/pipeline/coordinated_linr/test_linr_cv.py
index ed33e0556a..082c516ab8 100644
--- a/examples/pipeline/coordinated_linr/test_linr_cv.py
+++ b/examples/pipeline/coordinated_linr/test_linr_cv.py
@@ -50,7 +50,7 @@ def main(config="../config.yaml", namespace=""):
     pipeline.add_task(psi_0)
     pipeline.add_task(linr_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
 
diff --git a/examples/pipeline/coordinated_linr/test_linr_warm_start.py b/examples/pipeline/coordinated_linr/test_linr_warm_start.py
index 30f887254c..4caf3a2c20 100644
--- a/examples/pipeline/coordinated_linr/test_linr_warm_start.py
+++ b/examples/pipeline/coordinated_linr/test_linr_warm_start.py
@@ -76,12 +76,12 @@ def main(config="../config.yaml", namespace=""):
     pipeline.add_task(evaluation_0)
 
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
-    print(f"linr_1 model: {pipeline.get_task_info('linr_1').get_output_model()}")
+    # print(f"linr_1 model: {pipeline.get_task_info('linr_1').get_output_model()}")
     # print(f"train linr_1 data: {pipeline.get_task_info('linr_1').get_output_data()}")
 
-    print(f"linr_2 model: {pipeline.get_task_info('linr_2').get_output_model()}")
+    # print(f"linr_2 model: {pipeline.get_task_info('linr_2').get_output_model()}")
     # print(f"train linr_2 data: {pipeline.get_task_info('linr_2').get_output_data()}")
 
 
diff --git a/examples/pipeline/coordinated_lr/test_lr_cv.py b/examples/pipeline/coordinated_lr/test_lr_cv.py
index b981f005e1..bcd23a9b44 100644
--- a/examples/pipeline/coordinated_lr/test_lr_cv.py
+++ b/examples/pipeline/coordinated_lr/test_lr_cv.py
@@ -50,7 +50,7 @@ def main(config="../config.yaml", namespace=""):
     pipeline.add_task(psi_0)
     pipeline.add_task(lr_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
 
diff --git a/examples/pipeline/coordinated_lr/test_lr_multi_class.py b/examples/pipeline/coordinated_lr/test_lr_multi_class.py
index 9ce85fe3d9..7709532ee1 100644
--- a/examples/pipeline/coordinated_lr/test_lr_multi_class.py
+++ b/examples/pipeline/coordinated_lr/test_lr_multi_class.py
@@ -61,7 +61,7 @@ def main(config="../config.yaml", namespace=""):
     pipeline.add_task(evaluation_0)
 
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
     pipeline.deploy([psi_0, lr_0])
diff --git a/examples/pipeline/coordinated_lr/test_lr_multi_host.py b/examples/pipeline/coordinated_lr/test_lr_multi_host.py
index a94ff8afcf..1470fed40e 100644
--- a/examples/pipeline/coordinated_lr/test_lr_multi_host.py
+++ b/examples/pipeline/coordinated_lr/test_lr_multi_host.py
@@ -60,7 +60,7 @@ def main(config="../config.yaml", namespace=""):
     pipeline.add_task(evaluation_0)
 
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
     pipeline.deploy([psi_0, lr_0])
diff --git a/examples/pipeline/coordinated_lr/test_lr_validate.py b/examples/pipeline/coordinated_lr/test_lr_validate.py
index 19c44e3903..a0d3b90179 100644
--- a/examples/pipeline/coordinated_lr/test_lr_validate.py
+++ b/examples/pipeline/coordinated_lr/test_lr_validate.py
@@ -62,11 +62,12 @@ def main(config="../config.yaml", namespace=""):
                               input_data=lr_0.outputs["train_output_data"])
 
     pipeline.add_task(psi_0)
+    pipeline.add_task(data_split_0)
     pipeline.add_task(lr_0)
     pipeline.add_task(evaluation_0)
 
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
 
diff --git a/examples/pipeline/coordinated_lr/test_lr_warm_start.py b/examples/pipeline/coordinated_lr/test_lr_warm_start.py
index 5e554e837f..8f12b5967a 100644
--- a/examples/pipeline/coordinated_lr/test_lr_warm_start.py
+++ b/examples/pipeline/coordinated_lr/test_lr_warm_start.py
@@ -76,12 +76,12 @@ def main(config="../config.yaml", namespace=""):
     pipeline.add_task(evaluation_0)
 
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
-    print(f"lr_1 model: {pipeline.get_task_info('lr_1').get_output_model()}")
+    # print(f"lr_1 model: {pipeline.get_task_info('lr_1').get_output_model()}")
     # print(f"train lr_1 data: {pipeline.get_task_info('lr_1').get_output_data()}")
 
-    print(f"lr_2 model: {pipeline.get_task_info('lr_2').get_output_model()}")
+    # print(f"lr_2 model: {pipeline.get_task_info('lr_2').get_output_model()}")
     # print(f"train lr_2 data: {pipeline.get_task_info('lr_2').get_output_data()}")
 
 
diff --git a/examples/pipeline/data_split/test_data_split.py b/examples/pipeline/data_split/test_data_split.py
index 484d26fa93..ee3357fb92 100644
--- a/examples/pipeline/data_split/test_data_split.py
+++ b/examples/pipeline/data_split/test_data_split.py
@@ -65,7 +65,7 @@ def main(config="../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
     # print(pipeline.get_task_info("data_split_0").get_output_data())
diff --git a/examples/pipeline/data_split/test_data_split_stratified.py b/examples/pipeline/data_split/test_data_split_stratified.py
index 647d42ad63..f01aa488dc 100644
--- a/examples/pipeline/data_split/test_data_split_stratified.py
+++ b/examples/pipeline/data_split/test_data_split_stratified.py
@@ -46,7 +46,6 @@ def main(config="../config.yaml", namespace=""):
     psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
                                                                      namespace=f"experiment{namespace}"))
 
-    Linear: 0.7
     data_split_0 = DataSplit("data_split_0",
                              train_size=0.6,
                              validate_size=0.0,
@@ -68,7 +67,7 @@ def main(config="../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
     # print(pipeline.get_task_info("data_split_0").get_output_data())
diff --git a/examples/pipeline/scale/scale_testsuite.yaml b/examples/pipeline/feature_scale/scale_testsuite.yaml
similarity index 100%
rename from examples/pipeline/scale/scale_testsuite.yaml
rename to examples/pipeline/feature_scale/scale_testsuite.yaml
diff --git a/examples/pipeline/scale/test_scale_min_max.py b/examples/pipeline/feature_scale/test_scale_min_max.py
similarity index 100%
rename from examples/pipeline/scale/test_scale_min_max.py
rename to examples/pipeline/feature_scale/test_scale_min_max.py
diff --git a/examples/pipeline/scale/test_scale_standard.py b/examples/pipeline/feature_scale/test_scale_standard.py
similarity index 100%
rename from examples/pipeline/scale/test_scale_standard.py
rename to examples/pipeline/feature_scale/test_scale_standard.py
diff --git a/examples/pipeline/scale/test_scale_w_lr.py b/examples/pipeline/feature_scale/test_scale_w_lr.py
similarity index 100%
rename from examples/pipeline/scale/test_scale_w_lr.py
rename to examples/pipeline/feature_scale/test_scale_w_lr.py
diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py
index 2662615184..bd48a35729 100644
--- a/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py
+++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_asymmetric.py
@@ -59,10 +59,10 @@ def main(config="../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
-    print(pipeline.get_task_info("binning_1").get_output_model())
+    # print(pipeline.get_task_info("binning_1").get_output_model())
     # print(pipeline.get_task_info("feature_scale_1").get_output_model())
 
     pipeline.deploy([psi_0, binning_0])
diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py
index fae56d4dc4..34223dcc94 100644
--- a/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py
+++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_bucket.py
@@ -63,10 +63,10 @@ def main(config="../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
-    print(pipeline.get_task_info("binning_0").get_output_model())
+    # print(pipeline.get_task_info("binning_0").get_output_model())
     # print(pipeline.get_task_info("feature_scale_1").get_output_model())
 
     pipeline.deploy([psi_0, binning_0])
diff --git a/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py b/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py
index 727f622089..e1dc37525d 100644
--- a/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py
+++ b/examples/pipeline/hetero_feature_binning/test_feature_binning_quantile.py
@@ -58,10 +58,10 @@ def main(config="../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
-    print(pipeline.get_task_info("binning_1").get_output_model())
+    # print(pipeline.get_task_info("binning_1").get_output_model())
     # print(pipeline.get_task_info("feature_scale_1").get_output_model())
 
     pipeline.deploy([psi_0, binning_0])
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py
index 0e969544f9..d639fc63eb 100644
--- a/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_binning.py
@@ -57,7 +57,7 @@ def main(config=".../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
     # print(pipeline.get_task_info("feature_scale_1").get_output_model())
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py
index 722bb36c18..a278387dca 100644
--- a/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_manual.py
@@ -50,7 +50,7 @@ def main(config=".../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
     # print(pipeline.get_task_info("feature_scale_1").get_output_model())
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py
index b0dc8440ea..48186c182e 100644
--- a/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_multi_model.py
@@ -59,11 +59,12 @@ def main(config=".../config.yaml", namespace=""):
 
     pipeline.add_task(psi_0)
     pipeline.add_task(binning_0)
+    pipeline.add_task(statistics_0)
     pipeline.add_task(selection_0)
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
     # print(pipeline.get_task_info("feature_scale_1").get_output_model())
diff --git a/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py b/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py
index bb3a3c9839..48ffe32a5c 100644
--- a/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py
+++ b/examples/pipeline/hetero_feature_selection/test_feature_selection_statistics.py
@@ -53,7 +53,7 @@ def main(config=".../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
     # print(pipeline.get_task_info("feature_scale_1").get_output_model())
diff --git a/examples/pipeline/multi_model/test_multi.py b/examples/pipeline/multi_model/test_multi.py
index 25f2a4b9d2..c069212d17 100644
--- a/examples/pipeline/multi_model/test_multi.py
+++ b/examples/pipeline/multi_model/test_multi.py
@@ -99,7 +99,7 @@ def main(config="../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
     # print(pipeline.get_task_info("feature_scale_1").get_output_model())
diff --git a/examples/pipeline/multi_model/test_multi_preprocessing.py b/examples/pipeline/multi_model/test_multi_preprocessing.py
new file mode 100644
index 0000000000..c7a9e77711
--- /dev/null
+++ b/examples/pipeline/multi_model/test_multi_preprocessing.py
@@ -0,0 +1,113 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import DataSplit, PSI, Sample, FeatureScale
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../config.yaml", namespace=""):
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    psi_1 = PSI("psi_1")
+    psi_1.guest.component_setting(input_data=DataWarehouseChannel(name="breast_hetero_guest",
+                                                                  namespace=f"experiment{namespace}"))
+    psi_1.hosts[0].component_setting(input_data=DataWarehouseChannel(name="breast_hetero_host",
+                                                                     namespace=f"experiment{namespace}"))
+
+    data_split_0 = DataSplit("data_split_0",
+                             train_size=0.6,
+                             validate_size=0.0,
+                             test_size=0.4,
+                             stratified=True,
+                             input_data=psi_0.outputs["output_data"])
+
+    data_split_1 = DataSplit("data_split_1",
+                             train_size=200,
+                             test_size=50,
+                             stratified=True,
+                             input_data=psi_0.outputs["output_data"]
+                             )
+
+    sample_0 = Sample("sample_0",
+                      frac={0: 0.5},
+                      replace=False,
+                      hetero_sync=True,
+                      input_data=psi_0.outputs["output_data"])
+
+    sample_1 = Sample("sample_1",
+                      n=100,
+                      replace=False,
+                      hetero_sync=True,
+                      input_data=psi_0.outputs["output_data"]
+                      )
+    feature_scale_0 = FeatureScale("feature_scale_0",
+                                   method="min_max",
+                                   feature_range={"x0": [-1, 1]},
+                                   scale_col=["x0", "x1", "x3"],
+                                   train_data=psi_0.outputs["output_data"])
+    pipeline.add_task(psi_0)
+    pipeline.add_task(psi_1)
+    pipeline.add_task(data_split_0)
+    pipeline.add_task(data_split_1)
+    pipeline.add_task(sample_0)
+    pipeline.add_task(sample_1)
+    pipeline.add_task(feature_scale_0)
+
+    # pipeline.add_task(hetero_feature_binning_0)
+    pipeline.compile()
+    # print(pipeline.get_dag())
+    pipeline.fit()
+
+    # print(pipeline.get_task_info("data_split_0").get_output_data())
+    """output_data = pipeline.get_task_info("data_split_0").get_output_data()
+    import pandas as pd
+
+    print(f"data split 0 train size: {pd.DataFrame(output_data['train_output_data']).shape};"
+          f"validate size: {pd.DataFrame(output_data['validate_output_data']).shape}"
+          f"test size: {pd.DataFrame(output_data['test_output_data']).shape}")
+    output_data = pipeline.get_task_info("data_split_1").get_output_data()
+    print(f"data split 1train size: {pd.DataFrame(output_data['train_output_data']).shape};"
+          f"validate size: {pd.DataFrame(output_data['validate_output_data']).shape}"
+          f"test size: {pd.DataFrame(output_data['test_output_data']).shape}")"""
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("PIPELINE DEMO")
+    parser.add_argument("--config", type=str, default="../config.yaml",
+                        help="config file")
+    parser.add_argument("--namespace", type=str, default="",
+                        help="namespace for data stored in FATE")
+    args = parser.parse_args()
+    main(config=args.config, namespace=args.namespace)
diff --git a/examples/pipeline/sample/test_sample.py b/examples/pipeline/sample/test_sample.py
index 86fbf04a97..0cbea77bbe 100644
--- a/examples/pipeline/sample/test_sample.py
+++ b/examples/pipeline/sample/test_sample.py
@@ -65,7 +65,7 @@ def main(config="../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
 
diff --git a/examples/pipeline/sample/test_sample_unilateral.py b/examples/pipeline/sample/test_sample_unilateral.py
index 8bdc9b3bef..643a14e60f 100644
--- a/examples/pipeline/sample/test_sample_unilateral.py
+++ b/examples/pipeline/sample/test_sample_unilateral.py
@@ -66,7 +66,7 @@ def main(config="../config.yaml", namespace=""):
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
+    # print(pipeline.get_dag())
     pipeline.fit()
 
 

From 8afbae498f6d8c9d3bf467a234629bd5c41427cf Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Mon, 14 Aug 2023 15:52:10 +0800
Subject: [PATCH 28/30] fix median & allow quantile in statistics(#4663) edit
 lr bq examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../lr/default_credit_config.yaml             |  8 ++---
 .../lr/default_credit_lr_sklearn_config.yaml  |  2 +-
 .../lr/give_credit_config.yaml                |  2 +-
 .../benchmark_quality/lr/lr_benchmark.yaml    | 36 +++++++++----------
 .../lr/pipeline-lr-binary.py                  |  1 -
 .../benchmark_quality/lr/pipeline-lr-multi.py |  1 -
 .../benchmark_quality/lr/sklearn-lr-binary.py |  2 +-
 .../pipeline/statistics/test_statistics.py    |  4 +--
 .../fate/components/components/statistics.py  |  7 ++--
 .../fate/components/core/params/__init__.py   |  2 +-
 .../fate/components/core/params/_metrics.py   | 24 ++++++++++++-
 python/fate/ml/statistics/statistics.py       | 23 +++++++++---
 12 files changed, 75 insertions(+), 37 deletions(-)

diff --git a/examples/benchmark_quality/lr/default_credit_config.yaml b/examples/benchmark_quality/lr/default_credit_config.yaml
index 07144a2426..97d2f7c563 100644
--- a/examples/benchmark_quality/lr/default_credit_config.yaml
+++ b/examples/benchmark_quality/lr/default_credit_config.yaml
@@ -2,7 +2,7 @@ data_guest: "default_credit_hetero_guest"
 data_host: "default_credit_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 30
+epochs: 16
 init_param:
   fit_intercept: True
   method: "zeros"
@@ -15,8 +15,8 @@ learning_rate_scheduler:
 optimizer:
   method: "rmsprop"
   penalty: "L2"
-  alpha: 0.001
+  alpha: 0.01
   optimizer_params:
-    lr: 0.17
-batch_size: 3200
+    lr: 0.22
+batch_size: 2000
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/default_credit_lr_sklearn_config.yaml b/examples/benchmark_quality/lr/default_credit_lr_sklearn_config.yaml
index e1dd4f6932..73ce767d18 100644
--- a/examples/benchmark_quality/lr/default_credit_lr_sklearn_config.yaml
+++ b/examples/benchmark_quality/lr/default_credit_lr_sklearn_config.yaml
@@ -7,5 +7,5 @@ fit_intercept: True
 method: "rmsprop"
 penalty: "L2"
 eta0: 0.1
-alpha: 0.5
+alpha: 0.05
 batch_size: 5000
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/give_credit_config.yaml b/examples/benchmark_quality/lr/give_credit_config.yaml
index f5e47fcc76..6f8656132b 100644
--- a/examples/benchmark_quality/lr/give_credit_config.yaml
+++ b/examples/benchmark_quality/lr/give_credit_config.yaml
@@ -17,5 +17,5 @@ optimizer:
   alpha: 0.01
   optimizer_params:
     lr: 0.25
-batch_size: 5500
+batch_size: null
 early_stop: "diff"
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/lr_benchmark.yaml b/examples/benchmark_quality/lr/lr_benchmark.yaml
index 63cb2603bd..1dc428bbdc 100644
--- a/examples/benchmark_quality/lr/lr_benchmark.yaml
+++ b/examples/benchmark_quality/lr/lr_benchmark.yaml
@@ -206,21 +206,21 @@ hetero_lr-binary-1-default-credit:
 #    conf: "./epsilon_5k_config.yaml"
 #  compare_setting:
 #    relative_tol: 0.01
-hetero_lr-binary-3-give-credit:
-  local:
-    script: "./sklearn-lr-binary.py"
-    conf: "./give_credit_lr_sklearn_config.yaml"
-  FATE-hetero-lr:
-    script: "./pipeline-lr-binary.py"
-    conf: "./give_credit_config.yaml"
-  compare_setting:
-    relative_tol: 0.01
-multi-vehicle:
-  local:
-    script: "./sklearn-lr-multi.py"
-    conf: "./vehicle_lr_sklearn_config.yaml"
-  FATE-hetero-lr:
-    script: "./pipeline-lr-multi.py"
-    conf: "./vehicle_config.yaml"
-  compare_setting:
-    relative_tol: 0.01
+#hetero_lr-binary-3-give-credit:
+#  local:
+#    script: "./sklearn-lr-binary.py"
+#    conf: "./give_credit_lr_sklearn_config.yaml"
+#  FATE-hetero-lr:
+#    script: "./pipeline-lr-binary.py"
+#    conf: "./give_credit_config.yaml"
+#  compare_setting:
+#    relative_tol: 0.01
+#multi-vehicle:
+#  local:
+#    script: "./sklearn-lr-multi.py"
+#    conf: "./vehicle_lr_sklearn_config.yaml"
+#  FATE-hetero-lr:
+#    script: "./pipeline-lr-multi.py"
+#    conf: "./vehicle_config.yaml"
+#  compare_setting:
+#    relative_tol: 0.01
diff --git a/examples/benchmark_quality/lr/pipeline-lr-binary.py b/examples/benchmark_quality/lr/pipeline-lr-binary.py
index 9b41bbe612..fceacd020f 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-binary.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-binary.py
@@ -87,7 +87,6 @@ def main(config="../../config.yaml", param="./breast_config.yaml", namespace="")
     if config.timeout:
         pipeline.conf.set("timeout", config.timeout)
     pipeline.compile()
-    print(pipeline.get_dag())
     pipeline.fit()
 
     lr_0_data = pipeline.get_task_info("lr_0").get_output_data()["train_output_data"]
diff --git a/examples/benchmark_quality/lr/pipeline-lr-multi.py b/examples/benchmark_quality/lr/pipeline-lr-multi.py
index 463d3cc91a..aff7c32a36 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-multi.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-multi.py
@@ -85,7 +85,6 @@ def main(config="../../config.yaml", param="./vehicle_config.yaml", namespace=""
         pipeline.conf.set("timeout", config.timeout)
 
     pipeline.compile()
-    print(pipeline.get_dag())
     pipeline.fit()
 
     lr_0_data = pipeline.get_component("lr_0").get_output_data()["train_output_data"]
diff --git a/examples/benchmark_quality/lr/sklearn-lr-binary.py b/examples/benchmark_quality/lr/sklearn-lr-binary.py
index 2a2710be2f..058b2d79fc 100644
--- a/examples/benchmark_quality/lr/sklearn-lr-binary.py
+++ b/examples/benchmark_quality/lr/sklearn-lr-binary.py
@@ -76,7 +76,7 @@ def main(config="../../config.yaml", param="./breast_lr_sklearn_config.yaml"):
     fpr, tpr, thresholds = roc_curve(y_test, y_prob)
 
     ks = max(tpr - fpr)
-    result = {"auc": auc_score, "recall": recall, "binary_precision": pr, "accuracy": acc}
+    result = {"auc": auc_score, "recall": recall, "precision": pr, "accuracy": acc}
     print(result)
     print(f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}")
     return {}, result
diff --git a/examples/pipeline/statistics/test_statistics.py b/examples/pipeline/statistics/test_statistics.py
index e5e7605856..2f7bb18051 100644
--- a/examples/pipeline/statistics/test_statistics.py
+++ b/examples/pipeline/statistics/test_statistics.py
@@ -40,15 +40,15 @@ def main(config=".../config.yaml", namespace=""):
                                                                      namespace=f"experiment{namespace}"))
 
     statistics_0 = Statistics("statistics_0", input_data=psi_0.outputs["output_data"],
-                              metrics=["mean", "std", "min", "max"])
+                              metrics=["mean", "std", "min", "max", "25%", "median", "75%"])
 
     pipeline.add_task(psi_0)
     pipeline.add_task(statistics_0)
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
     pipeline.fit()
+    # print(f"statistics_0 output model: {pipeline.get_task_info('statistics_0').get_output_model()}")
 
 
 if __name__ == "__main__":
diff --git a/python/fate/components/components/statistics.py b/python/fate/components/components/statistics.py
index 2bf3661a75..5224b01609 100644
--- a/python/fate/components/components/statistics.py
+++ b/python/fate/components/components/statistics.py
@@ -25,7 +25,8 @@ def statistics(
         role: Role,
         input_data: cpn.dataframe_input(roles=[GUEST, HOST]),
         metrics: cpn.parameter(
-            type=Union[List[params.statistic_metrics_param()], params.statistic_metrics_param()],
+            type=Union[List[Union[params.statistic_metrics_param(), params.legal_percentile()]],
+            params.statistic_metrics_param(), params.legal_percentile()],
             default=["mean", "std", "min", "max"],
             desc="metrics to be computed, default ['count', 'mean', 'std', 'min', 'max']",
         ),
@@ -37,6 +38,8 @@ def statistics(
             default=True,
             desc="If False, the calculations of skewness and kurtosis are corrected for statistical bias.",
         ),
+        relative_error: cpn.parameter(type=params.confloat(gt=0, le=1), default=1e-3,
+                                      desc="float, error rate for quantile"),
         skip_col: cpn.parameter(
             type=List[str],
             default=None,
@@ -60,7 +63,7 @@ def statistics(
         for metric in metrics:
             if metric == "describe":
                 raise ValueError(f"'describe' should not be combined with additional metric names.")
-    stat_computer = FeatureStatistics(list(set(metrics)), ddof, bias)
+    stat_computer = FeatureStatistics(list(set(metrics)), ddof, bias, relative_error)
     input_data = input_data[select_cols]
     stat_computer.fit(sub_ctx, input_data)
 
diff --git a/python/fate/components/core/params/__init__.py b/python/fate/components/core/params/__init__.py
index 40b0d629ad..4e9fdf8bfc 100644
--- a/python/fate/components/core/params/__init__.py
+++ b/python/fate/components/core/params/__init__.py
@@ -27,6 +27,6 @@
 )
 from ._init_param import InitParam, init_param
 from ._learning_rate import LRSchedulerParam, lr_scheduler_param
-from ._metrics import metrics_param, statistic_metrics_param
+from ._metrics import metrics_param, statistic_metrics_param, legal_percentile
 from ._optimizer import OptimizerParam, optimizer_param
 from ._penalty import penalty_param
diff --git a/python/fate/components/core/params/_metrics.py b/python/fate/components/core/params/_metrics.py
index c911fc0707..e336e1da2c 100644
--- a/python/fate/components/core/params/_metrics.py
+++ b/python/fate/components/core/params/_metrics.py
@@ -13,9 +13,10 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+import re
 from typing import Type
 
-from ._fields import StringChoice
+from ._fields import StringChoice, Parameter
 
 
 class Metrics(StringChoice):
@@ -68,3 +69,24 @@ def metrics_param(auc=True, ks=True, accuracy=True, mse=True) -> Type[str]:
         choice={k for k, v in choice.items() if v},
     )
     return type("Metrics", (Metrics,), namespace)
+
+
+class LegalPercentile(str, Parameter):
+    legal_percentile = r"^(100)|(?:[05]|[0-9]?[05])0*%$"
+
+    @classmethod
+    def __get_validators__(cls):
+        yield cls.percentile_validator
+
+    @classmethod
+    def percentile_validator(cls, v):
+        if re.match(cls.legal_percentile, v):
+            return v
+        raise ValueError(f"provided `{v}` not in legal percentile format")
+
+
+def legal_percentile() -> Type[str]:
+    namespace = dict(
+        legal_percentile=LegalPercentile.legal_percentile,
+    )
+    return type("LegalPercentile", (LegalPercentile,), namespace)
diff --git a/python/fate/ml/statistics/statistics.py b/python/fate/ml/statistics/statistics.py
index 7015756dd6..232d8d6ba2 100644
--- a/python/fate/ml/statistics/statistics.py
+++ b/python/fate/ml/statistics/statistics.py
@@ -14,6 +14,7 @@
 #  limitations under the License.
 
 import logging
+import re
 from typing import List
 
 import pandas as pd
@@ -25,9 +26,9 @@
 
 
 class FeatureStatistics(Module):
-    def __init__(self, metrics: List[str] = None, ddof=1, bias=True):
+    def __init__(self, metrics: List[str] = None, ddof=1, bias=True, relative_error=1e-3):
         self.metrics = metrics
-        self.summary = StatisticsSummary(ddof, bias)
+        self.summary = StatisticsSummary(ddof, bias, relative_error)
 
     def fit(self, ctx: Context, input_data, validate_data=None) -> None:
         self.summary.compute_metrics(input_data, self.metrics)
@@ -49,7 +50,7 @@ def from_model(cls, model) -> "FeatureStatistics":
 
 
 class StatisticsSummary(Module):
-    def __init__(self, ddof=1, bias=True):
+    def __init__(self, ddof=1, bias=True, relative_error=1e-3):
         """if metrics is not None:
         if len(metrics) == 1 and metrics[0] == "describe":
             self.inner_metric_names = ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']
@@ -57,20 +58,31 @@ def __init__(self, ddof=1, bias=True):
             self.inner_metric_names = metrics"""
         self.ddof = ddof
         self.bias = bias
+        self.relative_error = relative_error
         self.inner_metric_names = []
         self.metrics_summary = None
         self._count = None
         self._nan_count = None
         self._mean = None
         self._describe = None
+        self._quantile = None
+        self._q_pts = None
 
     def get_from_describe(self, data, metric):
         if self._describe is None:
             self._describe = data.describe(ddof=self.ddof, unbiased=~self.bias)
         return self._describe[metric]
 
+    def get_from_quantile_summary(self, data, metric):
+        query_q = int(metric[:-1]) / 100
+        if self._quantile is None:
+            self._quantile = data.quantile(q=self._q_pts, relative_error=self.relative_error)
+        return self._quantile.loc[query_q]
+
     def compute_metrics(self, data, metrics):
         res = pd.DataFrame(columns=data.schema.columns)
+        q_metrics = [metric for metric in metrics if re.match(r"^(100|\d{1,2})%$", metric)]
+        self._q_pts = [int(metric[:-1]) / 100 for metric in q_metrics]
         for metric in metrics:
             metric_val = None
             """if metric == "describe":
@@ -80,12 +92,15 @@ def compute_metrics(self, data, metrics):
                 return"""
             if metric in ["sum", "min", "max", "mean", "std", "var"]:
                 metric_val = self.get_from_describe(data, metric)
+            if metric in q_metrics:
+                metric_val = self.get_from_quantile_summary(data, metric)
             elif metric == "count":
                 if self._count is None:
                     self._count = data.count()
                 metric_val = self._count
             elif metric == "median":
-                metric_val = data.median()
+                metric_val = data.quantile(q=0.5, relative_error=self.relative_error)
+                metric_val = metric_val.loc[0.5]
             elif metric == "coefficient_of_variation":
                 metric_val = self.get_from_describe(data, "variation")
             elif metric == "missing_count":

From c1f4b498bec1bd7bd3b98abf0b5c5697c997f127 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Mon, 14 Aug 2023 17:55:20 +0800
Subject: [PATCH 29/30] batch loader use sorted indexer for default edit bq
 examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 .../lr/breast_lr_sklearn_config.yaml          |  2 +-
 .../benchmark_quality/lr/lr_benchmark.yaml    | 54 +++++++++----------
 .../fate/arch/dataframe/utils/_dataloader.py  |  2 +-
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml b/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml
index 2993795c78..e7fc0c17d4 100644
--- a/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml
+++ b/examples/benchmark_quality/lr/breast_lr_sklearn_config.yaml
@@ -7,5 +7,5 @@ fit_intercept: True
 method: "rmsprop"
 penalty: "L2"
 eta0: 0.1
-alpha: 0.5
+alpha: 0.05
 batch_size: 5000
\ No newline at end of file
diff --git a/examples/benchmark_quality/lr/lr_benchmark.yaml b/examples/benchmark_quality/lr/lr_benchmark.yaml
index 1dc428bbdc..a26fa9a757 100644
--- a/examples/benchmark_quality/lr/lr_benchmark.yaml
+++ b/examples/benchmark_quality/lr/lr_benchmark.yaml
@@ -179,15 +179,15 @@ data:
     table_name: vehicle_scale_hetero_host
     namespace: experiment
     role: host_0
-#hetero_lr-binary-0-breast:
-#  local:
-#    script: "./sklearn-lr-binary.py"
-#    conf: "./breast_lr_sklearn_config.yaml"
-#  FATE-hetero-lr:
-#    script: "./pipeline-lr-binary.py"
-#    conf: "./breast_config.yaml"
-#  compare_setting:
-#    relative_tol: 0.01
+hetero_lr-binary-0-breast:
+  local:
+    script: "./sklearn-lr-binary.py"
+    conf: "./breast_lr_sklearn_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-binary.py"
+    conf: "./breast_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
 hetero_lr-binary-1-default-credit:
   local:
     script: "./sklearn-lr-binary.py"
@@ -197,24 +197,24 @@ hetero_lr-binary-1-default-credit:
     conf: "./default_credit_config.yaml"
   compare_setting:
     relative_tol: 0.01
-#hetero_lr-binary-2-epsilon-5k:
-#  local:
-#    script: "./sklearn-lr-binary.py"
-#    conf: "./epsilon_5k_lr_sklearn_config.yaml"
-#  FATE-hetero-lr:
-#    script: "./pipeline-lr-binary.py"
-#    conf: "./epsilon_5k_config.yaml"
-#  compare_setting:
-#    relative_tol: 0.01
-#hetero_lr-binary-3-give-credit:
-#  local:
-#    script: "./sklearn-lr-binary.py"
-#    conf: "./give_credit_lr_sklearn_config.yaml"
-#  FATE-hetero-lr:
-#    script: "./pipeline-lr-binary.py"
-#    conf: "./give_credit_config.yaml"
-#  compare_setting:
-#    relative_tol: 0.01
+hetero_lr-binary-2-epsilon-5k:
+  local:
+    script: "./sklearn-lr-binary.py"
+    conf: "./epsilon_5k_lr_sklearn_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-binary.py"
+    conf: "./epsilon_5k_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
+hetero_lr-binary-3-give-credit:
+  local:
+    script: "./sklearn-lr-binary.py"
+    conf: "./give_credit_lr_sklearn_config.yaml"
+  FATE-hetero-lr:
+    script: "./pipeline-lr-binary.py"
+    conf: "./give_credit_config.yaml"
+  compare_setting:
+    relative_tol: 0.01
 #multi-vehicle:
 #  local:
 #    script: "./sklearn-lr-multi.py"
diff --git a/python/fate/arch/dataframe/utils/_dataloader.py b/python/fate/arch/dataframe/utils/_dataloader.py
index d984dcf92f..f22fd3893a 100644
--- a/python/fate/arch/dataframe/utils/_dataloader.py
+++ b/python/fate/arch/dataframe/utils/_dataloader.py
@@ -124,7 +124,7 @@ def _prepare(self):
                 indexer = sorted(list(self._dataset.get_indexer(target="sample_id").collect()))
                 if self._shuffle:
                     random.seed = self._random_state
-                random.shuffle(indexer)
+                    random.shuffle(indexer)
 
                 for i, iter_ctx in self._ctx.sub_ctx("dataloader_batch").ctxs_range(self._batch_num):
                     batch_indexer = indexer[self._batch_size * i: self._batch_size * (i + 1)]

From 65e8a859ae22ec649bc088b58180eb3a189f5263 Mon Sep 17 00:00:00 2001
From: Yu Wu <yolandawu131@gmail.com>
Date: Mon, 14 Aug 2023 18:25:05 +0800
Subject: [PATCH 30/30] edit bq examples(#5008)

Signed-off-by: Yu Wu <yolandawu131@gmail.com>
---
 examples/benchmark_quality/lr/pipeline-lr-multi.py | 1 +
 examples/benchmark_quality/lr/sklearn-lr-binary.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/benchmark_quality/lr/pipeline-lr-multi.py b/examples/benchmark_quality/lr/pipeline-lr-multi.py
index aff7c32a36..b5401c1122 100644
--- a/examples/benchmark_quality/lr/pipeline-lr-multi.py
+++ b/examples/benchmark_quality/lr/pipeline-lr-multi.py
@@ -74,6 +74,7 @@ def main(config="../../config.yaml", param="./vehicle_config.yaml", namespace=""
                               runtime_roles=['guest'],
                               label_column_name=param.get("label_name"),
                               input_data=lr_0.outputs["train_output_data"],
+                              predict_column_name='predict_result',
                               metrics=['multi_recall', 'multi_accuracy', 'multi_precision'])
     pipeline.add_task(psi_0)
     pipeline.add_task(lr_0)
diff --git a/examples/benchmark_quality/lr/sklearn-lr-binary.py b/examples/benchmark_quality/lr/sklearn-lr-binary.py
index 058b2d79fc..51e463df94 100644
--- a/examples/benchmark_quality/lr/sklearn-lr-binary.py
+++ b/examples/benchmark_quality/lr/sklearn-lr-binary.py
@@ -78,7 +78,7 @@ def main(config="../../config.yaml", param="./breast_lr_sklearn_config.yaml"):
     ks = max(tpr - fpr)
     result = {"auc": auc_score, "recall": recall, "precision": pr, "accuracy": acc}
     print(result)
-    print(f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}")
+    # print(f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}")
     return {}, result