FederatedAI · mgqa34 · Aug 14, 2023 · Jul 26, 2023 · Jul 26, 2023 · Jul 27, 2023
diff --git a/doc/api/fate_test.md b/doc/api/fate_test.md
diff --git a/doc/tutorial/fate_test_tutorial.md b/doc/tutorial/fate_test_tutorial.md
@@ -0,0 +1,91 @@
+# FATE Test Tutorial
+
+A collection of useful tools to running FATE tests and [:file_folder:examples](../../examples).
+
+## quick start
+
+1. install
+
+    ```bash
+    pip install -e python/fate_test
+    ```
+2. edit default fate\_test\_config.yaml
+
+   ```bash
+   # edit priority config file with system default editor
+   # filling some field according to comments
+   fate_test config edit
+   ```
+
+3. configure FATE-Flow Commandline server setting
+
+    ```bash
+    # configure FATE-Flow Commandline server setting
+    flow init --port 9380 --ip 127.0.0.1
+    ```
+
+4. run some fate\_test suite
+
+   ```bash
+   fate_test suite -i <path contains *testsuite.yaml>
+   ```
+
+5. run some fate\_test benchmark quality
+
+   ```bash
+   fate_test benchmark-quality -i <path contains *benchmark.yaml>
+   ```
+
+6. run some fate\_test benchmark performance
+
+   ```bash
+   fate_test benchmark-quality -i <path contains *performance.yaml>
+   ```
+
+7useful logs or exception will be saved to logs dir with namespace
+shown in last step
+
+## command types
+
+- [suite](../api/fate_test.md#testsuite): used for running [testsuites](../api/fate_test.md#testsuite-configuration),
+  collection of FATE jobs
+
+  ```bash
+  fate_test suite -i <path contains *testsuite.yaml>
+  ```
+
+- [data](../api/fate_test.md#data): used for upload, delete, and generate dataset
+
+    - [upload/delete data](../api/fate_test.md#data-command-options) command:
+
+      ```bash
+      fate_test data [upload|delete] -i <path1 contains *testsuite.yaml | *benchmark.yaml>
+      ```
+    - [upload example data of min_test/all_examples](../api/fate_test.md#data-command-options) command:
+
+      ```bash
+      fate_test data upload -t min_test
+      fate_test data upload -t all_examples
+      ```
+
+    - [generate data](../api/fate_test.md#generate-command-options) command:
+
+      ```bash
+      fate_test data generate -i <path1 contains *testsuite.yaml | *benchmark.yaml>
+      ```
+
+- [benchmark-quality](../api/fate_test.md#benchmark-quality): used for comparing modeling quality between FATE
+  and other machine learning systems, as specified
+  in [benchmark job configuration](../api/fate_test.md#benchmark-job-configuration)
+
+  ```bash
+  fate_test bq -i <path contains *benchmark.yaml>
+  ```
+
+- [benchmark-performance](../api/fate_test.md#benchmark-performance): used for checking FATE algorithm performance; user
+  should first generate and upload data before running performance testsuite
+
+  ```bash
+  fate_test data generate -i <path contains *performance.yaml> -ng 10000 -fg 10 -fh 10 -m 1.0 --upload-data
+  fate_test performance -i <path contains *performance.yaml> --skip-data
+  ```
diff --git a/examples/benchmark_performance/coordinated_lr/breast_config.yaml b/examples/benchmark_performance/coordinated_lr/breast_config.yaml
@@ -0,0 +1,24 @@
+data_guest: "breast_hetero_guest"
+data_host: "breast_hetero_host"
+idx: "id"
+label_name: "y"
+epochs: 20
+init_param:
+  fit_intercept: True
+  method: "random_uniform"
+  random_state: 42
+learning_rate_scheduler:
+  method: "constant"
+  scheduler_params:
+    factor: 1.0
+    total_iters: 100
+optimizer:
+  method: "rmsprop"
+  penalty: "L2"
+  optimizer_params:
+    lr: 0.05
+  alpha: 0.1
+batch_size: null
+early_stop: "diff"
+task_cores: 4
+timeout: 3600
diff --git a/examples/benchmark_performance/coordinated_lr/config.yaml b/examples/benchmark_performance/coordinated_lr/config.yaml
@@ -0,0 +1,11 @@
+parties: # parties default id
+  guest:
+    - 9999
+  host:
+    - 9998
+    - 9999
+  arbiter:
+    - 9998
+
+data_base_dir: "" # path to project base where data is located
+timeout: 3600
diff --git a/examples/benchmark_performance/coordinated_lr/coordinated_lr_performance.yaml b/examples/benchmark_performance/coordinated_lr/coordinated_lr_performance.yaml
@@ -0,0 +1,39 @@
+data:
+  - file: examples/data/breast_hetero_guest.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      label_type: int64
+      label_name: y
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_guest
+    namespace: experiment
+    role: guest_0
+  - file: examples/data/breast_hetero_host.csv
+    meta:
+      delimiter: ","
+      dtype: float64
+      input_format: dense
+      match_id_name: id
+      match_id_range: 0
+      tag_value_delimiter: ":"
+      tag_with_value: false
+      weight_type: float64
+    partitions: 4
+    head: true
+    extend_sid: true
+    table_name: breast_hetero_host
+    namespace: experiment
+    role: host_0
+tasks:
+  normal-lr:
+    script: test_lr_sid.py
+    conf: "./breast_config.yaml"
diff --git a/examples/benchmark_performance/coordinated_lr/test_lr_sid.py b/examples/benchmark_performance/coordinated_lr/test_lr_sid.py
@@ -0,0 +1,103 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLR, PSI
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+
+
+def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""):
+    # obtain config
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    if isinstance(param, str):
+        param = test_utils.JobConfig.load_from_file(param)
+
+    assert isinstance(param, dict)
+
+    guest_data_table = param.get("data_guest")
+    host_data_table = param.get("data_host")
+
+    guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"}
+    host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"}
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
+                                                                  namespace=guest_train_data["namespace"]))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
+                                                                     namespace=host_train_data["namespace"]))
+
+    lr_param = {
+    }
+
+    config_param = {
+        "epochs": param["epochs"],
+        "learning_rate_scheduler": param["learning_rate_scheduler"],
+        "optimizer": param["optimizer"],
+        "batch_size": param["batch_size"],
+        "early_stop": param["early_stop"],
+        "init_param": param["init_param"],
+        "tol": 1e-5
+    }
+    lr_param.update(config_param)
+    lr_0 = CoordinatedLR("lr_0",
+                         train_data=psi_0.outputs["output_data"],
+                         **lr_param)
+    lr_1 = CoordinatedLR("lr_1",
+                         test_data=psi_0.outputs["output_data"],
+                         input_model=lr_0.outputs["output_model"])
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="y",
+                              runtime_roles=["guest"],
+                              metrics=["auc", "binary_precision", "binary_accuracy", "binary_recall"],
+                              input_data=lr_0.outputs["train_output_data"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(lr_0)
+    pipeline.add_task(lr_1)
+    pipeline.add_task(evaluation_0)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    job_id = pipeline.model_info.job_id
+    return job_id
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("BENCHMARK-QUALITY PIPELINE JOB")
+    parser.add_argument("-c", "--config", type=str,
+                        help="config file", default="../../config.yaml")
+    parser.add_argument("-p", "--param", type=str,
+                        help="config file for params", default="./breast_config.yaml")
+    args = parser.parse_args()
+    main(args.config, args.param)
diff --git a/examples/benchmark_quality/__init__.py b/examples/benchmark_quality/__init__.py
diff --git a/examples/benchmark_quality/linr/fate-linr.py b/examples/benchmark_quality/linr/fate-linr.py
@@ -0,0 +1,120 @@
+#
+#  Copyright 2019 The FATE Authors. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+import argparse
+
+from fate_client.pipeline import FateFlowPipeline
+from fate_client.pipeline.components.fate import CoordinatedLinR, PSI
+from fate_client.pipeline.components.fate import Evaluation
+from fate_client.pipeline.interface import DataWarehouseChannel
+from fate_client.pipeline.utils import test_utils
+from fate_test.utils import parse_summary_result
+
+
+def main(config="../../config.yaml", param="./linr_config.yaml", namespace=""):
+    # obtain config
+    if isinstance(config, str):
+        config = test_utils.load_job_config(config)
+    parties = config.parties
+    guest = parties.guest[0]
+    host = parties.host[0]
+    arbiter = parties.arbiter[0]
+
+    if isinstance(param, str):
+        param = test_utils.JobConfig.load_from_file(param)
+
+    assert isinstance(param, dict)
+
+    guest_train_data = {"name": "motor_hetero_guest", "namespace": f"experiment{namespace}"}
+    host_train_data = {"name": "motor_hetero_host", "namespace": f"experiment{namespace}"}
+
+    pipeline = FateFlowPipeline().set_roles(guest=guest, host=host, arbiter=arbiter)
+
+    psi_0 = PSI("psi_0")
+    psi_0.guest.component_setting(input_data=DataWarehouseChannel(name=guest_train_data["name"],
+                                                                  namespace=guest_train_data["namespace"]))
+    psi_0.hosts[0].component_setting(input_data=DataWarehouseChannel(name=host_train_data["name"],
+                                                                     namespace=host_train_data["namespace"]))
+
+    linr_param = {
+    }
+
+    config_param = {
+        "epochs": param["epochs"],
+        "learning_rate_scheduler": param["learning_rate_scheduler"],
+        "optimizer": param["optimizer"],
+        "batch_size": param["batch_size"],
+        "early_stop": param["early_stop"],
+        "init_param": param["init_param"],
+        "tol": 1e-5
+    }
+    linr_param.update(config_param)
+    linr_0 = CoordinatedLinR("linr_0",
+                             train_data=psi_0.outputs["output_data"],
+                             **config_param)
+    """linr_1 = CoordinatedLinR("linr_1",
+                             test_data=psi_0.outputs["output_data"],
+                             input_model=linr_0.outputs["output_model"])"""
+
+    evaluation_0 = Evaluation("evaluation_0",
+                              label_column_name="motor_speed",
+                              runtime_roles=["guest"],
+                              metrics=["r2_score",
+                                       "mse",
+                                       "rmse"],
+                              input_data=linr_0.outputs["train_output_data"])
+
+    pipeline.add_task(psi_0)
+    pipeline.add_task(linr_0)
+    # pipeline.add_task(linr_1)
+    pipeline.add_task(evaluation_0)
+
+    if config.task_cores:
+        pipeline.conf.set("task_cores", config.task_cores)
+    if config.timeout:
+        pipeline.conf.set("timeout", config.timeout)
+
+    pipeline.compile()
+    print(pipeline.get_dag())
+    pipeline.fit()
+
+    """linr_0_data = pipeline.get_task_info("linr_0").get_output_data()["train_output_data"]
+    linr_1_data = pipeline.get_task_info("linr_1").get_output_data()["test_output_data"]
+    linr_0_score = extract_data(linr_0_data, "predict_result")
+    linr_0_label = extract_data(linr_0_data, "motor_speed")
+    linr_1_score = extract_data(linr_1_data, "predict_result")
+    linr_1_label = extract_data(linr_1_data, "motor_speed")
+    linr_0_score_label = extract_data(linr_0_data, "predict_result", keep_id=True)
+    linr_1_score_label = extract_data(linr_1_data, "predict_result", keep_id=True)"""
+
+    result_summary = parse_summary_result(pipeline.get_task_info("evaluation_0").get_output_metric()[0]["data"])
+    print(f"result_summary")
+
+    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
+                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
+                    }
+
+    return data_summary, result_summary
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("BENCHMARK-QUALITY PIPELINE JOB")
+    parser.add_argument("-c", "--config", type=str,
+                        help="config file", default="../../config.yaml")
+    parser.add_argument("-p", "--param", type=str,
+                        help="config file for params", default="./breast_config.yaml")
+    args = parser.parse_args()
+    main(args.config, args.param)