fix median & allow quantile in statistics(#4663)

edit lr bq examples(#5008) Signed-off-by: Yu Wu <yolandawu131@gmail.com>
FederatedAI · Aug 14, 2023 · 8afbae4 · 8afbae4
1 parent b643c22
commit 8afbae4
Show file tree

Hide file tree

Showing 12 changed files with 75 additions and 37 deletions.
diff --git a/examples/benchmark_quality/lr/default_credit_config.yaml b/examples/benchmark_quality/lr/default_credit_config.yaml
@@ -2,7 +2,7 @@ data_guest: "default_credit_hetero_guest"
 data_host: "default_credit_hetero_host"
 idx: "id"
 label_name: "y"
-epochs: 30
+epochs: 16
 init_param:
   fit_intercept: True
   method: "zeros"
@@ -15,8 +15,8 @@ learning_rate_scheduler:
 optimizer:
   method: "rmsprop"
   penalty: "L2"
-  alpha: 0.001
+  alpha: 0.01
   optimizer_params:
-    lr: 0.17
-batch_size: 3200
+    lr: 0.22
+batch_size: 2000
 early_stop: "diff"
diff --git a/examples/benchmark_quality/lr/default_credit_lr_sklearn_config.yaml b/examples/benchmark_quality/lr/default_credit_lr_sklearn_config.yaml
@@ -7,5 +7,5 @@ fit_intercept: True
 method: "rmsprop"
 penalty: "L2"
 eta0: 0.1
-alpha: 0.5
+alpha: 0.05
 batch_size: 5000
diff --git a/examples/benchmark_quality/lr/give_credit_config.yaml b/examples/benchmark_quality/lr/give_credit_config.yaml
@@ -17,5 +17,5 @@ optimizer:
   alpha: 0.01
   optimizer_params:
     lr: 0.25
-batch_size: 5500
+batch_size: null
 early_stop: "diff"
diff --git a/examples/benchmark_quality/lr/lr_benchmark.yaml b/examples/benchmark_quality/lr/lr_benchmark.yaml
@@ -206,21 +206,21 @@ hetero_lr-binary-1-default-credit:
 #    conf: "./epsilon_5k_config.yaml"
 #  compare_setting:
 #    relative_tol: 0.01
-hetero_lr-binary-3-give-credit:
-  local:
-    script: "./sklearn-lr-binary.py"
-    conf: "./give_credit_lr_sklearn_config.yaml"
-  FATE-hetero-lr:
-    script: "./pipeline-lr-binary.py"
-    conf: "./give_credit_config.yaml"
-  compare_setting:
-    relative_tol: 0.01
-multi-vehicle:
-  local:
-    script: "./sklearn-lr-multi.py"
-    conf: "./vehicle_lr_sklearn_config.yaml"
-  FATE-hetero-lr:
-    script: "./pipeline-lr-multi.py"
-    conf: "./vehicle_config.yaml"
-  compare_setting:
-    relative_tol: 0.01
+#hetero_lr-binary-3-give-credit:
+#  local:
+#    script: "./sklearn-lr-binary.py"
+#    conf: "./give_credit_lr_sklearn_config.yaml"
+#  FATE-hetero-lr:
+#    script: "./pipeline-lr-binary.py"
+#    conf: "./give_credit_config.yaml"
+#  compare_setting:
+#    relative_tol: 0.01
+#multi-vehicle:
+#  local:
+#    script: "./sklearn-lr-multi.py"
+#    conf: "./vehicle_lr_sklearn_config.yaml"
+#  FATE-hetero-lr:
+#    script: "./pipeline-lr-multi.py"
+#    conf: "./vehicle_config.yaml"
+#  compare_setting:
+#    relative_tol: 0.01
diff --git a/examples/benchmark_quality/lr/pipeline-lr-binary.py b/examples/benchmark_quality/lr/pipeline-lr-binary.py
@@ -87,7 +87,6 @@ def main(config="../../config.yaml", param="./breast_config.yaml", namespace="")
     if config.timeout:
         pipeline.conf.set("timeout", config.timeout)
     pipeline.compile()
-    print(pipeline.get_dag())
     pipeline.fit()
 
     lr_0_data = pipeline.get_task_info("lr_0").get_output_data()["train_output_data"]

diff --git a/examples/benchmark_quality/lr/pipeline-lr-multi.py b/examples/benchmark_quality/lr/pipeline-lr-multi.py
@@ -85,7 +85,6 @@ def main(config="../../config.yaml", param="./vehicle_config.yaml", namespace=""
         pipeline.conf.set("timeout", config.timeout)
 
     pipeline.compile()
-    print(pipeline.get_dag())
     pipeline.fit()
 
     lr_0_data = pipeline.get_component("lr_0").get_output_data()["train_output_data"]

diff --git a/examples/benchmark_quality/lr/sklearn-lr-binary.py b/examples/benchmark_quality/lr/sklearn-lr-binary.py
@@ -76,7 +76,7 @@ def main(config="../../config.yaml", param="./breast_lr_sklearn_config.yaml"):
     fpr, tpr, thresholds = roc_curve(y_test, y_prob)
 
     ks = max(tpr - fpr)
-    result = {"auc": auc_score, "recall": recall, "binary_precision": pr, "accuracy": acc}
+    result = {"auc": auc_score, "recall": recall, "precision": pr, "accuracy": acc}
     print(result)
     print(f"coef_: {lm_fit.coef_}, intercept_: {lm_fit.intercept_}, n_iter: {lm_fit.n_iter_}")
     return {}, result

diff --git a/examples/pipeline/statistics/test_statistics.py b/examples/pipeline/statistics/test_statistics.py
@@ -40,15 +40,15 @@ def main(config=".../config.yaml", namespace=""):
                                                                      namespace=f"experiment{namespace}"))
 
     statistics_0 = Statistics("statistics_0", input_data=psi_0.outputs["output_data"],
-                              metrics=["mean", "std", "min", "max"])
+                              metrics=["mean", "std", "min", "max", "25%", "median", "75%"])
 
     pipeline.add_task(psi_0)
     pipeline.add_task(statistics_0)
 
     # pipeline.add_task(hetero_feature_binning_0)
     pipeline.compile()
-    print(pipeline.get_dag())
     pipeline.fit()
+    # print(f"statistics_0 output model: {pipeline.get_task_info('statistics_0').get_output_model()}")
 
 
 if __name__ == "__main__":

diff --git a/python/fate/components/components/statistics.py b/python/fate/components/components/statistics.py
@@ -25,7 +25,8 @@ def statistics(
         role: Role,
         input_data: cpn.dataframe_input(roles=[GUEST, HOST]),
         metrics: cpn.parameter(
-            type=Union[List[params.statistic_metrics_param()], params.statistic_metrics_param()],
+            type=Union[List[Union[params.statistic_metrics_param(), params.legal_percentile()]],
+            params.statistic_metrics_param(), params.legal_percentile()],
             default=["mean", "std", "min", "max"],
             desc="metrics to be computed, default ['count', 'mean', 'std', 'min', 'max']",
         ),
@@ -37,6 +38,8 @@ def statistics(
             default=True,
             desc="If False, the calculations of skewness and kurtosis are corrected for statistical bias.",
         ),
+        relative_error: cpn.parameter(type=params.confloat(gt=0, le=1), default=1e-3,
+                                      desc="float, error rate for quantile"),
         skip_col: cpn.parameter(
             type=List[str],
             default=None,
@@ -60,7 +63,7 @@ def statistics(
         for metric in metrics:
             if metric == "describe":
                 raise ValueError(f"'describe' should not be combined with additional metric names.")
-    stat_computer = FeatureStatistics(list(set(metrics)), ddof, bias)
+    stat_computer = FeatureStatistics(list(set(metrics)), ddof, bias, relative_error)
     input_data = input_data[select_cols]
     stat_computer.fit(sub_ctx, input_data)
 

diff --git a/python/fate/components/core/params/__init__.py b/python/fate/components/core/params/__init__.py
@@ -27,6 +27,6 @@
 )
 from ._init_param import InitParam, init_param
 from ._learning_rate import LRSchedulerParam, lr_scheduler_param
-from ._metrics import metrics_param, statistic_metrics_param
+from ._metrics import metrics_param, statistic_metrics_param, legal_percentile
 from ._optimizer import OptimizerParam, optimizer_param
 from ._penalty import penalty_param
diff --git a/python/fate/components/core/params/_metrics.py b/python/fate/components/core/params/_metrics.py
@@ -13,9 +13,10 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+import re
 from typing import Type
 
-from ._fields import StringChoice
+from ._fields import StringChoice, Parameter
 
 
 class Metrics(StringChoice):
@@ -68,3 +69,24 @@ def metrics_param(auc=True, ks=True, accuracy=True, mse=True) -> Type[str]:
         choice={k for k, v in choice.items() if v},
     )
     return type("Metrics", (Metrics,), namespace)
+
+
+class LegalPercentile(str, Parameter):
+    legal_percentile = r"^(100)|(?:[05]|[0-9]?[05])0*%$"
+
+    @classmethod
+    def __get_validators__(cls):
+        yield cls.percentile_validator
+
+    @classmethod
+    def percentile_validator(cls, v):
+        if re.match(cls.legal_percentile, v):
+            return v
+        raise ValueError(f"provided `{v}` not in legal percentile format")
+
+
+def legal_percentile() -> Type[str]:
+    namespace = dict(
+        legal_percentile=LegalPercentile.legal_percentile,
+    )
+    return type("LegalPercentile", (LegalPercentile,), namespace)
diff --git a/python/fate/ml/statistics/statistics.py b/python/fate/ml/statistics/statistics.py
@@ -14,6 +14,7 @@
 #  limitations under the License.
 
 import logging
+import re
 from typing import List
 
 import pandas as pd
@@ -25,9 +26,9 @@
 
 
 class FeatureStatistics(Module):
-    def __init__(self, metrics: List[str] = None, ddof=1, bias=True):
+    def __init__(self, metrics: List[str] = None, ddof=1, bias=True, relative_error=1e-3):
         self.metrics = metrics
-        self.summary = StatisticsSummary(ddof, bias)
+        self.summary = StatisticsSummary(ddof, bias, relative_error)
 
     def fit(self, ctx: Context, input_data, validate_data=None) -> None:
         self.summary.compute_metrics(input_data, self.metrics)
@@ -49,28 +50,39 @@ def from_model(cls, model) -> "FeatureStatistics":
 
 
 class StatisticsSummary(Module):
-    def __init__(self, ddof=1, bias=True):
+    def __init__(self, ddof=1, bias=True, relative_error=1e-3):
         """if metrics is not None:
         if len(metrics) == 1 and metrics[0] == "describe":
             self.inner_metric_names = ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']
         else:
             self.inner_metric_names = metrics"""
         self.ddof = ddof
         self.bias = bias
+        self.relative_error = relative_error
         self.inner_metric_names = []
         self.metrics_summary = None
         self._count = None
         self._nan_count = None
         self._mean = None
         self._describe = None
+        self._quantile = None
+        self._q_pts = None
 
     def get_from_describe(self, data, metric):
         if self._describe is None:
             self._describe = data.describe(ddof=self.ddof, unbiased=~self.bias)
         return self._describe[metric]
 
+    def get_from_quantile_summary(self, data, metric):
+        query_q = int(metric[:-1]) / 100
+        if self._quantile is None:
+            self._quantile = data.quantile(q=self._q_pts, relative_error=self.relative_error)
+        return self._quantile.loc[query_q]
+
     def compute_metrics(self, data, metrics):
         res = pd.DataFrame(columns=data.schema.columns)
+        q_metrics = [metric for metric in metrics if re.match(r"^(100|\d{1,2})%$", metric)]
+        self._q_pts = [int(metric[:-1]) / 100 for metric in q_metrics]
         for metric in metrics:
             metric_val = None
             """if metric == "describe":
@@ -80,12 +92,15 @@ def compute_metrics(self, data, metrics):
                 return"""
             if metric in ["sum", "min", "max", "mean", "std", "var"]:
                 metric_val = self.get_from_describe(data, metric)
+            if metric in q_metrics:
+                metric_val = self.get_from_quantile_summary(data, metric)
             elif metric == "count":
                 if self._count is None:
                     self._count = data.count()
                 metric_val = self._count
             elif metric == "median":
-                metric_val = data.median()
+                metric_val = data.quantile(q=0.5, relative_error=self.relative_error)
+                metric_val = metric_val.loc[0.5]
             elif metric == "coefficient_of_variation":
                 metric_val = self.get_from_describe(data, "variation")
             elif metric == "missing_count":