Skip to content

Commit

Permalink
dataframe: fix loc in-place dm bug, add quantile interface
Browse files Browse the repository at this point in the history
Signed-off-by: mgqa34 <mgq3374541@163.com>
  • Loading branch information
mgqa34 committed Jul 13, 2023
1 parent 41ccbc8 commit 3909244
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 4 deletions.
8 changes: 4 additions & 4 deletions python/fate/arch/dataframe/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,10 +272,10 @@ def describe(self, ddof=1, unbiased=False):
def quantile(
self,
q,
axis=0,
method="quantile",
relative_error: float = 1e-4
):
...
from .ops._quantile import quantile
return quantile(self, q, relative_error)

def __add__(self, other: Union[int, float, list, "np.ndarray", "DataFrame", "pd.Series"]) -> "DataFrame":
return self.__arithmetic_operate(operator.add, other)
Expand Down Expand Up @@ -519,7 +519,7 @@ def _merge_list(lhs, rhs):
block_table = transform_list_block_to_frame_block(block_table, self._data_manager)

partition_order_mappings = get_partition_order_mappings(block_table)
return DataFrame(self._ctx, block_table, partition_order_mappings, self._data_manager)
return DataFrame(self._ctx, block_table, partition_order_mappings, self._data_manager.duplicate())

def iloc(self, indexes):
...
Expand Down
58 changes: 58 additions & 0 deletions python/fate/arch/dataframe/ops/_quantile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# Copyright 2019 The FATE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import functools
import pandas as pd
from .._dataframe import DataFrame
from fate.arch.tensor.inside import GKSummary


def quantile(df: DataFrame, q, relative_error: float):
if isinstance(q, float):
q = [q]
elif not isinstance(q, list):
q = list(q)

data_manager = df.data_manager
column_names = data_manager.infer_operable_field_names()
blocks_loc = [data_manager.loc_block(name) for name in column_names]

def _mapper(blocks, columns_loc=None, error=None):
column_size = len(columns_loc)
gk_summary_obj_list = [GKSummary(error) for _ in range(column_size)]

for idx, (bid, offset) in enumerate(columns_loc):
gk_summary_obj_list[idx] += blocks[bid][:, offset]

return gk_summary_obj_list

def _reducer(l_gk_summary_obj_list, r_gk_summary_obj_list):
rets = []
for l_gk_summary_obj, r_gk_summary_obj in zip(l_gk_summary_obj_list, r_gk_summary_obj_list):
rets.append(l_gk_summary_obj + r_gk_summary_obj)

return rets

gk_summary_func = functools.partial(_mapper, columns_loc=blocks_loc, error=relative_error)
ret_gk_summary_obj_list = df.block_table.mapValues(gk_summary_func).reduce(_reducer)

quantile_rets = dict()
for column_name, gk_summary_obj in zip(column_names, ret_gk_summary_obj_list):
query_ret = gk_summary_obj.queries(q)
quantile_rets[column_name] = query_ret

quantile_df = pd.DataFrame(quantile_rets, index=q)

return quantile_df

0 comments on commit 3909244

Please sign in to comment.