From bf2bd9175ade12c42f2686305865002e455203f6 Mon Sep 17 00:00:00 2001 From: chrislisbon Date: Wed, 18 Oct 2023 19:46:42 +0300 Subject: [PATCH] add InputData from pd and numpy --- fedot/core/data/data.py | 51 +++++++++++++++++++++++++++++++++++++ test/unit/data/test_data.py | 7 +++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index c75c0e9483..1bfdf7c553 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -53,6 +53,57 @@ class Data: # Object with supplementary info supplementary_data: SupplementaryData = field(default_factory=SupplementaryData) + @classmethod + def from_numpy(cls, + features_array: np.ndarray, + target_array: np.ndarray, + idx: Optional[np.ndarray] = None, + task: Task = Task(TaskTypesEnum.classification), + data_type: Optional[DataTypesEnum] = None) -> InputData: + """Import data from numpy array. + + Args: + features_array: numpy array with features. + target_array: numpy array with target. + task: the :obj:`Task` to solve with the data. + data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. + + Returns: + data + """ + return array_to_input_data(features_array, target_array, idx, task, data_type) + + @classmethod + def from_dataframe(cls, + df: pd.DataFrame, + task: Union[Task, str] = 'classification', + data_type: DataTypesEnum = DataTypesEnum.table, + target_columns: Union[str, List[Union[str, int]]] = '') -> InputData: + """Import data from pandas DataFrame. + + Args: + df: loaded pandas DataFrame. + task: the :obj:`Task` to solve with the data. + data_type: the type of the data. Possible values are listed at :class:`DataTypesEnum`. + target_columns: name of the target column (the last column if empty and no target if ``None``). + + Returns: + data + """ + + if isinstance(task, str): + task = Task(TaskTypesEnum(task)) + + idx = df.index.to_numpy() + if not target_columns: + features_names = df.columns.to_numpy()[:-1] + else: + features_names = df.drop(target_columns, axis=1).columns.to_numpy() + features, target = process_target_and_features(df, target_columns) + + return InputData(idx=idx, features=features, target=target, task=task, data_type=data_type, + features_names=features_names) + @classmethod def from_csv(cls, file_path: PathType, diff --git a/test/unit/data/test_data.py b/test/unit/data/test_data.py index a3eb55bfaa..20e86ae448 100644 --- a/test/unit/data/test_data.py +++ b/test/unit/data/test_data.py @@ -59,9 +59,12 @@ def test_data_from_csv(): idx=idx, task=task, data_type=DataTypesEnum.table).features - actual_features = InputData.from_csv( + actual_features_from_csv = InputData.from_csv( os.path.join(test_file_path, file)).features - assert np.array_equal(expected_features, actual_features) + assert np.array_equal(expected_features, actual_features_from_csv) + df.set_index('ID', drop=True, inplace=True) + actual_features_from_df = InputData.from_dataframe(df).features + assert np.array_equal(expected_features, actual_features_from_df) def test_with_custom_target():