diff --git a/RELEASE.md b/RELEASE.md index 0a37fc6..b508a5c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,10 @@ +## Release 2.1.0 +### Major improvements +* Pipeline: add supports for fate-llm 2.0 + * newly added LLMModelLoader, LLMDatasetLoader, LLMDataFuncLoader + * newly added configuration parsing of seq2seq_runner and ot_runner +* Pipeline: unified input interface of components + ## Release 2.0.0 ### Feature Highlights > FATE-Client 2.0: Building Scalable Federated DSL for Application Layer Interconnection diff --git a/doc/pipeline.md b/doc/pipeline.md index 3196bca..1dec69e 100644 --- a/doc/pipeline.md +++ b/doc/pipeline.md @@ -100,11 +100,11 @@ Below lists data input and output of all components: | Coordinated-LR | CoordinatedLR | train_data, validate_data, test_data, cv_data | train_output_data, validate_output_data, test_output_data, cv_output_datas | | Coordinated-LinR | CoordinatedLinR | train_data, validate_data, test_data, cv_data | train_output_data, validate_output_data, test_output_data, cv_output_datas | | Homo-LR | HomoLR | train_data, validate_data, test_data, cv_data | train_output_data, validate_output_data, test_output_data, cv_output_datas | -| Homo-NN | HomoNN | train_data, validate_data, test_data | train_data_output, predict_data_output| -| Hetero-NN | HeteroNN | train_data, validate_data, test_data | train_data_output, predict_data_output| -| Hetero Secure Boosting | HeteroSecureBoost | train_data, validate_data, test_data, cv_data | train_data_output, test_output_data, cv_output_datas | -| Evaluation | Evaluation | input_data | | -| Union | Union | input_data_list | output_data | +| Homo-NN | HomoNN | train_data, validate_data, test_data | train_output_data, test_output_data | +| Hetero-NN | HeteroNN | train_data, validate_data, test_data | train_output_data, test_output_data | +| Hetero Secure Boosting | HeteroSecureBoost | train_data, validate_data, test_data, cv_data | train_output_data, test_output_data, cv_output_datas | +| Evaluation | Evaluation | input_datas | | +| Union | Union | input_datas | output_data | ### Model @@ -114,23 +114,23 @@ Model training components also may take `warm_start_model`, but note that only o Below lists model input and output of all components: -| Algorithm | Component Name | Model Input | Model Output | -|--------------------------|------------------------|-------------------------------|--------------| -| PSI | PSI | | | -| Sampling | Sample | | | -| Data Split | DataSplit | | | -| Feature Scale | FeatureScale | input_model | output_model | -| Data Statistics | Statistics | | output_model | -| Hetero Feature Binning | HeteroFeatureBinning | input_model | output_model | -| Hetero Feature Selection | HeteroFeatureSelection | input_models, input_model | output_model | -| Coordinated-LR | CoordinatedLR | input_model, warm_start_model | output_model | -| Coordinated-LinR | CoordinatedLinR | input_model, warm_start_model | output_model | -| Homo-LR | HomoLR | input_model, warm_start_model | output_model | -| Homo-NN | HomoNN | train_model_input, predict_model_input, train_model_output | train_model_output | -| Hetero-NN|HeteroNN|train_model_input, predict_model_input, train_model_output| train_model_output| -| Hetero Secure Boosting | HeteroSecureBoost | train_model_input, predict_model_input, train_model_output | train_model_output | -| Evaluation | Evaluation | | | -| Union | Union | | | +| Algorithm | Component Name | Model Input | Model Output | +|--------------------------|------------------------|--------------------------------|--------------| +| PSI | PSI | | | +| Sampling | Sample | | | +| Data Split | DataSplit | | | +| Feature Scale | FeatureScale | input_model | output_model | +| Data Statistics | Statistics | | output_model | +| Hetero Feature Binning | HeteroFeatureBinning | input_model | output_model | +| Hetero Feature Selection | HeteroFeatureSelection | input_models, input_model | output_model | +| Coordinated-LR | CoordinatedLR | input_model, warm_start_model | output_model | +| Coordinated-LinR | CoordinatedLinR | input_model, warm_start_model | output_model | +| Homo-LR | HomoLR | input_model, warm_start_model | output_model | +| Homo-NN | HomoNN | input_model, warm_start_model | output_model | +| Hetero-NN | HeteroNN | input_model, warm_start_model | output_model | +| Hetero Secure Boosting | HeteroSecureBoost | input_model, warm_start_model | output_model | +| Evaluation | Evaluation | | | +| Union | Union | | | ## Build A Pipeline diff --git a/python/MANIFEST.in b/python/MANIFEST.in index 24735fe..facfd91 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -1,3 +1,4 @@ include fate_client/pipeline/component_define/fate/*.yaml include fate_client/pipeline/*.yaml include fate_client/*.yaml +include fate_client/pipeline/components/fate/nn/*.yaml diff --git a/python/fate_client/__init__.py b/python/fate_client/__init__.py index ee8611f..6d0462f 100644 --- a/python/fate_client/__init__.py +++ b/python/fate_client/__init__.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.0.0" +__version__ = "2.1.0" diff --git a/python/fate_client/flow_sdk/api/data.py b/python/fate_client/flow_sdk/api/data.py index bb17b52..e5ce4e1 100644 --- a/python/fate_client/flow_sdk/api/data.py +++ b/python/fate_client/flow_sdk/api/data.py @@ -41,8 +41,8 @@ def upload(self, file: str, head: bool, partitions: int, meta: dict, namespace: {'code': 0, 'message': 'success','data':{...}]} """ kwargs = locals() - if not os.path.exists(file): - raise Exception(f"{file} is not exist, please check the file path") + # if not os.path.exists(file): + # raise Exception(f"{file} is not exist, please check the file path") params = filter_invalid_params(**kwargs) return self._post(url='/data/component/upload', json=params) diff --git a/python/fate_client/pipeline/component_define/fate/coordinated_linr.yaml b/python/fate_client/pipeline/component_define/fate/coordinated_linr.yaml index 0df76fd..65a6e3c 100644 --- a/python/fate_client/pipeline/component_define/fate/coordinated_linr.yaml +++ b/python/fate_client/pipeline/component_define/fate/coordinated_linr.yaml @@ -2,8 +2,8 @@ component: name: coordinated_linr description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host @@ -373,8 +373,9 @@ component: types: - json_metric optional: false - stages: [ ] - roles: [ ] + stages: [] + roles: [] description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/coordinated_lr.yaml b/python/fate_client/pipeline/component_define/fate/coordinated_lr.yaml index 567449c..837a899 100644 --- a/python/fate_client/pipeline/component_define/fate/coordinated_lr.yaml +++ b/python/fate_client/pipeline/component_define/fate/coordinated_lr.yaml @@ -2,8 +2,8 @@ component: name: coordinated_lr description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host @@ -383,8 +383,9 @@ component: types: - json_metric optional: false - stages: [ ] - roles: [ ] + stages: [] + roles: [] description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/data_split.yaml b/python/fate_client/pipeline/component_define/fate/data_split.yaml index 5509592..3668d07 100644 --- a/python/fate_client/pipeline/component_define/fate/data_split.yaml +++ b/python/fate_client/pipeline/component_define/fate/data_split.yaml @@ -2,8 +2,8 @@ component: name: data_split description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host @@ -111,7 +111,7 @@ component: - host description: '' is_multi: false - model: { } + model: {} output_artifacts: data: train_output_data: @@ -147,7 +147,7 @@ component: - host description: '' is_multi: false - model: { } + model: {} metric: metric: types: @@ -161,3 +161,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/dataframe_transformer.yaml b/python/fate_client/pipeline/component_define/fate/dataframe_transformer.yaml index e26ee88..67b7b90 100644 --- a/python/fate_client/pipeline/component_define/fate/dataframe_transformer.yaml +++ b/python/fate_client/pipeline/component_define/fate/dataframe_transformer.yaml @@ -2,7 +2,7 @@ component: name: dataframe_transformer description: '' provider: fate - version: 2.0.0 + version: 2.1.0 labels: [] roles: - local diff --git a/python/fate_client/pipeline/component_define/fate/evaluation.yaml b/python/fate_client/pipeline/component_define/fate/evaluation.yaml index 740bbb2..3ae060b 100644 --- a/python/fate_client/pipeline/component_define/fate/evaluation.yaml +++ b/python/fate_client/pipeline/component_define/fate/evaluation.yaml @@ -2,7 +2,7 @@ component: name: evaluation description: '' provider: fate - version: 2.0.0 + version: 2.1.0 labels: [] roles: - guest @@ -54,7 +54,7 @@ component: use 'label' in the input dataframe input_artifacts: data: - input_data: + input_datas: types: - dataframe optional: false @@ -82,3 +82,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/feature_correlation.yaml b/python/fate_client/pipeline/component_define/fate/feature_correlation.yaml index 13dd687..dbf01b0 100644 --- a/python/fate_client/pipeline/component_define/fate/feature_correlation.yaml +++ b/python/fate_client/pipeline/component_define/fate/feature_correlation.yaml @@ -2,8 +2,8 @@ component: name: feature_correlation description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host @@ -65,9 +65,9 @@ component: - host description: '' is_multi: false - model: { } + model: {} output_artifacts: - data: { } + data: {} model: output_model: types: @@ -93,3 +93,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/feature_scale.yaml b/python/fate_client/pipeline/component_define/fate/feature_scale.yaml index 4a8ef5a..cad3726 100644 --- a/python/fate_client/pipeline/component_define/fate/feature_scale.yaml +++ b/python/fate_client/pipeline/component_define/fate/feature_scale.yaml @@ -2,7 +2,7 @@ component: name: feature_scale description: '' provider: fate - version: 2.0.0 + version: 2.1.0 labels: [] roles: - guest @@ -29,7 +29,7 @@ component: title: typing.Union[list, dict] anyOf: - type: array - items: { } + items: {} - type: object default: - 0 @@ -164,8 +164,9 @@ component: types: - json_metric optional: false - stages: [ ] - roles: [ ] + stages: [] + roles: [] description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/hetero_feature_binning.yaml b/python/fate_client/pipeline/component_define/fate/hetero_feature_binning.yaml index bc7fb81..60e968a 100644 --- a/python/fate_client/pipeline/component_define/fate/hetero_feature_binning.yaml +++ b/python/fate_client/pipeline/component_define/fate/hetero_feature_binning.yaml @@ -2,8 +2,8 @@ component: name: hetero_feature_binning description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host @@ -266,8 +266,9 @@ component: types: - json_metric optional: false - stages: [ ] - roles: [ ] + stages: [] + roles: [] description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/hetero_feature_selection.yaml b/python/fate_client/pipeline/component_define/fate/hetero_feature_selection.yaml index 42d0181..bbf5078 100644 --- a/python/fate_client/pipeline/component_define/fate/hetero_feature_selection.yaml +++ b/python/fate_client/pipeline/component_define/fate/hetero_feature_selection.yaml @@ -2,8 +2,8 @@ component: name: hetero_feature_selection description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host @@ -178,8 +178,8 @@ component: manual_param: type: ModelMetaclass default: - keep_col: [ ] - filter_out_col: [ ] + keep_col: [] + filter_out_col: [] optional: true description: manual filter param type_meta: @@ -188,13 +188,13 @@ component: properties: keep_col: title: Keep Col - default: [ ] + default: [] type: array items: type: string filter_out_col: title: Filter Out Col - default: [ ] + default: [] type: array items: type: string @@ -309,8 +309,9 @@ component: types: - json_metric optional: false - stages: [ ] - roles: [ ] + stages: [] + roles: [] description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/hetero_nn.yaml b/python/fate_client/pipeline/component_define/fate/hetero_nn.yaml index 634baa2..6ad7f80 100644 --- a/python/fate_client/pipeline/component_define/fate/hetero_nn.yaml +++ b/python/fate_client/pipeline/component_define/fate/hetero_nn.yaml @@ -2,7 +2,7 @@ component: name: hetero_nn description: '' provider: fate - version: 2.0.0 + version: 2.1.0 labels: [] roles: - guest @@ -87,7 +87,7 @@ component: description: '' is_multi: false model: - train_model_input: + warm_start_model: types: - model_directory optional: true @@ -98,7 +98,7 @@ component: - host description: '' is_multi: false - predict_model_input: + input_model: types: - model_directory optional: false @@ -111,7 +111,7 @@ component: is_multi: false output_artifacts: data: - train_data_output: + train_output_data: types: - dataframe optional: true @@ -122,7 +122,7 @@ component: - host description: '' is_multi: false - predict_data_output: + test_output_data: types: - dataframe optional: true @@ -134,7 +134,7 @@ component: description: '' is_multi: false model: - train_model_output: + output_model: types: - model_directory optional: true @@ -155,3 +155,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/hetero_sbt.yaml b/python/fate_client/pipeline/component_define/fate/hetero_secureboost.yaml similarity index 98% rename from python/fate_client/pipeline/component_define/fate/hetero_sbt.yaml rename to python/fate_client/pipeline/component_define/fate/hetero_secureboost.yaml index c371a8e..df0ee95 100644 --- a/python/fate_client/pipeline/component_define/fate/hetero_sbt.yaml +++ b/python/fate_client/pipeline/component_define/fate/hetero_secureboost.yaml @@ -2,7 +2,7 @@ component: name: hetero_secureboost description: '' provider: fate - version: 2.0.0 + version: 2.1.0 labels: [] roles: - guest @@ -316,7 +316,7 @@ component: description: '' is_multi: false model: - train_model_input: + warm_start_model: types: - json_model optional: true @@ -327,7 +327,7 @@ component: - host description: '' is_multi: false - predict_model_input: + input_model: types: - json_model optional: false @@ -340,7 +340,7 @@ component: is_multi: false output_artifacts: data: - train_data_output: + train_output_data: types: - dataframe optional: true @@ -374,7 +374,7 @@ component: description: '' is_multi: true model: - train_model_output: + output_model: types: - json_model optional: true @@ -395,3 +395,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/homo_lr.yaml b/python/fate_client/pipeline/component_define/fate/homo_lr.yaml index 8840935..d9a362e 100644 --- a/python/fate_client/pipeline/component_define/fate/homo_lr.yaml +++ b/python/fate_client/pipeline/component_define/fate/homo_lr.yaml @@ -2,7 +2,7 @@ component: name: homo_lr description: '' provider: fate - version: 2.0.0 + version: 2.1.0 labels: [] roles: - guest @@ -206,7 +206,7 @@ component: description: '' is_multi: false model: - train_input_model: + warm_start_model: types: - json_model optional: true @@ -217,7 +217,7 @@ component: - host description: '' is_multi: false - predict_input_model: + input_model: types: - json_model optional: false @@ -253,7 +253,7 @@ component: description: '' is_multi: false model: - train_output_model: + output_model: types: - json_model optional: false diff --git a/python/fate_client/pipeline/component_define/fate/homo_nn.yaml b/python/fate_client/pipeline/component_define/fate/homo_nn.yaml index d68745f..23ce15d 100644 --- a/python/fate_client/pipeline/component_define/fate/homo_nn.yaml +++ b/python/fate_client/pipeline/component_define/fate/homo_nn.yaml @@ -2,7 +2,7 @@ component: name: homo_nn description: '' provider: fate - version: 2.0.0 + version: 2.1.0 labels: [] roles: - guest @@ -88,7 +88,7 @@ component: description: '' is_multi: false model: - train_model_input: + warm_start_model: types: - model_directory optional: true @@ -99,7 +99,7 @@ component: - host description: '' is_multi: false - predict_model_input: + input_model: types: - model_directory optional: false @@ -112,7 +112,7 @@ component: is_multi: false output_artifacts: data: - train_data_output: + train_output_data: types: - dataframe optional: true @@ -123,7 +123,7 @@ component: - host description: '' is_multi: false - predict_data_output: + test_output_data: types: - dataframe optional: true @@ -135,7 +135,7 @@ component: description: '' is_multi: false model: - train_model_output: + output_model: types: - model_directory optional: true @@ -156,3 +156,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/psi.yaml b/python/fate_client/pipeline/component_define/fate/psi.yaml index a0a9c70..d70d215 100644 --- a/python/fate_client/pipeline/component_define/fate/psi.yaml +++ b/python/fate_client/pipeline/component_define/fate/psi.yaml @@ -2,7 +2,7 @@ component: name: psi description: '' provider: fate - version: 2.0.0 + version: 2.1.0 labels: [] roles: - guest @@ -69,3 +69,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/reader.yaml b/python/fate_client/pipeline/component_define/fate/reader.yaml index 101c031..3ca780b 100644 --- a/python/fate_client/pipeline/component_define/fate/reader.yaml +++ b/python/fate_client/pipeline/component_define/fate/reader.yaml @@ -2,7 +2,7 @@ component: name: reader description: '' provider: fate - version: 2.0.0 + version: 2.1.0 labels: [] roles: - guest @@ -61,3 +61,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/sample.yaml b/python/fate_client/pipeline/component_define/fate/sample.yaml index 5ee2743..e06e723 100644 --- a/python/fate_client/pipeline/component_define/fate/sample.yaml +++ b/python/fate_client/pipeline/component_define/fate/sample.yaml @@ -2,8 +2,8 @@ component: name: sample description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host @@ -89,7 +89,7 @@ component: - host description: '' is_multi: false - model: { } + model: {} output_artifacts: data: output_data: @@ -103,7 +103,7 @@ component: - host description: '' is_multi: false - model: { } + model: {} metric: metric: types: @@ -117,3 +117,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/sshe_linr.yaml b/python/fate_client/pipeline/component_define/fate/sshe_linr.yaml index bcdfba9..14995ab 100644 --- a/python/fate_client/pipeline/component_define/fate/sshe_linr.yaml +++ b/python/fate_client/pipeline/component_define/fate/sshe_linr.yaml @@ -2,8 +2,8 @@ component: name: sshe_linr description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host @@ -297,8 +297,9 @@ component: types: - json_metric optional: false - stages: [ ] - roles: [ ] + stages: [] + roles: [] description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/sshe_lr.yaml b/python/fate_client/pipeline/component_define/fate/sshe_lr.yaml index 8aa25fc..f608a4c 100644 --- a/python/fate_client/pipeline/component_define/fate/sshe_lr.yaml +++ b/python/fate_client/pipeline/component_define/fate/sshe_lr.yaml @@ -2,8 +2,8 @@ component: name: sshe_lr description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host @@ -297,8 +297,9 @@ component: types: - json_metric optional: false - stages: [ ] - roles: [ ] + stages: [] + roles: [] description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/statistics.yaml b/python/fate_client/pipeline/component_define/fate/statistics.yaml index 8d7fafc..6a64518 100644 --- a/python/fate_client/pipeline/component_define/fate/statistics.yaml +++ b/python/fate_client/pipeline/component_define/fate/statistics.yaml @@ -2,8 +2,8 @@ component: name: statistics description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host @@ -106,9 +106,9 @@ component: - host description: '' is_multi: false - model: { } + model: {} output_artifacts: - data: { } + data: {} model: output_model: types: @@ -134,3 +134,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/component_define/fate/union.yaml b/python/fate_client/pipeline/component_define/fate/union.yaml index f1ff1d4..0a17f33 100644 --- a/python/fate_client/pipeline/component_define/fate/union.yaml +++ b/python/fate_client/pipeline/component_define/fate/union.yaml @@ -2,15 +2,15 @@ component: name: union description: '' provider: fate - version: 2.0.0 - labels: [ ] + version: 2.1.0 + labels: [] roles: - guest - host - parameters: { } + parameters: {} input_artifacts: data: - input_data_list: + input_datas: types: - dataframe optional: false @@ -21,7 +21,7 @@ component: - host description: '' is_multi: true - model: { } + model: {} output_artifacts: data: output_data: @@ -35,7 +35,7 @@ component: - host description: '' is_multi: false - model: { } + model: {} metric: metric: types: @@ -49,3 +49,4 @@ component: description: metric, invisible for user is_multi: false schema_version: v1 + diff --git a/python/fate_client/pipeline/components/fate/evaluation.py b/python/fate_client/pipeline/components/fate/evaluation.py index c49446b..16e1adf 100644 --- a/python/fate_client/pipeline/components/fate/evaluation.py +++ b/python/fate_client/pipeline/components/fate/evaluation.py @@ -29,14 +29,14 @@ def __init__( metrics: List[str] = None, predict_column_name: str = None, label_column_name: str = None, - input_data: ArtifactType = PlaceHolder(), + input_datas: ArtifactType = PlaceHolder(), ): inputs = locals() self._process_init_inputs(inputs) super(Evaluation, self).__init__() self._name = _name self.runtime_parties = runtime_parties - self.input_data = input_data + self.input_datas = input_datas self.default_eval_setting = default_eval_setting self.metrics = metrics self.predict_column_name = predict_column_name diff --git a/python/fate_client/pipeline/components/fate/hetero_nn.py b/python/fate_client/pipeline/components/fate/hetero_nn.py index 1baa447..7e82eb6 100644 --- a/python/fate_client/pipeline/components/fate/hetero_nn.py +++ b/python/fate_client/pipeline/components/fate/hetero_nn.py @@ -96,8 +96,8 @@ def __init__( train_data: ArtifactType = PlaceHolder(), validate_data: ArtifactType = PlaceHolder(), test_data: ArtifactType = PlaceHolder(), - train_model_input: ArtifactType = PlaceHolder(), - predict_model_input: ArtifactType = PlaceHolder(), + warm_start_model: ArtifactType = PlaceHolder(), + input_model: ArtifactType = PlaceHolder(), ): inputs = locals() @@ -112,5 +112,5 @@ def __init__( self.train_data = train_data self.validate_data = validate_data self.test_data = test_data - self.train_model_input = train_model_input - self.predict_model_input = predict_model_input \ No newline at end of file + self.warm_start_model = warm_start_model + self.input_model = input_model \ No newline at end of file diff --git a/python/fate_client/pipeline/components/fate/hetero_secureboost.py b/python/fate_client/pipeline/components/fate/hetero_secureboost.py index 115ba31..d407895 100644 --- a/python/fate_client/pipeline/components/fate/hetero_secureboost.py +++ b/python/fate_client/pipeline/components/fate/hetero_secureboost.py @@ -13,14 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. from typing import List - from ..component_base import Component from ...conf.types import PlaceHolder from ...interface import ArtifactType class HeteroSecureBoost(Component): - yaml_define_path = "./component_define/fate/hetero_sbt.yaml" + yaml_define_path = "./component_define/fate/hetero_secureboost.yaml" def __init__( self, @@ -49,11 +48,11 @@ def __init__( hist_sub: bool = True, he_param: dict = PlaceHolder(), cv_param: dict = PlaceHolder(), - train_data_output: ArtifactType = PlaceHolder(), - train_model_output: ArtifactType = PlaceHolder(), - train_model_input: ArtifactType = PlaceHolder(), + train_output_data: ArtifactType = PlaceHolder(), + output_model: ArtifactType = PlaceHolder(), + warm_start_model: ArtifactType = PlaceHolder(), test_data: ArtifactType = PlaceHolder(), - predict_model_input: ArtifactType = PlaceHolder(), + input_model: ArtifactType = PlaceHolder(), cv_data: ArtifactType = PlaceHolder() ): inputs = locals() @@ -63,7 +62,7 @@ def __init__( self.runtime_parties = runtime_parties self.train_data = train_data self.validate_data = validate_data - self.train_model_input = train_model_input + self.warm_start_model = warm_start_model self.num_trees = num_trees self.learning_rate = learning_rate self.max_depth = max_depth @@ -86,9 +85,9 @@ def __init__( self.split_info_pack = split_info_pack self.hist_sub = hist_sub self.he_param = he_param - self.train_data_output = train_data_output - self.train_model_output = train_model_output + self.train_output_data = train_output_data + self.output_model = output_model self.test_data = test_data - self.predict_model_input = predict_model_input + self.input_model = input_model self.cv_param = cv_param self.cv_data = cv_data diff --git a/python/fate_client/pipeline/components/fate/homo_lr.py b/python/fate_client/pipeline/components/fate/homo_lr.py index 3eefebd..188c568 100644 --- a/python/fate_client/pipeline/components/fate/homo_lr.py +++ b/python/fate_client/pipeline/components/fate/homo_lr.py @@ -39,8 +39,8 @@ def __init__( train_data: ArtifactType = PlaceHolder(), validate_data: ArtifactType = PlaceHolder(), test_data: ArtifactType = PlaceHolder(), - train_input_model: ArtifactType = PlaceHolder(), - predict_input_model: ArtifactType = PlaceHolder(), + warm_start_model: ArtifactType = PlaceHolder(), + input_model: ArtifactType = PlaceHolder(), ): inputs = locals() self._process_init_inputs(inputs) @@ -58,7 +58,7 @@ def __init__( self.train_data = train_data self.validate_data = validate_data self.test_data = test_data - self.train_input_model = train_input_model - self.predict_input_model = predict_input_model + self.warm_start_model = warm_start_model + self.input_model = input_model self.ovr = ovr self.label_num = label_num diff --git a/python/fate_client/pipeline/components/fate/homo_nn.py b/python/fate_client/pipeline/components/fate/homo_nn.py index 2b88e78..0b020ac 100644 --- a/python/fate_client/pipeline/components/fate/homo_nn.py +++ b/python/fate_client/pipeline/components/fate/homo_nn.py @@ -47,7 +47,7 @@ def get_config_of_default_runner( dataset: DatasetLoader = None, data_collator: CustFuncLoader = None, tokenizer: CustFuncLoader = None, - task_type: Literal["binary", "multi", "regression", "others"] = "binary", + task_type: Literal["binary", "multi", "regression", "causal_lm", "others"] = "binary", ): if model is not None and not isinstance( @@ -57,7 +57,6 @@ def get_config_of_default_runner( f"The model is of type {type(model)}, not TorchModule, Sequential, or ModelLoader. Remember to use patched_torch_hook for passing NN Modules or Optimizers." ) - if fed_args is not None and not isinstance(fed_args, FedArguments): raise ValueError( f"Federation arguments are of type {type(fed_args)}, not FedArguments." @@ -73,6 +72,65 @@ def get_config_of_default_runner( return runner_conf +def get_config_of_seq2seq_runner( + algo: str = "fedavg", + model: Union[TorchModule, Sequential, ModelLoader] = None, + optimizer: Union[TorchOptimizer, Loader] = None, + training_args: TrainingArguments = None, + fed_args: FedArguments = None, + dataset: DatasetLoader = None, + data_collator: CustFuncLoader = None, + tokenizer: CustFuncLoader = None, + task_type: Literal["causal_lm", "others"] = "causal_lm", + save_trainable_weights_only: bool = False, +): + runner_conf = get_config_of_default_runner( + algo=algo, + model=model, + optimizer=optimizer, + training_args=training_args, + fed_args=fed_args, + dataset=dataset, + data_collator=data_collator, + tokenizer=tokenizer, + task_type=task_type + ) + runner_conf.pop("loss_conf") + runner_conf["save_trainable_weights_only"] = save_trainable_weights_only + + return runner_conf + + +def get_conf_of_ot_runner( + model: Union[TorchModule, Sequential, ModelLoader] = None, + optimizer: Union[TorchOptimizer, Loader] = None, + training_args: TrainingArguments = None, + fed_args: FedArguments = None, + dataset: DatasetLoader = None, + data_collator: CustFuncLoader = None, + tokenizer: CustFuncLoader = None, + task_type: Literal["causal_lm", "others"] = "causal_lm", + save_trainable_weights_only: bool = False, + aggregate_model: bool = False +): + runner_conf = get_config_of_default_runner( + algo='ot', # offsite-tuning + model=model, + optimizer=optimizer, + training_args=training_args, + fed_args=fed_args, + dataset=dataset, + data_collator=data_collator, + tokenizer=tokenizer, + task_type=task_type + ) + runner_conf.pop("loss_conf") + runner_conf["save_trainable_weights_only"] = save_trainable_weights_only + runner_conf["aggregate_model"] = aggregate_model + + return runner_conf + + class HomoNN(Component): yaml_define_path = "./component_define/fate/homo_nn.yaml" @@ -87,8 +145,8 @@ def __init__( train_data: ArtifactType = PlaceHolder(), validate_data: ArtifactType = PlaceHolder(), test_data: ArtifactType = PlaceHolder(), - train_model_input: ArtifactType = PlaceHolder(), - predict_model_input: ArtifactType = PlaceHolder(), + warm_start_model: ArtifactType = PlaceHolder(), + input_model: ArtifactType = PlaceHolder(), ): inputs = locals() self._process_init_inputs(inputs) @@ -102,5 +160,5 @@ def __init__( self.train_data = train_data self.validate_data = validate_data self.test_data = test_data - self.train_model_input = train_model_input - self.predict_model_input = predict_model_input + self.warm_start_model = warm_start_model + self.input_model = input_model diff --git a/python/fate_client/pipeline/components/fate/nn/algo_params.py b/python/fate_client/pipeline/components/fate/nn/algo_params.py index 99af15b..a92f4ef 100644 --- a/python/fate_client/pipeline/components/fate/nn/algo_params.py +++ b/python/fate_client/pipeline/components/fate/nn/algo_params.py @@ -1,4 +1,5 @@ from transformers import TrainingArguments as _hf_TrainingArguments +from transformers import Seq2SeqTrainingArguments as _hf_Seq2SeqTrainingArguments from dataclasses import dataclass, field, fields from typing import Union, Literal from enum import Enum @@ -70,7 +71,6 @@ class _TrainingArguments(_hf_TrainingArguments): save_safetensors: bool = field(default=False) use_cpu: bool = field(default=True) - def __post_init__(self): self.push_to_hub = False self.hub_model_id = None @@ -84,7 +84,6 @@ def __post_init__(self): super().__post_init__() - @dataclass class TrainingArguments(_TrainingArguments): @@ -99,6 +98,54 @@ def to_dict(self): set_args = {name: value for name, value in all_args.items() if value != default_args.get(name)} return set_args + +@dataclass +class _S2STrainingArguments(_hf_Seq2SeqTrainingArguments): + # in fate-2.0, we will control the output dir when using pipeline + output_dir: str = field(default="./") + disable_tqdm: bool = field(default=True) + save_strategy: str = field(default="no") + logging_strategy: str = field(default="epoch") + logging_steps: int = field(default=1) + evaluation_strategy: str = field(default="no") + logging_dir: str = field(default=None) + checkpoint_idx: int = field(default=None) + # by default, we use constant learning rate, the same as FATE-1.X + lr_scheduler_type: str = field(default="constant") + log_level: str = field(default="info") + deepspeed: Optional[str] = field(default=None) + save_safetensors: bool = field(default=False) + use_cpu: bool = field(default=True) + remove_unused_columns: bool = field(default=True) + + def __post_init__(self): + self.push_to_hub = False + self.hub_model_id = None + self.hub_strategy = "every_save" + self.hub_token = None + self.hub_private_repo = False + self.push_to_hub_model_id = None + self.push_to_hub_organization = None + self.push_to_hub_token = None + + super().__post_init__() + + +@dataclass +class Seq2SeqTrainingArguments(_S2STrainingArguments): + # To simplify the to dict result(to_dict only return non-default args) + + def to_dict(self): + # Call the superclass's to_dict method + all_args = super().to_dict() + # Get a dict with default values for all fields + default_args = _S2STrainingArguments().to_dict() + # Filter out args that are equal to their default values + set_args = {name: value for name, value in all_args.items() if value != default_args.get(name)} + return set_args + + + @dataclass class FedAVGArguments(FedArguments): pass @@ -165,6 +212,7 @@ def to_dict(self): d['agg_type'] = 'hess' return d + def parse_agglayer_conf(agglayer_arg_conf): import copy @@ -184,6 +232,7 @@ def parse_agglayer_conf(agglayer_arg_conf): Top & Bottom Model Strategy """ + @dataclass class TopModelStrategyArguments(Args): diff --git a/python/fate_client/pipeline/components/fate/nn/common_utils.py b/python/fate_client/pipeline/components/fate/nn/common_utils.py index aeda89f..6e5ca17 100644 --- a/python/fate_client/pipeline/components/fate/nn/common_utils.py +++ b/python/fate_client/pipeline/components/fate/nn/common_utils.py @@ -12,6 +12,7 @@ from typing import Union from fate_client.pipeline.components.fate.nn.algo_params import ( TrainingArguments, + Seq2SeqTrainingArguments ) from typing import Literal @@ -23,7 +24,7 @@ def get_config_of_default_runner( dataset: DatasetLoader = None, data_collator: CustFuncLoader = None, tokenizer: CustFuncLoader = None, - task_type: Literal["binary", "multi", "regression", "others"] = "binary", + task_type: Literal["binary", "multi", "regression", "causal_lm", "others"] = "binary", ): if optimizer is not None and not isinstance(optimizer, (TorchOptimizer, Loader)): @@ -36,9 +37,9 @@ def get_config_of_default_runner( f"The loss function is of type {type(loss)}, not TorchModule or CustFuncLoader." ) - if training_args is not None and not isinstance(training_args, TrainingArguments): + if training_args is not None and not isinstance(training_args, (TrainingArguments, Seq2SeqTrainingArguments)): raise ValueError( - f"Training arguments are of type {type(training_args)}, not TrainingArguments." + f"Training arguments are of type {type(training_args)}, not TrainingArguments/Seq2SeqTrainingArguments." ) if dataset is not None and not isinstance(dataset, DatasetLoader): @@ -54,9 +55,9 @@ def get_config_of_default_runner( f"The tokenizer is of type {type(tokenizer)}, not CustFuncLoader." ) - if task_type not in ["binary", "multi", "regression", "others"]: + if task_type not in ["binary", "multi", "regression", "causal_lm", "others"]: raise ValueError( - f"The task type is {task_type}, not 'binary', 'multi', 'regression', 'others'." + f"The task type is {task_type}, not 'binary', 'multi', 'regression', 'causal_lm', 'others'." ) runner_conf = { diff --git a/python/fate_client/pipeline/components/fate/nn/loader.py b/python/fate_client/pipeline/components/fate/nn/loader.py index 5e72772..21aa1b5 100644 --- a/python/fate_client/pipeline/components/fate/nn/loader.py +++ b/python/fate_client/pipeline/components/fate/nn/loader.py @@ -1,3 +1,18 @@ +# +# Copyright 2019 The FATE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import sys import importlib.util @@ -5,6 +20,7 @@ import json import yaml import difflib +import torch as t class _Source(object): @@ -13,6 +29,12 @@ class _Source(object): CUST_FUNC = "fate.ml.nn.cust_func" +class _LLMSource(object): + MODEL_ZOO = "fate_llm.model_zoo" + DATASET = "fate_llm.dataset" + CUST_FUNC = "fate_llm.data" + + SOURCE_FILE = "source.yaml" @@ -97,24 +119,16 @@ def _load_item(self): suggestion = self._find_similar_module_names() if suggestion: raise ValueError( - "Module: {} not found in the import path. Do you mean {}?".format( - self.module_name, suggestion - ) + "Module: {} not found in the import path. Do you mean {}?".format(self.module_name, suggestion) ) else: - raise ValueError( - "Module: {} not found in the import path.".format(self.module_name) - ) + raise ValueError("Module: {} not found in the import path.".format(self.module_name)) module = importlib.import_module(self.module_name) item = getattr(module, self.item_name, None) if item is None: - raise ValueError( - "Item: {} not found in module: {}.".format( - self.item_name, self.module_name - ) - ) + raise ValueError("Item: {} not found in module: {}.".format(self.item_name, self.module_name)) if self.source_path is not None: sys.path.remove(self.source_path) @@ -125,6 +139,7 @@ def _find_similar_module_names(self): if self.source_path is None: return None files = os.listdir(self.source_path) + print("source matches are", files) similar_names = difflib.get_close_matches(self.module_name, files) return similar_names[0] if similar_names else None @@ -154,28 +169,54 @@ def from_dict(data_dict): ) -class ModelLoader(Loader): +class ModelLoader(Loader, t.nn.Module): + + source_class = _Source + def __init__(self, module_name, item_name, source=None, **kwargs): if source is None: - module_name = ( - f"{_Source.MODEL_ZOO}.{module_name}" # add prefix for moduele loader - ) + # add prefix for moduele loader + module_name = f"{self.source_class.MODEL_ZOO}.{module_name}" + super(t.nn.Module, self).__init__() super(ModelLoader, self).__init__(module_name, item_name, source, **kwargs) + def __repr__(self): + return '{}(module_name={}, item_name={}, source={}, kwargs={})'.format( \ + self.__class__.__name__, self.module_name, self.item_name, self.source, self.kwargs) + class DatasetLoader(Loader): + + source_class = _Source + def __init__(self, module_name, item_name, source=None, **kwargs): if source is None: - module_name = ( - f"{_Source.DATASET}.{module_name}" # add prefix for moduele loader - ) + # add prefix for moduele loader + module_name = f"{self.source_class.DATASET}.{module_name}" super(DatasetLoader, self).__init__(module_name, item_name, source, **kwargs) class CustFuncLoader(Loader): + + source_class = _Source + def __init__(self, module_name, item_name, source=None, **kwargs): if source is None: - module_name = ( - f"{_Source.CUST_FUNC}.{module_name}" # add prefix for moduele loader - ) + # add prefix for moduele loader + module_name = f"{self.source_class.CUST_FUNC}.{module_name}" super(CustFuncLoader, self).__init__(module_name, item_name, source, **kwargs) + + +class LLMModelLoader(ModelLoader): + + source_class = _LLMSource + + +class LLMDatasetLoader(DatasetLoader): + + source_class = _LLMSource + + +class LLMDataFuncLoader(CustFuncLoader): + + source_class = _LLMSource \ No newline at end of file diff --git a/python/fate_client/pipeline/components/fate/union.py b/python/fate_client/pipeline/components/fate/union.py index f772cd2..17e0a68 100644 --- a/python/fate_client/pipeline/components/fate/union.py +++ b/python/fate_client/pipeline/components/fate/union.py @@ -26,11 +26,11 @@ def __init__( self, _name: str, runtime_parties: dict = None, - input_data_list: List[ArtifactType] = PlaceHolder(), + input_datas: List[ArtifactType] = PlaceHolder(), ): inputs = locals() self._process_init_inputs(inputs) super(Union, self).__init__() self._name = _name self.runtime_parties = runtime_parties - self.input_data_list = input_data_list + self.input_datas = input_datas diff --git a/python/fate_client/pipeline/entity/dag.py b/python/fate_client/pipeline/entity/dag.py index 5079ec4..fb89e4f 100644 --- a/python/fate_client/pipeline/entity/dag.py +++ b/python/fate_client/pipeline/entity/dag.py @@ -19,7 +19,7 @@ TaskSpec, PartyTaskRefSpec, PartyTaskSpec, JobConfSpec from ..scheduler.component_stage import ComponentStageSchedule -SCHEMA_VERSION = "2.0.0" +SCHEMA_VERSION = "2.1.0" class DAG(object): diff --git a/python/fate_client/pipeline/utils/test_utils.py b/python/fate_client/pipeline/utils/test_utils.py index cbbefd8..3c4290e 100644 --- a/python/fate_client/pipeline/utils/test_utils.py +++ b/python/fate_client/pipeline/utils/test_utils.py @@ -64,12 +64,11 @@ def __init__(self, parties): class JobConfig(object): def __init__(self, config): self.parties = Parties(config.get("parties", {})) - # self.backend = config.get("backend", 0) - # self.work_mode = config.get("work_mode", 0) self.data_base_dir = config.get("data_base_dir", "") self.system_setting = config.get("system_setting", {}) self.task_cores = config.get("task_cores", None) self.timeout = config.get("timeout", None) + self.engine_run = config.get("engine_run", None) @staticmethod def load(path: typing.Union[str, Path]): diff --git a/python/setup.py b/python/setup.py index 9bbc19b..e3fed83 100644 --- a/python/setup.py +++ b/python/setup.py @@ -34,8 +34,8 @@ ] extras_require = { - "fate": ["pyfate==2.0.0"], - "fate_flow": ["fate_flow==2.0.0"] + "fate": ["pyfate==2.1.0"], + "fate_flow": ["fate_flow==2.1.0"] } entry_points = {"console_scripts": ["flow = fate_client.flow_cli.flow:flow_cli", "pipeline = fate_client.pipeline.pipeline_cli:pipeline_group"]}