From c2f4d6e741a3692b212e28a605ec9e7d543c268f Mon Sep 17 00:00:00 2001 From: Jarvis Date: Wed, 9 Aug 2023 10:19:01 +0800 Subject: [PATCH] [Improve] add compress_codec parameter to file related connector --- docs/en/connector-v2/source/CosFile.md | 11 +++++++++++ docs/en/connector-v2/source/FtpFile.md | 11 +++++++++++ docs/en/connector-v2/source/HdfsFile.md | 11 +++++++++++ docs/en/connector-v2/source/Hive.md | 11 +++++++++++ docs/en/connector-v2/source/OssFile.md | 11 +++++++++++ docs/en/connector-v2/source/OssJindoFile.md | 11 +++++++++++ docs/en/connector-v2/source/S3File.md | 11 +++++++++++ docs/en/connector-v2/source/SftpFile.md | 11 +++++++++++ .../file/cos/source/CosFileSourceFactory.java | 1 + .../file/ftp/source/FtpFileSourceFactory.java | 1 + .../file/hdfs/source/HdfsFileSourceFactory.java | 1 + .../file/oss/source/OssFileSourceFactory.java | 1 + .../file/oss/source/OssFileSourceFactory.java | 1 + .../seatunnel/file/s3/source/S3FileSourceFactory.java | 1 + .../file/sftp/source/SftpFileSourceFactory.java | 1 + 15 files changed, 95 insertions(+) diff --git a/docs/en/connector-v2/source/CosFile.md b/docs/en/connector-v2/source/CosFile.md index dd1e77ebcfd..236c4b8ca09 100644 --- a/docs/en/connector-v2/source/CosFile.md +++ b/docs/en/connector-v2/source/CosFile.md @@ -56,6 +56,7 @@ Read all the data in a split in a pollNext call. What splits are read will be sa | common-options | | no | - | | sheet_name | string | no | - | | file_filter_pattern | string | no | - | +| compress_codec | string | no | none | ### path [string] @@ -252,6 +253,16 @@ Reader the sheet of the workbook,Only used when file_format is excel. Filter pattern, which used for filtering files. +### compress_codec [string] + +The compress codec of files and the details that supported as the following shown: + +- txt: `lzo` `none` +- json: `lzo` `none` +- csv: `lzo` `none` +- orc/parquet: + automatically recognizes the compression type, no additional settings required. + ## Example ```hocon diff --git a/docs/en/connector-v2/source/FtpFile.md b/docs/en/connector-v2/source/FtpFile.md index 6737511e63d..3c29b859639 100644 --- a/docs/en/connector-v2/source/FtpFile.md +++ b/docs/en/connector-v2/source/FtpFile.md @@ -49,6 +49,7 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you | common-options | | no | - | | sheet_name | string | no | - | | file_filter_pattern | string | no | - | +| compress_codec | string | no | none | ### host [string] @@ -228,6 +229,16 @@ Source plugin common parameters, please refer to [Source Common Options](common- Reader the sheet of the workbook,Only used when file_format is excel. +### compress_codec [string] + +The compress codec of files and the details that supported as the following shown: + +- txt: `lzo` `none` +- json: `lzo` `none` +- csv: `lzo` `none` +- orc/parquet: + automatically recognizes the compression type, no additional settings required. + ## Example ```hocon diff --git a/docs/en/connector-v2/source/HdfsFile.md b/docs/en/connector-v2/source/HdfsFile.md index 1d285c539a3..5ec8f4f76ff 100644 --- a/docs/en/connector-v2/source/HdfsFile.md +++ b/docs/en/connector-v2/source/HdfsFile.md @@ -54,6 +54,7 @@ Read all the data in a split in a pollNext call. What splits are read will be sa | common-options | | no | - | | sheet_name | string | no | - | | file_filter_pattern | string | no | - | +| compress_codec | string | no | none | ### path [string] @@ -250,6 +251,16 @@ Reader the sheet of the workbook,Only used when file_format is excel. Filter pattern, which used for filtering files. +### compress_codec [string] + +The compress codec of files and the details that supported as the following shown: + +- txt: `lzo` `none` +- json: `lzo` `none` +- csv: `lzo` `none` +- orc/parquet: + automatically recognizes the compression type, no additional settings required. + ## Example ```hocon diff --git a/docs/en/connector-v2/source/Hive.md b/docs/en/connector-v2/source/Hive.md index f9f35aaf733..dbe87936f59 100644 --- a/docs/en/connector-v2/source/Hive.md +++ b/docs/en/connector-v2/source/Hive.md @@ -44,6 +44,7 @@ Read all the data in a split in a pollNext call. What splits are read will be sa | read_partitions | list | no | - | | read_columns | list | no | - | | common-options | | no | - | +| compress_codec | string | no | none | ### table_name [string] @@ -84,6 +85,16 @@ The read column list of the data source, user can use it to implement field proj Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details +### compress_codec [string] + +The compress codec of files and the details that supported as the following shown: + +- txt: `lzo` `none` +- json: `lzo` `none` +- csv: `lzo` `none` +- orc/parquet: + automatically recognizes the compression type, no additional settings required. + ## Example ```bash diff --git a/docs/en/connector-v2/source/OssFile.md b/docs/en/connector-v2/source/OssFile.md index 12f2141cd6e..c6d4fbb733c 100644 --- a/docs/en/connector-v2/source/OssFile.md +++ b/docs/en/connector-v2/source/OssFile.md @@ -57,6 +57,7 @@ Read all the data in a split in a pollNext call. What splits are read will be sa | common-options | | no | - | | sheet_name | string | no | - | | file_filter_pattern | string | no | - | +| compress_codec | string | no | none | ### path [string] @@ -249,6 +250,16 @@ Source plugin common parameters, please refer to [Source Common Options](common- Reader the sheet of the workbook,Only used when file_format is excel. +### compress_codec [string] + +The compress codec of files and the details that supported as the following shown: + +- txt: `lzo` `none` +- json: `lzo` `none` +- csv: `lzo` `none` +- orc/parquet: + automatically recognizes the compression type, no additional settings required. + ## Example ```hocon diff --git a/docs/en/connector-v2/source/OssJindoFile.md b/docs/en/connector-v2/source/OssJindoFile.md index 913d277683e..de91d661593 100644 --- a/docs/en/connector-v2/source/OssJindoFile.md +++ b/docs/en/connector-v2/source/OssJindoFile.md @@ -57,6 +57,7 @@ Read all the data in a split in a pollNext call. What splits are read will be sa | common-options | | no | - | | sheet_name | string | no | - | | file_filter_pattern | string | no | - | +| compress_codec | string | no | none | ### path [string] @@ -253,6 +254,16 @@ Reader the sheet of the workbook,Only used when file_format is excel. Filter pattern, which used for filtering files. +### compress_codec [string] + +The compress codec of files and the details that supported as the following shown: + +- txt: `lzo` `none` +- json: `lzo` `none` +- csv: `lzo` `none` +- orc/parquet: + automatically recognizes the compression type, no additional settings required. + ## Example ```hocon diff --git a/docs/en/connector-v2/source/S3File.md b/docs/en/connector-v2/source/S3File.md index 79a89be1c27..f41c19321b8 100644 --- a/docs/en/connector-v2/source/S3File.md +++ b/docs/en/connector-v2/source/S3File.md @@ -58,6 +58,7 @@ Read all the data in a split in a pollNext call. What splits are read will be sa | common-options | | no | - | | sheet_name | string | no | - | | file_filter_pattern | string | no | - | +| compress_codec | string | no | none | ### path [string] @@ -304,6 +305,16 @@ Reader the sheet of the workbook,Only used when file_format is excel. Filter pattern, which used for filtering files. +### compress_codec [string] + +The compress codec of files and the details that supported as the following shown: + +- txt: `lzo` `none` +- json: `lzo` `none` +- csv: `lzo` `none` +- orc/parquet: + automatically recognizes the compression type, no additional settings required. + ## Changelog ### 2.3.0-beta 2022-10-20 diff --git a/docs/en/connector-v2/source/SftpFile.md b/docs/en/connector-v2/source/SftpFile.md index 22047d481ed..da432ad0fd8 100644 --- a/docs/en/connector-v2/source/SftpFile.md +++ b/docs/en/connector-v2/source/SftpFile.md @@ -48,6 +48,7 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you | common-options | | no | - | | sheet_name | string | no | - | | file_filter_pattern | string | no | - | +| compress_codec | string | no | none | ### host [string] @@ -231,6 +232,16 @@ Reader the sheet of the workbook,Only used when file_format is excel. Filter pattern, which used for filtering files. +### compress_codec [string] + +The compress codec of files and the details that supported as the following shown: + +- txt: `lzo` `none` +- json: `lzo` `none` +- csv: `lzo` `none` +- orc/parquet: + automatically recognizes the compression type, no additional settings required. + ## Example ```hocon diff --git a/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java index 496e9277f4e..e2b0285efba 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-cos/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/cos/source/CosFileSourceFactory.java @@ -61,6 +61,7 @@ public OptionRule optionRule() { .optional(BaseSourceConfig.DATETIME_FORMAT) .optional(BaseSourceConfig.TIME_FORMAT) .optional(BaseSourceConfig.FILE_FILTER_PATTERN) + .optional(BaseSourceConfig.COMPRESS_CODEC) .build(); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java index 4ab637c4348..a29bb8228f2 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-ftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/ftp/source/FtpFileSourceFactory.java @@ -61,6 +61,7 @@ public OptionRule optionRule() { .optional(BaseSourceConfig.DATETIME_FORMAT) .optional(BaseSourceConfig.TIME_FORMAT) .optional(BaseSourceConfig.FILE_FILTER_PATTERN) + .optional(BaseSourceConfig.COMPRESS_CODEC) .build(); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java index c3d406d62c7..2e4832da8ac 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-hadoop/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/hdfs/source/HdfsFileSourceFactory.java @@ -58,6 +58,7 @@ public OptionRule optionRule() { .optional(BaseSourceConfig.DATETIME_FORMAT) .optional(BaseSourceConfig.TIME_FORMAT) .optional(BaseSourceConfig.FILE_FILTER_PATTERN) + .optional(BaseSourceConfig.COMPRESS_CODEC) .build(); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java index eaea7bccb61..74f88bb17a0 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-jindo-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java @@ -61,6 +61,7 @@ public OptionRule optionRule() { .optional(BaseSourceConfig.DATETIME_FORMAT) .optional(BaseSourceConfig.TIME_FORMAT) .optional(BaseSourceConfig.FILE_FILTER_PATTERN) + .optional(BaseSourceConfig.COMPRESS_CODEC) .build(); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java index e7d862bd44a..ca22d62ef6d 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/source/OssFileSourceFactory.java @@ -61,6 +61,7 @@ public OptionRule optionRule() { .optional(BaseSourceConfig.DATETIME_FORMAT) .optional(BaseSourceConfig.TIME_FORMAT) .optional(BaseSourceConfig.FILE_FILTER_PATTERN) + .optional(BaseSourceConfig.COMPRESS_CODEC) .build(); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java index a3b48088650..2d2f3d749a8 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java @@ -66,6 +66,7 @@ public OptionRule optionRule() { .optional(BaseSourceConfig.DATETIME_FORMAT) .optional(BaseSourceConfig.TIME_FORMAT) .optional(BaseSourceConfig.FILE_FILTER_PATTERN) + .optional(BaseSourceConfig.COMPRESS_CODEC) .build(); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java index e9efe1cdf9b..7b6fc8d52c2 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-sftp/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sftp/source/SftpFileSourceFactory.java @@ -61,6 +61,7 @@ public OptionRule optionRule() { .optional(BaseSourceConfig.DATETIME_FORMAT) .optional(BaseSourceConfig.TIME_FORMAT) .optional(BaseSourceConfig.FILE_FILTER_PATTERN) + .optional(BaseSourceConfig.COMPRESS_CODEC) .build(); }