diff --git a/.github/workflows/code-analysys.yml b/.github/workflows/code-analysys.yml deleted file mode 100644 index d4a0227da163..000000000000 --- a/.github/workflows/code-analysys.yml +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: SonarCloud -on: - push: - pull_request: - branches: [dev] - paths-ignore: - - 'docs/**' - - '**/*.md' - - 'seatunnel-ui/**' -jobs: - build: - runs-on: ubuntu-latest - timeout-minutes: 120 - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - name: Set up JDK 11 - uses: actions/setup-java@v2 - with: - java-version: 11 - distribution: 'adopt' - - name: Run SonarCloud Analysis - run: bash ./tools/sonarcheck/check.sh - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SONAR_TOKEN: ${{ secrets.SONARCLOUD_TOKEN }} \ No newline at end of file diff --git a/README.md b/README.md index e7f898bd6594..0bce6778f0f1 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ Please follow this [document](docs/en/contribution/setup.md). * Mail list: **dev@seatunnel.apache.org**. Mail to `dev-subscribe@seatunnel.apache.org`, follow the reply to subscribe the mail list. -* Slack: https://the-asf.slack.com/archives/C053HND1D6X +* Slack: https://s.apache.org/seatunnel-slack * Twitter: https://twitter.com/ASFSeaTunnel * [Bilibili](https://space.bilibili.com/1542095008) (for Chinese users) diff --git a/bin/install-plugin.sh b/bin/install-plugin.sh index cddf4ebf68a2..b8a1cca71492 100755 --- a/bin/install-plugin.sh +++ b/bin/install-plugin.sh @@ -23,8 +23,8 @@ # get seatunnel home SEATUNNEL_HOME=$(cd $(dirname $0);cd ../;pwd) -# connector default version is 2.3.1, you can also choose a custom version. eg: 2.1.2: sh install-plugin.sh 2.1.2 -version=2.3.1 +# connector default version is 2.3.3, you can also choose a custom version. eg: 2.1.2: sh install-plugin.sh 2.1.2 +version=2.3.3 if [ -n "$1" ]; then version="$1" diff --git a/config/plugin_config b/config/plugin_config index 95b952b31bf1..0c7e119ecfc4 100644 --- a/config/plugin_config +++ b/config/plugin_config @@ -36,7 +36,7 @@ connector-file-ftp connector-file-hadoop connector-file-local connector-file-oss -connector-file-oss-jindo +connector-file-jindo-oss connector-file-s3 connector-file-sftp connector-google-sheets diff --git a/docs/en/about.md b/docs/en/about.md index d2e28693915a..57a800343b02 100644 --- a/docs/en/about.md +++ b/docs/en/about.md @@ -2,7 +2,7 @@ seatunnel logo -[![Slack](https://img.shields.io/badge/slack-%23seatunnel-4f8eba?logo=slack)](https://the-asf.slack.com/archives/C053HND1D6X) +[![Slack](https://img.shields.io/badge/slack-%23seatunnel-4f8eba?logo=slack)](https://s.apache.org/seatunnel-slack) [![Twitter Follow](https://img.shields.io/twitter/follow/ASFSeaTunnel.svg?label=Follow&logo=twitter)](https://twitter.com/ASFSeaTunnel) SeaTunnel is a very easy-to-use, ultra-high-performance, distributed data integration platform that supports real-time diff --git a/docs/en/connector-v2/formats/cdc-compatible-debezium-json.md b/docs/en/connector-v2/formats/cdc-compatible-debezium-json.md index 002bd0c3bec4..e0751a249272 100644 --- a/docs/en/connector-v2/formats/cdc-compatible-debezium-json.md +++ b/docs/en/connector-v2/formats/cdc-compatible-debezium-json.md @@ -19,7 +19,6 @@ source { MySQL-CDC { result_table_name = "table1" - hostname = localhost base-url="jdbc:mysql://localhost:3306/test" "startup.mode"=INITIAL catalog { diff --git a/docs/en/connector-v2/sink/AmazonDynamoDB.md b/docs/en/connector-v2/sink/AmazonDynamoDB.md index e8fe0b23afbe..6e880fb4af42 100644 --- a/docs/en/connector-v2/sink/AmazonDynamoDB.md +++ b/docs/en/connector-v2/sink/AmazonDynamoDB.md @@ -20,7 +20,6 @@ Write data to Amazon DynamoDB | secret_access_key | string | yes | - | | table | string | yes | - | | batch_size | string | no | 25 | -| batch_interval_ms | string | no | 1000 | | common-options | | no | - | ### url [string] diff --git a/docs/en/connector-v2/sink/Console.md b/docs/en/connector-v2/sink/Console.md index fd7623d7d389..55df281b2752 100644 --- a/docs/en/connector-v2/sink/Console.md +++ b/docs/en/connector-v2/sink/Console.md @@ -14,14 +14,24 @@ Used to send data to Console. Both support streaming and batch mode. ## Options -| name | type | required | default value | -|----------------|------|----------|---------------| -| common-options | | no | - | +| name | type | required | default value | +|--------------------|---------|----------|---------------| +| common-options | | no | - | +| log.print.data | boolean | no | yes | +| log.print.delay.ms | int | no | 0 | ### common options Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details +### log.print.data + +Flag to determine whether data should be printed in the logs. The default value is `true`. + +### log.print.delay.ms + +Delay in milliseconds between printing each data item to the logs. The default value is `0`. + ## Example simple: diff --git a/docs/en/connector-v2/sink/Feishu.md b/docs/en/connector-v2/sink/Feishu.md index bd45977ce809..5573086db3e4 100644 --- a/docs/en/connector-v2/sink/Feishu.md +++ b/docs/en/connector-v2/sink/Feishu.md @@ -2,41 +2,55 @@ > Feishu sink connector -## Description - -Used to launch Feishu web hooks using data. - -> For example, if the data from upstream is [`age: 12, name: tyrantlucifer`], the body content is the following: `{"age": 12, "name": "tyrantlucifer"}` +## Support Those Engines -**Tips: Feishu sink only support `post json` webhook and the data from source will be treated as body content in web hook.** +> Spark
+> Flink
+> SeaTunnel Zeta
-## Key features +## Key Features - [ ] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [cdc](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|----------------|--------|----------|---------------| -| url | String | Yes | - | -| headers | Map | No | - | -| common-options | | no | - | - -### url [string] - -Feishu webhook url - -### headers [Map] - -Http request headers +## Description -### common options +Used to launch Feishu web hooks using data. -Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details +> For example, if the data from upstream is [`age: 12, name: tyrantlucifer`], the body content is the following: `{"age": 12, "name": "tyrantlucifer"}` -## Example +**Tips: Feishu sink only support `post json` webhook and the data from source will be treated as body content in web hook.** -simple: +## Data Type Mapping + +| Seatunnel Data type | Feishu Data type | +|-----------------------------|------------------| +| ROW
MAP | Json | +| NULL | null | +| BOOLEAN | boolean | +| TINYINT | byte | +| SMALLINT | short | +| INT | int | +| BIGINT | long | +| FLOAT | float | +| DOUBLE | double | +| DECIMAL | BigDecimal | +| BYTES | byte[] | +| STRING | String | +| TIME
TIMESTAMP
TIME | String | +| ARRAY | JsonArray | + +## Sink Options + +| Name | Type | Required | Default | Description | +|----------------|--------|----------|---------|-----------------------------------------------------------------------------------------------------| +| url | String | Yes | - | Feishu webhook url | +| headers | Map | No | - | Http request headers | +| common-options | | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | + +## Task Example + +### Simple: ```hocon Feishu { diff --git a/docs/en/connector-v2/sink/InfluxDB.md b/docs/en/connector-v2/sink/InfluxDB.md index e824a41fe686..1dba1fbe4dc8 100644 --- a/docs/en/connector-v2/sink/InfluxDB.md +++ b/docs/en/connector-v2/sink/InfluxDB.md @@ -22,7 +22,6 @@ Write data to InfluxDB. | key_time | string | no | processing time | | key_tags | array | no | exclude `field` & `key_time` | | batch_size | int | no | 1024 | -| batch_interval_ms | int | no | - | | max_retries | int | no | - | | retry_backoff_multiplier_ms | int | no | - | | connect_timeout_ms | long | no | 15000 | @@ -63,11 +62,7 @@ If not specified, include all fields with `influxDB` measurement field ### batch_size [int] -For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the influxDB - -### batch_interval_ms [int] - -For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the influxDB +For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `checkpoint.interval`, the data will be flushed into the influxDB ### max_retries [int] diff --git a/docs/en/connector-v2/sink/IoTDB.md b/docs/en/connector-v2/sink/IoTDB.md index d60021719e80..554d0bfd06ed 100644 --- a/docs/en/connector-v2/sink/IoTDB.md +++ b/docs/en/connector-v2/sink/IoTDB.md @@ -2,193 +2,190 @@ > IoTDB sink connector -## Description +## Support Those Engines -Used to write data to IoTDB. +> Spark
+> Flink
+> SeaTunnel Zeta
-:::tip - -There is a conflict of thrift version between IoTDB and Spark.Therefore, you need to execute `rm -f $SPARK_HOME/jars/libthrift*` and `cp $IOTDB_HOME/lib/libthrift* $SPARK_HOME/jars/` to resolve it. - -::: - -## Key features +## Key Features - [x] [exactly-once](../../concept/connector-v2-features.md) IoTDB supports the `exactly-once` feature through idempotent writing. If two pieces of data have the same `key` and `timestamp`, the new data will overwrite the old one. -## Options - -| name | type | required | default value | -|-----------------------------|---------|----------|--------------------------------| -| node_urls | list | yes | - | -| username | string | yes | - | -| password | string | yes | - | -| key_device | string | yes | - | -| key_timestamp | string | no | processing time | -| key_measurement_fields | array | no | exclude `device` & `timestamp` | -| storage_group | string | no | - | -| batch_size | int | no | 1024 | -| batch_interval_ms | int | no | - | -| max_retries | int | no | - | -| retry_backoff_multiplier_ms | int | no | - | -| max_retry_backoff_ms | int | no | - | -| default_thrift_buffer_size | int | no | - | -| max_thrift_frame_size | int | no | - | -| zone_id | string | no | - | -| enable_rpc_compression | boolean | no | - | -| connection_timeout_in_ms | int | no | - | -| common-options | | no | - | - -### node_urls [list] - -`IoTDB` cluster address, the format is `["host:port", ...]` - -### username [string] - -`IoTDB` user username - -### password [string] - -`IoTDB` user password - -### key_device [string] - -Specify field name of the `IoTDB` deviceId in SeaTunnelRow - -### key_timestamp [string] - -Specify field-name of the `IoTDB` timestamp in SeaTunnelRow. If not specified, use processing-time as timestamp - -### key_measurement_fields [array] - -Specify field-name of the `IoTDB` measurement list in SeaTunnelRow. If not specified, include all fields but exclude `device` & `timestamp` - -### storage_group [string] - -Specify device storage group(path prefix) - -example: deviceId = ${storage_group} + "." + ${key_device} - -### batch_size [int] - -For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the IoTDB - -### batch_interval_ms [int] - -For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the IoTDB - -### max_retries [int] - -The number of retries to flush failed - -### retry_backoff_multiplier_ms [int] - -Using as a multiplier for generating the next delay for backoff - -### max_retry_backoff_ms [int] - -The amount of time to wait before attempting to retry a request to `IoTDB` - -### default_thrift_buffer_size [int] +## Description -Thrift init buffer size in `IoTDB` client +Used to write data to IoTDB. -### max_thrift_frame_size [int] +:::tip -Thrift max frame size in `IoTDB` client +There is a conflict of thrift version between IoTDB and Spark.Therefore, you need to execute `rm -f $SPARK_HOME/jars/libthrift*` and `cp $IOTDB_HOME/lib/libthrift* $SPARK_HOME/jars/` to resolve it. -### zone_id [string] +::: -java.time.ZoneId in `IoTDB` client +## Supported DataSource Info + +| Datasource | Supported Versions | Url | +|------------|--------------------|----------------| +| IoTDB | `>= 0.13.0` | localhost:6667 | + +## Database Dependency + +## Data Type Mapping + +| IotDB Data type | SeaTunnel Data type | +|-----------------|---------------------| +| BOOLEAN | BOOLEAN | +| INT32 | TINYINT | +| INT32 | SMALLINT | +| INT32 | INT | +| INT64 | BIGINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| TEXT | STRING | + +## Sink Options + +| Name | Type | Required | Default | Description | +|-----------------------------|---------|----------|--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| node_urls | String | Yes | - | `IoTDB` cluster address, the format is `"host1:port"` or `"host1:port,host2:port"` | +| username | String | Yes | - | `IoTDB` user username | +| password | String | Yes | - | `IoTDB` user password | +| key_device | String | Yes | - | Specify field name of the `IoTDB` deviceId in SeaTunnelRow | +| key_timestamp | String | No | processing time | Specify field-name of the `IoTDB` timestamp in SeaTunnelRow. If not specified, use processing-time as timestamp | +| key_measurement_fields | Array | No | exclude `device` & `timestamp` | Specify field-name of the `IoTDB` measurement list in SeaTunnelRow. If not specified, include all fields but exclude `device` & `timestamp` | +| storage_group | Array | No | - | Specify device storage group(path prefix)
example: deviceId = ${storage_group} + "." + ${key_device} | +| batch_size | Integer | No | 1024 | For batch writing, when the number of buffers reaches the number of `batch_size` or the time reaches `batch_interval_ms`, the data will be flushed into the IoTDB | +| max_retries | Integer | No | - | The number of retries to flush failed | +| retry_backoff_multiplier_ms | Integer | No | - | Using as a multiplier for generating the next delay for backoff | +| max_retry_backoff_ms | Integer | No | - | The amount of time to wait before attempting to retry a request to `IoTDB` | +| default_thrift_buffer_size | Integer | No | - | Thrift init buffer size in `IoTDB` client | +| max_thrift_frame_size | Integer | No | - | Thrift max frame size in `IoTDB` client | +| zone_id | string | No | - | java.time.ZoneId in `IoTDB` client | +| enable_rpc_compression | Boolean | No | - | Enable rpc compression in `IoTDB` client | +| connection_timeout_in_ms | Integer | No | - | The maximum time (in ms) to wait when connecting to `IoTDB` | +| common-options | | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | -### enable_rpc_compression [boolean] +## Examples -Enable rpc compression in `IoTDB` client +```hocon +env { + execution.parallelism = 2 + job.mode = "BATCH" +} -### connection_timeout_in_ms [int] +source { + FakeSource { + row.num = 16 + bigint.template = [1664035200001] + schema = { + fields { + device_name = "string" + temperature = "float" + moisture = "int" + event_ts = "bigint" + c_string = "string" + c_boolean = "boolean" + c_tinyint = "tinyint" + c_smallint = "smallint" + c_int = "int" + c_bigint = "bigint" + c_float = "float" + c_double = "double" + } + } + } +} -The maximum time (in ms) to wait when connecting to `IoTDB` +... -### common options +``` -Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details +Upstream SeaTunnelRow data format is the following: -## Examples +| device_name | temperature | moisture | event_ts | c_string | c_boolean | c_tinyint | c_smallint | c_int | c_bigint | c_float | c_double | +|--------------------------|-------------|----------|---------------|----------|-----------|-----------|------------|-------|------------|---------|----------| +| root.test_group.device_a | 36.1 | 100 | 1664035200001 | abc1 | true | 1 | 1 | 1 | 2147483648 | 1.0 | 1.0 | +| root.test_group.device_b | 36.2 | 101 | 1664035200001 | abc2 | false | 2 | 2 | 2 | 2147483649 | 2.0 | 2.0 | +| root.test_group.device_c | 36.3 | 102 | 1664035200001 | abc3 | false | 3 | 3 | 3 | 2147483649 | 3.0 | 3.0 | ### Case1 -Common options: +only fill required config. +use current processing time as timestamp. and include all fields but exclude `device` & `timestamp` as measurement fields ```hocon sink { IoTDB { - node_urls = ["localhost:6667"] + node_urls = "localhost:6667" username = "root" password = "root" - batch_size = 1024 - batch_interval_ms = 1000 + key_device = "device_name" # specify the `deviceId` use device_name field } } ``` -When you assign `key_device` is `device_name`, for example: +Output to `IoTDB` data format is the following: + +```shell +IoTDB> SELECT * FROM root.test_group.* align by device; ++------------------------+------------------------+--------------+-----------+--------------+---------+----------+----------+-----------+------+-----------+--------+---------+ +| Time| Device| temperature| moisture| event_ts| c_string| c_boolean| c_tinyint| c_smallint| c_int| c_bigint| c_float| c_double| ++------------------------+------------------------+--------------+-----------+--------------+---------+----------+----------+-----------+------+-----------+--------+---------+ +|2023-09-01T00:00:00.001Z|root.test_group.device_a| 36.1| 100| 1664035200001| abc1| true| 1| 1| 1| 2147483648| 1.0| 1.0| +|2023-09-01T00:00:00.001Z|root.test_group.device_b| 36.2| 101| 1664035200001| abc2| false| 2| 2| 2| 2147483649| 2.0| 2.0| +|2023-09-01T00:00:00.001Z|root.test_group.device_c| 36.3| 102| 1664035200001| abc2| false| 3| 3| 3| 2147483649| 3.0| 3.0| ++------------------------+------------------------+--------------+-----------+--------------+---------+---------+-----------+-----------+------+-----------+--------+---------+ +``` + +### Case2 + +use source event's time ```hocon sink { IoTDB { - ... - key_device = "device_name" + node_urls = "localhost:6667" + username = "root" + password = "root" + key_device = "device_name" # specify the `deviceId` use device_name field + key_timestamp = "event_ts" # specify the `timestamp` use event_ts field } } ``` -Upstream SeaTunnelRow data format is the following: - -| device_name | field_1 | field_2 | -|--------------------------|---------|---------| -| root.test_group.device_a | 1001 | 1002 | -| root.test_group.device_b | 2001 | 2002 | -| root.test_group.device_c | 3001 | 3002 | - Output to `IoTDB` data format is the following: ```shell IoTDB> SELECT * FROM root.test_group.* align by device; -+------------------------+------------------------+-----------+----------+ -| Time| Device| field_1| field_2| -+------------------------+------------------------+----------+-----------+ -|2022-09-26T17:50:01.201Z|root.test_group.device_a| 1001| 1002| -|2022-09-26T17:50:01.202Z|root.test_group.device_b| 2001| 2002| -|2022-09-26T17:50:01.203Z|root.test_group.device_c| 3001| 3002| -+------------------------+------------------------+----------+-----------+ ++------------------------+------------------------+--------------+-----------+--------------+---------+----------+----------+-----------+------+-----------+--------+---------+ +| Time| Device| temperature| moisture| event_ts| c_string| c_boolean| c_tinyint| c_smallint| c_int| c_bigint| c_float| c_double| ++------------------------+------------------------+--------------+-----------+--------------+---------+----------+----------+-----------+------+-----------+--------+---------+ +|2022-09-25T00:00:00.001Z|root.test_group.device_a| 36.1| 100| 1664035200001| abc1| true| 1| 1| 1| 2147483648| 1.0| 1.0| +|2022-09-25T00:00:00.001Z|root.test_group.device_b| 36.2| 101| 1664035200001| abc2| false| 2| 2| 2| 2147483649| 2.0| 2.0| +|2022-09-25T00:00:00.001Z|root.test_group.device_c| 36.3| 102| 1664035200001| abc2| false| 3| 3| 3| 2147483649| 3.0| 3.0| ++------------------------+------------------------+--------------+-----------+--------------+---------+---------+-----------+-----------+------+-----------+--------+---------+ ``` -### Case2 +### Case3 -When you assign `key_device`、`key_timestamp`、`key_measurement_fields`, for example: +use source event's time and limit measurement fields ```hocon sink { IoTDB { - ... + node_urls = "localhost:6667" + username = "root" + password = "root" key_device = "device_name" - key_timestamp = "ts" + key_timestamp = "event_ts" key_measurement_fields = ["temperature", "moisture"] } } ``` -Upstream SeaTunnelRow data format is the following: - -| ts | device_name | field_1 | field_2 | temperature | moisture | -|---------------|--------------------------|---------|---------|-------------|----------| -| 1664035200001 | root.test_group.device_a | 1001 | 1002 | 36.1 | 100 | -| 1664035200001 | root.test_group.device_b | 2001 | 2002 | 36.2 | 101 | -| 1664035200001 | root.test_group.device_c | 3001 | 3002 | 36.3 | 102 | - Output to `IoTDB` data format is the following: ```shell diff --git a/docs/en/connector-v2/sink/Mysql.md b/docs/en/connector-v2/sink/Mysql.md index 55c825ed168e..6c01c35ee8cb 100644 --- a/docs/en/connector-v2/sink/Mysql.md +++ b/docs/en/connector-v2/sink/Mysql.md @@ -2,6 +2,10 @@ > JDBC Mysql Sink Connector +## Support Mysql Version + +- 5.5/5.6/5.7/8.0 + ## Support Those Engines > Spark
diff --git a/docs/en/connector-v2/sink/Oracle.md b/docs/en/connector-v2/sink/Oracle.md index feda00b8159d..151243f318fb 100644 --- a/docs/en/connector-v2/sink/Oracle.md +++ b/docs/en/connector-v2/sink/Oracle.md @@ -35,7 +35,7 @@ semantics (using XA transaction guarantee). ## Data Type Mapping -| PostgreSQL Data type | SeaTunnel Data type | +| Oracle Data type | SeaTunnel Data type | |--------------------------------------------------------------------------------------|---------------------| | INTEGER | INT | | FLOAT | DECIMAL(38, 18) | @@ -54,7 +54,7 @@ semantics (using XA transaction guarantee). | Name | Type | Required | Default | Description | |-------------------------------------------|---------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost:5432/test | +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:oracle:thin:@datasource01:1523:xe | | driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use Oracle the value is `oracle.jdbc.OracleDriver`. | | user | String | No | - | Connection instance user name | | password | String | No | - | Connection instance password | @@ -83,7 +83,7 @@ semantics (using XA transaction guarantee). ### Simple: -> This example defines a SeaTunnel synchronization task that automatically generates data through FakeSource and sends it to JDBC Sink. FakeSource generates a total of 16 rows of data (row.num=16), with each row having two fields, name (string type) and age (int type). The final target table is test_table will also be 16 rows of data in the table. Before run this job, you need create database test and table test_table in your PostgreSQL. And if you have not yet installed and deployed SeaTunnel, you need to follow the instructions in [Install SeaTunnel](../../start-v2/locally/deployment.md) to install and deploy SeaTunnel. And then follow the instructions in [Quick Start With SeaTunnel Engine](../../start-v2/locally/quick-start-seatunnel-engine.md) to run this job. +> This example defines a SeaTunnel synchronization task that automatically generates data through FakeSource and sends it to JDBC Sink. FakeSource generates a total of 16 rows of data (row.num=16), with each row having two fields, name (string type) and age (int type). The final target table is test_table will also be 16 rows of data in the table. Before run this job, you need create database test and table test_table in your Oracle. And if you have not yet installed and deployed SeaTunnel, you need to follow the instructions in [Install SeaTunnel](../../start-v2/locally/deployment.md) to install and deploy SeaTunnel. And then follow the instructions in [Quick Start With SeaTunnel Engine](../../start-v2/locally/quick-start-seatunnel-engine.md) to run this job. ``` # Defining the runtime environment diff --git a/docs/en/connector-v2/sink/Slack.md b/docs/en/connector-v2/sink/Slack.md index 27ba01c32b0f..7ed87d2022c3 100644 --- a/docs/en/connector-v2/sink/Slack.md +++ b/docs/en/connector-v2/sink/Slack.md @@ -2,42 +2,39 @@ > Slack sink connector -## Description - -Used to send data to Slack Channel. Both support streaming and batch mode. +## Support Those Engines -> For example, if the data from upstream is [`age: 12, name: huan`], the content send to socket server is the following: `{"name":"huan","age":17}` +> Spark
+> Flink
+> SeaTunnel Zeta
## Key features - [ ] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [cdc](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|----------------|--------|----------|---------------| -| webhooks_url | String | Yes | - | -| oauth_token | String | Yes | - | -| slack_channel | String | Yes | - | -| common-options | | no | - | - -### webhooks_url [string] +## Description -Slack webhook url +Used to send data to Slack Channel. Both support streaming and batch mode. -### oauth_token [string] +> For example, if the data from upstream is [`age: 12, name: huan`], the content send to socket server is the following: `{"name":"huan","age":17}` -Slack oauth token used for the actual authentication +## Data Type Mapping -### slack_channel [string] +All data types are mapped to string. -slack channel for data write +## Options -### common options +| Name | Type | Required | Default | Description | +|----------------|--------|----------|---------|-----------------------------------------------------------------------------------------------------| +| webhooks_url | String | Yes | - | Slack webhook url | +| oauth_token | String | Yes | - | Slack oauth token used for the actual authentication | +| slack_channel | String | Yes | - | slack channel for data write | +| common-options | | no | - | Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details | -Sink plugin common parameters, please refer to [Sink Common Options](common-options.md) for details +## Task Example -## Example +### Simple: ```hocon sink { diff --git a/docs/en/connector-v2/sink/StarRocks.md b/docs/en/connector-v2/sink/StarRocks.md index 763743ce9677..38893a429ef7 100644 --- a/docs/en/connector-v2/sink/StarRocks.md +++ b/docs/en/connector-v2/sink/StarRocks.md @@ -20,24 +20,23 @@ The internal implementation of StarRocks sink connector is cached and imported b ## Sink Options -| Name | Type | Required | Default | Description | -|-----------------------------|---------|----------|-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| nodeUrls | list | yes | - | `StarRocks` cluster address, the format is `["fe_ip:fe_http_port", ...]` | -| base-url | string | yes | - | The JDBC URL like `jdbc:mysql://localhost:9030/` or `jdbc:mysql://localhost:9030` or `jdbc:mysql://localhost:9030/db` | -| username | string | yes | - | `StarRocks` user username | -| password | string | yes | - | `StarRocks` user password | -| database | string | yes | - | The name of StarRocks database | -| table | string | no | - | The name of StarRocks table, If not set, the table name will be the name of the upstream table | -| labelPrefix | string | no | - | The prefix of StarRocks stream load label | -| batch_max_rows | long | no | 1024 | For batch writing, when the number of buffers reaches the number of `batch_max_rows` or the byte size of `batch_max_bytes` or the time reaches `batch_interval_ms`, the data will be flushed into the StarRocks | -| batch_max_bytes | int | no | 5 * 1024 * 1024 | For batch writing, when the number of buffers reaches the number of `batch_max_rows` or the byte size of `batch_max_bytes` or the time reaches `batch_interval_ms`, the data will be flushed into the StarRocks | -| batch_interval_ms | int | no | - | For batch writing, when the number of buffers reaches the number of `batch_max_rows` or the byte size of `batch_max_bytes` or the time reaches `batch_interval_ms`, the data will be flushed into the StarRocks | -| max_retries | int | no | - | The number of retries to flush failed | -| retry_backoff_multiplier_ms | int | no | - | Using as a multiplier for generating the next delay for backoff | -| max_retry_backoff_ms | int | no | - | The amount of time to wait before attempting to retry a request to `StarRocks` | -| enable_upsert_delete | boolean | no | false | Whether to enable upsert/delete, only supports PrimaryKey model. | -| save_mode_create_template | string | no | see below | see below | -| starrocks.config | map | no | - | The parameter of the stream load `data_desc` | +| Name | Type | Required | Default | Description | +|-----------------------------|---------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| nodeUrls | list | yes | - | `StarRocks` cluster address, the format is `["fe_ip:fe_http_port", ...]` | +| base-url | string | yes | - | The JDBC URL like `jdbc:mysql://localhost:9030/` or `jdbc:mysql://localhost:9030` or `jdbc:mysql://localhost:9030/db` | +| username | string | yes | - | `StarRocks` user username | +| password | string | yes | - | `StarRocks` user password | +| database | string | yes | - | The name of StarRocks database | +| table | string | no | - | The name of StarRocks table, If not set, the table name will be the name of the upstream table | +| labelPrefix | string | no | - | The prefix of StarRocks stream load label | +| batch_max_rows | long | no | 1024 | For batch writing, when the number of buffers reaches the number of `batch_max_rows` or the byte size of `batch_max_bytes` or the time reaches `checkpoint.interval`, the data will be flushed into the StarRocks | +| batch_max_bytes | int | no | 5 * 1024 * 1024 | For batch writing, when the number of buffers reaches the number of `batch_max_rows` or the byte size of `batch_max_bytes` or the time reaches `checkpoint.interval`, the data will be flushed into the StarRocks | +| max_retries | int | no | - | The number of retries to flush failed | +| retry_backoff_multiplier_ms | int | no | - | Using as a multiplier for generating the next delay for backoff | +| max_retry_backoff_ms | int | no | - | The amount of time to wait before attempting to retry a request to `StarRocks` | +| enable_upsert_delete | boolean | no | false | Whether to enable upsert/delete, only supports PrimaryKey model. | +| save_mode_create_template | string | no | see below | see below | +| starrocks.config | map | no | - | The parameter of the stream load `data_desc` | ### save_mode_create_template diff --git a/docs/en/connector-v2/source/FtpFile.md b/docs/en/connector-v2/source/FtpFile.md index c692a7483a6d..c9fb8e70cdb8 100644 --- a/docs/en/connector-v2/source/FtpFile.md +++ b/docs/en/connector-v2/source/FtpFile.md @@ -58,9 +58,9 @@ The target ftp host is required The target ftp port is required -### username [string] +### user [string] -The target ftp username is required +The target ftp user name is required ### password [string] diff --git a/docs/en/connector-v2/source/Http.md b/docs/en/connector-v2/source/Http.md index 291835b93ed6..aa4067fe438e 100644 --- a/docs/en/connector-v2/source/Http.md +++ b/docs/en/connector-v2/source/Http.md @@ -52,8 +52,8 @@ They can be downloaded via install-plugin.sh or from the Maven central repositor | format | String | No | json | The format of upstream data, now only support `json` `text`, default `json`. | | method | String | No | get | Http request method, only supports GET, POST method. | | headers | Map | No | - | Http headers. | -| params | Map | No | - | Http params. | -| body | String | No | - | Http body. | +| params | Map | No | - | Http params,the program will automatically add http header application/x-www-form-urlencoded. | +| body | String | No | - | Http body,the program will automatically add http header application/json,body is jsonbody. | | poll_interval_ms | Int | No | - | Request http api interval(millis) in stream mode. | | retry | Int | No | - | The max retry times if request http return to `IOException`. | | retry_backoff_multiplier_ms | Int | No | 100 | The retry-backoff times(millis) multiplier if request http failed. | diff --git a/docs/en/connector-v2/source/Iceberg.md b/docs/en/connector-v2/source/Iceberg.md index 6a42ee0ddd30..b6d3924b95f1 100644 --- a/docs/en/connector-v2/source/Iceberg.md +++ b/docs/en/connector-v2/source/Iceberg.md @@ -2,9 +2,15 @@ > Apache Iceberg source connector -## Description +## Support Iceberg Version -Source connector for Apache Iceberg. It can support batch and stream mode. +- 0.14.0 + +## Support Those Engines + +> Spark
+> Flink
+> SeaTunnel Zeta
## Key features @@ -22,126 +28,120 @@ Source connector for Apache Iceberg. It can support batch and stream mode. - [x] hadoop(2.7.1 , 2.7.5 , 3.1.3) - [x] hive(2.3.9 , 3.1.2) -## Options - -| name | type | required | default value | -|--------------------------|---------|----------|----------------------| -| catalog_name | string | yes | - | -| catalog_type | string | yes | - | -| uri | string | no | - | -| warehouse | string | yes | - | -| namespace | string | yes | - | -| table | string | yes | - | -| schema | config | no | - | -| case_sensitive | boolean | no | false | -| start_snapshot_timestamp | long | no | - | -| start_snapshot_id | long | no | - | -| end_snapshot_id | long | no | - | -| use_snapshot_id | long | no | - | -| use_snapshot_timestamp | long | no | - | -| stream_scan_strategy | enum | no | FROM_LATEST_SNAPSHOT | -| common-options | | no | - | - -### catalog_name [string] - -User-specified catalog name. - -### catalog_type [string] - -The optional values are: -- hive: The hive metastore catalog. -- hadoop: The hadoop catalog. - -### uri [string] - -The Hive metastore’s thrift URI. - -### warehouse [string] - -The location to store metadata files and data files. - -### namespace [string] - -The iceberg database name in the backend catalog. - -### table [string] - -The iceberg table name in the backend catalog. - -### case_sensitive [boolean] +## Description -If data columns where selected via schema [config], controls whether the match to the schema will be done with case sensitivity. +Source connector for Apache Iceberg. It can support batch and stream mode. -### schema [config] +## Supported DataSource Info -#### fields [Config] +| Datasource | Dependent | Maven | +|------------|---------------------|---------------------------------------------------------------------------| +| Iceberg | flink-shaded-hadoop | [Download](https://mvnrepository.com/search?q=flink-shaded-hadoop-) | +| Iceberg | hive-exec | [Download](https://mvnrepository.com/artifact/org.apache.hive/hive-exec) | +| Iceberg | libfb303 | [Download](https://mvnrepository.com/artifact/org.apache.thrift/libfb303) | -Use projection to select data columns and columns order. +## Database Dependency -e.g. +> In order to be compatible with different versions of Hadoop and Hive, the scope of hive-exec and flink-shaded-hadoop-2 in the project pom file are provided, so if you use the Flink engine, first you may need to add the following Jar packages to /lib directory, if you are using the Spark engine and integrated with Hadoop, then you do not need to add the following Jar packages. ``` -schema { - fields { - f2 = "boolean" - f1 = "bigint" - f3 = "int" - f4 = "bigint" - } -} +flink-shaded-hadoop-x-xxx.jar +hive-exec-xxx.jar +libfb303-xxx.jar ``` -### start_snapshot_id [long] - -Instructs this scan to look for changes starting from a particular snapshot (exclusive). - -### start_snapshot_timestamp [long] - -Instructs this scan to look for changes starting from the most recent snapshot for the table as of the timestamp. timestamp – the timestamp in millis since the Unix epoch - -### end_snapshot_id [long] - -Instructs this scan to look for changes up to a particular snapshot (inclusive). - -### use_snapshot_id [long] - -Instructs this scan to look for use the given snapshot ID. - -### use_snapshot_timestamp [long] - -Instructs this scan to look for use the most recent snapshot as of the given time in milliseconds. timestamp – the timestamp in millis since the Unix epoch - -### stream_scan_strategy [enum] - -Starting strategy for stream mode execution, Default to use `FROM_LATEST_SNAPSHOT` if don’t specify any value. -The optional values are: -- TABLE_SCAN_THEN_INCREMENTAL: Do a regular table scan then switch to the incremental mode. -- FROM_LATEST_SNAPSHOT: Start incremental mode from the latest snapshot inclusive. -- FROM_EARLIEST_SNAPSHOT: Start incremental mode from the earliest snapshot inclusive. -- FROM_SNAPSHOT_ID: Start incremental mode from a snapshot with a specific id inclusive. -- FROM_SNAPSHOT_TIMESTAMP: Start incremental mode from a snapshot with a specific timestamp inclusive. - -### common options - -Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. - -## Example - -simple +> Some versions of the hive-exec package do not have libfb303-xxx.jar, so you also need to manually import the Jar package. + +## Data Type Mapping + +| Iceberg Data type | SeaTunnel Data type | +|-------------------|---------------------| +| BOOLEAN | BOOLEAN | +| INTEGER | INT | +| LONG | BIGINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| DATE | DATE | +| TIME | TIME | +| TIMESTAMP | TIMESTAMP | +| STRING | STRING | +| FIXED
BINARY | BYTES | +| DECIMAL | DECIMAL | +| STRUCT | ROW | +| LIST | ARRAY | +| MAP | MAP | + +## Source Options + +| Name | Type | Required | Default | Description | +|--------------------------|---------|----------|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| catalog_name | string | yes | - | User-specified catalog name. | +| catalog_type | string | yes | - | The optional values are: hive(The hive metastore catalog),hadoop(The hadoop catalog) | +| uri | string | no | - | The Hive metastore’s thrift URI. | +| warehouse | string | yes | - | The location to store metadata files and data files. | +| namespace | string | yes | - | The iceberg database name in the backend catalog. | +| table | string | yes | - | The iceberg table name in the backend catalog. | +| schema | config | no | - | Use projection to select data columns and columns order. | +| case_sensitive | boolean | no | false | If data columns where selected via schema [config], controls whether the match to the schema will be done with case sensitivity. | +| start_snapshot_timestamp | long | no | - | Instructs this scan to look for changes starting from the most recent snapshot for the table as of the timestamp.
timestamp – the timestamp in millis since the Unix epoch | +| start_snapshot_id | long | no | - | Instructs this scan to look for changes starting from a particular snapshot (exclusive). | +| end_snapshot_id | long | no | - | Instructs this scan to look for changes up to a particular snapshot (inclusive). | +| use_snapshot_id | long | no | - | Instructs this scan to look for use the given snapshot ID. | +| use_snapshot_timestamp | long | no | - | Instructs this scan to look for use the most recent snapshot as of the given time in milliseconds. timestamp – the timestamp in millis since the Unix epoch | +| stream_scan_strategy | enum | no | FROM_LATEST_SNAPSHOT | Starting strategy for stream mode execution, Default to use `FROM_LATEST_SNAPSHOT` if don’t specify any value,The optional values are:
TABLE_SCAN_THEN_INCREMENTAL: Do a regular table scan then switch to the incremental mode.
FROM_LATEST_SNAPSHOT: Start incremental mode from the latest snapshot inclusive.
FROM_EARLIEST_SNAPSHOT: Start incremental mode from the earliest snapshot inclusive.
FROM_SNAPSHOT_ID: Start incremental mode from a snapshot with a specific id inclusive.
FROM_SNAPSHOT_TIMESTAMP: Start incremental mode from a snapshot with a specific timestamp inclusive. | +| common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. | + +## Task Example + +### Simple: ```hocon +env { + execution.parallelism = 2 + job.mode = "BATCH" +} + source { Iceberg { + schema { + fields { + f2 = "boolean" + f1 = "bigint" + f3 = "int" + f4 = "bigint" + f5 = "float" + f6 = "double" + f7 = "date" + f9 = "timestamp" + f10 = "timestamp" + f11 = "string" + f12 = "bytes" + f13 = "bytes" + f14 = "decimal(19,9)" + f15 = "array" + f16 = "map" + } + } catalog_name = "seatunnel" catalog_type = "hadoop" - warehouse = "hdfs://your_cluster//tmp/seatunnel/iceberg/" - namespace = "your_iceberg_database" - table = "your_iceberg_table" + warehouse = "file:///tmp/seatunnel/iceberg/hadoop/" + namespace = "database1" + table = "source" + result_table_name = "iceberg" + } +} + +transform { +} + +sink { + Console { + source_table_name = "iceberg" } } ``` -Or +### Hive Catalog: ```hocon source { @@ -156,7 +156,7 @@ source { } ``` -column projection +### Column Projection: ```hocon source { @@ -179,20 +179,6 @@ source { } ``` -:::tip - -In order to be compatible with different versions of Hadoop and Hive, the scope of hive-exec and flink-shaded-hadoop-2 in the project pom file are provided, so if you use the Flink engine, first you may need to add the following Jar packages to /lib directory, if you are using the Spark engine and integrated with Hadoop, then you do not need to add the following Jar packages. - -::: - -``` -flink-shaded-hadoop-x-xxx.jar -hive-exec-xxx.jar -libfb303-xxx.jar -``` - -Some versions of the hive-exec package do not have libfb303-xxx.jar, so you also need to manually import the Jar package. - ## Changelog ### 2.2.0-beta 2022-09-26 diff --git a/docs/en/connector-v2/source/IoTDB.md b/docs/en/connector-v2/source/IoTDB.md index a20680ce638f..da0f198d3e1b 100644 --- a/docs/en/connector-v2/source/IoTDB.md +++ b/docs/en/connector-v2/source/IoTDB.md @@ -2,14 +2,16 @@ > IoTDB source connector -## Description +## Support Those Engines -Read external data source data through IoTDB. +> Spark
+> Flink
+> SeaTunnel Zeta
## Key features - [x] [batch](../../concept/connector-v2-features.md) -- [ ] [stream](../../concept/connector-v2-features.md) +- [x] [stream](../../concept/connector-v2-features.md) - [x] [exactly-once](../../concept/connector-v2-features.md) - [x] [column projection](../../concept/connector-v2-features.md) @@ -18,106 +20,53 @@ supports query SQL and can achieve projection effect. - [x] [parallelism](../../concept/connector-v2-features.md) - [ ] [support user-defined split](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|----------------------------|---------|----------|---------------| -| host | string | no | - | -| port | int | no | - | -| node_urls | string | no | - | -| username | string | yes | - | -| password | string | yes | - | -| sql | string | yes | - | -| schema | config | yes | - | -| fetch_size | int | no | - | -| lower_bound | long | no | - | -| upper_bound | long | no | - | -| num_partitions | int | no | - | -| thrift_default_buffer_size | int | no | - | -| enable_cache_leader | boolean | no | - | -| version | string | no | - | -| common-options | | no | - | - -### single node, you need to set host and port to connect to the remote data source. - -**host** [string] the host of the IoTDB when you select host of the IoTDB - -**port** [int] the port of the IoTDB when you select - -### multi node, you need to set node_urls to connect to the remote data source. - -**node_urls** [string] the node_urls of the IoTDB when you select - -e.g. - -```text -127.0.0.1:8080,127.0.0.2:8080 -``` - -### other parameters - -**sql** [string] -execute sql statement e.g. - -``` -select name,age from test -``` - -### schema [config] - -#### fields [Config] - -The schema of the IoTDB that you want to generate - -e.g. - -``` -schema { - fields { - name = string - age = int - } - } -``` - -### option parameters - -### fetch_size [int] - -the fetch_size of the IoTDB when you select - -### username [string] - -the username of the IoTDB when you select - -### password [string] - -the password of the IoTDB when you select - -### lower_bound [long] - -the lower_bound of the IoTDB when you select - -### upper_bound [long] - -the upper_bound of the IoTDB when you select - -### num_partitions [int] - -the num_partitions of the IoTDB when you select - -### thrift_default_buffer_size [int] - -the thrift_default_buffer_size of the IoTDB when you select - -### enable_cache_leader [boolean] - -enable_cache_leader of the IoTDB when you select +## Description -### version [string] +Read external data source data through IoTDB. -Version represents the SQL semantic version used by the client, which is used to be compatible with the SQL semantics of -0.12 when upgrading 0.13. The possible values are: V_0_12, V_0_13. +:::tip + +There is a conflict of thrift version between IoTDB and Spark.Therefore, you need to execute `rm -f $SPARK_HOME/jars/libthrift*` and `cp $IOTDB_HOME/lib/libthrift* $SPARK_HOME/jars/` to resolve it. + +::: + +## Supported DataSource Info + +| Datasource | Supported Versions | Url | +|------------|--------------------|----------------| +| IoTDB | `>= 0.13.0` | localhost:6667 | + +## Data Type Mapping + +| IotDB Data type | SeaTunnel Data type | +|-----------------|---------------------| +| BOOLEAN | BOOLEAN | +| INT32 | TINYINT | +| INT32 | SMALLINT | +| INT32 | INT | +| INT64 | BIGINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| TEXT | STRING | + +## Source Options + +| Name | Type | Required | Default Value | Description | +|----------------------------|---------|----------|---------------|------------------------------------------------------------------------------------| +| node_urls | string | yes | - | `IoTDB` cluster address, the format is `"host1:port"` or `"host1:port,host2:port"` | +| username | string | yes | - | `IoTDB` user username | +| password | string | yes | - | `IoTDB` user password | +| sql | string | yes | - | execute sql statement | +| schema | config | yes | - | the data schema | +| fetch_size | int | no | - | the fetch_size of the IoTDB when you select | +| lower_bound | long | no | - | the lower_bound of the IoTDB when you select | +| upper_bound | long | no | - | the upper_bound of the IoTDB when you select | +| num_partitions | int | no | - | the num_partitions of the IoTDB when you select | +| thrift_default_buffer_size | int | no | - | the thrift_default_buffer_size of the IoTDB when you select | +| thrift_max_frame_size | int | no | - | the thrift max frame size | +| enable_cache_leader | boolean | no | - | enable_cache_leader of the IoTDB when you select | +| version | string | no | - | SQL semantic version used by the client, The possible values are: V_0_12, V_0_13 | +| common-options | | no | - | | ### split partitions @@ -157,37 +106,37 @@ Source plugin common parameters, please refer to [Source Common Options](common- ## Examples -### Case1 - -Common options: - ```hocon +env { + execution.parallelism = 2 + job.mode = "BATCH" +} + source { IoTDB { node_urls = "localhost:6667" username = "root" password = "root" + sql = "SELECT temperature, moisture, c_int, c_bigint, c_float, c_double, c_string, c_boolean FROM root.test_group.* WHERE time < 4102329600000 align by device" + schema { + fields { + ts = timestamp + device_name = string + temperature = float + moisture = bigint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_string = string + c_boolean = boolean + } + } } } -``` - -When you assign `sql`、`fields`、`partition`, for example: -```hocon sink { - IoTDB { - ... - sql = "SELECT temperature, moisture FROM root.test_group.* WHERE time < 4102329600000 align by device" - lower_bound = 1 - upper_bound = 4102329600000 - num_partitions = 10 - fields { - ts = bigint - device_name = string - - temperature = float - moisture = bigint - } + Console { } } ``` @@ -195,23 +144,23 @@ sink { Upstream `IoTDB` data format is the following: ```shell -IoTDB> SELECT temperature, moisture FROM root.test_group.* WHERE time < 4102329600000 align by device; -+------------------------+------------------------+--------------+-----------+ -| Time| Device| temperature| moisture| -+------------------------+------------------------+--------------+-----------+ -|2022-09-25T00:00:00.001Z|root.test_group.device_a| 36.1| 100| -|2022-09-25T00:00:00.001Z|root.test_group.device_b| 36.2| 101| -|2022-09-25T00:00:00.001Z|root.test_group.device_c| 36.3| 102| -+------------------------+------------------------+--------------+-----------+ +IoTDB> SELECT temperature, moisture, c_int, c_bigint, c_float, c_double, c_string, c_boolean FROM root.test_group.* WHERE time < 4102329600000 align by device; ++------------------------+------------------------+--------------+-----------+--------+--------------+----------+---------+---------+----------+ +| Time| Device| temperature| moisture| c_int| c_bigint| c_float| c_double| c_string| c_boolean| ++------------------------+------------------------+--------------+-----------+--------+--------------+----------+---------+---------+----------+ +|2022-09-25T00:00:00.001Z|root.test_group.device_a| 36.1| 100| 1| 21474836470| 1.0f| 1.0d| abc| true| +|2022-09-25T00:00:00.001Z|root.test_group.device_b| 36.2| 101| 2| 21474836470| 2.0f| 2.0d| abc| true| +|2022-09-25T00:00:00.001Z|root.test_group.device_c| 36.3| 102| 3| 21474836470| 3.0f| 3.0d| abc| true| ++------------------------+------------------------+--------------+-----------+--------+--------------+----------+---------+---------+----------+ ``` Loaded to SeaTunnelRow data format is the following: -| ts | device_name | temperature | moisture | -|---------------|--------------------------|-------------|----------| -| 1664035200001 | root.test_group.device_a | 36.1 | 100 | -| 1664035200001 | root.test_group.device_b | 36.2 | 101 | -| 1664035200001 | root.test_group.device_c | 36.3 | 102 | +| ts | device_name | temperature | moisture | c_int | c_bigint | c_float | c_double | c_string | c_boolean | +|---------------|--------------------------|-------------|----------|-------|-------------|---------|----------|----------|-----------| +| 1664035200001 | root.test_group.device_a | 36.1 | 100 | 1 | 21474836470 | 1.0f | 1.0d | abc | true | +| 1664035200001 | root.test_group.device_b | 36.2 | 101 | 2 | 21474836470 | 2.0f | 2.0d | abc | true | +| 1664035200001 | root.test_group.device_c | 36.3 | 102 | 3 | 21474836470 | 3.0f | 3.0d | abc | true | ## Changelog diff --git a/docs/en/connector-v2/source/MySQL-CDC.md b/docs/en/connector-v2/source/MySQL-CDC.md index caeeca062836..6740fd4b8b2e 100644 --- a/docs/en/connector-v2/source/MySQL-CDC.md +++ b/docs/en/connector-v2/source/MySQL-CDC.md @@ -2,10 +2,9 @@ > MySQL CDC source connector -## Description +## Support Those Engines -The MySQL CDC connector allows for reading snapshot data and incremental data from MySQL database. This document -describes how to set up the MySQL CDC connector to run SQL queries against MySQL databases. +> SeaTunnel Zeta
## Key features @@ -16,207 +15,202 @@ describes how to set up the MySQL CDC connector to run SQL queries against MySQL - [x] [parallelism](../../concept/connector-v2-features.md) - [x] [support user-defined split](../../concept/connector-v2-features.md) -## Options - -| name | type | required | default value | -|------------------------------------------------|----------|----------|---------------| -| username | String | Yes | - | -| password | String | Yes | - | -| database-names | List | No | - | -| table-names | List | Yes | - | -| base-url | String | Yes | - | -| startup.mode | Enum | No | INITIAL | -| startup.timestamp | Long | No | - | -| startup.specific-offset.file | String | No | - | -| startup.specific-offset.pos | Long | No | - | -| stop.mode | Enum | No | NEVER | -| stop.timestamp | Long | No | - | -| stop.specific-offset.file | String | No | - | -| stop.specific-offset.pos | Long | No | - | -| incremental.parallelism | Integer | No | 1 | -| snapshot.split.size | Integer | No | 8096 | -| snapshot.fetch.size | Integer | No | 1024 | -| server-id | String | No | - | -| server-time-zone | String | No | UTC | -| connect.timeout.ms | Duration | No | 30000 | -| connect.max-retries | Integer | No | 3 | -| connection.pool.size | Integer | No | 20 | -| chunk-key.even-distribution.factor.upper-bound | Double | No | 100 | -| chunk-key.even-distribution.factor.lower-bound | Double | No | 0.05 | -| sample-sharding.threshold | int | No | 1000 | -| inverse-sampling.rate | int | No | 1000 | -| exactly_once | Boolean | No | true | -| debezium.* | config | No | - | -| format | Enum | No | DEFAULT | -| common-options | | no | - | - -### username [String] - -Name of the database to use when connecting to the database server. - -### password [String] - -Password to use when connecting to the database server. - -### database-names [List] - -Database name of the database to monitor. - -### table-names [List] - -Table name of the database to monitor. The table name needs to include the database name, for example: database_name.table_name - -### base-url [String] - -URL has to be with database, like "jdbc:mysql://localhost:5432/db" or "jdbc:mysql://localhost:5432/db?useSSL=true". - -### startup.mode [Enum] - -Optional startup mode for MySQL CDC consumer, valid enumerations are "initial", "earliest", "latest" and "specific". - -### startup.timestamp [Long] - -Start from the specified epoch timestamp (in milliseconds). - -**Note, This option is required when the "startup.mode" option used `'timestamp'`.** - -### startup.specific-offset.file [String] - -Start from the specified binlog file name. - -**Note, This option is required when the "startup.mode" option used `'specific'`.** - -### startup.specific-offset.pos [Long] - -Start from the specified binlog file position. - -**Note, This option is required when the "startup.mode" option used `'specific'`.** - -### stop.mode [Enum] - -Optional stop mode for MySQL CDC consumer, valid enumerations are "never". - -### stop.timestamp [Long] - -Stop from the specified epoch timestamp (in milliseconds). - -**Note, This option is required when the "stop.mode" option used `'timestamp'`.** - -### stop.specific-offset.file [String] - -Stop from the specified binlog file name. - -**Note, This option is required when the "stop.mode" option used `'specific'`.** - -### stop.specific-offset.pos [Long] - -Stop from the specified binlog file position. - -**Note, This option is required when the "stop.mode" option used `'specific'`.** - -### incremental.parallelism [Integer] - -The number of parallel readers in the incremental phase. - -### snapshot.split.size [Integer] - -The split size (number of rows) of table snapshot, captured tables are split into multiple splits when read the snapshot -of table. - -### snapshot.fetch.size [Integer] - -The maximum fetch size for per poll when read table snapshot. - -### chunk-key.even-distribution.factor.upper-bound [Double] - -The upper bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be less than or equal to this upper bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is greater, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 100.0. - -### chunk-key.even-distribution.factor.lower-bound [Double] +## Description -The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. +The MySQL CDC connector allows for reading snapshot data and incremental data from MySQL database. This document +describes how to set up the MySQL CDC connector to run SQL queries against MySQL databases. -### sample-sharding.threshold [Integer] +## Supported DataSource Info -This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards. +| Datasource | Supported versions | Driver | Url | Maven | +|------------|-------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|----------------------------------|----------------------------------------------------------------------| +| MySQL |
  • [MySQL](https://dev.mysql.com/doc): 5.6, 5.7, 8.0.x
  • [RDS MySQL](https://www.aliyun.com/product/rds/mysql): 5.6, 5.7, 8.0.x
  • | com.mysql.cj.jdbc.Driver | jdbc:mysql://localhost:3306/test | https://mvnrepository.com/artifact/mysql/mysql-connector-java/8.0.28 | -### inverse-sampling.rate [Integer] +## Database Dependency -The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. +### Install Jdbc Driver -### server-id [String] +Please download and put mysql driver in `${SEATUNNEL_HOME}/lib/` dir. For example: cp mysql-connector-java-xxx.jar `$SEATNUNNEL_HOME/lib/` -A numeric ID or a numeric ID range of this database client, The numeric ID syntax is like '5400', the numeric ID range -syntax is like '5400-5408'. +### Creating MySQL user -Every ID must be unique across all currently-running database processes in the MySQL cluster. This connector joins the -MySQL cluster as another server (with this unique ID) so it can read the binlog. +You have to define a MySQL user with appropriate permissions on all databases that the Debezium MySQL connector monitors. -By default, a random number is generated between 5400 and 6400, though we recommend setting an explicit value. +1. Create the MySQL user: -### server-time-zone [String] +```sql +mysql> CREATE USER 'user'@'localhost' IDENTIFIED BY 'password'; +``` -The session time zone in database server. If not set, then ZoneId.systemDefault() is used to determine the server time zone. +2. Grant the required permissions to the user: -### connect.timeout.ms [long] +```sql +mysql> GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'user' IDENTIFIED BY 'password'; +``` -The maximum time that the connector should wait after trying to connect to the database server before timing out. +3. Finalize the user’s permissions: -### connect.max-retries [Integer] +```sql +mysql> FLUSH PRIVILEGES; +``` -The max retry times that the connector should retry to build database server connection. +### Enabling the MySQL binlog -### connection.pool.size [Integer] +You must enable binary logging for MySQL replication. The binary logs record transaction updates for replication tools to propagate changes. -The connection pool size. +1. Check whether the `log-bin` option is already on: -### exactly_once [Boolean] +```sql +mysql> show variables where variable_name in ('log_bin', 'binlog_format', 'binlog_row_image', 'gtid_mode', 'enforce_gtid_consistency'); ++--------------------------+----------------+ +| Variable_name | Value | ++--------------------------+----------------+ +| binlog_format | ROW | +| binlog_row_image | FULL | +| enforce_gtid_consistency | ON | +| gtid_mode | ON | +| log_bin | ON | ++--------------------------+----------------+ +5 rows in set (0.00 sec) +``` -Enable exactly once semantic. +2. If inconsistent with the above results, configure your MySQL server configuration file(`$MYSQL_HOME/mysql.cnf`) with the following properties, which are described in the table below: -### debezium [Config] +``` +# Enable binary replication log and set the prefix, expiration, and log format. +# The prefix is arbitrary, expiration can be short for integration tests but would +# be longer on a production system. Row-level info is required for ingest to work. +# Server ID is required, but this will vary on production systems +server-id = 223344 +log_bin = mysql-bin +expire_logs_days = 10 +binlog_format = row +binlog_row_image = FULL + +# enable gtid mode +gtid_mode = on +enforce_gtid_consistency = on +``` -Pass-through Debezium's properties to Debezium Embedded Engine which is used to capture data changes from MySQL server. +3. Restart MySQL Server -See more about -the [Debezium's MySQL Connector properties](https://debezium.io/documentation/reference/1.6/connectors/mysql.html#mysql-connector-properties) +```shell +/etc/inint.d/mysqld restart +``` -### format [Enum] +4. Confirm your changes by checking the binlog status once more: + +```sql +mysql> show variables where variable_name in ('log_bin', 'binlog_format', 'binlog_row_image', 'gtid_mode', 'enforce_gtid_consistency'); ++--------------------------+----------------+ +| Variable_name | Value | ++--------------------------+----------------+ +| binlog_format | ROW | +| binlog_row_image | FULL | +| enforce_gtid_consistency | ON | +| gtid_mode | ON | +| log_bin | ON | ++--------------------------+----------------+ +5 rows in set (0.00 sec) +``` -Optional output format for MySQL CDC, valid enumerations are "DEFAULT"、"COMPATIBLE_DEBEZIUM_JSON". +### Notes + +#### Setting up MySQL session timeouts + +When an initial consistent snapshot is made for large databases, your established connection could timeout while the tables are being read. You can prevent this behavior by configuring interactive_timeout and wait_timeout in your MySQL configuration file. +- `interactive_timeout`: The number of seconds the server waits for activity on an interactive connection before closing it. See [MySQL’s documentation](https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_interactive_timeout) for more details. +- `wait_timeout`: The number of seconds the server waits for activity on a non-interactive connection before closing it. See [MySQL’s documentation](https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_wait_timeout) for more details. + +*For more database settings see [Debezium MySQL Connector](https://debezium.io/documentation/reference/1.6/connectors/mysql.html#setting-up-mysql)* + +## Data Type Mapping + +| Mysql Data type | SeaTunnel Data type | +|------------------------------------------------------------------------------------------|---------------------| +| BIT(1)
    TINYINT(1) | BOOLEAN | +| TINYINT | TINYINT | +| TINYINT UNSIGNED
    SMALLINT | SMALLINT | +| SMALLINT UNSIGNED
    MEDIUMINT
    MEDIUMINT UNSIGNED
    INT
    INTEGER
    YEAR | INT | +| INT UNSIGNED
    INTEGER UNSIGNED
    BIGINT | BIGINT | +| BIGINT UNSIGNED | DECIMAL(20,0) | +| DECIMAL(p, s)
    DECIMAL(p, s) UNSIGNED
    NUMERIC(p, s)
    NUMERIC(p, s) UNSIGNED | DECIMAL(p,s) | +| FLOAT
    FLOAT UNSIGNED | FLOAT | +| DOUBLE
    DOUBLE UNSIGNED
    REAL
    REAL UNSIGNED | DOUBLE | +| CHAR
    VARCHAR
    TINYTEXT
    MEDIUMTEXT
    TEXT
    LONGTEXT
    ENUM
    JSON | STRING | +| DATE | DATE | +| TIME | TIME | +| DATETIME
    TIMESTAMP | TIMESTAMP | +| BINARY
    VARBINAR
    BIT(p)
    TINYBLOB
    MEDIUMBLOB
    BLOB
    LONGBLOB | BYTES | + +## Source Options + +| Name | Type | Required | Default | Description | +|------------------------------------------------|----------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| base-url | String | Yes | - | The URL of the JDBC connection. Refer to a case: `jdbc:mysql://localhost:3306:3306/test`. | +| username | String | Yes | - | Name of the database to use when connecting to the database server. | +| password | String | Yes | - | Password to use when connecting to the database server. | +| database-names | List | No | - | Database name of the database to monitor. | +| table-names | List | Yes | - | Table name of the database to monitor. The table name needs to include the database name, for example: `database_name.table_name` | +| startup.mode | Enum | No | INITIAL | Optional startup mode for MySQL CDC consumer, valid enumerations are `initial`, `earliest`, `latest` and `specific`.
    `initial`: Synchronize historical data at startup, and then synchronize incremental data.
    `earliest`: Startup from the earliest offset possible.
    `latest`: Startup from the latest offset.
    `specific`: Startup from user-supplied specific offsets. | +| startup.specific-offset.file | String | No | - | Start from the specified binlog file name. **Note, This option is required when the `startup.mode` option used `specific`.** | +| startup.specific-offset.pos | Long | No | - | Start from the specified binlog file position. **Note, This option is required when the `startup.mode` option used `specific`.** | +| stop.mode | Enum | No | NEVER | Optional stop mode for MySQL CDC consumer, valid enumerations are `never`, `latest` or `specific`.
    `never`: Real-time job don't stop the source.
    `latest`: Stop from the latest offset.
    `specific`: Stop from user-supplied specific offset. | +| stop.specific-offset.file | String | No | - | Stop from the specified binlog file name. **Note, This option is required when the `stop.mode` option used `specific`.** | +| stop.specific-offset.pos | Long | No | - | Stop from the specified binlog file position. **Note, This option is required when the `stop.mode` option used `specific`.** | +| snapshot.split.size | Integer | No | 8096 | The split size (number of rows) of table snapshot, captured tables are split into multiple splits when read the snapshot of table. | +| snapshot.fetch.size | Integer | No | 1024 | The maximum fetch size for per poll when read table snapshot. | +| server-id | String | No | - | A numeric ID or a numeric ID range of this database client, The numeric ID syntax is like `5400`, the numeric ID range syntax is like '5400-5408'.
    Every ID must be unique across all currently-running database processes in the MySQL cluster. This connector joins the
    MySQL cluster as another server (with this unique ID) so it can read the binlog.
    By default, a random number is generated between 5400 and 6400, though we recommend setting an explicit value. | +| server-time-zone | String | No | UTC | The session time zone in database server. If not set, then ZoneId.systemDefault() is used to determine the server time zone. | +| connect.timeout.ms | Duration | No | 30000 | The maximum time that the connector should wait after trying to connect to the database server before timing out. | +| connect.max-retries | Integer | No | 3 | The max retry times that the connector should retry to build database server connection. | +| connection.pool.size | Integer | No | 20 | The jdbc connection pool size. | +| chunk-key.even-distribution.factor.upper-bound | Double | No | 100 | The upper bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be less than or equal to this upper bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is greater, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 100.0. | +| chunk-key.even-distribution.factor.lower-bound | Double | No | 0.05 | The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. | +| sample-sharding.threshold | Integer | No | 1000 | This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards. | +| inverse-sampling.rate | Integer | No | 1000 | The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. | +| exactly_once | Boolean | No | true | Enable exactly once semantic. | +| format | Enum | No | DEFAULT | Optional output format for MySQL CDC, valid enumerations are `DEFAULT`、`COMPATIBLE_DEBEZIUM_JSON`. | +| debezium | Config | No | - | Pass-through [Debezium's properties](https://debezium.io/documentation/reference/1.6/connectors/mysql.html#mysql-connector-properties) to Debezium Embedded Engine which is used to capture data changes from MySQL server. | +| common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | + +## Task Example + +### Simple + +> Support multi-table reading -#### example +``` +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 10000 +} -```conf source { MySQL-CDC { - debezium { - snapshot.mode = "never" - decimal.handling.mode = "double" + catalog = { + factory = MySQL } + base-url = "jdbc:mysql://localhost:3306/testdb" + username = "root" + password = "root@123" + table-names = ["testdb.table1", "testdb.table2"] + + startup.mode = "initial" } } -``` - -### common options - -Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. - -## Example -```Jdbc { -source { - MySQL-CDC { - result_table_name = "fake" - parallelism = 1 - server-id = 5656 - username = "mysqluser" - password = "mysqlpw" - table-names = ["inventory_vwyw0n.products"] - base-url = "jdbc:mysql://localhost:56725/inventory_vwyw0n" +sink { + Console { } } ``` +### Support debezium-compatible format send to kafka + +> Must be used with kafka connector sink, see [compatible debezium format](../formats/cdc-compatible-debezium-json.md) for details + ## Changelog - Add MySQL CDC Source Connector diff --git a/docs/en/connector-v2/source/Mysql.md b/docs/en/connector-v2/source/Mysql.md index 32933f8c9a81..001ef1463dab 100644 --- a/docs/en/connector-v2/source/Mysql.md +++ b/docs/en/connector-v2/source/Mysql.md @@ -2,6 +2,10 @@ > JDBC Mysql Source Connector +## Support Mysql Version + +- 5.5/5.6/5.7/8.0 + ## Support Those Engines > Spark
    diff --git a/docs/en/connector-v2/source/Oracle.md b/docs/en/connector-v2/source/Oracle.md index c1cedbded7af..385d55ca9e56 100644 --- a/docs/en/connector-v2/source/Oracle.md +++ b/docs/en/connector-v2/source/Oracle.md @@ -37,7 +37,7 @@ Read external data source data through JDBC. ## Data Type Mapping -| PostgreSQL Data type | SeaTunnel Data type | +| Oracle Data type | SeaTunnel Data type | |--------------------------------------------------------------------------------------|---------------------| | INTEGER | INT | | FLOAT | DECIMAL(38, 18) | diff --git a/docs/en/transform-v2/sql-udf.md b/docs/en/transform-v2/sql-udf.md index 143044f5a797..ede3ef9ab4a8 100644 --- a/docs/en/transform-v2/sql-udf.md +++ b/docs/en/transform-v2/sql-udf.md @@ -39,16 +39,31 @@ public interface ZetaUDF { ## UDF Implements Example -Add the dependency of transform-v2 and provided scope to your maven project: +Add these dependencies and provided scope to your maven project: ```xml - - org.apache.seatunnel - seatunnel-transforms-v2 - 2.3.x - provided - + + + org.apache.seatunnel + seatunnel-transforms-v2 + 2.3.2 + provided + + + org.apache.seatunnel + seatunnel-api + 2.3.2 + provided + + + com.google.auto.service + auto-service + 1.0.1 + provided + + + ``` Add a Java Class implements of ZetaUDF like this: diff --git a/release-note.md b/release-note.md index d647bdbad935..1b797ff3154b 100644 --- a/release-note.md +++ b/release-note.md @@ -15,6 +15,7 @@ ### Connectors - [Elasticsearch] Support https protocol & compatible with opensearch - [Hbase] Add hbase sink connector #4049 +- [Clickhouse] Fix clickhouse old version compatibility #5326 ### Formats - [Canal]Support read canal format message #3950 - [Debezium]Support debezium canal format message #3981 @@ -81,6 +82,7 @@ - [E2E] [Kafka] Fix kafka e2e testcase (#4520) - [Container Version] Fix risk of unreproducible test cases #4591 - [E2e] [Mysql-cdc] Removing the excess MySqlIncrementalSourceIT e2e reduces the CI time (#4738) +- [E2E] [Common] Update test container version of seatunnel engine (#5323) ## Improve @@ -115,6 +117,7 @@ ### CI - [CI] Fix error repository name in ci config files (#4795) +- [CI][E2E][Zeta] Increase Zeta checkpoint timeout to avoid connector-file-sftp-e2e failed frequently (#5339) ### Zeta(ST-Engine) @@ -192,3 +195,4 @@ - [Docs] Redshift add defaultRowFetchSize (#4616) - [Docs] Refactor connector-v2 docs using unified format Mysql (#4590) - [Docs] Add Value types in Java to Schema features (#5087) +- [Docs] Replace username by user in the options of FtpFile (#5421) \ No newline at end of file diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvCommonOptions.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvCommonOptions.java index bc80c6642889..d076cd5367bf 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvCommonOptions.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvCommonOptions.java @@ -51,6 +51,12 @@ public interface EnvCommonOptions { .withDescription( "The interval (in milliseconds) between two consecutive checkpoints."); + Option CHECKPOINT_TIMEOUT = + Options.key("checkpoint.timeout") + .longType() + .noDefaultValue() + .withDescription("The timeout (in milliseconds) for a checkpoint."); + Option JARS = Options.key("jars") .stringType() diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvOptionRule.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvOptionRule.java index 3a90b82e83bc..09310f080c53 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvOptionRule.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/env/EnvOptionRule.java @@ -30,6 +30,7 @@ public static OptionRule getEnvOptionRules() { CommonOptions.PARALLELISM, EnvCommonOptions.JARS, EnvCommonOptions.CHECKPOINT_INTERVAL, + EnvCommonOptions.CHECKPOINT_TIMEOUT, EnvCommonOptions.CUSTOM_PARAMETERS) .build(); } diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/RetryUtils.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/RetryUtils.java index aa1bbd5934bd..e8ee03a5013a 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/RetryUtils.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/utils/RetryUtils.java @@ -66,7 +66,7 @@ public static T retryWithException( backoff); Thread.sleep(backoff); } else { - log.debug(attemptMessage, ExceptionUtils.getMessage(e), i, retryTimes, 0); + log.info(attemptMessage, ExceptionUtils.getMessage(e), i, retryTimes, 0); } } } diff --git a/seatunnel-config/seatunnel-config-shade/src/main/java/org/apache/seatunnel/shade/com/typesafe/config/impl/PropertiesParser.java b/seatunnel-config/seatunnel-config-shade/src/main/java/org/apache/seatunnel/shade/com/typesafe/config/impl/PropertiesParser.java index eceacf997972..3cfdb7dba3b5 100644 --- a/seatunnel-config/seatunnel-config-shade/src/main/java/org/apache/seatunnel/shade/com/typesafe/config/impl/PropertiesParser.java +++ b/seatunnel-config/seatunnel-config-shade/src/main/java/org/apache/seatunnel/shade/com/typesafe/config/impl/PropertiesParser.java @@ -7,8 +7,7 @@ import java.io.IOException; import java.io.Reader; import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; +import java.util.Arrays; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; @@ -58,7 +57,15 @@ private static AbstractConfigObject fromEntrySet( } private static Map getPathMap(Set> entries) { - Map pathMap = new LinkedHashMap(); + Map pathMap = new LinkedHashMap<>(); + System.getProperties() + .forEach( + (key, value) -> { + if (key instanceof String) { + Path path = pathFromPropertyKey((String) key); + pathMap.put(path, value); + } + }); for (Map.Entry entry : entries) { Object key = entry.getKey(); if (key instanceof String) { @@ -74,7 +81,7 @@ static AbstractConfigObject fromStringMap(ConfigOrigin origin, Map pathExpressionMap) { - Map pathMap = new LinkedHashMap(); + Map pathMap = new LinkedHashMap<>(); for (Map.Entry entry : pathExpressionMap.entrySet()) { Object keyObj = entry.getKey(); if (!(keyObj instanceof String)) { @@ -93,8 +100,8 @@ private static AbstractConfigObject fromPathMap( * First, build a list of paths that will have values, either string or * object values. */ - Set scopePaths = new LinkedHashSet(); - Set valuePaths = new LinkedHashSet(); + Set scopePaths = new LinkedHashSet<>(); + Set valuePaths = new LinkedHashSet<>(); for (Path path : pathMap.keySet()) { // add value's path valuePaths.add(path); @@ -129,13 +136,11 @@ private static AbstractConfigObject fromPathMap( /* * Create maps for the object-valued values. */ - Map root = new LinkedHashMap(); - Map> scopes = - new LinkedHashMap>(); + Map root = new LinkedHashMap<>(); + Map> scopes = new LinkedHashMap<>(); for (Path path : scopePaths) { - Map scope = - new LinkedHashMap(); + Map scope = new LinkedHashMap<>(); scopes.put(path, scope); } @@ -150,7 +155,17 @@ private static AbstractConfigObject fromPathMap( AbstractConfigValue value; if (convertedFromProperties) { if (rawValue instanceof String) { - value = new ConfigString.Quoted(origin, (String) rawValue); + if (((String) rawValue).startsWith("[") && ((String) rawValue).endsWith("]")) { + List list = + Arrays.asList( + ((String) rawValue) + .substring(1, ((String) rawValue).length() - 1) + .split(",")); + value = ConfigImpl.fromAnyRef(list, origin, FromMapMode.KEYS_ARE_PATHS); + } else { + value = new ConfigString.Quoted(origin, (String) rawValue); + } + } else { // silently ignore non-string values in Properties value = null; @@ -167,19 +182,14 @@ private static AbstractConfigObject fromPathMap( * Make a list of scope paths from longest to shortest, so children go * before parents. */ - List sortedScopePaths = new ArrayList(); - sortedScopePaths.addAll(scopePaths); + List sortedScopePaths = new ArrayList<>(scopePaths); // sort descending by length - Collections.sort( - sortedScopePaths, - new Comparator() { - @Override - public int compare(Path a, Path b) { - // Path.length() is O(n) so in theory this sucks - // but in practice we can make Path precompute length - // if it ever matters. - return b.length() - a.length(); - } + sortedScopePaths.sort( + (a, b) -> { + // Path.length() is O(n) so in theory this sucks + // but in practice we can make Path precompute length + // if it ever matters. + return b.length() - a.length(); }); /* diff --git a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBSourceOptions.java b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBSourceOptions.java index f92921ee140f..54f955f540e0 100644 --- a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBSourceOptions.java +++ b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/config/AmazonDynamoDBSourceOptions.java @@ -43,7 +43,6 @@ public class AmazonDynamoDBSourceOptions implements Serializable { private Config schema; public int batchSize = AmazonDynamoDBConfig.BATCH_SIZE.defaultValue(); - public int batchIntervalMs = AmazonDynamoDBConfig.BATCH_INTERVAL_MS.defaultValue(); public AmazonDynamoDBSourceOptions(Config config) { this.url = config.getString(AmazonDynamoDBConfig.URL.key()); @@ -57,8 +56,5 @@ public AmazonDynamoDBSourceOptions(Config config) { if (config.hasPath(AmazonDynamoDBConfig.BATCH_SIZE.key())) { this.batchSize = config.getInt(AmazonDynamoDBConfig.BATCH_SIZE.key()); } - if (config.hasPath(AmazonDynamoDBConfig.BATCH_INTERVAL_MS.key())) { - this.batchIntervalMs = config.getInt(AmazonDynamoDBConfig.BATCH_INTERVAL_MS.key()); - } } } diff --git a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/AmazonDynamoDBWriter.java b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/AmazonDynamoDBWriter.java index 016036cc841b..d059bce7b578 100644 --- a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/AmazonDynamoDBWriter.java +++ b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/AmazonDynamoDBWriter.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import java.io.IOException; +import java.util.Optional; public class AmazonDynamoDBWriter extends AbstractSinkWriter { @@ -48,4 +49,10 @@ public void write(SeaTunnelRow element) throws IOException { public void close() throws IOException { dynamoDbSinkClient.close(); } + + @Override + public Optional prepareCommit() { + dynamoDbSinkClient.flush(); + return Optional.empty(); + } } diff --git a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/DynamoDbSinkClient.java b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/DynamoDbSinkClient.java index d8acf33ebeb8..e42f573dfb8a 100644 --- a/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/DynamoDbSinkClient.java +++ b/seatunnel-connectors-v2/connector-amazondynamodb/src/main/java/org/apache/seatunnel/connectors/seatunnel/amazondynamodb/sink/DynamoDbSinkClient.java @@ -24,7 +24,6 @@ import org.apache.seatunnel.connectors.seatunnel.amazondynamodb.serialize.DefaultSeaTunnelRowDeserializer; import org.apache.seatunnel.connectors.seatunnel.amazondynamodb.serialize.SeaTunnelRowDeserializer; -import com.google.common.util.concurrent.ThreadFactoryBuilder; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; import software.amazon.awssdk.regions.Region; @@ -40,15 +39,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; public class DynamoDbSinkClient { private final AmazonDynamoDBSourceOptions amazondynamodbSourceOptions; - private ScheduledExecutorService scheduler; - private ScheduledFuture scheduledFuture; private volatile boolean initialize; private volatile Exception flushException; private DynamoDbClient dynamoDbClient; @@ -62,7 +55,7 @@ public DynamoDbSinkClient( this.seaTunnelRowDeserializer = new DefaultSeaTunnelRowDeserializer(typeInfo); } - private void tryInit() throws IOException { + private void tryInit() { if (initialize) { return; } @@ -78,25 +71,6 @@ private void tryInit() throws IOException { amazondynamodbSourceOptions.getAccessKeyId(), amazondynamodbSourceOptions.getSecretAccessKey()))) .build(); - - scheduler = - Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("DdynamoDb-sink-output-%s") - .build()); - scheduledFuture = - scheduler.scheduleAtFixedRate( - () -> { - try { - flush(); - } catch (IOException e) { - flushException = e; - } - }, - amazondynamodbSourceOptions.getBatchIntervalMs(), - amazondynamodbSourceOptions.getBatchIntervalMs(), - TimeUnit.MILLISECONDS); - initialize = true; } @@ -114,17 +88,13 @@ public synchronized void write(PutItemRequest putItemRequest) throws IOException } public synchronized void close() throws IOException { - if (scheduledFuture != null) { - scheduledFuture.cancel(false); - scheduler.shutdown(); - } if (dynamoDbClient != null) { flush(); dynamoDbClient.close(); } } - synchronized void flush() throws IOException { + synchronized void flush() { checkFlushException(); if (batchList.isEmpty()) { return; diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java index a84eb79be3e6..6429fa4b5299 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlIncrementalSourceFactory.java @@ -71,6 +71,10 @@ public OptionRule optionRule() { JdbcSourceOptions.SAMPLE_SHARDING_THRESHOLD, JdbcSourceOptions.INVERSE_SAMPLING_RATE) .optional(MySqlSourceOptions.STARTUP_MODE, MySqlSourceOptions.STOP_MODE) + .conditional( + MySqlSourceOptions.STARTUP_MODE, + StartupMode.INITIAL, + SourceOptions.EXACTLY_ONCE) .conditional( MySqlSourceOptions.STARTUP_MODE, StartupMode.SPECIFIC, @@ -81,18 +85,6 @@ public OptionRule optionRule() { StopMode.SPECIFIC, SourceOptions.STOP_SPECIFIC_OFFSET_FILE, SourceOptions.STOP_SPECIFIC_OFFSET_POS) - .conditional( - MySqlSourceOptions.STARTUP_MODE, - StartupMode.TIMESTAMP, - SourceOptions.STARTUP_TIMESTAMP) - .conditional( - MySqlSourceOptions.STOP_MODE, - StopMode.TIMESTAMP, - SourceOptions.STOP_TIMESTAMP) - .conditional( - MySqlSourceOptions.STARTUP_MODE, - StartupMode.INITIAL, - SourceOptions.EXACTLY_ONCE) .build(); } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlSourceOptions.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlSourceOptions.java index 43f3f4c70cc8..bc59fd0f5c16 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlSourceOptions.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/source/MySqlSourceOptions.java @@ -34,18 +34,22 @@ public class MySqlSourceOptions { Arrays.asList( StartupMode.INITIAL, StartupMode.EARLIEST, - StartupMode.LATEST)) + StartupMode.LATEST, + StartupMode.SPECIFIC)) .defaultValue(StartupMode.INITIAL) .withDescription( "Optional startup mode for CDC source, valid enumerations are " - + "\"initial\", \"earliest\", \"latest\", \"timestamp\"\n or \"specific\""); + + "\"initial\", \"earliest\", \"latest\" or \"specific\""); public static final SingleChoiceOption STOP_MODE = (SingleChoiceOption) Options.key(SourceOptions.STOP_MODE_KEY) - .singleChoice(StopMode.class, Arrays.asList(StopMode.NEVER)) + .singleChoice( + StopMode.class, + Arrays.asList( + StopMode.LATEST, StopMode.SPECIFIC, StopMode.NEVER)) .defaultValue(StopMode.NEVER) .withDescription( "Optional stop mode for CDC source, valid enumerations are " - + "\"never\", \"latest\", \"timestamp\"\n or \"specific\""); + + "\"never\", \"latest\" or \"specific\""); } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlTypeUtils.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlTypeUtils.java index d30ba32d221a..00c10f53cb5e 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlTypeUtils.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/MySqlTypeUtils.java @@ -50,10 +50,14 @@ public class MySqlTypeUtils { private static final String MYSQL_BIGINT_UNSIGNED = "BIGINT UNSIGNED"; private static final String MYSQL_DECIMAL = "DECIMAL"; private static final String MYSQL_DECIMAL_UNSIGNED = "DECIMAL UNSIGNED"; + private static final String MYSQL_NUMERIC = "NUMERIC"; + private static final String MYSQL_NUMERIC_UNSIGNED = "NUMERIC UNSIGNED"; private static final String MYSQL_FLOAT = "FLOAT"; private static final String MYSQL_FLOAT_UNSIGNED = "FLOAT UNSIGNED"; private static final String MYSQL_DOUBLE = "DOUBLE"; private static final String MYSQL_DOUBLE_UNSIGNED = "DOUBLE UNSIGNED"; + private static final String MYSQL_REAL = "REAL"; + private static final String MYSQL_REAL_UNSIGNED = "REAL UNSIGNED"; // -------------------------string---------------------------- private static final String MYSQL_CHAR = "CHAR"; @@ -63,6 +67,7 @@ public class MySqlTypeUtils { private static final String MYSQL_TEXT = "TEXT"; private static final String MYSQL_LONGTEXT = "LONGTEXT"; private static final String MYSQL_JSON = "JSON"; + private static final String MYSQL_ENUM = "ENUM"; // ------------------------------time------------------------- private static final String MYSQL_DATE = "DATE"; @@ -89,6 +94,7 @@ public static SeaTunnelDataType convertFromColumn(Column column) { return column.length() == 1 ? BasicType.BOOLEAN_TYPE : BasicType.INT_TYPE; case MYSQL_TINYINT_UNSIGNED: case MYSQL_SMALLINT: + return BasicType.SHORT_TYPE; case MYSQL_SMALLINT_UNSIGNED: case MYSQL_MEDIUMINT: case MYSQL_MEDIUMINT_UNSIGNED: @@ -103,6 +109,9 @@ public static SeaTunnelDataType convertFromColumn(Column column) { case MYSQL_BIGINT_UNSIGNED: return new DecimalType(20, 0); case MYSQL_DECIMAL: + case MYSQL_DECIMAL_UNSIGNED: + case MYSQL_NUMERIC: + case MYSQL_NUMERIC_UNSIGNED: return new DecimalType(column.length(), column.scale().orElse(0)); case MYSQL_FLOAT: return BasicType.FLOAT_TYPE; @@ -110,8 +119,10 @@ public static SeaTunnelDataType convertFromColumn(Column column) { log.warn("{} will probably cause value overflow.", MYSQL_FLOAT_UNSIGNED); return BasicType.FLOAT_TYPE; case MYSQL_DOUBLE: + case MYSQL_REAL: return BasicType.DOUBLE_TYPE; case MYSQL_DOUBLE_UNSIGNED: + case MYSQL_REAL_UNSIGNED: log.warn("{} will probably cause value overflow.", MYSQL_DOUBLE_UNSIGNED); return BasicType.DOUBLE_TYPE; case MYSQL_CHAR: @@ -120,6 +131,7 @@ public static SeaTunnelDataType convertFromColumn(Column column) { case MYSQL_TEXT: case MYSQL_VARCHAR: case MYSQL_JSON: + case MYSQL_ENUM: return BasicType.STRING_TYPE; case MYSQL_LONGTEXT: log.warn( diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/TableDiscoveryUtils.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/TableDiscoveryUtils.java index 151dcca2f3bb..0f635c5c7a8d 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/TableDiscoveryUtils.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-mysql/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/mysql/utils/TableDiscoveryUtils.java @@ -48,7 +48,10 @@ public static List listTables(JdbcConnection jdbc, RelationalTableFilte "SHOW DATABASES", rs -> { while (rs.next()) { - databaseNames.add(rs.getString(1)); + String databaseName = rs.getString(1); + if (tableFilters.databaseFilter().test(databaseName)) { + databaseNames.add(databaseName); + } } }); LOG.info("\t list of available databases is: {}", databaseNames); diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java index 235279b4d5a5..de29c6cf8b4c 100644 --- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java +++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/client/ClickhouseSinkWriter.java @@ -208,7 +208,8 @@ private static boolean clickhouseServerEnableExperimentalLightweightDelete( } return false; } catch (SQLException e) { - throw new ClickhouseConnectorException(CommonErrorCode.SQL_OPERATION_FAILED, e); + log.warn("Failed to get clickhouse server config: {}", configKey, e); + return false; } } } diff --git a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java index 036a5d802f4a..49957b99e215 100644 --- a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java +++ b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.sink.SeaTunnelSink; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; @@ -30,13 +31,20 @@ import com.google.auto.service.AutoService; import lombok.NoArgsConstructor; +import static org.apache.seatunnel.connectors.seatunnel.console.sink.ConsoleSinkFactory.LOG_PRINT_DATA; +import static org.apache.seatunnel.connectors.seatunnel.console.sink.ConsoleSinkFactory.LOG_PRINT_DELAY; + @NoArgsConstructor @AutoService(SeaTunnelSink.class) public class ConsoleSink extends AbstractSimpleSink { private SeaTunnelRowType seaTunnelRowType; + private boolean isPrintData = true; + private int delayMs = 0; - public ConsoleSink(SeaTunnelRowType seaTunnelRowType) { + public ConsoleSink(SeaTunnelRowType seaTunnelRowType, ReadonlyConfig options) { this.seaTunnelRowType = seaTunnelRowType; + this.isPrintData = options.get(LOG_PRINT_DATA); + this.delayMs = options.get(LOG_PRINT_DELAY); } @Override @@ -51,7 +59,7 @@ public SeaTunnelDataType getConsumedType() { @Override public AbstractSinkWriter createWriter(SinkWriter.Context context) { - return new ConsoleSinkWriter(seaTunnelRowType, context); + return new ConsoleSinkWriter(seaTunnelRowType, context, isPrintData, delayMs); } @Override @@ -60,5 +68,8 @@ public String getPluginName() { } @Override - public void prepare(Config pluginConfig) {} + public void prepare(Config pluginConfig) { + this.isPrintData = ReadonlyConfig.fromConfig(pluginConfig).get(LOG_PRINT_DATA); + this.delayMs = ReadonlyConfig.fromConfig(pluginConfig).get(LOG_PRINT_DELAY); + } } diff --git a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java index 1e0450d66c7b..5a66493aee5e 100644 --- a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java +++ b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java @@ -17,6 +17,9 @@ package org.apache.seatunnel.connectors.seatunnel.console.sink; +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -27,6 +30,21 @@ @AutoService(Factory.class) public class ConsoleSinkFactory implements TableSinkFactory { + + public static final Option LOG_PRINT_DATA = + Options.key("log.print.data") + .booleanType() + .defaultValue(true) + .withDescription( + "Flag to determine whether data should be printed in the logs."); + + public static final Option LOG_PRINT_DELAY = + Options.key("log.print.delay.ms") + .intType() + .defaultValue(0) + .withDescription( + "Delay in milliseconds between printing each data item to the logs."); + @Override public String factoryIdentifier() { return "Console"; @@ -39,7 +57,10 @@ public OptionRule optionRule() { @Override public TableSink createSink(TableFactoryContext context) { + ReadonlyConfig options = context.getOptions(); return () -> - new ConsoleSink(context.getCatalogTable().getTableSchema().toPhysicalRowDataType()); + new ConsoleSink( + context.getCatalogTable().getTableSchema().toPhysicalRowDataType(), + options); } } diff --git a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java index fc3f7f232cbc..c8c6c945ff1b 100644 --- a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java +++ b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.common.utils.JsonUtils; +import org.apache.seatunnel.common.utils.SeaTunnelException; import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import org.apache.commons.lang3.StringUtils; @@ -44,9 +45,18 @@ public class ConsoleSinkWriter extends AbstractSinkWriter { private final SinkWriter.Context context; private final DataTypeChangeEventHandler dataTypeChangeEventHandler; - public ConsoleSinkWriter(SeaTunnelRowType seaTunnelRowType, SinkWriter.Context context) { + boolean isPrintData = true; + int delayMs = 0; + + public ConsoleSinkWriter( + SeaTunnelRowType seaTunnelRowType, + SinkWriter.Context context, + boolean isPrintData, + int delayMs) { this.seaTunnelRowType = seaTunnelRowType; this.context = context; + this.isPrintData = isPrintData; + this.delayMs = delayMs; this.dataTypeChangeEventHandler = new DataTypeChangeEventDispatcher(); log.info("output rowType: {}", fieldsInfo(seaTunnelRowType)); } @@ -66,13 +76,23 @@ public void write(SeaTunnelRow element) { for (int i = 0; i < fieldTypes.length; i++) { arr[i] = fieldToString(fieldTypes[i], fields[i]); } - log.info( - "subtaskIndex={} rowIndex={}: SeaTunnelRow#tableId={} SeaTunnelRow#kind={} : {}", - context.getIndexOfSubtask(), - rowCounter.incrementAndGet(), - element.getTableId(), - element.getRowKind(), - StringUtils.join(arr, ", ")); + if (isPrintData) { + log.info( + "subtaskIndex={} rowIndex={}: SeaTunnelRow#tableId={} SeaTunnelRow#kind={} : {}", + context.getIndexOfSubtask(), + rowCounter.incrementAndGet(), + element.getTableId(), + element.getRowKind(), + StringUtils.join(arr, ", ")); + } + if (delayMs > 0) { + try { + Thread.sleep(delayMs); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new SeaTunnelException(e); + } + } } @Override diff --git a/seatunnel-connectors-v2/connector-console/src/test/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriterIT.java b/seatunnel-connectors-v2/connector-console/src/test/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriterIT.java index 0220c8896295..e03c00c49599 100644 --- a/seatunnel-connectors-v2/connector-console/src/test/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriterIT.java +++ b/seatunnel-connectors-v2/connector-console/src/test/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriterIT.java @@ -48,7 +48,7 @@ void setUp() { String[] fieldNames = {}; SeaTunnelDataType[] fieldTypes = {}; SeaTunnelRowType seaTunnelRowType = new SeaTunnelRowType(fieldNames, fieldTypes); - consoleSinkWriter = new ConsoleSinkWriter(seaTunnelRowType, null); + consoleSinkWriter = new ConsoleSinkWriter(seaTunnelRowType, null, true, 0); } private Object fieldToStringTest(SeaTunnelDataType dataType, Object value) { diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/config/CommonConfig.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/config/CommonConfig.java index ac9f8c12bbb4..2f893da092bf 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/config/CommonConfig.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/config/CommonConfig.java @@ -26,7 +26,6 @@ import lombok.ToString; import java.io.Serializable; -import java.util.List; import static org.apache.seatunnel.connectors.seatunnel.iceberg.config.IcebergCatalogType.HADOOP; import static org.apache.seatunnel.connectors.seatunnel.iceberg.config.IcebergCatalogType.HIVE; @@ -80,12 +79,6 @@ public class CommonConfig implements Serializable { .defaultValue(false) .withDescription(" the iceberg case_sensitive"); - public static final Option> KEY_FIELDS = - Options.key("fields") - .listType() - .noDefaultValue() - .withDescription(" the iceberg table fields"); - private String catalogName; private IcebergCatalogType catalogType; private String uri; diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/SinkConfig.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/SinkConfig.java index 1070815abd8b..806309bffeba 100644 --- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/SinkConfig.java +++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/config/SinkConfig.java @@ -60,12 +60,6 @@ public SinkConfig(Config config) { .defaultValue(1024) .withDescription("batch size of the influxdb client"); - public static final Option BATCH_INTERVAL_MS = - Options.key("batch_interval_ms") - .intType() - .noDefaultValue() - .withDescription("batch interval ms of the influxdb client"); - public static final Option MAX_RETRIES = Options.key("max_retries") .intType() @@ -104,7 +98,6 @@ public SinkConfig(Config config) { private String keyTime; private List keyTags; private int batchSize = BATCH_SIZE.defaultValue(); - private Integer batchIntervalMs; private int maxRetries; private int retryBackoffMultiplierMs; private int maxRetryBackoffMs; @@ -119,9 +112,6 @@ public static SinkConfig loadConfig(Config config) { if (config.hasPath(KEY_TAGS.key())) { sinkConfig.setKeyTags(config.getStringList(KEY_TAGS.key())); } - if (config.hasPath(BATCH_INTERVAL_MS.key())) { - sinkConfig.setBatchIntervalMs(config.getInt(BATCH_INTERVAL_MS.key())); - } if (config.hasPath(MAX_RETRIES.key())) { sinkConfig.setMaxRetries(config.getInt(MAX_RETRIES.key())); } diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java index f3673ddd9b73..3d44158e78b1 100644 --- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java +++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java @@ -28,7 +28,6 @@ import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.InfluxDBConfig.PASSWORD; import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.InfluxDBConfig.URL; import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.InfluxDBConfig.USERNAME; -import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.SinkConfig.BATCH_INTERVAL_MS; import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.SinkConfig.BATCH_SIZE; import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.SinkConfig.KEY_MEASUREMENT; import static org.apache.seatunnel.connectors.seatunnel.influxdb.config.SinkConfig.KEY_TAGS; @@ -54,7 +53,6 @@ public OptionRule optionRule() { KEY_TAGS, KEY_TIME, BATCH_SIZE, - BATCH_INTERVAL_MS, MAX_RETRIES, RETRY_BACKOFF_MULTIPLIER_MS) .build(); diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkWriter.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkWriter.java index 4683e4f460fc..a70c9f9e9a83 100644 --- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkWriter.java +++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkWriter.java @@ -34,7 +34,6 @@ import org.influxdb.dto.BatchPoints; import org.influxdb.dto.Point; -import com.google.common.util.concurrent.ThreadFactoryBuilder; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @@ -43,10 +42,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Optional; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; @Slf4j public class InfluxDBSinkWriter extends AbstractSinkWriter { @@ -55,15 +50,11 @@ public class InfluxDBSinkWriter extends AbstractSinkWriter { private InfluxDB influxdb; private final SinkConfig sinkConfig; private final List batchList; - private ScheduledExecutorService scheduler; - private ScheduledFuture scheduledFuture; private volatile Exception flushException; - private final Integer batchIntervalMs; public InfluxDBSinkWriter(Config pluginConfig, SeaTunnelRowType seaTunnelRowType) throws ConnectException { this.sinkConfig = SinkConfig.loadConfig(pluginConfig); - this.batchIntervalMs = sinkConfig.getBatchIntervalMs(); this.serializer = new DefaultSerializer( seaTunnelRowType, @@ -73,26 +64,6 @@ public InfluxDBSinkWriter(Config pluginConfig, SeaTunnelRowType seaTunnelRowType sinkConfig.getMeasurement()); this.batchList = new ArrayList<>(); - if (batchIntervalMs != null) { - scheduler = - Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("influxDB-sink-output-%s") - .build()); - scheduledFuture = - scheduler.scheduleAtFixedRate( - () -> { - try { - flush(); - } catch (IOException e) { - flushException = e; - } - }, - batchIntervalMs, - batchIntervalMs, - TimeUnit.MILLISECONDS); - } - connect(); } @@ -112,11 +83,6 @@ public Optional prepareCommit() { @Override public void close() throws IOException { - if (scheduledFuture != null) { - scheduledFuture.cancel(false); - scheduler.shutdown(); - } - flush(); if (influxdb != null) { diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SinkConfig.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SinkConfig.java index 9a7b7a112def..ce9b784b0608 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SinkConfig.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SinkConfig.java @@ -62,11 +62,6 @@ public class SinkConfig extends CommonConfig { .intType() .defaultValue(DEFAULT_BATCH_SIZE) .withDescription("batch size"); - public static final Option BATCH_INTERVAL_MS = - Options.key("batch_interval_ms") - .stringType() - .noDefaultValue() - .withDescription("batch interval ms"); public static final Option MAX_RETRIES = Options.key("max_retries").intType().noDefaultValue().withDescription("max retries"); public static final Option RETRY_BACKOFF_MULTIPLIER_MS = @@ -107,7 +102,6 @@ public class SinkConfig extends CommonConfig { private List keyMeasurementFields; private String storageGroup; private int batchSize = BATCH_SIZE.defaultValue(); - private Integer batchIntervalMs; private int maxRetries; private int retryBackoffMultiplierMs; private int maxRetryBackoffMs; @@ -144,10 +138,6 @@ public static SinkConfig loadConfig(Config pluginConfig) { int batchSize = checkIntArgument(pluginConfig.getInt(BATCH_SIZE.key())); sinkConfig.setBatchSize(batchSize); } - if (pluginConfig.hasPath(BATCH_INTERVAL_MS.key())) { - int batchIntervalMs = checkIntArgument(pluginConfig.getInt(BATCH_INTERVAL_MS.key())); - sinkConfig.setBatchIntervalMs(batchIntervalMs); - } if (pluginConfig.hasPath(MAX_RETRIES.key())) { int maxRetries = checkIntArgument(pluginConfig.getInt(MAX_RETRIES.key())); sinkConfig.setMaxRetries(maxRetries); diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SourceConfig.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SourceConfig.java index be96a7d91879..ac515ef37a91 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SourceConfig.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/config/SourceConfig.java @@ -31,20 +31,6 @@ public class SourceConfig { public static final Option SQL = Options.key("sql").stringType().noDefaultValue().withDescription("sql"); - /*---------------------- single node configurations -------------------------*/ - - /** The host of the IotDB server. */ - public static final Option HOST = - Options.key("host").stringType().noDefaultValue().withDescription("host"); - - /* - * The port of the IotDB server. - */ - public static final Option PORT = - Options.key("port").intType().noDefaultValue().withDescription("port"); - - /*---------------------- multiple node configurations -------------------------*/ - /** Username for the source. */ public static final Option USERNAME = Options.key("username").stringType().noDefaultValue().withDescription("usernam"); @@ -53,7 +39,7 @@ public class SourceConfig { public static final Option PASSWORD = Options.key("password").stringType().noDefaultValue().withDescription("password"); - /** multiple nodes */ + /** node urls */ public static final Option NODE_URLS = Options.key("node_urls").stringType().noDefaultValue().withDescription("node urls"); diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkClient.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkClient.java index 87b66f62bd1b..e3cdac1ba4b4 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkClient.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkClient.java @@ -28,17 +28,12 @@ import org.apache.iotdb.session.Session; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; -import com.google.common.util.concurrent.ThreadFactoryBuilder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; @Slf4j public class IoTDBSinkClient { @@ -47,8 +42,6 @@ public class IoTDBSinkClient { private final List batchList; private Session session; - private ScheduledExecutorService scheduler; - private ScheduledFuture scheduledFuture; private volatile boolean initialize; private volatile Exception flushException; @@ -95,26 +88,6 @@ private void tryInit() throws IOException { "Initialize IoTDB client failed.", e); } - - if (sinkConfig.getBatchIntervalMs() != null) { - scheduler = - Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("IoTDB-sink-output-%s") - .build()); - scheduledFuture = - scheduler.scheduleAtFixedRate( - () -> { - try { - flush(); - } catch (IOException e) { - flushException = e; - } - }, - sinkConfig.getBatchIntervalMs(), - sinkConfig.getBatchIntervalMs(), - TimeUnit.MILLISECONDS); - } initialize = true; } @@ -129,11 +102,6 @@ public synchronized void write(IoTDBRecord record) throws IOException { } public synchronized void close() throws IOException { - if (scheduledFuture != null) { - scheduledFuture.cancel(false); - scheduler.shutdown(); - } - flush(); try { diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkFactory.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkFactory.java index 7b46df3ae553..67a4527cf27b 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkFactory.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/sink/IoTDBSinkFactory.java @@ -26,7 +26,6 @@ import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.CommonConfig.NODE_URLS; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.CommonConfig.PASSWORD; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.CommonConfig.USERNAME; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SinkConfig.BATCH_INTERVAL_MS; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SinkConfig.BATCH_SIZE; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SinkConfig.CONNECTION_TIMEOUT_IN_MS; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SinkConfig.DEFAULT_THRIFT_BUFFER_SIZE; @@ -57,7 +56,6 @@ public OptionRule optionRule() { KEY_MEASUREMENT_FIELDS, STORAGE_GROUP, BATCH_SIZE, - BATCH_INTERVAL_MS, MAX_RETRIES, RETRY_BACKOFF_MULTIPLIER_MS, MAX_RETRY_BACKOFF_MS, diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSource.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSource.java index 7f2960ae007d..0c171ada4fcf 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSource.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSource.java @@ -43,9 +43,7 @@ import java.util.HashMap; import java.util.Map; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.HOST; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.NODE_URLS; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.PORT; @AutoService(SeaTunnelSource.class) public class IoTDBSource @@ -66,11 +64,7 @@ public String getPluginName() { @Override public void prepare(Config pluginConfig) throws PrepareFailException { - CheckResult urlCheckResult = - CheckConfigUtil.checkAllExists(pluginConfig, HOST.key(), PORT.key()); - if (!urlCheckResult.isSuccess()) { - urlCheckResult = CheckConfigUtil.checkAllExists(pluginConfig, NODE_URLS.key()); - } + CheckResult urlCheckResult = CheckConfigUtil.checkAllExists(pluginConfig, NODE_URLS.key()); CheckResult schemaCheckResult = CheckConfigUtil.checkAllExists(pluginConfig, CatalogTableUtil.SCHEMA.key()); CheckResult mergedConfigCheck = diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceFactory.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceFactory.java index c697df701296..2c2a521fd84a 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceFactory.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceFactory.java @@ -30,10 +30,8 @@ import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.CommonConfig.USERNAME; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.ENABLE_CACHE_LEADER; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.FETCH_SIZE; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.HOST; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.LOWER_BOUND; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.NUM_PARTITIONS; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.PORT; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.SQL; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.THRIFT_DEFAULT_BUFFER_SIZE; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.THRIFT_MAX_FRAME_SIZE; @@ -52,8 +50,6 @@ public OptionRule optionRule() { return OptionRule.builder() .required(NODE_URLS, USERNAME, PASSWORD, SQL, SCHEMA) .optional( - HOST, - PORT, FETCH_SIZE, THRIFT_DEFAULT_BUFFER_SIZE, THRIFT_MAX_FRAME_SIZE, diff --git a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceReader.java b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceReader.java index c4ecd9dc81dc..546487825c35 100644 --- a/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceReader.java +++ b/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/source/IoTDBSourceReader.java @@ -46,10 +46,8 @@ import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.ENABLE_CACHE_LEADER; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.FETCH_SIZE; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.HOST; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.NODE_URLS; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.PASSWORD; -import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.PORT; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.THRIFT_DEFAULT_BUFFER_SIZE; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.THRIFT_MAX_FRAME_SIZE; import static org.apache.seatunnel.connectors.seatunnel.iotdb.config.SourceConfig.USERNAME; @@ -130,17 +128,10 @@ private void read(IoTDBSourceSplit split, Collector output) throws private Session buildSession(Map conf) { Session.Builder sessionBuilder = new Session.Builder(); - if (conf.containsKey(HOST.key())) { - sessionBuilder - .host((String) conf.get(HOST.key())) - .port(Integer.parseInt(conf.get(PORT.key()).toString())) - .build(); - } else { - String nodeUrlsString = (String) conf.get(NODE_URLS.key()); - List nodes = - Stream.of(nodeUrlsString.split(NODES_SPLIT)).collect(Collectors.toList()); - sessionBuilder.nodeUrls(nodes); - } + String nodeUrlsString = (String) conf.get(NODE_URLS.key()); + List nodes = + Stream.of(nodeUrlsString.split(NODES_SPLIT)).collect(Collectors.toList()); + sessionBuilder.nodeUrls(nodes); if (null != conf.get(FETCH_SIZE.key())) { sessionBuilder.fetchSize(Integer.parseInt(conf.get(FETCH_SIZE.key()).toString())); } diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksSinkManager.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksSinkManager.java index f4f37e584eb4..e7a1c8c2c5e8 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksSinkManager.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/client/StarRocksSinkManager.java @@ -22,7 +22,6 @@ import org.apache.seatunnel.connectors.seatunnel.starrocks.exception.StarRocksConnectorException; import com.google.common.base.Strings; -import com.google.common.util.concurrent.ThreadFactoryBuilder; import lombok.extern.slf4j.Slf4j; import java.io.IOException; @@ -30,10 +29,6 @@ import java.util.ArrayList; import java.util.List; import java.util.UUID; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; @Slf4j public class StarRocksSinkManager { @@ -42,18 +37,14 @@ public class StarRocksSinkManager { private final List batchList; private final StarRocksStreamLoadVisitor starrocksStreamLoadVisitor; - private ScheduledExecutorService scheduler; - private ScheduledFuture scheduledFuture; private volatile boolean initialize; private volatile Exception flushException; private int batchRowCount = 0; private long batchBytesSize = 0; - private final Integer batchIntervalMs; public StarRocksSinkManager(SinkConfig sinkConfig, List fileNames) { this.sinkConfig = sinkConfig; this.batchList = new ArrayList<>(); - this.batchIntervalMs = sinkConfig.getBatchIntervalMs(); starrocksStreamLoadVisitor = new StarRocksStreamLoadVisitor(sinkConfig, fileNames); } @@ -62,26 +53,6 @@ private void tryInit() throws IOException { return; } initialize = true; - - if (batchIntervalMs != null) { - scheduler = - Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder() - .setNameFormat("StarRocks-sink-output-%s") - .build()); - scheduledFuture = - scheduler.scheduleAtFixedRate( - () -> { - try { - flush(); - } catch (IOException e) { - flushException = e; - } - }, - batchIntervalMs, - batchIntervalMs, - TimeUnit.MILLISECONDS); - } } public synchronized void write(String record) throws IOException { @@ -98,11 +69,6 @@ public synchronized void write(String record) throws IOException { } public synchronized void close() throws IOException { - if (scheduledFuture != null) { - scheduledFuture.cancel(false); - scheduler.shutdown(); - } - flush(); } diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SinkConfig.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SinkConfig.java index f5a2d0dc88c1..c1709b693903 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SinkConfig.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/SinkConfig.java @@ -51,7 +51,6 @@ public enum StreamLoadFormat { private int batchMaxSize; private long batchMaxBytes; - private Integer batchIntervalMs; private int maxRetries; private int retryBackoffMultiplierMs; private int maxRetryBackoffMs; @@ -74,8 +73,6 @@ public static SinkConfig of(ReadonlyConfig config) { config.getOptional(StarRocksSinkOptions.LABEL_PREFIX).ifPresent(sinkConfig::setLabelPrefix); sinkConfig.setBatchMaxSize(config.get(StarRocksSinkOptions.BATCH_MAX_SIZE)); sinkConfig.setBatchMaxBytes(config.get(StarRocksSinkOptions.BATCH_MAX_BYTES)); - config.getOptional(StarRocksSinkOptions.BATCH_INTERVAL_MS) - .ifPresent(sinkConfig::setBatchIntervalMs); config.getOptional(StarRocksSinkOptions.MAX_RETRIES).ifPresent(sinkConfig::setMaxRetries); config.getOptional(StarRocksSinkOptions.RETRY_BACKOFF_MULTIPLIER_MS) .ifPresent(sinkConfig::setRetryBackoffMultiplierMs); diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java index 02918f0f96d7..eed2afc36058 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java @@ -75,21 +75,14 @@ public interface StarRocksSinkOptions { .intType() .defaultValue(1024) .withDescription( - "For batch writing, when the number of buffers reaches the number of batch_max_rows or the byte size of batch_max_bytes or the time reaches batch_interval_ms, the data will be flushed into the StarRocks"); + "For batch writing, when the number of buffers reaches the number of batch_max_rows or the byte size of batch_max_bytes or the time reaches checkpoint.interval, the data will be flushed into the StarRocks"); Option BATCH_MAX_BYTES = Options.key("batch_max_bytes") .longType() .defaultValue((long) (5 * 1024 * 1024)) .withDescription( - "For batch writing, when the number of buffers reaches the number of batch_max_rows or the byte size of batch_max_bytes or the time reaches batch_interval_ms, the data will be flushed into the StarRocks"); - - Option BATCH_INTERVAL_MS = - Options.key("batch_interval_ms") - .intType() - .noDefaultValue() - .withDescription( - "For batch writing, when the number of buffers reaches the number of batch_max_rows or the byte size of batch_max_bytes or the time reaches batch_interval_ms, the data will be flushed into the StarRocks"); + "For batch writing, when the number of buffers reaches the number of batch_max_rows or the byte size of batch_max_bytes or the time reaches checkpoint.interval, the data will be flushed into the StarRocks"); Option MAX_RETRIES = Options.key("max_retries") diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java index 471be7001b68..c0159c5fd429 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java @@ -49,7 +49,6 @@ public OptionRule optionRule() { StarRocksSinkOptions.LABEL_PREFIX, StarRocksSinkOptions.BATCH_MAX_SIZE, StarRocksSinkOptions.BATCH_MAX_BYTES, - StarRocksSinkOptions.BATCH_INTERVAL_MS, StarRocksSinkOptions.MAX_RETRIES, StarRocksSinkOptions.MAX_RETRY_BACKOFF_MS, StarRocksSinkOptions.RETRY_BACKOFF_MULTIPLIER_MS, diff --git a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/AbstractCommandArgs.java b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/AbstractCommandArgs.java index 9b818ca95f61..ada15490f0a1 100644 --- a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/AbstractCommandArgs.java +++ b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/AbstractCommandArgs.java @@ -41,6 +41,7 @@ public abstract class AbstractCommandArgs extends CommandArgs { /** user-defined parameters */ @Parameter( names = {"-i", "--variable"}, + splitter = ParameterSplitter.class, description = "Variable substitution, such as -i city=beijing, or -i date=20190318") protected List variables = Collections.emptyList(); diff --git a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/ParameterSplitter.java b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/ParameterSplitter.java new file mode 100644 index 000000000000..29263d417e7c --- /dev/null +++ b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/command/ParameterSplitter.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.core.starter.command; + +import com.beust.jcommander.converters.IParameterSplitter; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ParameterSplitter implements IParameterSplitter { + + @Override + public List split(String value) { + if (!value.contains(",")) { + return Collections.singletonList(value); + } + + List result = new ArrayList<>(); + StringBuilder currentToken = new StringBuilder(); + boolean insideBrackets = false; + + for (char c : value.toCharArray()) { + if (c == '[') { + insideBrackets = true; + } else if (c == ']') { + insideBrackets = false; + } + + if (c == ',' && !insideBrackets) { + result.add(currentToken.toString().trim()); + currentToken = new StringBuilder(); + } else { + currentToken.append(c); + } + } + + if (currentToken.length() > 0) { + result.add(currentToken.toString().trim()); + } + + return result; + } +} diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java index 7fb75064a4c8..34aa7ee4f2d4 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java @@ -265,7 +265,10 @@ private void setCheckpoint() { } } - if (config.hasPath(ConfigKeyName.CHECKPOINT_TIMEOUT)) { + if (config.hasPath(EnvCommonOptions.CHECKPOINT_TIMEOUT.key())) { + long timeout = config.getLong(EnvCommonOptions.CHECKPOINT_TIMEOUT.key()); + checkpointConfig.setCheckpointTimeout(timeout); + } else if (config.hasPath(ConfigKeyName.CHECKPOINT_TIMEOUT)) { long timeout = config.getLong(ConfigKeyName.CHECKPOINT_TIMEOUT); checkpointConfig.setCheckpointTimeout(timeout); } diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java index 4b5bef07cb05..583a1cf3e5ca 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java @@ -265,7 +265,10 @@ private void setCheckpoint() { } } - if (config.hasPath(ConfigKeyName.CHECKPOINT_TIMEOUT)) { + if (config.hasPath(EnvCommonOptions.CHECKPOINT_TIMEOUT.key())) { + long timeout = config.getLong(EnvCommonOptions.CHECKPOINT_TIMEOUT.key()); + checkpointConfig.setCheckpointTimeout(timeout); + } else if (config.hasPath(ConfigKeyName.CHECKPOINT_TIMEOUT)) { long timeout = config.getLong(ConfigKeyName.CHECKPOINT_TIMEOUT); checkpointConfig.setCheckpointTimeout(timeout); } diff --git a/seatunnel-core/seatunnel-starter/src/test/java/org/apache/seatunnel/core/starter/seatunnel/args/ClientCommandArgsTest.java b/seatunnel-core/seatunnel-starter/src/test/java/org/apache/seatunnel/core/starter/seatunnel/args/ClientCommandArgsTest.java index 5f197367d0d3..c4bd422f2f10 100644 --- a/seatunnel-core/seatunnel-starter/src/test/java/org/apache/seatunnel/core/starter/seatunnel/args/ClientCommandArgsTest.java +++ b/seatunnel-core/seatunnel-starter/src/test/java/org/apache/seatunnel/core/starter/seatunnel/args/ClientCommandArgsTest.java @@ -40,6 +40,7 @@ public void testUserDefinedParamsCommand() throws URISyntaxException { String password = "dsjr42=4wfskahdsd=w1chh"; String fakeSourceTable = "fake"; String fakeSinkTable = "sink"; + String list = "[par1=20230829,par2=20230829]"; String[] args = { "-c", "/args/user_defined_params.conf", @@ -54,7 +55,9 @@ public void testUserDefinedParamsCommand() throws URISyntaxException { "-i", "password=" + password, "-i", - "username=" + username + "username=" + username, + "-i", + "list=" + list, }; ClientCommandArgs clientCommandArgs = CommandLineUtils.parse(args, new ClientCommandArgs(), "seatunnel-zeta", true); @@ -88,6 +91,9 @@ public void testUserDefinedParamsCommand() throws URISyntaxException { Assertions.assertEquals(sinkConfig.getString("username"), username); Assertions.assertEquals(sinkConfig.getString("password"), password); + List list1 = sinkConfig.getStringList("list"); + Assertions.assertEquals(list1.get(0), "par1=20230829"); + Assertions.assertEquals(list1.get(1), "par2=20230829"); } } } diff --git a/seatunnel-core/seatunnel-starter/src/test/resources/args/user_defined_params.conf b/seatunnel-core/seatunnel-starter/src/test/resources/args/user_defined_params.conf index 9dfde35dd6a9..bc2114443f17 100644 --- a/seatunnel-core/seatunnel-starter/src/test/resources/args/user_defined_params.conf +++ b/seatunnel-core/seatunnel-starter/src/test/resources/args/user_defined_params.conf @@ -47,5 +47,6 @@ sink { result_table_name = ${fake_sink_table} username = ${username} password = ${password} + list = ${list} } -} \ No newline at end of file +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iotdb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iotdb/IoTDBIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iotdb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iotdb/IoTDBIT.java index 94bfbe917e27..8b8d6acd77b6 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iotdb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iotdb/IoTDBIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iotdb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iotdb/IoTDBIT.java @@ -63,7 +63,7 @@ value = {}, type = {EngineType.SPARK}, disabledReason = - "There is a conflict of thrift version between IoTDB and Spark.Therefore. Refactor starter module, so disabled in flink") + "There is a conflict of thrift version between IoTDB and Spark.Therefore. Refactor starter module, so disabled in spark") public class IoTDBIT extends TestSuiteBase implements TestResource { private static final String IOTDB_DOCKER_IMAGE = "apache/iotdb:0.13.1-node"; diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainerId.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainerId.java index 485fe2df7711..c50712820038 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainerId.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainerId.java @@ -32,7 +32,7 @@ public enum TestContainerId { FLINK_1_16(FLINK, "1.16.0"), SPARK_2_4(SPARK, "2.4.6"), SPARK_3_3(SPARK, "3.3.0"), - SEATUNNEL(EngineType.SEATUNNEL, "2.3.1"); + SEATUNNEL(EngineType.SEATUNNEL, "dev"); private final EngineType engineType; private final String version; diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceTwoPipelineIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceTwoPipelineIT.java index 608871dd5617..3c677b45f3d3 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceTwoPipelineIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceTwoPipelineIT.java @@ -543,9 +543,11 @@ public void testTwoPipelineStreamJobRestoreIn2NodeWorkerDown() @Test public void testTwoPipelineBatchJobRestoreIn2NodeMasterDown() throws ExecutionException, InterruptedException { - String testCaseName = "testTwoPipelineBatchJobRestoreIn2NodeMasterDown"; + String testCaseName = + "testTwoPipelineBatchJobRestoreIn2NodeMasterDown" + System.currentTimeMillis(); String testClusterName = - "ClusterFaultToleranceTwoPipelineIT_testTwoPipelineBatchJobRestoreIn2NodeMasterDown"; + "ClusterFaultToleranceTwoPipelineIT_testTwoPipelineBatchJobRestoreIn2NodeMasterDown" + + System.currentTimeMillis(); long testRowNumber = 1000; int testParallelism = 6; HazelcastInstanceImpl node1 = null; @@ -651,9 +653,11 @@ public void testTwoPipelineBatchJobRestoreIn2NodeMasterDown() @Test public void testTwoPipelineStreamJobRestoreIn2NodeMasterDown() throws ExecutionException, InterruptedException { - String testCaseName = "testTwoPipelineStreamJobRestoreIn2NodeMasterDown"; + String testCaseName = + "testTwoPipelineStreamJobRestoreIn2NodeMasterDown" + System.currentTimeMillis(); String testClusterName = - "ClusterFaultToleranceTwoPipelineIT_testTwoPipelineStreamJobRestoreIn2NodeMasterDown"; + "ClusterFaultToleranceTwoPipelineIT_testTwoPipelineStreamJobRestoreIn2NodeMasterDown" + + System.currentTimeMillis(); long testRowNumber = 1000; int testParallelism = 6; HazelcastInstanceImpl node1 = null; diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobExecutionIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobExecutionIT.java index cba498e99922..4ecee663ae52 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobExecutionIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobExecutionIT.java @@ -145,6 +145,28 @@ public void testGetErrorInfo() throws ExecutionException, InterruptedException { Assertions.assertTrue(result.getError().startsWith("java.lang.NumberFormatException")); } + @Test + public void testGetUnKnownJobID() { + + ClientConfig clientConfig = ConfigProvider.locateAndGetClientConfig(); + clientConfig.setClusterName(TestUtils.getClusterName("JobExecutionIT")); + SeaTunnelClient engineClient = new SeaTunnelClient(clientConfig); + + ClientJobProxy newClientJobProxy = + engineClient.createJobClient().getJobProxy(System.currentTimeMillis()); + CompletableFuture waitForJobCompleteFuture = + CompletableFuture.supplyAsync(newClientJobProxy::waitForJobComplete); + + await().atMost(20000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertEquals( + JobStatus.UNKNOWABLE, waitForJobCompleteFuture.get())); + + Assertions.assertEquals( + "UNKNOWABLE", engineClient.getJobClient().getJobStatus(System.currentTimeMillis())); + } + @Test public void testExpiredJobWasDeleted() throws Exception { Common.setDeployMode(DeployMode.CLIENT); @@ -164,8 +186,8 @@ public void testExpiredJobWasDeleted() throws Exception { await().atMost(65, TimeUnit.SECONDS) .untilAsserted( () -> - Assertions.assertThrowsExactly( - NullPointerException.class, clientJobProxy::getJobStatus)); + Assertions.assertEquals( + JobStatus.UNKNOWABLE, clientJobProxy.getJobStatus())); } @AfterEach diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/seatunnel.yaml b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/seatunnel.yaml index 4276fc87916f..7775a483cd74 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/seatunnel.yaml +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/seatunnel.yaml @@ -25,7 +25,7 @@ seatunnel: dynamic-slot: true checkpoint: interval: 300000 - timeout: 10000 + timeout: 100000 storage: type: localfile max-retained: 3 diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/JobStatus.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/JobStatus.java index f9dbfb4c6cc3..7c50744dba0a 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/JobStatus.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/job/JobStatus.java @@ -60,7 +60,10 @@ public enum JobStatus { SUSPENDED(EndState.LOCALLY), /** The job is currently reconciling and waits for task execution report to recover state. */ - RECONCILING(EndState.NOT_END); + RECONCILING(EndState.NOT_END), + + /** Cannot find the JobID or the job status has already been cleared. */ + UNKNOWABLE(EndState.GLOBALLY); // -------------------------------------------------------------------------------------------- diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java index 5293fe8bf998..89a2258ce2dc 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java @@ -505,9 +505,22 @@ public PassiveCompletableFuture savePoint(long jobId) { public PassiveCompletableFuture waitForJobComplete(long jobId) { JobMaster runningJobMaster = runningJobMasterMap.get(jobId); if (runningJobMaster == null) { - JobHistoryService.JobState jobState = jobHistoryService.getJobDetailState(jobId); + // Because operations on Imap cannot be performed within Operation. + CompletableFuture jobStateFuture = + CompletableFuture.supplyAsync( + () -> { + return jobHistoryService.getJobDetailState(jobId); + }, + executorService); + JobHistoryService.JobState jobState = null; + try { + jobState = jobStateFuture.get(); + } catch (Exception e) { + throw new SeaTunnelEngineException("get job state error", e); + } + CompletableFuture future = new CompletableFuture<>(); - if (jobState == null) future.complete(new JobResult(JobStatus.FAILED, null)); + if (jobState == null) future.complete(new JobResult(JobStatus.UNKNOWABLE, null)); else future.complete(new JobResult(jobState.getJobStatus(), jobState.getErrorMessage())); return new PassiveCompletableFuture<>(future); @@ -537,7 +550,7 @@ public JobStatus getJobStatus(long jobId) { JobMaster runningJobMaster = runningJobMasterMap.get(jobId); if (runningJobMaster == null) { JobHistoryService.JobState jobDetailState = jobHistoryService.getJobDetailState(jobId); - return null == jobDetailState ? null : jobDetailState.getJobStatus(); + return null == jobDetailState ? JobStatus.UNKNOWABLE : jobDetailState.getJobStatus(); } return runningJobMaster.getJobStatus(); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java index 56b0e5ec0082..88ee1afc9ddf 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java @@ -230,7 +230,7 @@ public boolean isMasterNode() { // must retry until the cluster have master node try { return RetryUtils.retryWithException( - () -> nodeEngine.getMasterAddress().equals(nodeEngine.getThisAddress()), + () -> nodeEngine.getThisAddress().equals(nodeEngine.getMasterAddress()), new RetryUtils.RetryMaterial( Constant.OPERATION_RETRY_TIME, true, diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCloseReason.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCloseReason.java index 9f35f62fd608..c07f10fb1c9c 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCloseReason.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCloseReason.java @@ -20,7 +20,7 @@ public enum CheckpointCloseReason { PIPELINE_END("Pipeline turn to end state."), CHECKPOINT_EXPIRED( - "Checkpoint expired before completing. Please increase checkpoint timeout in the seatunnel.yaml"), + "Checkpoint expired before completing. Please increase checkpoint timeout in the seatunnel.yaml or jobConfig env."), CHECKPOINT_COORDINATOR_COMPLETED("CheckpointCoordinator completed."), CHECKPOINT_COORDINATOR_SHUTDOWN("CheckpointCoordinator shutdown."), CHECKPOINT_COORDINATOR_RESET("CheckpointCoordinator reset."), diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java index 5b329dbff817..6246831843ed 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java @@ -278,6 +278,11 @@ private CheckpointConfig createJobCheckpointConfig( Long.parseLong( jobEnv.get(EnvCommonOptions.CHECKPOINT_INTERVAL.key()).toString())); } + if (jobEnv.containsKey(EnvCommonOptions.CHECKPOINT_TIMEOUT.key())) { + jobCheckpointConfig.setCheckpointTimeout( + Long.parseLong( + jobEnv.get(EnvCommonOptions.CHECKPOINT_TIMEOUT.key()).toString())); + } return jobCheckpointConfig; } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java index 1c8b25f37af6..e2fe0c335a4e 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java @@ -37,7 +37,6 @@ import com.hazelcast.cluster.Address; import com.hazelcast.spi.impl.operationservice.Operation; import com.hazelcast.spi.impl.operationservice.impl.InvocationFuture; -import lombok.Getter; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; @@ -78,7 +77,7 @@ public class SourceSplitEnumeratorTask extends Coord private SeaTunnelSplitEnumeratorContext enumeratorContext; private Serializer enumeratorStateSerializer; - @Getter private Serializer splitSerializer; + private Serializer splitSerializer; private int maxReaderSize; private Set unfinishedReaders; @@ -197,6 +196,13 @@ public void restoreState(List actionStateList) throws Except log.debug("restoreState split enumerator [{}] finished", actionStateList); } + public Serializer getSplitSerializer() throws ExecutionException, InterruptedException { + // Because the splitSerializer is initialized in the init method, it's necessary to wait for + // the Enumerator to finish initializing. + getEnumerator(); + return splitSerializer; + } + public void addSplitsBack(List splits, int subtaskId) throws ExecutionException, InterruptedException { getEnumerator().addSplitsBack(splits, subtaskId); diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointTimeOutTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointTimeOutTest.java new file mode 100644 index 000000000000..3cf7636adf8e --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointTimeOutTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.server.checkpoint; + +import org.apache.seatunnel.engine.common.utils.PassiveCompletableFuture; +import org.apache.seatunnel.engine.core.dag.logical.LogicalDag; +import org.apache.seatunnel.engine.core.job.JobImmutableInformation; +import org.apache.seatunnel.engine.core.job.JobStatus; +import org.apache.seatunnel.engine.server.AbstractSeaTunnelServerTest; +import org.apache.seatunnel.engine.server.TestUtils; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.hazelcast.internal.serialization.Data; +import lombok.extern.slf4j.Slf4j; + +import java.util.Collections; +import java.util.concurrent.TimeUnit; + +import static org.awaitility.Awaitility.await; + +@Slf4j +public class CheckpointTimeOutTest extends AbstractSeaTunnelServerTest { + + public static String CONF_PATH = "stream_fake_to_console_checkpointTimeOut.conf"; + public static long JOB_ID = System.currentTimeMillis(); + + @Test + public void testJobLevelCheckpointTimeOut() { + startJob(JOB_ID, CONF_PATH); + + await().atMost(120000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertEquals( + server.getCoordinatorService().getJobStatus(JOB_ID), + JobStatus.RUNNING)); + + await().atMost(360000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + log.info( + "Job status: {}", + server.getCoordinatorService().getJobStatus(JOB_ID)); + Assertions.assertEquals( + server.getCoordinatorService().getJobStatus(JOB_ID), + JobStatus.FAILED); + }); + } + + private void startJob(Long jobid, String path) { + LogicalDag testLogicalDag = TestUtils.createTestLogicalPlan(path, jobid.toString(), jobid); + + JobImmutableInformation jobImmutableInformation = + new JobImmutableInformation( + jobid, + "Test", + false, + nodeEngine.getSerializationService().toData(testLogicalDag), + testLogicalDag.getJobConfig(), + Collections.emptyList()); + + Data data = nodeEngine.getSerializationService().toData(jobImmutableInformation); + + PassiveCompletableFuture voidPassiveCompletableFuture = + server.getCoordinatorService().submitJob(jobid, data); + voidPassiveCompletableFuture.join(); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/resources/stream_fake_to_console_checkpointTimeOut.conf b/seatunnel-engine/seatunnel-engine-server/src/test/resources/stream_fake_to_console_checkpointTimeOut.conf new file mode 100644 index 000000000000..2d541ac2acd7 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/test/resources/stream_fake_to_console_checkpointTimeOut.conf @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set flink configuration here + execution.parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 1000 + checkpoint.timeout = 100 +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake1" + row.num = 1000 + split.num = 100 + split.read-interval = 3000 + parallelism = 1 + schema = { + fields { + name = "string" + age = "int" + } + } + parallelism = 1 + } +} + +transform { +} + +sink { + console { + log.print.delay.ms=5000 + } +} \ No newline at end of file diff --git a/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf index 0f927351fb63..a09137dc033a 100644 --- a/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf +++ b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf @@ -47,7 +47,7 @@ source { transform { # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, - # please go to https://seatunnel.apache.org/docs/category/transform + # please go to https://seatunnel.apache.org/docs/category/transform-v2 } sink { diff --git a/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/resources/examples/spark.batch.conf b/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/resources/examples/spark.batch.conf index f3ad1b5f73cc..cf0958ecdee6 100644 --- a/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/resources/examples/spark.batch.conf +++ b/seatunnel-examples/seatunnel-spark-connector-v2-example/src/main/resources/examples/spark.batch.conf @@ -79,7 +79,7 @@ transform { } # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, - # please go to https://seatunnel.apache.org/docs/category/transform + # please go to https://seatunnel.apache.org/docs/category/transform-v2 } sink {