From 66df5d4b907d74a5ba0d663f12222cb973f6ff5e Mon Sep 17 00:00:00 2001 From: Wenjun Ruan Date: Tue, 9 Apr 2024 14:04:24 +0800 Subject: [PATCH] Split cpuUsage to systemCpuUsage and jvmCpuUsage (#15803) --- deploy/kubernetes/dolphinscheduler/README.md | 8 +- .../kubernetes/dolphinscheduler/values.yaml | 16 +- docs/docs/en/architecture/configuration.md | 234 +++++++++--------- docs/docs/zh/architecture/configuration.md | 176 +++++++------ .../alert/registry/AlertHeartbeatTask.java | 3 +- .../common/model/AlertServerHeartBeat.java | 28 +-- .../common/model/BaseHeartBeat.java | 46 ++++ .../common/model/HeartBeat.java | 4 - .../common/model/MasterHeartBeat.java | 28 +-- .../common/model/WorkerHeartBeat.java | 29 +-- .../server/master/MasterServer.java | 2 +- .../config/MasterServerLoadProtection.java | 50 +--- .../master/metrics/MasterServerMetrics.java | 2 +- .../master/registry/MasterSlotManager.java | 3 +- .../master/registry/ServerNodeManager.java | 4 +- .../master/task/MasterHeartBeatTask.java | 3 +- .../src/main/resources/application.yaml | 8 +- .../master/config/MasterConfigTest.java | 25 +- .../MasterServerLoadProtectionTest.java | 3 +- .../src/test/resources/application.yaml | 164 ++++++++++++ .../src/test/resources/logback.xml | 8 +- .../metrics/BaseServerLoadProtection.java | 67 +++++ .../meter/metrics/DefaultMetricsProvider.java | 11 +- .../meter/metrics/ServerLoadProtection.java | 24 ++ .../meter/metrics/SystemMetrics.java | 3 +- .../src/main/resources/application.yaml | 16 +- .../server/worker/WorkerServer.java | 2 +- .../config/WorkerServerLoadProtection.java | 50 +--- .../worker/task/WorkerHeartBeatTask.java | 3 +- .../src/main/resources/application.yaml | 8 +- .../WorkerServerLoadProtectionTest.java | 3 +- 31 files changed, 603 insertions(+), 428 deletions(-) create mode 100644 dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/BaseHeartBeat.java create mode 100644 dolphinscheduler-master/src/test/resources/application.yaml create mode 100644 dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java create mode 100644 dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/ServerLoadProtection.java diff --git a/deploy/kubernetes/dolphinscheduler/README.md b/deploy/kubernetes/dolphinscheduler/README.md index 5659605b957a..33633f3b2e18 100644 --- a/deploy/kubernetes/dolphinscheduler/README.md +++ b/deploy/kubernetes/dolphinscheduler/README.md @@ -200,9 +200,9 @@ Please refer to the [Quick Start in Kubernetes](../../../docs/docs/en/guide/inst | master.env.MASTER_KILL_APPLICATION_WHEN_HANDLE_FAILOVER | string | `"true"` | Master kill application when handle failover | | master.env.MASTER_MAX_HEARTBEAT_INTERVAL | string | `"10s"` | Master max heartbeat interval | | master.env.MASTER_SERVER_LOAD_PROTECTION_ENABLED | bool | `false` | If set true, will open master overload protection | -| master.env.MASTER_SERVER_LOAD_PROTECTION_MAX_CPU_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Master max cpu usage, when the master's cpu usage is smaller then this value, master server can execute workflow. | | master.env.MASTER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. | -| master.env.MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Master max JVM memory usage , when the master's jvm memory usage is smaller then this value, master server can execute workflow. | +| master.env.MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Master max jvm cpu usage, when the master's jvm cpu usage is smaller then this value, master server can execute workflow. | +| master.env.MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Master max system cpu usage, when the master's system cpu usage is smaller then this value, master server can execute workflow. | | master.env.MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Master max System memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. | | master.env.MASTER_STATE_WHEEL_INTERVAL | string | `"5s"` | master state wheel interval, the unit is second | | master.env.MASTER_TASK_COMMIT_INTERVAL | string | `"1s"` | master commit task interval, the unit is second | @@ -301,9 +301,9 @@ Please refer to the [Quick Start in Kubernetes](../../../docs/docs/en/guide/inst | worker.env.WORKER_HOST_WEIGHT | string | `"100"` | Worker host weight to dispatch tasks | | worker.env.WORKER_MAX_HEARTBEAT_INTERVAL | string | `"10s"` | Worker heartbeat interval | | worker.env.WORKER_SERVER_LOAD_PROTECTION_ENABLED | bool | `false` | If set true, will open worker overload protection | -| worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_CPU_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max cpu usage, when the worker's cpu usage is smaller then this value, worker server can be dispatched tasks. | | worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max disk usage , when the worker's disk usage is smaller then this value, worker server can be dispatched tasks. | -| worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max jvm memory usage , when the worker's jvm memory usage is smaller then this value, worker server can be dispatched tasks. | +| worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max jvm cpu usage, when the worker's jvm cpu usage is smaller then this value, worker server can be dispatched tasks. | +| worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max system cpu usage, when the worker's system cpu usage is smaller then this value, worker server can be dispatched tasks. | | worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max memory usage , when the worker's memory usage is smaller then this value, worker server can be dispatched tasks. | | worker.env.WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED | bool | `true` | tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true. | | worker.env.WORKER_TENANT_CONFIG_DISTRIBUTED_TENANT | bool | `false` | Scenes to be used for distributed users. For example, users created by FreeIpa are stored in LDAP. This parameter only applies to Linux, When this parameter is true, worker.tenant.auto.create has no effect and will not automatically create tenants. | diff --git a/deploy/kubernetes/dolphinscheduler/values.yaml b/deploy/kubernetes/dolphinscheduler/values.yaml index a8d9a34875ca..98c2f70db07e 100644 --- a/deploy/kubernetes/dolphinscheduler/values.yaml +++ b/deploy/kubernetes/dolphinscheduler/values.yaml @@ -508,10 +508,10 @@ master: MASTER_STATE_WHEEL_INTERVAL: "5s" # -- If set true, will open master overload protection MASTER_SERVER_LOAD_PROTECTION_ENABLED: false - # -- Master max cpu usage, when the master's cpu usage is smaller then this value, master server can execute workflow. - MASTER_SERVER_LOAD_PROTECTION_MAX_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.7 - # -- Master max JVM memory usage , when the master's jvm memory usage is smaller then this value, master server can execute workflow. - MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.7 + # -- Master max system cpu usage, when the master's system cpu usage is smaller then this value, master server can execute workflow. + MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.7 + # -- Master max jvm cpu usage, when the master's jvm cpu usage is smaller then this value, master server can execute workflow. + MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.7 # -- Master max System memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.7 # -- Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. @@ -629,10 +629,10 @@ worker: env: # -- If set true, will open worker overload protection WORKER_SERVER_LOAD_PROTECTION_ENABLED: false - # -- Worker max cpu usage, when the worker's cpu usage is smaller then this value, worker server can be dispatched tasks. - WORKER_SERVER_LOAD_PROTECTION_MAX_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.7 - # -- Worker max jvm memory usage , when the worker's jvm memory usage is smaller then this value, worker server can be dispatched tasks. - WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.7 + # -- Worker max system cpu usage, when the worker's system cpu usage is smaller then this value, worker server can be dispatched tasks. + WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.7 + # -- Worker max jvm cpu usage, when the worker's jvm cpu usage is smaller then this value, worker server can be dispatched tasks. + WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.7 # -- Worker max memory usage , when the worker's memory usage is smaller then this value, worker server can be dispatched tasks. WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.7 # -- Worker max disk usage , when the worker's disk usage is smaller then this value, worker server can be dispatched tasks. diff --git a/docs/docs/en/architecture/configuration.md b/docs/docs/en/architecture/configuration.md index b9a26b865c77..13d89329439b 100644 --- a/docs/docs/en/architecture/configuration.md +++ b/docs/docs/en/architecture/configuration.md @@ -110,7 +110,8 @@ The directory structure of DolphinScheduler is as follows: dolphinscheduler-daemon.sh is responsible for DolphinScheduler startup and shutdown. Essentially, start-all.sh or stop-all.sh startup and shutdown the cluster via dolphinscheduler-daemon.sh. -Currently, DolphinScheduler just makes a basic config, remember to config further JVM options based on your practical situation of resources. +Currently, DolphinScheduler just makes a basic config, remember to config further JVM options based on your practical +situation of resources. Default simplified parameters are: @@ -128,44 +129,47 @@ export DOLPHINSCHEDULER_OPTS=" " ``` -> "-XX:DisableExplicitGC" is not recommended due to may lead to memory link (DolphinScheduler dependent on Netty to communicate). -> If add "-Djava.net.preferIPv6Addresses=true" will use ipv6 address, if add "-Djava.net.preferIPv4Addresses=true" will use ipv4 address, if doesn't set the two parameter will use ipv4 or ipv6. +> "-XX:DisableExplicitGC" is not recommended due to may lead to memory link (DolphinScheduler dependent on Netty to +> communicate). +> If add "-Djava.net.preferIPv6Addresses=true" will use ipv6 address, if add "-Djava.net.preferIPv4Addresses=true" will +> use ipv4 address, if doesn't set the two parameter will use ipv4 or ipv6. ### Database connection related configuration DolphinScheduler uses Spring Hikari to manage database connections, configuration file location: -|Service| Configuration file | -|--|--| -|Master Server | `master-server/conf/application.yaml`| -|Api Server| `api-server/conf/application.yaml`| -|Worker Server| `worker-server/conf/application.yaml`| -|Alert Server| `alert-server/conf/application.yaml`| +| Service | Configuration file | +|---------------|---------------------------------------| +| Master Server | `master-server/conf/application.yaml` | +| Api Server | `api-server/conf/application.yaml` | +| Worker Server | `worker-server/conf/application.yaml` | +| Alert Server | `alert-server/conf/application.yaml` | The default configuration is as follows: -|Parameters | Default value| Description| -|--|--|--| -|spring.datasource.driver-class-name| org.postgresql.Driver |datasource driver| -|spring.datasource.url| jdbc:postgresql://127.0.0.1:5432/dolphinscheduler |datasource connection url| -|spring.datasource.username|root|datasource username| -|spring.datasource.password|root|datasource password| -|spring.datasource.hikari.connection-test-query|select 1|validate connection by running the SQL| -|spring.datasource.hikari.minimum-idle| 5| minimum connection pool size number| -|spring.datasource.hikari.auto-commit|true|whether auto commit| -|spring.datasource.hikari.pool-name|DolphinScheduler|name of the connection pool| -|spring.datasource.hikari.maximum-pool-size|50| maximum connection pool size number| -|spring.datasource.hikari.connection-timeout|30000|connection timeout| -|spring.datasource.hikari.idle-timeout|600000|Maximum idle connection survival time| -|spring.datasource.hikari.leak-detection-threshold|0|Connection leak detection threshold| -|spring.datasource.hikari.initialization-fail-timeout|1|Connection pool initialization failed timeout| +| Parameters | Default value | Description | +|------------------------------------------------------|---------------------------------------------------|-----------------------------------------------| +| spring.datasource.driver-class-name | org.postgresql.Driver | datasource driver | +| spring.datasource.url | jdbc:postgresql://127.0.0.1:5432/dolphinscheduler | datasource connection url | +| spring.datasource.username | root | datasource username | +| spring.datasource.password | root | datasource password | +| spring.datasource.hikari.connection-test-query | select 1 | validate connection by running the SQL | +| spring.datasource.hikari.minimum-idle | 5 | minimum connection pool size number | +| spring.datasource.hikari.auto-commit | true | whether auto commit | +| spring.datasource.hikari.pool-name | DolphinScheduler | name of the connection pool | +| spring.datasource.hikari.maximum-pool-size | 50 | maximum connection pool size number | +| spring.datasource.hikari.connection-timeout | 30000 | connection timeout | +| spring.datasource.hikari.idle-timeout | 600000 | Maximum idle connection survival time | +| spring.datasource.hikari.leak-detection-threshold | 0 | Connection leak detection threshold | +| spring.datasource.hikari.initialization-fail-timeout | 1 | Connection pool initialization failed timeout | Note that DolphinScheduler also supports database configuration through `bin/env/dolphinscheduler_env.sh`. ### Zookeeper related configuration -DolphinScheduler uses Zookeeper for cluster management, fault tolerance, event monitoring and other functions. Configuration file location: -|Service| Configuration file | +DolphinScheduler uses Zookeeper for cluster management, fault tolerance, event monitoring and other functions. +Configuration file location: +|Service| Configuration file | |--|--| |Master Server | `master-server/conf/application.yaml`| |Api Server| `api-server/conf/application.yaml`| @@ -173,17 +177,17 @@ DolphinScheduler uses Zookeeper for cluster management, fault tolerance, event m The default configuration is as follows: -|Parameters | Default value| Description| -|--|--|--| -|registry.zookeeper.namespace|dolphinscheduler|namespace of zookeeper| -|registry.zookeeper.connect-string|localhost:2181| the connection string of zookeeper| -|registry.zookeeper.retry-policy.base-sleep-time|60ms|time to wait between subsequent retries| -|registry.zookeeper.retry-policy.max-sleep|300ms|maximum time to wait between subsequent retries| -|registry.zookeeper.retry-policy.max-retries|5|maximum retry times| -|registry.zookeeper.session-timeout|30s|session timeout| -|registry.zookeeper.connection-timeout|30s|connection timeout| -|registry.zookeeper.block-until-connected|600ms|waiting time to block until the connection succeeds| -|registry.zookeeper.digest|{username}:{password}|digest of zookeeper to access znode, works only when acl is enabled, for more details please check [https://zookeeper.apache.org/doc/r3.4.14/zookeeperAdmin.html](Apache Zookeeper doc) | +| Parameters | Default value | Description | +|-------------------------------------------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| registry.zookeeper.namespace | dolphinscheduler | namespace of zookeeper | +| registry.zookeeper.connect-string | localhost:2181 | the connection string of zookeeper | +| registry.zookeeper.retry-policy.base-sleep-time | 60ms | time to wait between subsequent retries | +| registry.zookeeper.retry-policy.max-sleep | 300ms | maximum time to wait between subsequent retries | +| registry.zookeeper.retry-policy.max-retries | 5 | maximum retry times | +| registry.zookeeper.session-timeout | 30s | session timeout | +| registry.zookeeper.connection-timeout | 30s | connection timeout | +| registry.zookeeper.block-until-connected | 600ms | waiting time to block until the connection succeeds | +| registry.zookeeper.digest | {username}:{password} | digest of zookeeper to access znode, works only when acl is enabled, for more details please check [https://zookeeper.apache.org/doc/r3.4.14/zookeeperAdmin.html](Apache Zookeeper doc) | Note that DolphinScheduler also supports zookeeper related configuration through `bin/env/dolphinscheduler_env.sh`. @@ -191,12 +195,12 @@ Note that DolphinScheduler also supports zookeeper related configuration through Currently, common.properties mainly configures Hadoop,s3a related configurations. Configuration file location: -|Service| Configuration file | -|--|--| -|Master Server | `master-server/conf/common.properties`| -|Api Server| `api-server/conf/common.properties`| -|Worker Server| `worker-server/conf/common.properties`| -|Alert Server| `alert-server/conf/common.properties`| +| Service | Configuration file | +|---------------|----------------------------------------| +| Master Server | `master-server/conf/common.properties` | +| Api Server | `api-server/conf/common.properties` | +| Worker Server | `worker-server/conf/common.properties` | +| Alert Server | `alert-server/conf/common.properties` | The default configuration is as follows: @@ -237,43 +241,43 @@ The default configuration is as follows: Location: `api-server/conf/application.yaml` -|Parameters | Default value| Description| -|--|--|--| -|server.port|12345|api service communication port| -|server.servlet.session.timeout|120m|session timeout| -|server.servlet.context-path|/dolphinscheduler/ |request path| -|spring.servlet.multipart.max-file-size|1024MB|maximum file size| -|spring.servlet.multipart.max-request-size|1024MB|maximum request size| -|server.jetty.max-http-post-size|5000000|jetty maximum post size| -|spring.banner.charset|UTF-8|message encoding| -|spring.jackson.time-zone|UTC|time zone| -|spring.jackson.date-format|"yyyy-MM-dd HH:mm:ss"|time format| -|spring.messages.basename|i18n/messages|i18n config| -|security.authentication.type|PASSWORD|authentication type| -|security.authentication.ldap.user.admin|read-only-admin|admin user account when you log-in with LDAP| -|security.authentication.ldap.urls|ldap://ldap.forumsys.com:389/|LDAP urls| -|security.authentication.ldap.base.dn|dc=example,dc=com|LDAP base dn| -|security.authentication.ldap.username|cn=read-only-admin,dc=example,dc=com|LDAP username| -|security.authentication.ldap.password|password|LDAP password| -|security.authentication.ldap.user.identity-attribute|uid|LDAP user identity attribute| -|security.authentication.ldap.user.email-attribute|mail|LDAP user email attribute| -|security.authentication.ldap.user.not-exist-action|CREATE|action when ldap user is not exist,default value: CREATE. Optional values include(CREATE,DENY)| -|security.authentication.ldap.ssl.enable|false|LDAP ssl switch| -|security.authentication.ldap.ssl.trust-store|ldapkeystore.jks|LDAP jks file absolute path| -|security.authentication.ldap.ssl.trust-store-password|password|LDAP jks password| -|security.authentication.casdoor.user.admin||admin user account when you log-in with Casdoor| -|casdoor.endpoint||Casdoor server url| -|casdoor.client-id||id in Casdoor| -|casdoor.client-secret||secret in Casdoor| -|casdoor.certificate||certificate in Casdoor| -|casdoor.organization-name||organization name in Casdoor| -|casdoor.application-name||application name in Casdoor| -|casdoor.redirect-url||doplhinscheduler login url| -|api.traffic.control.global.switch|false|traffic control global switch| -|api.traffic.control.max-global-qps-rate|300|global max request number per second| -|api.traffic.control.tenant-switch|false|traffic control tenant switch| -|api.traffic.control.default-tenant-qps-rate|10|default tenant max request number per second| -|api.traffic.control.customize-tenant-qps-rate||customize tenant max request number per second| +| Parameters | Default value | Description | +|-------------------------------------------------------|--------------------------------------|------------------------------------------------------------------------------------------------| +| server.port | 12345 | api service communication port | +| server.servlet.session.timeout | 120m | session timeout | +| server.servlet.context-path | /dolphinscheduler/ | request path | +| spring.servlet.multipart.max-file-size | 1024MB | maximum file size | +| spring.servlet.multipart.max-request-size | 1024MB | maximum request size | +| server.jetty.max-http-post-size | 5000000 | jetty maximum post size | +| spring.banner.charset | UTF-8 | message encoding | +| spring.jackson.time-zone | UTC | time zone | +| spring.jackson.date-format | "yyyy-MM-dd HH:mm:ss" | time format | +| spring.messages.basename | i18n/messages | i18n config | +| security.authentication.type | PASSWORD | authentication type | +| security.authentication.ldap.user.admin | read-only-admin | admin user account when you log-in with LDAP | +| security.authentication.ldap.urls | ldap://ldap.forumsys.com:389/ | LDAP urls | +| security.authentication.ldap.base.dn | dc=example,dc=com | LDAP base dn | +| security.authentication.ldap.username | cn=read-only-admin,dc=example,dc=com | LDAP username | +| security.authentication.ldap.password | password | LDAP password | +| security.authentication.ldap.user.identity-attribute | uid | LDAP user identity attribute | +| security.authentication.ldap.user.email-attribute | mail | LDAP user email attribute | +| security.authentication.ldap.user.not-exist-action | CREATE | action when ldap user is not exist,default value: CREATE. Optional values include(CREATE,DENY) | +| security.authentication.ldap.ssl.enable | false | LDAP ssl switch | +| security.authentication.ldap.ssl.trust-store | ldapkeystore.jks | LDAP jks file absolute path | +| security.authentication.ldap.ssl.trust-store-password | password | LDAP jks password | +| security.authentication.casdoor.user.admin | | admin user account when you log-in with Casdoor | +| casdoor.endpoint | | Casdoor server url | +| casdoor.client-id | | id in Casdoor | +| casdoor.client-secret | | secret in Casdoor | +| casdoor.certificate | | certificate in Casdoor | +| casdoor.organization-name | | organization name in Casdoor | +| casdoor.application-name | | application name in Casdoor | +| casdoor.redirect-url | | doplhinscheduler login url | +| api.traffic.control.global.switch | false | traffic control global switch | +| api.traffic.control.max-global-qps-rate | 300 | global max request number per second | +| api.traffic.control.tenant-switch | false | traffic control tenant switch | +| api.traffic.control.default-tenant-qps-rate | 10 | default tenant max request number per second | +| api.traffic.control.customize-tenant-qps-rate | | customize tenant max request number per second | ### Master Server related configuration @@ -292,9 +296,9 @@ Location: `master-server/conf/application.yaml` | master.task-commit-interval | 1000 | master commit task interval, the unit is millisecond | | master.state-wheel-interval | 5 | time to check status | | master.server-load-protection.enabled | true | If set true, will open master overload protection | -| master.server-load-protection.max-cpu-usage-percentage-thresholds | 0.7 | Master max cpu usage, when the master's cpu usage is smaller then this value, master server can execute workflow. | -| master.server-load-protection.max-jvm-memory-usage-percentage-thresholds | 0.7 | Master max JVM memory usage , when the master's jvm memory usage is smaller then this value, master server can execute workflow. | -| master.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | Master max System memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. | +| master.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | Master max system cpu usage, when the master's system cpu usage is smaller then this value, master server can execute workflow. | +| master.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | Master max JVM cpu usage, when the master's jvm cpu usage is smaller then this value, master server can execute workflow. | +| master.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | Master max system memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. | | master.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. | | master.failover-interval | 10 | failover interval, the unit is minute | | master.kill-application-when-task-failover | true | whether to kill yarn/k8s application when failover taskInstance | @@ -306,23 +310,23 @@ Location: `master-server/conf/application.yaml` Location: `worker-server/conf/application.yaml` -| Parameters | Default value | Description | -|--------------------------------------------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| worker.listen-port | 1234 | worker-service listen port | -| worker.exec-threads | 100 | worker-service execute thread number, used to limit the number of task instances in parallel | -| worker.max-heartbeat-interval | 10s | worker-service max heartbeat interval | -| worker.host-weight | 100 | worker host weight to dispatch tasks | -| worker.server-load-protection.enabled | true | If set true will open worker overload protection | -| worker.max-cpu-usage-percentage-thresholds.max-cpu-usage-percentage-thresholds | 0.7 | Master max cpu usage, when the master's cpu usage is smaller then this value, master server can execute workflow. | -| worker.server-load-protection.max-jvm-memory-usage-percentage-thresholds | 0.7 | Master max JVM memory usage , when the master's jvm memory usage is smaller then this value, master server can execute workflow. | -| worker.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | Master max System memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. | -| worker.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. | -| worker.registry-disconnect-strategy.strategy | stop | Used when the worker disconnect from registry, default value: stop. Optional values include stop, waiting | -| worker.registry-disconnect-strategy.max-waiting-time | 100s | Used when the worker disconnect from registry, and the disconnect strategy is waiting, this config means the worker will waiting to reconnect to registry in given times, and after the waiting times, if the worker still cannot connect to registry, will stop itself, if the value is 0s, will wait infinitely | -| worker.task-execute-threads-full-policy | REJECT | If REJECT, when the task waiting in the worker reaches exec-threads, it will reject the received task and the Master will redispatch it; If CONTINUE, it will put the task into the worker's execution queue and wait for a free thread to start execution | -| worker.tenant-config.auto-create-tenant-enabled | true | tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true. | -| worker.tenant-config.distributed-tenant-enabled | false | When this parameter is true, auto-create-tenant-enabled has no effect and will not automatically create tenants | -| worker.tenant-config.default-tenant-enabled | false | If set true, will use worker bootstrap user as the tenant to execute task when the tenant is `default`. | +| Parameters | Default value | Description | +|-----------------------------------------------------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| worker.listen-port | 1234 | worker-service listen port | +| worker.exec-threads | 100 | worker-service execute thread number, used to limit the number of task instances in parallel | +| worker.max-heartbeat-interval | 10s | worker-service max heartbeat interval | +| worker.host-weight | 100 | worker host weight to dispatch tasks | +| worker.server-load-protection.enabled | true | If set true will open worker overload protection | +| worker.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | Worker max system cpu usage, when the worker's system cpu usage is smaller then this value, master server can execute workflow. | +| worker.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | Worker max JVM cpu usage, when the worker's jvm cpu usage is smaller then this value, master server can execute workflow. | +| worker.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | Worker max system memory usage , when the worker's system memory usage is smaller then this value, master server can execute workflow. | +| worker.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | Worker max disk usage , when the worker's disk usage is smaller then this value, master server can execute workflow. | +| worker.registry-disconnect-strategy.strategy | stop | Used when the worker disconnect from registry, default value: stop. Optional values include stop, waiting | +| worker.registry-disconnect-strategy.max-waiting-time | 100s | Used when the worker disconnect from registry, and the disconnect strategy is waiting, this config means the worker will waiting to reconnect to registry in given times, and after the waiting times, if the worker still cannot connect to registry, will stop itself, if the value is 0s, will wait infinitely | +| worker.task-execute-threads-full-policy | REJECT | If REJECT, when the task waiting in the worker reaches exec-threads, it will reject the received task and the Master will redispatch it; If CONTINUE, it will put the task into the worker's execution queue and wait for a free thread to start execution | +| worker.tenant-config.auto-create-tenant-enabled | true | tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true. | +| worker.tenant-config.distributed-tenant-enabled | false | When this parameter is true, auto-create-tenant-enabled has no effect and will not automatically create tenants | +| worker.tenant-config.default-tenant-enabled | false | If set true, will use worker bootstrap user as the tenant to execute task when the tenant is `default`. | ### Alert Server related configuration @@ -337,10 +341,10 @@ Location: `alert-server/conf/application.yaml` This part describes quartz configs and configure them based on your practical situation and resources. -|Service| Configuration file | -|--|--| -|Master Server | `master-server/conf/application.yaml`| -|Api Server| `api-server/conf/application.yaml`| +| Service | Configuration file | +|---------------|---------------------------------------| +| Master Server | `master-server/conf/application.yaml` | +| Api Server | `api-server/conf/application.yaml` | The default configuration is as follows: @@ -358,7 +362,8 @@ The default configuration is as follows: | spring.quartz.properties.org.quartz.jobStore.driverDelegateClass | org.quartz.impl.jdbcjobstore.PostgreSQLDelegate | | spring.quartz.properties.org.quartz.jobStore.clusterCheckinInterval | 5000 | -The above configuration items is the same in *Master Server* and *Api Server*, but their *Quartz Scheduler* threadpool configuration is different. +The above configuration items is the same in *Master Server* and *Api Server*, but their *Quartz Scheduler* threadpool +configuration is different. The default quartz threadpool configuration in *Master Server* is as follows: @@ -369,7 +374,8 @@ The default quartz threadpool configuration in *Master Server* is as follows: | spring.quartz.properties.org.quartz.threadPool.threadPriority | 5 | | spring.quartz.properties.org.quartz.threadPool.class | org.quartz.simpl.SimpleThreadPool | -Since *Api Server* will not start *Quartz Scheduler* instance, as a client only, therefore it's threadpool is configured as `QuartzZeroSizeThreadPool` which has zero thread; +Since *Api Server* will not start *Quartz Scheduler* instance, as a client only, therefore it's threadpool is configured +as `QuartzZeroSizeThreadPool` which has zero thread; The default configuration is as follows: | Parameters | Default value | @@ -378,7 +384,8 @@ The default configuration is as follows: ### dolphinscheduler_env.sh [load environment variables configs] -When using shell to commit tasks, DolphinScheduler will export environment variables from `bin/env/dolphinscheduler_env.sh`. The +When using shell to commit tasks, DolphinScheduler will export environment variables +from `bin/env/dolphinscheduler_env.sh`. The mainly configuration including `JAVA_HOME` and other environment paths. ```bash @@ -406,9 +413,10 @@ export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspec ### Log related configuration -|Service| Configuration file | -|--|--| -|Master Server | `master-server/conf/logback-spring.xml`| -|Api Server| `api-server/conf/logback-spring.xml`| -|Worker Server| `worker-server/conf/logback-spring.xml`| -|Alert Server| `alert-server/conf/logback-spring.xml`| +| Service | Configuration file | +|---------------|-----------------------------------------| +| Master Server | `master-server/conf/logback-spring.xml` | +| Api Server | `api-server/conf/logback-spring.xml` | +| Worker Server | `worker-server/conf/logback-spring.xml` | +| Alert Server | `alert-server/conf/logback-spring.xml` | + diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 0b3ea9bc5bd1..08fded19e069 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -130,38 +130,40 @@ export DOLPHINSCHEDULER_OPTS=" > 不建议设置"-XX:DisableExplicitGC" , DolphinScheduler使用Netty进行通讯,设置该参数,可能会导致内存泄漏. > ->> 如果设置"-Djava.net.preferIPv6Addresses=true" 将会使用ipv6的IP地址, 如果设置"-Djava.net.preferIPv4Addresses=true"将会使用ipv4的IP地址, 如果都不设置,将会随机使用ipv4或者ipv6. +>> 如果设置"-Djava.net.preferIPv6Addresses=true" 将会使用ipv6的IP地址, 如果设置"-Djava.net.preferIPv4Addresses=true" +>> 将会使用ipv4的IP地址, 如果都不设置,将会随机使用ipv4或者ipv6. ## 数据库连接相关配置 在DolphinScheduler中使用Spring Hikari对数据库连接进行管理,配置文件位置: -|服务名称| 配置文件 | -|--|--| -|Master Server | `master-server/conf/application.yaml`| -|Api Server| `api-server/conf/application.yaml`| -|Worker Server| `worker-server/conf/application.yaml`| -|Alert Server| `alert-server/conf/application.yaml`| +| 服务名称 | 配置文件 | +|---------------|---------------------------------------| +| Master Server | `master-server/conf/application.yaml` | +| Api Server | `api-server/conf/application.yaml` | +| Worker Server | `worker-server/conf/application.yaml` | +| Alert Server | `alert-server/conf/application.yaml` | 默认配置如下: -|参数 | 默认值| 描述| -|--|--|--| -|spring.datasource.driver-class-name| org.postgresql.Driver |数据库驱动| -|spring.datasource.url| jdbc:postgresql://127.0.0.1:5432/dolphinscheduler |数据库连接地址| -|spring.datasource.username|root|数据库用户名| -|spring.datasource.password|root|数据库密码| -|spring.datasource.hikari.connection-test-query|select 1|检测连接是否有效的sql| -|spring.datasource.hikari.minimum-idle| 5|最小空闲连接池数量| -|spring.datasource.hikari.auto-commit|true|是否自动提交| -|spring.datasource.hikari.pool-name|DolphinScheduler|连接池名称| -|spring.datasource.hikari.maximum-pool-size|50|连接池最大连接数| -|spring.datasource.hikari.connection-timeout|30000|连接超时时长| -|spring.datasource.hikari.idle-timeout|600000|空闲连接存活最大时间| -|spring.datasource.hikari.leak-detection-threshold|0|连接泄露检测阈值| -|spring.datasource.hikari.initialization-fail-timeout|1|连接池初始化失败timeout| - -DolphinScheduler同样可以通过设置环境变量进行数据库连接相关的配置, 将以上小写字母转成大写并把`.`换成`_`作为环境变量名, 设置值即可。 +| 参数 | 默认值 | 描述 | +|------------------------------------------------------|---------------------------------------------------|-----------------| +| spring.datasource.driver-class-name | org.postgresql.Driver | 数据库驱动 | +| spring.datasource.url | jdbc:postgresql://127.0.0.1:5432/dolphinscheduler | 数据库连接地址 | +| spring.datasource.username | root | 数据库用户名 | +| spring.datasource.password | root | 数据库密码 | +| spring.datasource.hikari.connection-test-query | select 1 | 检测连接是否有效的sql | +| spring.datasource.hikari.minimum-idle | 5 | 最小空闲连接池数量 | +| spring.datasource.hikari.auto-commit | true | 是否自动提交 | +| spring.datasource.hikari.pool-name | DolphinScheduler | 连接池名称 | +| spring.datasource.hikari.maximum-pool-size | 50 | 连接池最大连接数 | +| spring.datasource.hikari.connection-timeout | 30000 | 连接超时时长 | +| spring.datasource.hikari.idle-timeout | 600000 | 空闲连接存活最大时间 | +| spring.datasource.hikari.leak-detection-threshold | 0 | 连接泄露检测阈值 | +| spring.datasource.hikari.initialization-fail-timeout | 1 | 连接池初始化失败timeout | + +DolphinScheduler同样可以通过设置环境变量进行数据库连接相关的配置, 将以上小写字母转成大写并把`.`换成`_`作为环境变量名, +设置值即可。 ## Zookeeper相关配置 @@ -174,17 +176,17 @@ DolphinScheduler使用Zookeeper进行集群管理、容错、事件监听等功 默认配置如下: -|参数 |默认值| 描述| -|--|--|--| -|registry.zookeeper.namespace|dolphinscheduler|Zookeeper集群使用的namespace| -|registry.zookeeper.connect-string|localhost:2181| Zookeeper集群连接信息| -|registry.zookeeper.retry-policy.base-sleep-time|60ms|基本重试时间差| -|registry.zookeeper.retry-policy.max-sleep|300ms|最大重试时间| -|registry.zookeeper.retry-policy.max-retries|5|最大重试次数| -|registry.zookeeper.session-timeout|30s|session超时时间| -|registry.zookeeper.connection-timeout|30s|连接超时时间| -|registry.zookeeper.block-until-connected|600ms|阻塞直到连接成功的等待时间| -|registry.zookeeper.digest|{用户名:密码}|如果zookeeper打开了acl,则需要填写认证信息访问znode,认证信息格式为{用户名}:{密码}。关于Zookeeper ACL详见[https://zookeeper.apache.org/doc/r3.4.14/zookeeperAdmin.html](Apache Zookeeper官方文档)| +| 参数 | 默认值 | 描述 | +|-------------------------------------------------|------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------| +| registry.zookeeper.namespace | dolphinscheduler | Zookeeper集群使用的namespace | +| registry.zookeeper.connect-string | localhost:2181 | Zookeeper集群连接信息 | +| registry.zookeeper.retry-policy.base-sleep-time | 60ms | 基本重试时间差 | +| registry.zookeeper.retry-policy.max-sleep | 300ms | 最大重试时间 | +| registry.zookeeper.retry-policy.max-retries | 5 | 最大重试次数 | +| registry.zookeeper.session-timeout | 30s | session超时时间 | +| registry.zookeeper.connection-timeout | 30s | 连接超时时间 | +| registry.zookeeper.block-until-connected | 600ms | 阻塞直到连接成功的等待时间 | +| registry.zookeeper.digest | {用户名:密码} | 如果zookeeper打开了acl,则需要填写认证信息访问znode,认证信息格式为{用户名}:{密码}。关于Zookeeper ACL详见[https://zookeeper.apache.org/doc/r3.4.14/zookeeperAdmin.html](Apache Zookeeper官方文档) | DolphinScheduler同样可以通过`bin/env/dolphinscheduler_env.sh`进行Zookeeper相关的配置。 @@ -200,8 +202,8 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId 默认配置如下: -| 参数 | 默认值 | 描述 | -|-----------------------------------------------|--------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 参数 | 默认值 | 描述 | +|-----------------------------------------------|--------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | data.basedir.path | /tmp/dolphinscheduler | 本地工作目录,用于存放临时文件 | | resource.storage.type | NONE | 资源文件存储类型: HDFS,S3,OSS,GCS,ABS,NONE | | resource.upload.path | /dolphinscheduler | 资源文件存储路径 | @@ -279,48 +281,54 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId 位置:`master-server/conf/application.yaml` -| 参数 | 默认值 | 描述 | -|--------------------------------------------------------|--------------|-----------------------------------------------------------------------------------| -| master.listen-port | 5678 | master监听端口 | -| master.fetch-command-num | 10 | master拉取command数量 | -| master.pre-exec-threads | 10 | master准备执行任务的数量,用于限制并行的command | -| master.exec-threads | 100 | master工作线程数量,用于限制并行的流程实例数量 | -| master.dispatch-task-number | 3 | master每个批次的派发任务数量 | -| master.host-selector | lower_weight | master host选择器,用于选择合适的worker执行任务,可选值: random, round_robin, lower_weight | -| master.max-heartbeat-interval | 10s | master最大心跳间隔 | -| master.task-commit-retry-times | 5 | 任务重试次数 | -| master.task-commit-interval | 1000 | 任务提交间隔,单位为毫秒 | -| master.state-wheel-interval | 5 | 轮询检查状态时间 | -| master.max-cpu-load-avg | 1 | master最大cpuload均值,只有高于系统cpuload均值时,master服务才能调度任务. 默认值为1: 会使用100%的CPU | -| master.reserved-memory | 0.3 | master预留内存,只有低于系统可用内存时,master服务才能调度任务. 默认值为0.3:当系统内存低于30%时会停止调度新的工作流 | -| master.failover-interval | 10 | failover间隔,单位为分钟 | -| master.kill-application-when-task-failover | true | 当任务实例failover时,是否kill掉yarn或k8s application | -| master.registry-disconnect-strategy.strategy | stop | 当Master与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting | -| master.registry-disconnect-strategy.max-waiting-time | 100s | 当Master与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Master与注册中心失联时会在给定时间之内进行重连, | -| 在给定时间之内重连失败将会停止自己,在重连时,Master会丢弃目前正在执行的工作流,值为0表示会无限期等待 | -| master.master.worker-group-refresh-interval | 10s | 定期将workerGroup从数据库中同步到内存的时间间隔 | +| 参数 | 默认值 | 描述 | +|-----------------------------------------------------------------------------|--------------|-----------------------------------------------------------------------------------------| +| master.listen-port | 5678 | master监听端口 | +| master.fetch-command-num | 10 | master拉取command数量 | +| master.pre-exec-threads | 10 | master准备执行任务的数量,用于限制并行的command | +| master.exec-threads | 100 | master工作线程数量,用于限制并行的流程实例数量 | +| master.dispatch-task-number | 3 | master每个批次的派发任务数量 | +| master.host-selector | lower_weight | master host选择器,用于选择合适的worker执行任务,可选值: random, round_robin, lower_weight | +| master.max-heartbeat-interval | 10s | master最大心跳间隔 | +| master.task-commit-retry-times | 5 | 任务重试次数 | +| master.task-commit-interval | 1000 | 任务提交间隔,单位为毫秒 | +| master.state-wheel-interval | 5 | 轮询检查状态时间 | +| master.server-load-protection.enabled | true | 是否开启系统保护策略 | +| master.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | master最大系统cpu使用值,只有当前系统cpu使用值低于最大系统cpu使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的操作系统CPU | +| master.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | master最大JVM cpu使用值,只有当前JVM cpu使用值低于最大JVM cpu使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的JVM CPU | +| master.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | master最大系统 内存使用值,只有当前系统内存使用值低于最大系统内存使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的操作系统内存 | +| master.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | master最大系统磁盘使用值,只有当前系统磁盘使用值低于最大系统磁盘使用值,master服务才能调度任务. 默认值为0.7: 会使用70%的操作系统磁盘空间 | +| master.failover-interval | 10 | failover间隔,单位为分钟 | +| master.kill-application-when-task-failover | true | 当任务实例failover时,是否kill掉yarn或k8s application | +| master.registry-disconnect-strategy.strategy | stop | 当Master与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting | +| master.registry-disconnect-strategy.max-waiting-time | 100s | 当Master与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Master与注册中心失联时会在给定时间之内进行重连, | +| 在给定时间之内重连失败将会停止自己,在重连时,Master会丢弃目前正在执行的工作流,值为0表示会无限期等待 | +| master.master.worker-group-refresh-interval | 10s | 定期将workerGroup从数据库中同步到内存的时间间隔 | ## Worker Server相关配置 位置:`worker-server/conf/application.yaml` -| 参数 | 默认值 | 描述 | -|------------------------------------------------------|-----------|-------------------------------------------------------------------------------------------------------------------------------------------| -| worker.listen-port | 1234 | worker监听端口 | -| worker.exec-threads | 100 | worker工作线程数量,用于限制并行的任务实例数量 | -| worker.max-heartbeat-interval | 10s | worker最大心跳间隔 | -| worker.host-weight | 100 | 派发任务时,worker主机的权重 | -| worker.tenant-auto-create | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 | -| worker.max-cpu-load-avg | 1 | worker最大cpuload均值,只有高于系统cpuload均值时,worker服务才能被派发任务. 默认值为1: 会使用100%的CPU | -| worker.reserved-memory | 0.3 | worker预留内存,只有低于系统可用内存时,worker服务才能被派发任务. 默认值为0.3:当系统内存低于30%时会停止调度新的工作流 | -| worker.alert-listen-host | localhost | alert监听host | -| worker.alert-listen-port | 50052 | alert监听端口 | -| worker.registry-disconnect-strategy.strategy | stop | 当Worker与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting | -| worker.registry-disconnect-strategy.max-waiting-time | 100s | 当Worker与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Worker与注册中心失联时会在给定时间之内进行重连, 在给定时间之内重连失败将会停止自己,在重连时,Worker会丢弃kill正在执行的任务。值为0表示会无限期等待 | -| worker.task-execute-threads-full-policy | REJECT | 如果是 REJECT, 当Worker中等待队列中的任务数达到exec-threads时, Worker将会拒绝接下来新接收的任务,Master将会重新分发该任务; 如果是 CONTINUE, Worker将会接收任务,放入等待队列中等待空闲线程去执行该任务 | -| worker.tenant-config.auto-create-tenant-enabled | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 | -| worker.tenant-config.distributed-tenant-enabled | false | 如果设置为true, auto-create-tenant-enabled 将会不起作用。 | -| worker.tenant-config.default-tenant-enabled | false | 如果设置为true, 将会使用worker服务启动用户作为 `default` 租户。 | +| 参数 | 默认值 | 描述 | +|-----------------------------------------------------------------------------|-----------|-------------------------------------------------------------------------------------------------------------------------------------------| +| worker.listen-port | 1234 | worker监听端口 | +| worker.exec-threads | 100 | worker工作线程数量,用于限制并行的任务实例数量 | +| worker.max-heartbeat-interval | 10s | worker最大心跳间隔 | +| worker.host-weight | 100 | 派发任务时,worker主机的权重 | +| worker.tenant-auto-create | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 | +| worker.server-load-protection.enabled | true | 是否开启系统保护策略 | +| worker.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | worker最大系统cpu使用值,只有当前系统cpu使用值低于最大系统cpu使用值,worker服务才能接收任务. 默认值为0.7: 会使用70%的操作系统CPU | +| worker.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | worker最大JVM cpu使用值,只有当前JVM cpu使用值低于最大JVM cpu使用值,worker服务才能接收任务. 默认值为0.7: 会使用70%的JVM CPU | +| worker.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | worker最大系统 内存使用值,只有当前系统内存使用值低于最大系统内存使用值,worker服务才能接收任务. 默认值为0.7: 会使用70%的操作系统内存 | +| worker.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | worker最大系统磁盘使用值,只有当前系统磁盘使用值低于最大系统磁盘使用值,worker服务才能接收任务. 默认值为0.7: 会使用70%的操作系统磁盘空间 | +| worker.alert-listen-host | localhost | alert监听host | +| worker.alert-listen-port | 50052 | alert监听端口 | +| worker.registry-disconnect-strategy.strategy | stop | 当Worker与注册中心失联之后采取的策略, 默认值是: stop. 可选值包括: stop, waiting | +| worker.registry-disconnect-strategy.max-waiting-time | 100s | 当Worker与注册中心失联之后重连时间, 之后当strategy为waiting时,该值生效。 该值表示当Worker与注册中心失联时会在给定时间之内进行重连, 在给定时间之内重连失败将会停止自己,在重连时,Worker会丢弃kill正在执行的任务。值为0表示会无限期等待 | +| worker.task-execute-threads-full-policy | REJECT | 如果是 REJECT, 当Worker中等待队列中的任务数达到exec-threads时, Worker将会拒绝接下来新接收的任务,Master将会重新分发该任务; 如果是 CONTINUE, Worker将会接收任务,放入等待队列中等待空闲线程去执行该任务 | +| worker.tenant-config.auto-create-tenant-enabled | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 | +| worker.tenant-config.distributed-tenant-enabled | false | 如果设置为true, auto-create-tenant-enabled 将会不起作用。 | +| worker.tenant-config.default-tenant-enabled | false | 如果设置为true, 将会使用worker服务启动用户作为 `default` 租户。 | ## Alert Server相关配置 @@ -366,7 +374,9 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId | spring.quartz.properties.org.quartz.threadPool.threadPriority | 5 | | spring.quartz.properties.org.quartz.threadPool.class | org.quartz.simpl.SimpleThreadPool | -因为*Api Server*不会启动*Quartz Scheduler*实例,只会作为Scheduler客户端使用,因此它的Quartz线程池将会使用`QuartzZeroSizeThreadPool`。`QuartzZeroSizeThreadPool`不会启动任何线程。具体的默认配置如下: +因为*Api Server*不会启动*Quartz Scheduler* +实例,只会作为Scheduler客户端使用,因此它的Quartz线程池将会使用`QuartzZeroSizeThreadPool`。`QuartzZeroSizeThreadPool` +不会启动任何线程。具体的默认配置如下: | Parameters | Default value | |------------------------------------------------------|-----------------------------------------------------------------------| @@ -374,7 +384,8 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId ## dolphinscheduler_env.sh [环境变量配置] -通过类似shell方式提交任务的的时候,会加载该配置文件中的环境变量到主机中。涉及到的 `JAVA_HOME` 任务类型的环境配置,其中任务类型主要有: Shell任务、Python任务、Spark任务、Flink任务、Datax任务等等。 +通过类似shell方式提交任务的的时候,会加载该配置文件中的环境变量到主机中。涉及到的 `JAVA_HOME` +任务类型的环境配置,其中任务类型主要有: Shell任务、Python任务、Spark任务、Flink任务、Datax任务等等。 ```bash # JAVA_HOME, will use it to start DolphinScheduler server @@ -401,9 +412,10 @@ export FLINK_ENV_JAVA_OPTS="-javaagent:${DOLPHINSCHEDULER_HOME}/tools/libs/aspec ## 日志相关配置 -|服务名称| 配置文件 | -|--|--| -|Master Server | `master-server/conf/logback-spring.xml`| -|Api Server| `api-server/conf/logback-spring.xml`| -|Worker Server| `worker-server/conf/logback-spring.xml`| -|Alert Server| `alert-server/conf/logback-spring.xml`| +| 服务名称 | 配置文件 | +|---------------|-----------------------------------------| +| Master Server | `master-server/conf/logback-spring.xml` | +| Api Server | `api-server/conf/logback-spring.xml` | +| Worker Server | `worker-server/conf/logback-spring.xml` | +| Alert Server | `alert-server/conf/logback-spring.xml` | + diff --git a/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/java/org/apache/dolphinscheduler/alert/registry/AlertHeartbeatTask.java b/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/java/org/apache/dolphinscheduler/alert/registry/AlertHeartbeatTask.java index 3b2d588f8554..0bfefed223f3 100644 --- a/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/java/org/apache/dolphinscheduler/alert/registry/AlertHeartbeatTask.java +++ b/dolphinscheduler-alert/dolphinscheduler-alert-server/src/main/java/org/apache/dolphinscheduler/alert/registry/AlertHeartbeatTask.java @@ -65,7 +65,8 @@ public AlertServerHeartBeat getHeartBeat() { .processId(processId) .startupTime(startupTime) .reportTime(System.currentTimeMillis()) - .cpuUsage(systemMetrics.getTotalCpuUsedPercentage()) + .jvmCpuUsage(systemMetrics.getJvmCpuUsagePercentage()) + .cpuUsage(systemMetrics.getSystemCpuUsagePercentage()) .memoryUsage(systemMetrics.getSystemMemoryUsedPercentage()) .jvmMemoryUsage(systemMetrics.getJvmMemoryUsedPercentage()) .serverStatus(ServerStatus.NORMAL) diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/AlertServerHeartBeat.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/AlertServerHeartBeat.java index 7cbd83b8ce0d..9faaef82be4f 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/AlertServerHeartBeat.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/AlertServerHeartBeat.java @@ -17,33 +17,11 @@ package org.apache.dolphinscheduler.common.model; -import org.apache.dolphinscheduler.common.enums.ServerStatus; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; -@Data -@Builder +@SuperBuilder @NoArgsConstructor -@AllArgsConstructor -public class AlertServerHeartBeat implements HeartBeat { - - private int processId; - private long startupTime; - private long reportTime; - private double cpuUsage; - private double memoryUsage; - private double jvmMemoryUsage; - - private ServerStatus serverStatus; - - private String host; - private int port; +public class AlertServerHeartBeat extends BaseHeartBeat implements HeartBeat { - @Override - public ServerStatus getServerStatus() { - return serverStatus; - } } diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/BaseHeartBeat.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/BaseHeartBeat.java new file mode 100644 index 000000000000..2837e5482b76 --- /dev/null +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/BaseHeartBeat.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.common.model; + +import org.apache.dolphinscheduler.common.enums.ServerStatus; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; + +@Data +@SuperBuilder +@NoArgsConstructor +@AllArgsConstructor +public class BaseHeartBeat implements HeartBeat { + + protected int processId; + protected long startupTime; + protected long reportTime; + protected double jvmCpuUsage; + protected double cpuUsage; + protected double jvmMemoryUsage; + protected double memoryUsage; + protected double diskUsage; + protected ServerStatus serverStatus; + + protected String host; + protected int port; + +} diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/HeartBeat.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/HeartBeat.java index 3a105227aa08..35971b398b6e 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/HeartBeat.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/HeartBeat.java @@ -21,10 +21,6 @@ public interface HeartBeat { - String getHost(); - ServerStatus getServerStatus(); - int getPort(); - } diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/MasterHeartBeat.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/MasterHeartBeat.java index ecc140bcfb5b..b8ae4512dd6d 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/MasterHeartBeat.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/MasterHeartBeat.java @@ -17,33 +17,11 @@ package org.apache.dolphinscheduler.common.model; -import org.apache.dolphinscheduler.common.enums.ServerStatus; - -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Data; import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; -@Data -@Builder +@SuperBuilder @NoArgsConstructor -@AllArgsConstructor -public class MasterHeartBeat implements HeartBeat { - - private long startupTime; - private long reportTime; - private double cpuUsage; - private double jvmMemoryUsage; - private double memoryUsage; - private double diskUsage; - private ServerStatus serverStatus; - private int processId; - - private String host; - private int port; +public class MasterHeartBeat extends BaseHeartBeat implements HeartBeat { - @Override - public ServerStatus getServerStatus() { - return serverStatus; - } } diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/WorkerHeartBeat.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/WorkerHeartBeat.java index 056fc6a2c713..c02748619818 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/WorkerHeartBeat.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/WorkerHeartBeat.java @@ -17,37 +17,18 @@ package org.apache.dolphinscheduler.common.model; -import org.apache.dolphinscheduler.common.enums.ServerStatus; - -import lombok.AllArgsConstructor; -import lombok.Builder; import lombok.Data; +import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; @Data -@Builder +@EqualsAndHashCode(callSuper = true) +@SuperBuilder @NoArgsConstructor -@AllArgsConstructor -public class WorkerHeartBeat implements HeartBeat { - - private long startupTime; - private long reportTime; - private double cpuUsage; - private double jvmMemoryUsage; - private double memoryUsage; - private double diskUsage; - private ServerStatus serverStatus; - private int processId; - - private String host; - private int port; +public class WorkerHeartBeat extends BaseHeartBeat implements HeartBeat { private int workerHostWeight; // worker host weight private int threadPoolUsage; // worker waiting task count - @Override - public ServerStatus getServerStatus() { - return serverStatus; - } - } diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java index d37ca9d0167f..752479e60086 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/MasterServer.java @@ -123,7 +123,7 @@ public void run() throws SchedulerException { MasterServerMetrics.registerMasterCpuUsageGauge(() -> { SystemMetrics systemMetrics = metricsProvider.getSystemMetrics(); - return systemMetrics.getTotalCpuUsedPercentage(); + return systemMetrics.getSystemCpuUsagePercentage(); }); MasterServerMetrics.registerMasterMemoryAvailableGauge(() -> { SystemMetrics systemMetrics = metricsProvider.getSystemMetrics(); diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtection.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtection.java index 03570d691d2b..6b259738fed2 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtection.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtection.java @@ -17,57 +17,11 @@ package org.apache.dolphinscheduler.server.master.config; -import org.apache.dolphinscheduler.meter.metrics.SystemMetrics; +import org.apache.dolphinscheduler.meter.metrics.BaseServerLoadProtection; -import lombok.AllArgsConstructor; -import lombok.Data; -import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; @Slf4j -@Data -@NoArgsConstructor -@AllArgsConstructor -public class MasterServerLoadProtection { - - private boolean enabled = true; - - private double maxCpuUsagePercentageThresholds = 0.7; - - private double maxJVMMemoryUsagePercentageThresholds = 0.7; - - private double maxSystemMemoryUsagePercentageThresholds = 0.7; - - private double maxDiskUsagePercentageThresholds = 0.7; - - public boolean isOverload(SystemMetrics systemMetrics) { - if (!enabled) { - return false; - } - if (systemMetrics.getTotalCpuUsedPercentage() > maxCpuUsagePercentageThresholds) { - log.info( - "Master OverLoad: the TotalCpuUsedPercentage: {} is over then the MaxCpuUsagePercentageThresholds {}", - systemMetrics.getTotalCpuUsedPercentage(), maxCpuUsagePercentageThresholds); - return true; - } - if (systemMetrics.getJvmMemoryUsedPercentage() > maxJVMMemoryUsagePercentageThresholds) { - log.info( - "Master OverLoad: the JvmMemoryUsedPercentage: {} is over then the MaxJVMMemoryUsagePercentageThresholds {}", - systemMetrics.getJvmMemoryUsedPercentage(), maxCpuUsagePercentageThresholds); - return true; - } - if (systemMetrics.getDiskUsedPercentage() > maxDiskUsagePercentageThresholds) { - log.info("Master OverLoad: the DiskUsedPercentage: {} is over then the MaxDiskUsagePercentageThresholds {}", - systemMetrics.getDiskUsedPercentage(), maxCpuUsagePercentageThresholds); - return true; - } - if (systemMetrics.getSystemMemoryUsedPercentage() > maxSystemMemoryUsagePercentageThresholds) { - log.info( - "Master OverLoad: the SystemMemoryUsedPercentage: {} is over then the MaxSystemMemoryUsagePercentageThresholds {}", - systemMetrics.getSystemMemoryUsedPercentage(), maxSystemMemoryUsagePercentageThresholds); - return true; - } - return false; - } +public class MasterServerLoadProtection extends BaseServerLoadProtection { } diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/MasterServerMetrics.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/MasterServerMetrics.java index 09ba1cb4ba3e..1fc92200df11 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/MasterServerMetrics.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/metrics/MasterServerMetrics.java @@ -51,7 +51,7 @@ public void registerMasterMemoryAvailableGauge(Supplier supplier) { public void registerMasterCpuUsageGauge(Supplier supplier) { Gauge.builder("ds.master.cpu.usage", supplier) - .description("worker cpu usage") + .description("master cpu usage") .register(Metrics.globalRegistry); } diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterSlotManager.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterSlotManager.java index 834f56c2a456..155b97311796 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterSlotManager.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterSlotManager.java @@ -70,7 +70,8 @@ public class SlotChangeListener implements MasterInfoChangeListener { public void notify(Map masterNodeInfo) { List serverList = masterNodeInfo.values().stream() .filter(heartBeat -> !heartBeat.getServerStatus().equals(ServerStatus.BUSY)) - .map(this::convertHeartBeatToServer).collect(Collectors.toList()); + .map(this::convertHeartBeatToServer) + .collect(Collectors.toList()); syncMasterNodes(serverList); } diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/ServerNodeManager.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/ServerNodeManager.java index 11a994aacdb1..258acd8f6e92 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/ServerNodeManager.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/ServerNodeManager.java @@ -249,7 +249,9 @@ private void updateWorkerNodes() { try { Map workerNodeMaps = registryClient.getServerMaps(RegistryNodeType.WORKER); for (Map.Entry entry : workerNodeMaps.entrySet()) { - workerNodeInfo.put(entry.getKey(), JSONUtils.parseObject(entry.getValue(), WorkerHeartBeat.class)); + String nodeAddress = entry.getKey(); + WorkerHeartBeat workerHeartBeat = JSONUtils.parseObject(entry.getValue(), WorkerHeartBeat.class); + workerNodeInfo.put(nodeAddress, workerHeartBeat); } } finally { workerGroupWriteLock.unlock(); diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/task/MasterHeartBeatTask.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/task/MasterHeartBeatTask.java index e9b0970ed300..b7b5e7a21e45 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/task/MasterHeartBeatTask.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/task/MasterHeartBeatTask.java @@ -64,7 +64,8 @@ public MasterHeartBeat getHeartBeat() { return MasterHeartBeat.builder() .startupTime(ServerLifeCycleManager.getServerStartupTime()) .reportTime(System.currentTimeMillis()) - .cpuUsage(systemMetrics.getTotalCpuUsedPercentage()) + .jvmCpuUsage(systemMetrics.getJvmCpuUsagePercentage()) + .cpuUsage(systemMetrics.getSystemCpuUsagePercentage()) .jvmMemoryUsage(systemMetrics.getJvmMemoryUsedPercentage()) .memoryUsage(systemMetrics.getSystemMemoryUsedPercentage()) .diskUsage(systemMetrics.getDiskUsedPercentage()) diff --git a/dolphinscheduler-master/src/main/resources/application.yaml b/dolphinscheduler-master/src/main/resources/application.yaml index ce7b1df7ddbc..a85eadb59f01 100644 --- a/dolphinscheduler-master/src/main/resources/application.yaml +++ b/dolphinscheduler-master/src/main/resources/application.yaml @@ -122,10 +122,10 @@ master: server-load-protection: # If set true, will open master overload protection enabled: true - # Master max cpu usage, when the master's cpu usage is smaller then this value, master server can execute workflow. - max-cpu-usage-percentage-thresholds: 0.7 - # Master max JVM memory usage , when the master's jvm memory usage is smaller then this value, master server can execute workflow. - max-jvm-memory-usage-percentage-thresholds: 0.7 + # Master max system cpu usage, when the master's system cpu usage is smaller then this value, master server can execute workflow. + max-system-cpu-usage-percentage-thresholds: 0.7 + # Master max jvm cpu usage, when the master's jvm cpu usage is smaller then this value, master server can execute workflow. + max-jvm-cpu-usage-percentage-thresholds: 0.7 # Master max System memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. max-system-memory-usage-percentage-thresholds: 0.7 # Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. diff --git a/dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/config/MasterConfigTest.java b/dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/config/MasterConfigTest.java index ed982933d287..faab44cf854c 100644 --- a/dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/config/MasterConfigTest.java +++ b/dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/config/MasterConfigTest.java @@ -17,16 +17,15 @@ package org.apache.dolphinscheduler.server.master.config; -import org.junit.jupiter.api.Assertions; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.test.context.ActiveProfiles; -import org.springframework.test.context.junit.jupiter.SpringExtension; -@ActiveProfiles("master") -@ExtendWith(SpringExtension.class) +@AutoConfigureMockMvc @SpringBootTest(classes = MasterConfig.class) public class MasterConfigTest { @@ -36,6 +35,18 @@ public class MasterConfigTest { @Test public void getMasterDispatchTaskNumber() { int masterDispatchTaskNumber = masterConfig.getDispatchTaskNumber(); - Assertions.assertEquals(3, masterDispatchTaskNumber); + assertEquals(30, masterDispatchTaskNumber); + } + + @Test + public void getServerLoadProtection() { + MasterServerLoadProtection serverLoadProtection = masterConfig.getServerLoadProtection(); + assertTrue(serverLoadProtection.isEnabled()); + assertEquals(0.77, serverLoadProtection.getMaxSystemCpuUsagePercentageThresholds()); + assertEquals(0.77, serverLoadProtection.getMaxJvmCpuUsagePercentageThresholds()); + assertEquals(0.77, serverLoadProtection.getMaxJvmCpuUsagePercentageThresholds()); + assertEquals(0.77, serverLoadProtection.getMaxSystemMemoryUsagePercentageThresholds()); + assertEquals(0.77, serverLoadProtection.getMaxDiskUsagePercentageThresholds()); + } } diff --git a/dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionTest.java b/dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionTest.java index 90627f99d35b..ce12eb1bd94f 100644 --- a/dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionTest.java +++ b/dolphinscheduler-master/src/test/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionTest.java @@ -30,7 +30,8 @@ void isOverload() { SystemMetrics systemMetrics = SystemMetrics.builder() .jvmMemoryUsedPercentage(0.71) .systemMemoryUsedPercentage(0.71) - .totalCpuUsedPercentage(0.71) + .systemCpuUsagePercentage(0.71) + .jvmCpuUsagePercentage(0.71) .diskUsedPercentage(0.71) .build(); masterServerLoadProtection.setEnabled(false); diff --git a/dolphinscheduler-master/src/test/resources/application.yaml b/dolphinscheduler-master/src/test/resources/application.yaml new file mode 100644 index 000000000000..0dbe490af394 --- /dev/null +++ b/dolphinscheduler-master/src/test/resources/application.yaml @@ -0,0 +1,164 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +spring: + banner: + charset: UTF-8 + jackson: + time-zone: UTC + date-format: "yyyy-MM-dd HH:mm:ss" + cache: + # default enable cache, you can disable by `type: none` + type: none + cache-names: + - tenant + - user + - processDefinition + - processTaskRelation + - taskDefinition + caffeine: + spec: maximumSize=100,expireAfterWrite=300s,recordStats + datasource: + driver-class-name: org.postgresql.Driver + url: jdbc:postgresql://127.0.0.1:5432/dolphinscheduler + username: root + password: root + hikari: + connection-test-query: select 1 + minimum-idle: 5 + auto-commit: true + validation-timeout: 3000 + pool-name: DolphinScheduler + maximum-pool-size: 50 + connection-timeout: 30000 + idle-timeout: 600000 + leak-detection-threshold: 0 + initialization-fail-timeout: 1 + quartz: + job-store-type: jdbc + jdbc: + initialize-schema: never + properties: + org.quartz.threadPool.threadPriority: 5 + org.quartz.jobStore.isClustered: true + org.quartz.jobStore.class: org.springframework.scheduling.quartz.LocalDataSourceJobStore + org.quartz.scheduler.instanceId: AUTO + org.quartz.jobStore.tablePrefix: QRTZ_ + org.quartz.jobStore.acquireTriggersWithinLock: true + org.quartz.scheduler.instanceName: DolphinScheduler + org.quartz.threadPool.class: org.quartz.simpl.SimpleThreadPool + org.quartz.jobStore.useProperties: false + org.quartz.threadPool.makeThreadsDaemons: true + org.quartz.threadPool.threadCount: 25 + org.quartz.jobStore.misfireThreshold: 60000 + org.quartz.scheduler.batchTriggerAcquisitionMaxCount: 1 + org.quartz.scheduler.makeSchedulerThreadDaemon: true + org.quartz.jobStore.driverDelegateClass: org.quartz.impl.jdbcjobstore.PostgreSQLDelegate + org.quartz.jobStore.clusterCheckinInterval: 5000 + +# Mybatis-plus configuration, you don't need to change it +mybatis-plus: + mapper-locations: classpath:org/apache/dolphinscheduler/dao/mapper/*Mapper.xml + type-aliases-package: org.apache.dolphinscheduler.dao.entity + configuration: + cache-enabled: false + call-setters-on-nulls: true + map-underscore-to-camel-case: true + jdbc-type-for-null: NULL + global-config: + db-config: + id-type: auto + banner: false + + +registry: + type: zookeeper + zookeeper: + namespace: dolphinscheduler + connect-string: localhost:2181 + retry-policy: + base-sleep-time: 60ms + max-sleep: 300ms + max-retries: 5 + session-timeout: 30s + connection-timeout: 9s + block-until-connected: 600ms + digest: ~ + +master: + listen-port: 5678 + # master fetch command num + fetch-command-num: 10 + # master prepare execute thread number to limit handle commands in parallel + pre-exec-threads: 10 + # master execute thread number to limit process instances in parallel + exec-threads: 100 + # master dispatch task number per batch, if all the tasks dispatch failed in a batch, will sleep 1s. + dispatch-task-number: 30 + # master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight + host-selector: lower_weight + # master heartbeat interval + max-heartbeat-interval: 10s + # master commit task retry times + task-commit-retry-times: 5 + # master commit task interval + task-commit-interval: 1s + state-wheel-interval: 5s + server-load-protection: + # If set true, will open master overload protection + enabled: true + # Master max system cpu usage, when the master's system cpu usage is smaller then this value, master server can execute workflow. + max-system-cpu-usage-percentage-thresholds: 0.77 + # Master max jvm cpu usage, when the master's jvm cpu usage is smaller then this value, master server can execute workflow. + max-jvm-cpu-usage-percentage-thresholds: 0.77 + # Master max System memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. + max-system-memory-usage-percentage-thresholds: 0.77 + # Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. + max-disk-usage-percentage-thresholds: 0.77 + # failover interval, the unit is minute + failover-interval: 10m + # kill yarn / k8s application when failover taskInstance, default true + kill-application-when-task-failover: true + registry-disconnect-strategy: + # The disconnect strategy: stop, waiting + strategy: waiting + # The max waiting time to reconnect to registry if you set the strategy to waiting + max-waiting-time: 100s + worker-group-refresh-interval: 10s + +server: + port: 5679 + +management: + endpoints: + web: + exposure: + include: health,metrics,prometheus + endpoint: + health: + enabled: true + show-details: always + health: + db: + enabled: true + defaults: + enabled: false + metrics: + tags: + application: ${spring.application.name} + +metrics: + enabled: true diff --git a/dolphinscheduler-master/src/test/resources/logback.xml b/dolphinscheduler-master/src/test/resources/logback.xml index deb791fae21c..4470a4639109 100644 --- a/dolphinscheduler-master/src/test/resources/logback.xml +++ b/dolphinscheduler-master/src/test/resources/logback.xml @@ -66,12 +66,6 @@ - - - - - - - + diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java new file mode 100644 index 000000000000..fd12d3bb6604 --- /dev/null +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.meter.metrics; + +import lombok.Data; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Data +public class BaseServerLoadProtection implements ServerLoadProtection { + + protected boolean enabled = true; + + protected double maxSystemCpuUsagePercentageThresholds = 0.7; + + protected double maxJvmCpuUsagePercentageThresholds = 0.7; + + protected double maxSystemMemoryUsagePercentageThresholds = 0.7; + + protected double maxDiskUsagePercentageThresholds = 0.7; + + @Override + public boolean isOverload(SystemMetrics systemMetrics) { + if (!enabled) { + return false; + } + if (systemMetrics.getSystemCpuUsagePercentage() > maxSystemCpuUsagePercentageThresholds) { + log.info( + "OverLoad: the system cpu usage: {} is over then the maxSystemCpuUsagePercentageThresholds {}", + systemMetrics.getSystemCpuUsagePercentage(), maxSystemCpuUsagePercentageThresholds); + return true; + } + if (systemMetrics.getJvmCpuUsagePercentage() > maxJvmCpuUsagePercentageThresholds) { + log.info( + "OverLoad: the jvm cpu usage: {} is over then the maxJvmCpuUsagePercentageThresholds {}", + systemMetrics.getJvmCpuUsagePercentage(), maxJvmCpuUsagePercentageThresholds); + return true; + } + if (systemMetrics.getDiskUsedPercentage() > maxDiskUsagePercentageThresholds) { + log.info("OverLoad: the DiskUsedPercentage: {} is over then the maxDiskUsagePercentageThresholds {}", + systemMetrics.getDiskUsedPercentage(), maxDiskUsagePercentageThresholds); + return true; + } + if (systemMetrics.getSystemMemoryUsedPercentage() > maxSystemMemoryUsagePercentageThresholds) { + log.info( + "OverLoad: the SystemMemoryUsedPercentage: {} is over then the maxSystemMemoryUsagePercentageThresholds {}", + systemMetrics.getSystemMemoryUsedPercentage(), maxSystemMemoryUsagePercentageThresholds); + return true; + } + return false; + } +} diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java index 0ce6ceb4a401..f1240a054117 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java @@ -19,7 +19,6 @@ import org.apache.dolphinscheduler.common.utils.OSUtils; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import io.micrometer.core.instrument.MeterRegistry; @@ -27,8 +26,11 @@ @Component public class DefaultMetricsProvider implements MetricsProvider { - @Autowired - private MeterRegistry meterRegistry; + private final MeterRegistry meterRegistry; + + public DefaultMetricsProvider(MeterRegistry meterRegistry) { + this.meterRegistry = meterRegistry; + } private SystemMetrics systemMetrics; @@ -53,8 +55,7 @@ public SystemMetrics getSystemMetrics() { systemMetrics = SystemMetrics.builder() .systemCpuUsagePercentage(systemCpuUsage) - .processCpuUsagePercentage(processCpuUsage) - .totalCpuUsedPercentage(systemCpuUsage + processCpuUsage) + .jvmCpuUsagePercentage(processCpuUsage) .jvmMemoryUsed(jvmMemoryUsed) .jvmMemoryMax(jvmMemoryMax) .jvmMemoryUsedPercentage(jvmMemoryUsed / jvmMemoryMax) diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/ServerLoadProtection.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/ServerLoadProtection.java new file mode 100644 index 000000000000..3385de891f3b --- /dev/null +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/ServerLoadProtection.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.dolphinscheduler.meter.metrics; + +public interface ServerLoadProtection { + + boolean isOverload(SystemMetrics systemMetrics); + +} diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java index dcffafb83dee..6da8f8ca4ece 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java @@ -30,8 +30,7 @@ public class SystemMetrics { // CPU private double systemCpuUsagePercentage; - private double processCpuUsagePercentage; - private double totalCpuUsedPercentage; + private double jvmCpuUsagePercentage; // JVM-Memory // todo: get pod memory usage diff --git a/dolphinscheduler-standalone-server/src/main/resources/application.yaml b/dolphinscheduler-standalone-server/src/main/resources/application.yaml index 8e58b8956804..d26ea5a4e0aa 100644 --- a/dolphinscheduler-standalone-server/src/main/resources/application.yaml +++ b/dolphinscheduler-standalone-server/src/main/resources/application.yaml @@ -190,10 +190,10 @@ master: state-wheel-interval: 5s server-load-protection: enabled: true - # Master max cpu usage, when the master's cpu usage is smaller then this value, master server can execute workflow. - max-cpu-usage-percentage-thresholds: 0.9 - # Master max JVM memory usage , when the master's jvm memory usage is smaller then this value, master server can execute workflow. - max-jvm-memory-usage-percentage-thresholds: 0.9 + # Master max system cpu usage, when the master's system cpu usage is smaller then this value, master server can execute workflow. + max-system-cpu-usage-percentage-thresholds: 0.9 + # Master max jvm cpu usage, when the master's jvm cpu usage is smaller then this value, master server can execute workflow. + max-jvm-cpu-usage-percentage-thresholds: 0.9 # Master max System memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. max-system-memory-usage-percentage-thresholds: 0.9 # Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. @@ -215,10 +215,10 @@ worker: host-weight: 100 server-load-protection: enabled: true - # Worker max cpu usage, when the worker's cpu usage is smaller then this value, worker server can be dispatched tasks. - max-cpu-usage-percentage-thresholds: 0.9 - # Worker max JVM memory usage , when the worker's jvm memory usage is smaller then this value, worker server can be dispatched tasks. - max-jvm-memory-usage-percentage-thresholds: 0.9 + # Worker max system cpu usage, when the worker's system cpu usage is smaller then this value, worker server can be dispatched tasks. + max-system-cpu-usage-percentage-thresholds: 0.9 + # Worker max jvm cpu usage, when the worker's jvm cpu usage is smaller then this value, worker server can be dispatched tasks. + max-jvm-cpu-usage-percentage-thresholds: 0.9 # Worker max System memory usage , when the worker's system memory usage is smaller then this value, worker server can be dispatched tasks. max-system-memory-usage-percentage-thresholds: 0.9 # Worker max disk usage , when the worker's disk usage is smaller then this value, worker server can be dispatched tasks. diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/WorkerServer.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/WorkerServer.java index 2420ae52535b..86e755fc8a24 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/WorkerServer.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/WorkerServer.java @@ -94,7 +94,7 @@ public void run() { WorkerServerMetrics.registerWorkerCpuUsageGauge(() -> { SystemMetrics systemMetrics = metricsProvider.getSystemMetrics(); - return systemMetrics.getTotalCpuUsedPercentage(); + return systemMetrics.getSystemCpuUsagePercentage(); }); WorkerServerMetrics.registerWorkerMemoryAvailableGauge(() -> { SystemMetrics systemMetrics = metricsProvider.getSystemMetrics(); diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtection.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtection.java index 6e68a71bf524..1a52100eb26c 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtection.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtection.java @@ -17,57 +17,11 @@ package org.apache.dolphinscheduler.server.worker.config; -import org.apache.dolphinscheduler.meter.metrics.SystemMetrics; +import org.apache.dolphinscheduler.meter.metrics.BaseServerLoadProtection; -import lombok.AllArgsConstructor; -import lombok.Data; -import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; -@Data @Slf4j -@NoArgsConstructor -@AllArgsConstructor -public class WorkerServerLoadProtection { - - private boolean enabled = true; - - private double maxCpuUsagePercentageThresholds = 0.7; - - private double maxJVMMemoryUsagePercentageThresholds = 0.7; - - private double maxSystemMemoryUsagePercentageThresholds = 0.7; - - private double maxDiskUsagePercentageThresholds = 0.7; - - public boolean isOverload(SystemMetrics systemMetrics) { - if (!enabled) { - return false; - } - if (systemMetrics.getTotalCpuUsedPercentage() > maxCpuUsagePercentageThresholds) { - log.info( - "Worker OverLoad: the TotalCpuUsedPercentage: {} is over then the MaxCpuUsagePercentageThresholds {}", - systemMetrics.getTotalCpuUsedPercentage(), maxCpuUsagePercentageThresholds); - return true; - } - if (systemMetrics.getJvmMemoryUsedPercentage() > maxJVMMemoryUsagePercentageThresholds) { - log.info( - "Worker OverLoad: the JvmMemoryUsedPercentage: {} is over then the maxCpuUsagePercentageThresholds {}", - systemMetrics.getJvmMemoryUsedPercentage(), maxJVMMemoryUsagePercentageThresholds); - return true; - } - if (systemMetrics.getDiskUsedPercentage() > maxDiskUsagePercentageThresholds) { - log.info("Worker OverLoad: the DiskUsedPercentage: {} is over then the MaxCpuUsagePercentageThresholds {}", - systemMetrics.getDiskUsedPercentage(), maxDiskUsagePercentageThresholds); - return true; - } - if (systemMetrics.getSystemMemoryUsedPercentage() > maxSystemMemoryUsagePercentageThresholds) { - log.info( - "Worker OverLoad: the SystemMemoryUsedPercentage: {} is over then the MaxSystemMemoryUsagePercentageThresholds {}", - systemMetrics.getSystemMemoryUsedPercentage(), maxSystemMemoryUsagePercentageThresholds); - return true; - } - return false; - } +public class WorkerServerLoadProtection extends BaseServerLoadProtection { } diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java index 57349e14489b..4eefd9df1084 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java @@ -65,7 +65,8 @@ public WorkerHeartBeat getHeartBeat() { return WorkerHeartBeat.builder() .startupTime(ServerLifeCycleManager.getServerStartupTime()) .reportTime(System.currentTimeMillis()) - .cpuUsage(systemMetrics.getTotalCpuUsedPercentage()) + .jvmCpuUsage(systemMetrics.getJvmCpuUsagePercentage()) + .cpuUsage(systemMetrics.getSystemCpuUsagePercentage()) .jvmMemoryUsage(systemMetrics.getJvmMemoryUsedPercentage()) .memoryUsage(systemMetrics.getSystemMemoryUsedPercentage()) .diskUsage(systemMetrics.getDiskUsedPercentage()) diff --git a/dolphinscheduler-worker/src/main/resources/application.yaml b/dolphinscheduler-worker/src/main/resources/application.yaml index ad0535ac7630..4361e8f014e2 100644 --- a/dolphinscheduler-worker/src/main/resources/application.yaml +++ b/dolphinscheduler-worker/src/main/resources/application.yaml @@ -50,10 +50,10 @@ worker: server-load-protection: # If set true, will open worker overload protection enabled: true - # Worker max cpu usage, when the worker's cpu usage is smaller then this value, worker server can be dispatched tasks. - max-cpu-usage-percentage-thresholds: 0.7 - # Worker max jvm memory usage , when the worker's jvm memory usage is smaller then this value, worker server can be dispatched tasks. - max-jvm-memory-usage-percentage-thresholds: 0.7 + # Worker max system cpu usage, when the worker's system cpu usage is smaller then this value, worker server can be dispatched tasks. + max-system-cpu-usage-percentage-thresholds: 0.7 + # Worker max jvm cpu usage, when the worker's jvm cpu usage is smaller then this value, worker server can be dispatched tasks. + max-jvm-cpu-usage-percentage-thresholds: 0.7 # Worker max System memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. max-system-memory-usage-percentage-thresholds: 0.7 # Worker max disk usage , when the worker's disk usage is smaller then this value, worker server can be dispatched tasks. diff --git a/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionTest.java b/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionTest.java index 696e9c247839..204deb120eb9 100644 --- a/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionTest.java +++ b/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionTest.java @@ -30,7 +30,8 @@ void isOverload() { SystemMetrics systemMetrics = SystemMetrics.builder() .jvmMemoryUsedPercentage(0.71) .systemMemoryUsedPercentage(0.71) - .totalCpuUsedPercentage(0.71) + .systemCpuUsagePercentage(0.71) + .jvmCpuUsagePercentage(0.71) .diskUsedPercentage(0.71) .build(); workerServerLoadProtection.setEnabled(false);