From 5ce5c9bdf4ba2453a34aa9fea6cb202ff29e616e Mon Sep 17 00:00:00 2001 From: gaozhenfeng Date: Fri, 3 Nov 2023 17:05:52 +0800 Subject: [PATCH 1/3] third party doc --- ...atasophon\351\233\206\346\210\220Minio.md" | 248 +++ ...phon\351\233\206\346\210\220clickhouse.md" | 1859 ++++++++++++++++ ...tasophon\351\233\206\346\210\220presto.md" | 1888 +++++++++++++++++ ...72\247flink1.15\345\210\260flink1.16.2.md" | 200 ++ ...7\347\272\247spark3.1\345\210\2603.2.2.md" | 147 ++ 5 files changed, 4342 insertions(+) create mode 100644 "docs/zh/datasophon\351\233\206\346\210\220Minio.md" create mode 100644 "docs/zh/datasophon\351\233\206\346\210\220clickhouse.md" create mode 100644 "docs/zh/datasophon\351\233\206\346\210\220presto.md" create mode 100644 "docs/zh/\345\215\207\347\272\247flink1.15\345\210\260flink1.16.2.md" create mode 100644 "docs/zh/\345\215\207\347\272\247spark3.1\345\210\2603.2.2.md" diff --git "a/docs/zh/datasophon\351\233\206\346\210\220Minio.md" "b/docs/zh/datasophon\351\233\206\346\210\220Minio.md" new file mode 100644 index 00000000..ebb03f82 --- /dev/null +++ "b/docs/zh/datasophon\351\233\206\346\210\220Minio.md" @@ -0,0 +1,248 @@ +### 1、构建minio压缩包 +下载minio安装包:[https://dl.min.io/server/minio/release/linux-amd64/minio](https://dl.min.io/server/minio/release/linux-amd64/minio) +```shell +mkdir /opt/soft/tmp/minio-8.4.3 +cd /opt/soft/tmp/minio-8.4.3 +# 将Minio安装包放到当前目录 +mkdir bin +mkdir etc +touch ./bin/start.sh +touch ./bin/stop.sh +touch ./bin/status.sh +``` +创建好的编排目录格式如下: +```shell +-bin + -start.sh + -stop.sh + -status.sh +-ect +-minio +``` +编写 stop.sh 和 status.sh +```shell +#!/bin/bash + +echo "Stopping minio" + +pid=`ps -ef | grep 'minio server' | grep -v grep | awk '{print $2}'` + +if [ -n "$pid" ] + +then + +kill -9 $pid + +fi + +echo "Stop Success!" +``` +```shell +#!/bin/bash + +echo "Checking Minio Status" + +# 使用ps命令查找Minio进程 +pid=$(ps -ef | grep 'minio server' | grep -v grep | awk '{print $2}') + +if [ -n "$pid" ]; then + echo "Minio is running with PID $pid" + exit 0 +else + echo "Minio is not running" + exit 1 +fi +``` +制作minio安装包 +```shell +cd /opt/soft/tmp +tar czf minio-8.4.3.tar.gz minio-8.4.3 +md5sum minio-8.4.3.tar.gz +echo '8f766b89b11cbc15b46b9f620a20780f' > minio-8.4.3.tar.gz.md5 +``` +将安装包拷贝到各worker节点对应目录 +```shell +cp ./minio-8.4.3.tar.gz ./minio-8.4.3.tar.gz.md5 /opt/datasophon/DDP/packages/ +``` +### 2、创建minio配置文件 +```shell +cd /opt/apps/datasophon/datasophon-manager-1.1.2/conf/meta/DDP-1.1.2 +mkdir MINIO +cd MINIO +touch service_ddl.json +``` +```shell +{ + "name": "MINIO", + "label": "MINIO", + "description": "s3对象存储", + "version": "8.4.3", + "sortNum": 22, + "dependencies": [], + "packageName": "minio-8.4.3.tar.gz", + "decompressPackageName": "minio-8.4.3", + "roles": [ + { + "name": "MinioService", + "label": "MinioService", + "roleType": "master", + "cardinality": "1+", + "sortNum": 1, + "logFile": "minio.log", + "jmxPort": 11111, + "startRunner": { + "timeout": "60", + "program": "bin/start.sh", + "args": [] + }, + "stopRunner": { + "timeout": "60", + "program": "bin/stop.sh", + "args": [] + }, + "statusRunner": { + "timeout": "60", + "program": "bin/status.sh", + "args": [] + }, + "externalLink": { + "name": "minio Ui", + "label": "minio Ui", + "url": "http://${host}:${consolePort}" + } + } + ], + "configWriter": { + "generators": [ + { + "filename": "start.sh", + "configFormat": "custom", + "outputDirectory": "bin", + "templateName": "minio-run.flt", + "includeParams": [ + "MINIO_ACCESS_KEY", + "MINIO_SECRET_KEY", + "dataPaths", + "apiPort", + "consolePort" + ] + } + ] + }, + "parameters": [ + { + "name": "MINIO_ACCESS_KEY", + "label": "用户名", + "description": "用户名,长度最小是5个字符", + "required": true, + "configType": "map", + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "minio" + }, + { + "name": "MINIO_SECRET_KEY", + "label": "密码", + "description": "密码不能设置过于简单,不然minio会启动失败,长度最小是8个字符", + "required": true, + "configType": "map", + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "Jd2019@123" + }, + { + "name": "dataPaths", + "label": "集群配置文件目录", + "description": "集群配置文件目录,必须根据指定格式将各部署节点配置上,按空格分隔", + "configType": "map", + "required": true, + "separator": " ", + "type": "multiple", + "value": [ + "http://{host}:{apiPort}/data/minio/data" + ], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + }, + { + "name": "apiPort", + "label": "api访问端口", + "description": "api访问端口", + "required": true, + "configType": "map", + "type": "input", + "value": "9000", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "9000" + }, + { + "name": "consolePort", + "label": "UI访问端口", + "description": "UI访问端口", + "required": true, + "configType": "map", + "type": "input", + "value": "9001", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "9001" + } + ] +} +``` +各worker几点创建minio-run.flt文件 +```shell +cd /opt/datasophon/datasophon-worker/conf/templates +touch minio-run.flt +``` +```shell +#!/bin/bash + +# 设置MinIO的配置参数 +export MINIO_ROOT_USER=${MINIO_ACCESS_KEY} +export MINIO_ROOT_PASSWORD=${MINIO_SECRET_KEY} + +export MINIO_PROMETHEUS_AUTH_TYPE=public #加入这行环境变量,“public”表示Prometheus访问minio集群可以不通过身份验证 + +/opt/datasophon/minio/minio server --config-dir /opt/datasophon/minio/etc \ + --address "0.0.0.0:${apiPort}" --console-address ":${consolePort}" \ + ${dataPaths} > /opt/datasophon/minio/minio.log 2>&1 & +``` +### 3、重启datasophon +各节点worker重启 +```shell +sh /opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker +``` +主节点重启api +```shell +sh /opt/apps/datasophon/datasophon-manager-1.1.2/bin/datasophon-api.sh restart api +``` +此时可以看到mysql元数据库中 t_ddh_frame_service 和 t_ddh_frame_service_role 两个表已经添加了minio的元数据。 +### 4、安装 +安装配置样例 + +![image](https://github.com/datavane/datasophon/assets/62798940/b7ca4c46-fcb8-4c8b-b195-e2e3d32f00c2) + +注意配置文件目录data文件夹必须是空的!!! +### 5、监控 +```shell +vim /opt/datasophon/prometheus/prometheus.yml +# 新增配置 + - job_name: minio_job + metrics_path: /minio/prometheus/metrics + scheme: http + static_configs: + - targets: ['192.168.1.54:9000','192.168.1.55:9000','192.168.1.56:9000'] +``` +重启prometheus +### 6、grafana +导入模板 [https://grafana.com/grafana/dashboards/12063](https://grafana.com/grafana/dashboards/12063) +datasophon mysql表 t_ddh_cluster_service_dashboard 新增图标链接 + +![image](https://github.com/datavane/datasophon/assets/62798940/95067756-41b4-428d-aeb6-b4923411c314) diff --git "a/docs/zh/datasophon\351\233\206\346\210\220clickhouse.md" "b/docs/zh/datasophon\351\233\206\346\210\220clickhouse.md" new file mode 100644 index 00000000..f19c4bae --- /dev/null +++ "b/docs/zh/datasophon\351\233\206\346\210\220clickhouse.md" @@ -0,0 +1,1859 @@ +### 1、构建压缩包 +官网下载安装包 + +![image](https://github.com/datavane/datasophon/assets/62798940/8e84d696-5854-41c2-9c9d-9574a6062d4a) + +各安装包解压到同一个文件夹中,同时新增bin目录,bin目录中放置status.sh +```shell +#!/bin/bash + +status_output=$(/etc/init.d/clickhouse-server status) + +if echo "$status_output" | grep -q "is running"; then + exit 0 +else + exit 1 +fi +``` +压缩部署到DDP +### 2、worker新增ck strategy +```java +map.put("ClickHouse", new ClickHouseHandlerStrategy("CLICKHOUSE", "ClickHouse")); +``` +```java +package com.datasophon.worker.strategy; + +import com.datasophon.common.Constants; +import com.datasophon.common.command.ServiceRoleOperateCommand; +import com.datasophon.common.enums.CommandType; +import com.datasophon.common.utils.ExecResult; +import com.datasophon.common.utils.ShellUtils; +import com.datasophon.worker.handler.ServiceHandler; + +import java.sql.SQLException; +import java.util.ArrayList; + +public class ClickHouseHandlerStrategy extends AbstractHandlerStrategy implements ServiceRoleStrategy { + + public ClickHouseHandlerStrategy(String serviceName, String serviceRoleName) { + super(serviceName, serviceRoleName); + } + + @Override + public ExecResult handler(ServiceRoleOperateCommand command) throws SQLException, ClassNotFoundException { + ServiceHandler serviceHandler = new ServiceHandler(command.getServiceName(), command.getServiceRoleName()); + String workPath = Constants.INSTALL_PATH + Constants.SLASH + command.getDecompressPackageName(); + if (command.getCommandType().equals(CommandType.INSTALL_SERVICE)) { + ArrayList commands = new ArrayList<>(); + + logger.info("/clickhouse-common-static-23.9.1.1854/install/doinst.sh"); + commands.add(workPath + "/clickhouse-common-static-23.9.1.1854/install/doinst.sh"); + ShellUtils.execWithStatus(workPath, commands, 300L, logger); + logger.info("clickhouse common static install success"); + + logger.info("/clickhouse-common-static-dbg-23.9.1.1854/install/doinst.sh"); + commands.clear(); + commands.add(workPath + "/clickhouse-common-static-dbg-23.9.1.1854/install/doinst.sh"); + ShellUtils.execWithStatus(workPath, commands, 300L, logger); + logger.info("clickhouse common static dbg install success"); + + logger.info("/clickhouse-server-23.9.1.1854/install/doinst.sh configure"); + commands.clear(); + commands.add(workPath + "/clickhouse-server-23.9.1.1854/install/doinst.sh"); + commands.add("configure"); + ShellUtils.execWithStatus(workPath, commands, 300L, logger); + + ShellUtils.exceShell("rm -rf /etc/clickhouse-server/config.xml"); + ShellUtils.exceShell("rm -rf /etc/clickhouse-server/users.xml"); + ShellUtils.exceShell("cp " + workPath + "/etc/config.xml /etc/clickhouse-server"); + ShellUtils.exceShell("cp " + workPath + "/etc/users.xml /etc/clickhouse-server"); + ShellUtils.exceShell("chown clickhouse:clickhouse /etc/clickhouse-server/config.xml /etc/clickhouse-server/users.xml"); + logger.info("clickhouse server install success"); + + logger.info("/clickhouse-client-23.9.1.1854/install/doinst.sh"); + commands.clear(); + commands.add(workPath + "/clickhouse-client-23.9.1.1854/install/doinst.sh"); + ShellUtils.execWithStatus(workPath, commands, 300L, logger); + logger.info("clickhouse client install success"); + + commands.clear(); + commands.add("sudo"); + commands.add("/etc/init.d/clickhouse-server"); + commands.add("start"); + ShellUtils.execWithStatus(workPath, commands, 300L, logger); + logger.info("clickhouse start success"); + } + + ExecResult startResult = serviceHandler.start(command.getStartRunner(), command.getStatusRunner(), + command.getDecompressPackageName(), command.getRunAs()); + return startResult; + } +} + +``` +worker打包替换旧包 +### 3、ck元数据文件 +```shell + + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + + 1000M + 10 + + + + + + + + + + + + + + https://{bucket}.s3.amazonaws.com + + + https://{bucket}.storage.googleapis.com + + + https://{bucket}.oss.aliyuncs.com + + + + + + + + + + + + 8123 + + + ${tcpPort} + + + 9004 + + + 9005 + + + + + + + + + + + + 9009 + + + + + + + + + + + + + :: + + + + + + + + + + + + + + + + + + + + + 4096 + + + 3 + + + + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + none + + + 0 + + + -1 + -1 + + + false + + + + + + + + + + none + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + + + + + 0 + 0 + + + 1000 + + + 0 + + + + 10000 + + + + + + 0.9 + + + 4194304 + + + 0 + + + + + + 8589934592 + + + 5368709120 + + + + 1000 + + + 134217728 + + + 10000 + + false + + + /var/lib/clickhouse/ + + + + + + + /var/lib/clickhouse/tmp/ + + + 1 + 1 + 1 + + + sha256_password + + + 12 + + + + + + + + + /var/lib/clickhouse/user_files/ + + + + + + + + + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + + + + + + + false + + + false + + + false + + + false + + + false + + + 600 + + + + default + + + SQL_ + + + + + + + + + default + + + + + + + + + true + + + false + + ' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + clickhouse-jdbc-bridge & + + * [CentOS/RHEL] + export MVN_URL=https://repo1.maven.org/maven2/com/clickhouse/clickhouse-jdbc-bridge/ + export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + clickhouse-jdbc-bridge & + + Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. + ]]> + + + + + + <#list itemList as item> + <#if item.name == "shardAddress"> + <#list item.value?split(",") as shardAddress> + + + <#assign parts = shardAddress?split(":")> + ${parts?first} + ${parts?last} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 3600 + + + + 3600 + + + 60 + + + + + + + + + + + + + system + query_log
+ + toYYYYMM(event_date) + + + + + + + + 7500 + + 1048576 + + 8192 + + 524288 + + false + + + +
+ + + + system + trace_log
+ + toYYYYMM(event_date) + 7500 + 1048576 + 8192 + 524288 + + false +
+ + + + system + query_thread_log
+ toYYYYMM(event_date) + 7500 + 1048576 + 8192 + 524288 + false +
+ + + + system + query_views_log
+ toYYYYMM(event_date) + 7500 +
+ + + + system + part_log
+ toYYYYMM(event_date) + 7500 + 1048576 + 8192 + 524288 + false +
+ + + + + + system + metric_log
+ 7500 + 1048576 + 8192 + 524288 + 1000 + false +
+ + + + system + asynchronous_metric_log
+ 7000 + 1048576 + 8192 + 524288 + false +
+ + + + + + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + + system + opentelemetry_span_log
+ 7500 + 1048576 + 8192 + 524288 + false +
+ + + + + system + crash_log
+ + + 1000 + 1024 + 1024 + 512 + true +
+ + + + + + + system + processors_profile_log
+ + toYYYYMM(event_date) + 7500 + 1048576 + 8192 + 524288 + false +
+ + + + system + asynchronous_insert_log
+ + 7500 + 1048576 + 8192 + 524288 + false + event_date + event_date + INTERVAL 3 DAY +
+ + + + system + backup_log
+ toYYYYMM(event_date) + 7500 +
+ + + + + + + + + *_dictionary.*ml + + + *_function.*ml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + + + /var/lib/clickhouse/format_schemas/ + + + + + + + + + + false + + false + + + https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 + + + + + + + + + + + 1073741824 + 1024 + 1048576 + 30000000 + + + + + + + + + + + +
+ +``` +```shell + + + + + + + + + + + + 1 + + + + + + + <${username}> + + ${password} + + + + ::/0 + + + + default + + + default + + + + + + + + + + + + + + 3600 + + + 0 + 0 + 0 + 0 + 0 + + + + + +``` +```shell +{ + "name": "CLICKHOUSE", + "label": "ClickHouse", + "description": "联机分析(OLAP)列式数据库", + "version": "23.9.1.1854", + "sortNum": 29, + "dependencies":[ + "ZOOKEEPER" + ], + "packageName": "clickhouse-23.9.1.1854.tar.gz", + "decompressPackageName": "clickhouse-23.9.1.1854", + "roles": [ + { + "name": "ClickHouse", + "label": "ClickHouse", + "roleType": "master", + "cardinality": "1+", + "logFile": "/var/log/clickhouse-server/clickhouse-server.log", + "jmxPort": "", + "startRunner": { + "timeout": "60", + "program": "/etc/init.d/clickhouse-server", + "args": [ + "start" + ] + }, + "stopRunner": { + "timeout": "60", + "program": "/etc/init.d/clickhouse-server", + "args": [ + "stop" + ] + }, + "restartRunner": { + "timeout": "60", + "program": "/etc/init.d/clickhouse-server", + "args": [ + "restart" + ] + }, + "statusRunner": { + "timeout": "60", + "program": "bin/status.sh", + "args": [] + } + } + ], + "configWriter": { + "generators": [ + { + "filename": "config.xml", + "configFormat": "custom", + "outputDirectory": "etc", + "templateName": "clickhouse-server-config.flt", + "includeParams": [ + "tcpPort", + "shardAddress", + "zkAddress" + ] + }, + { + "filename": "users.xml", + "configFormat": "custom", + "outputDirectory": "etc", + "templateName": "clickhouse-user.flt", + "includeParams": [ + "username", + "password" + ] + } + ] + }, + "parameters": [ + { + "name": "tcpPort", + "label": "tcp端口", + "description": "tcp端口", + "required": true, + "configType": "map", + "type": "input", + "value": "9010", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "9010" + }, + { + "name": "shardAddress", + "label": "clickhouse所有分片地址", + "description": "clickhouse所有分片地址", + "required": true, + "type": "multiple", + "separator": ",", + "value": [], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + }, + { + "name": "zkAddress", + "label": "zookeeper地址", + "description": "zookeeper地址", + "required": true, + "type": "multiple", + "separator": ",", + "value": [], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + }, + { + "name": "username", + "label": "用户名", + "description": "用户名", + "required": true, + "configType": "map", + "type": "input", + "value": "default", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "default" + }, + { + "name": "password", + "label": "密码", + "description": "密码", + "required": true, + "configType": "map", + "type": "input", + "value": "123456", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "123456" + } + ] +} +``` +部署元数据文件 +### 4、重启 +各节点worker重启 +```shell +sh /opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker debug +``` +主节点重启api +```shell +sh /opt/apps/datasophon-manager-1.2.0/bin/datasophon-api.sh restart api debug +``` +### 5、clickhouse服务器完全卸载命令 +```shell +pkill clickhouse +rm -rf /etc/clickhouse-server +rm -rf /etc/clickhouse-server +rm -rf /usr/bin/clickhouse* +rm -rf /var/log/clickhouse-server +rm -rf /var/lib/clickhouse +rm -rf /var/run/clickhouse-server +rm -rf /opt/datasophon/clickhouse-23.9.1.1854 +rm -rf /opt/datasophon/clickhouse +``` +### 6、配置样例 +![image](https://github.com/datavane/datasophon/assets/62798940/b2915f2e-3b2e-4e06-bcfe-b40d57bd1607) \ No newline at end of file diff --git "a/docs/zh/datasophon\351\233\206\346\210\220presto.md" "b/docs/zh/datasophon\351\233\206\346\210\220presto.md" new file mode 100644 index 00000000..cd7a3c3d --- /dev/null +++ "b/docs/zh/datasophon\351\233\206\346\210\220presto.md" @@ -0,0 +1,1888 @@ +### 1、打包安装包 +解压安装包,可以在这里对解压后的文件做修改,更改文件名,这里的文件名是和后面的配置文件对应的: +```shell +tar -zxvf presto-server-0.283.tar.gz +mv presto-server-0.283 presto-0.283 +``` +将文件打包,注意这里的压缩包名也和后面配置文件对应: +```shell +tar czf presto-0.283.tar.gz presto-0.283 +``` +编写md5文件: +```shell +md5sum presto-0.283.tar.gz +echo '84666ba9ef9b9024fa7c385af0823101' > presto-0.283.tar.gz.md5 +``` +将两个文件拷贝进对应文件夹中: +```shell +cp ./presto-0.283.tar.gz ./presto-0.283.tar.gz.md5 /opt/datasophon/DDP/packages +``` +### 2、编写presto元数据 +```shell +cd /opt/apps/datasophon/datasophon-manager-1.1.2/conf/meta/DDP-1.1.2 +mkdir PRESTO +cd PRESTO +vim service_ddl.json +``` +```shell +{ + "name": "PRESTO", + "label": "Presto", + "description": "分布式SQL交互式查询引擎", + "version": "0.283", + "sortNum": 21, + "dependencies": [], + "packageName": "presto-0.283.tar.gz", + "decompressPackageName": "presto-0.283", + "roles": [ + { + "name": "PrestoCoordinator", + "label": "PrestoCoordinator", + "roleType": "master", + "cardinality": "1", + "jmxPort": 8087, + "logFile": "data/var/log/server.log", + "startRunner": { + "timeout": "60", + "program": "bin/launcher", + "args": [ + "start" + ] + }, + "stopRunner": { + "timeout": "600", + "program": "bin/launcher", + "args": [ + "stop" + ] + }, + "statusRunner": { + "timeout": "60", + "program": "bin/launcher", + "args": [ + "status" + ] + }, + "restartRunner": { + "timeout": "60", + "program": "bin/launcher", + "args": [ + "restart" + ] + }, + "externalLink": { + "name": "Presto UI", + "label": "Presto UI", + "url": "http://${host}:7777" + } + }, + { + "name": "PrestoWorker", + "label": "PrestoWorker", + "roleType": "worker", + "cardinality": "1+", + "jmxPort": 8089, + "logFile": "data/var/log/server.log", + "startRunner": { + "timeout": "60", + "program": "bin/launcher", + "args": [ + "start" + ] + }, + "stopRunner": { + "timeout": "600", + "program": "bin/launcher", + "args": [ + "stop" + ] + }, + "statusRunner": { + "timeout": "60", + "program": "bin/launcher", + "args": [ + "status" + ] + }, + "restartRunner": { + "timeout": "60", + "program": "bin/launcher", + "args": [ + "restart" + ] + } + } + ], + "configWriter": { + "generators": [ + { + "filename": "config.properties", + "configFormat": "properties", + "outputDirectory": "etc", + "includeParams": [ + "coordinator", + "http-server.http.port", + "query.max-memory-per-node", + "query.max-memory", + "discovery.uri", + "custom.config.properties" + ] + }, + { + "filename": "jvm.config", + "configFormat": "custom", + "outputDirectory": "etc", + "templateName": "presto.jvm.config.ftl", + "includeParams": [ + "prestoHeapSize" + ] + }, + { + "filename": "node.properties", + "configFormat": "properties", + "outputDirectory": "etc", + "includeParams": [ + "node.data-dir", + "node.environment" + ] + }, + { + "filename": "hive.properties", + "configFormat": "properties", + "outputDirectory": "etc/catalog", + "includeParams": [ + "custom.hive.properties" + ] + } + ] + }, + "parameters": [ + { + "name": "coordinator", + "label": "coordinator", + "description": "coordinator", + "required": true, + "type": "input", + "value": "false", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "false" + }, + { + "name": "prestoHeapSize", + "label": "Presto最大堆内存", + "description": "Presto最大堆内存", + "configType": "map", + "required": true, + "minValue": 0, + "maxValue": 64, + "type": "slider", + "value": "", + "unit": "GB", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "32" + }, + { + "name": "http-server.http.port", + "label": "Presto Http端口", + "description": "", + "required": true, + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "7777" + }, + { + "name": "discovery.uri", + "label": "服务发现地址", + "description": "", + "required": true, + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "http://${coordinatorHost}:7777" + }, + { + "name": "query.max-memory-per-node", + "label": "每个查询在单个节点可使用最大内存", + "description": "", + "required": true, + "type": "input", + "minValue": 0, + "maxValue": "30", + "value": "", + "unit": "GB", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "1GB" + }, + { + "name": "query.max-memory", + "label": "总共可使用最大内存", + "description": "若query.max-memory-per-node = 30GB则query.max-memory = <30GB *节点数>", + "required": true, + "type": "input", + "minValue": 0, + "maxValue": "30", + "value": "", + "unit": "GB", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "30GB" + }, + { + "name": "node.data-dir", + "label": "日志存储地址", + "description": "", + "required": true, + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "${INSTALL_PATH}/presto-0.283/data" + }, + { + "name": "node.environment", + "label": "集群环境名称", + "description": "", + "required": true, + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "production" + }, + { + "name": "custom.config.properties", + "label": "自定义配置config.properties", + "description": "自定义配置", + "configType": "custom", + "required": false, + "type": "multipleWithKey", + "value": [], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + }, + { + "name": "custom.hive.properties", + "label": "自定义配置hive.properties", + "description": "自定义配置", + "configType": "custom", + "required": false, + "type": "multipleWithKey", + "value": [{"connector.name":"hive-hadoop2"},{"hive.metastore.uri":"thrift://${metastoreHost}:9083"},{"hive.config.resources":"${INSTALL_PATH}/hadoop-3.3.3/etc/hadoop/core-site.xml,${INSTALL_PATH}/hadoop-3.3.3/etc/hadoop/hdfs-site.xml"}], + "configurableInWizard": true, + "hidden": false, + "defaultValue": [{"connector.name":"hive-hadoop2"},{"hive.metastore.uri":"thrift://${metastoreHost}:9083"},{"hive.config.resources":"${INSTALL_PATH}/hadoop-3.3.3/etc/hadoop/core-site.xml,${INSTALL_PATH}/hadoop-3.3.3/etc/hadoop/hdfs-site.xml"}] + } + ] +} +``` +```shell +cd /opt/datasophon/datasophon-worker/conf/templates +vim presto.jvm.config.ftl +``` +```shell +-server +-Xmx${prestoHeapSize}G +-XX:-UseBiasedLocking +-XX:G1HeapRegionSize=32M +-XX:+ExplicitGCInvokesConcurrent +-XX:+ExitOnOutOfMemoryError +-XX:+HeapDumpOnOutOfMemoryError +-XX:-OmitStackTraceInFastThrow +-XX:ReservedCodeCacheSize=512M +-XX:PerMethodRecompilationCutoff=10000 +-XX:PerBytecodeRecompilationCutoff=10000 +-Djdk.attach.allowAttachSelf=true +-Djdk.nio.maxCachedBufferSize=2000000 +-XX:+UnlockDiagnosticVMOptions +-XX:+UseAESCTRIntrinsics +``` +### 3、修改worker源码,重新打包worker包 +修改 datasophon-worker/src/main/java/com/datasophon/worker/handler/ConfigureServiceHandler.java +新增代码 + +![image](https://github.com/datavane/datasophon/assets/62798940/0fbf7d09-e351-4789-9aff-f911610e117f) + +```shell + if ("PrestoCoordinator".equals(serviceRoleName) && "coordinator".equals(config.getName())) { + logger.info("Start config presto coordinator"); + config.setValue("true"); + ServiceConfig serviceConfig = new ServiceConfig(); + serviceConfig.setName("node-scheduler.include-coordinator"); + serviceConfig.setValue("false"); + ServiceConfig serviceConfig1 = new ServiceConfig(); + serviceConfig1.setName("discovery-server.enabled"); + serviceConfig1.setValue("true"); + customConfList.add(serviceConfig); + customConfList.add(serviceConfig1); + } +``` +将重新打包的 datasophon-worker-1.1.2.jar 文件替换到每个worker节点的 /opt/datasophon/datasophon-worker/lib +1.2.0版本worker包名为datasophon-worker-1.1.3.jar,需要上传后改名 +### 4、重启 +各节点worker重启 +```shell +sh /opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker +``` +主节点重启api +```shell +sh /opt/apps/datasophon/datasophon-manager-1.1.2/bin/datasophon-api.sh restart api +``` +此时可以看到mysql元数据库中 t_ddh_frame_service 和 t_ddh_frame_service_role 两个表已经添加了presto的元数据。 +搭建需要注意一点节点不能既是master又是worker +### 5、集成监控 +#### 5.1 presto安装目录创建jmx配置文件 +```shell +pwd +/opt/datasophon/presto +mkdir jmx +cd jmx +vim prometheus_config.yml +``` +```shell +--- +lowercaseOutputLabelNames: true +lowercaseOutputName: true +whitelistObjectNames: ["java.lang:type=OperatingSystem"] +blacklistObjectNames: [] +rules: + - pattern: 'java.lang<>(committed_virtual_memory|free_physical_memory|free_swap_space|total_physical_memory|total_swap_space)_size:' + name: os_$1_bytes + type: GAUGE + attrNameSnakeCase: true + - pattern: 'java.lang<>((?!process_cpu_time)\w+):' + name: os_$1 + type: GAUGE + attrNameSnakeCase: true +``` +将 jmx_prometheus_javaagent-0.16.1.jar 放入jmx文件夹 + +![image](https://github.com/datavane/datasophon/assets/62798940/16b9dd5d-8957-45b6-b0fc-163e47d49a25) + +#### 5.2 修改presto启动脚本 /opt/datasophon/presto/bin/launcher.py + +![image](https://github.com/datavane/datasophon/assets/62798940/820fda3d-860d-4817-a687-ffa37cf5f6a3) + +```shell +#!/usr/bin/env python + +import errno +import os +import platform +import subprocess +import sys +import traceback + +from fcntl import flock, LOCK_EX, LOCK_NB +from optparse import OptionParser +from os import O_RDWR, O_CREAT, O_WRONLY, O_APPEND +from os.path import basename, dirname, exists, realpath +from os.path import join as pathjoin +from signal import SIGTERM, SIGKILL +from stat import S_ISLNK +from time import sleep + +COMMANDS = ['run', 'start', 'stop', 'restart', 'kill', 'status'] + +LSB_NOT_RUNNING = 3 +LSB_STATUS_UNKNOWN = 4 + + +def find_install_path(f): + """Find canonical parent of bin/launcher.py""" + if basename(f) != 'launcher.py': + raise Exception("Expected file '%s' to be 'launcher.py' not '%s'" % (f, basename(f))) + p = realpath(dirname(f)) + if basename(p) != 'bin': + raise Exception("Expected file '%s' directory to be 'bin' not '%s" % (f, basename(p))) + return dirname(p) + + +def makedirs(p): + """Create directory and all intermediate ones""" + try: + os.makedirs(p) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def load_properties(f): + """Load key/value pairs from a file""" + properties = {} + for line in load_lines(f): + k, v = line.split('=', 1) + properties[k.strip()] = v.strip() + return properties + + +def load_lines(f): + """Load lines from a file, ignoring blank or comment lines""" + lines = [] + for line in open(f, 'r').readlines(): + line = line.strip() + if len(line) > 0 and not line.startswith('#'): + lines.append(line) + return lines + + +def try_lock(f): + """Try to open an exclusive lock (inheritable) on a file""" + try: + flock(f, LOCK_EX | LOCK_NB) + return True + except (IOError, OSError): # IOError in Python 2, OSError in Python 3. + return False + + +def open_pidfile(f, mode): + """Open file in read/write mode (without truncating it)""" + fd = os.open(f, O_RDWR | O_CREAT, mode) + if hasattr(os, 'set_inheritable'): + # See https://docs.python.org/3/library/os.html#inheritance-of-file-descriptors + # Since Python 3.4 + os.set_inheritable(fd, True) + return os.fdopen(fd, 'r+') + + +class Process: + def __init__(self, path): + makedirs(dirname(path)) + self.path = path + self.pid_file = open_pidfile(path, 0o600) + self.refresh() + + def refresh(self): + self.locked = try_lock(self.pid_file) + + def clear_pid(self): + assert self.locked, 'pid file not locked by us' + self.pid_file.seek(0) + self.pid_file.truncate() + + def write_pid(self, pid): + self.clear_pid() + self.pid_file.write(str(pid) + '\n') + self.pid_file.flush() + + def alive(self): + self.refresh() + if self.locked: + return False + + pid = self.read_pid() + try: + os.kill(pid, 0) + return True + except OSError as e: + raise Exception('Signaling pid %s failed: %s' % (pid, e)) + + def read_pid(self): + assert not self.locked, 'pid file is locked by us' + self.pid_file.seek(0) + line = self.pid_file.readline().strip() + if len(line) == 0: + raise Exception("Pid file '%s' is empty" % self.path) + + try: + pid = int(line) + except ValueError: + raise Exception("Pid file '%s' contains garbage: %s" % (self.path, line)) + if pid <= 0: + raise Exception("Pid file '%s' contains an invalid pid: %s" % (self.path, pid)) + return pid + + +def redirect_stdin_to_devnull(): + """Redirect stdin to /dev/null""" + fd = os.open(os.devnull, O_RDWR) + os.dup2(fd, sys.stdin.fileno()) + os.close(fd) + + +def open_append(f): + """Open a raw file descriptor in append mode""" + # noinspection PyTypeChecker + return os.open(f, O_WRONLY | O_APPEND | O_CREAT, 0o644) + + +def redirect_output(fd): + """Redirect stdout and stderr to a file descriptor""" + os.dup2(fd, sys.stdout.fileno()) + os.dup2(fd, sys.stderr.fileno()) + + +def symlink_exists(p): + """Check if symlink exists and raise if another type of file exists""" + try: + st = os.lstat(p) + if not S_ISLNK(st.st_mode): + raise Exception('Path exists and is not a symlink: %s' % p) + return True + except OSError as e: + if e.errno != errno.ENOENT: + raise + return False + + +def create_symlink(source, target): + """Create a symlink, removing the target first if it is a symlink""" + if symlink_exists(target): + os.remove(target) + if exists(source): + os.symlink(source, target) + + +def create_app_symlinks(options): + """ + Symlink the 'etc' and 'plugin' directory into the data directory. + + This is needed to support programs that reference 'etc/xyz' from within + their config files: log.levels-file=etc/log.properties + """ + if options.etc_dir != pathjoin(options.data_dir, 'etc'): + create_symlink( + options.etc_dir, + pathjoin(options.data_dir, 'etc')) + + if options.install_path != options.data_dir: + create_symlink( + pathjoin(options.install_path, 'plugin'), + pathjoin(options.data_dir, 'plugin')) + + +def build_java_execution(options, daemon): + if not exists(options.config_path): + raise Exception('Config file is missing: %s' % options.config_path) + if not exists(options.jvm_config): + raise Exception('JVM config file is missing: %s' % options.jvm_config) + if not exists(options.launcher_config): + raise Exception('Launcher config file is missing: %s' % options.launcher_config) + if options.log_levels_set and not exists(options.log_levels): + raise Exception('Log levels file is missing: %s' % options.log_levels) + + with open(os.devnull, 'w') as devnull: + try: + subprocess.check_call(['java', '-version'], stdout=devnull, stderr=devnull) + except (OSError, subprocess.CalledProcessError): + raise Exception('Java is not installed') + + properties = options.properties.copy() + + if exists(options.log_levels): + properties['log.levels-file'] = options.log_levels + + if daemon: + properties['log.output-file'] = options.server_log + properties['log.enable-console'] = 'false' + + jvm_properties = load_lines(options.jvm_config) + launcher_properties = load_properties(options.launcher_config) + + try: + main_class = launcher_properties['main-class'] + except KeyError: + raise Exception("Launcher config is missing 'main-class' property") + + properties['config'] = options.config_path + + system_properties = ['-D%s=%s' % i for i in properties.items()] + classpath = pathjoin(options.install_path, 'lib', '*') + + command = ['java', '-cp', classpath] + command += jvm_properties + options.jvm_options + system_properties + config_properties = {} + if exists(options.config_path): + config_properties = load_properties(options.config_path) + if config_properties['coordinator'] == 'true': + print 'coordinator true' + command += ['-javaagent:/opt/datasophon/presto/jmx/jmx_prometheus_javaagent-0.16.1.jar=7778:/opt/datasophon/presto/jmx/prometheus_config.yml'] + else: + command += ['-javaagent:/opt/datasophon/presto/jmx/jmx_prometheus_javaagent-0.16.1.jar=7779:/opt/datasophon/presto/jmx/prometheus_config.yml'] + command += [main_class] + if options.verbose: + print(command) + print("") + + env = os.environ.copy() + + # set process name: https://github.com/airlift/procname + process_name = launcher_properties.get('process-name', '') + if len(process_name) > 0: + system = platform.system() + '-' + platform.machine() + shim = pathjoin(options.install_path, 'bin', 'procname', system, 'libprocname.so') + if exists(shim): + env['LD_PRELOAD'] = (env.get('LD_PRELOAD', '') + ':' + shim).strip() + env['PROCNAME'] = process_name + + return command, env + + +def run(process, options): + if process.alive(): + print('Already running as %s' % process.read_pid()) + return + + create_app_symlinks(options) + args, env = build_java_execution(options, False) + + makedirs(options.data_dir) + os.chdir(options.data_dir) + + process.write_pid(os.getpid()) + + redirect_stdin_to_devnull() + + os.execvpe(args[0], args, env) + + +def start(process, options): + if process.alive(): + print('Already running as %s' % process.read_pid()) + return + + create_app_symlinks(options) + args, env = build_java_execution(options, True) + + makedirs(dirname(options.launcher_log)) + log = open_append(options.launcher_log) + + makedirs(options.data_dir) + os.chdir(options.data_dir) + + pid = os.fork() + if pid > 0: + process.write_pid(pid) + print('Started as %s' % pid) + return + + os.setsid() + + redirect_stdin_to_devnull() + redirect_output(log) + os.close(log) + + os.execvpe(args[0], args, env) + + +def terminate(process, signal, message): + if not process.alive(): + print('Not running') + return + + pid = process.read_pid() + + while True: + try: + os.kill(pid, signal) + except OSError as e: + if e.errno != errno.ESRCH: + raise Exception('Signaling pid %s failed: %s' % (pid, e)) + + if not process.alive(): + process.clear_pid() + break + + sleep(0.1) + + print('%s %s' % (message, pid)) + + +def stop(process): + terminate(process, SIGTERM, 'Stopped') + + +def kill(process): + terminate(process, SIGKILL, 'Killed') + + +def status(process): + if not process.alive(): + print('Not running') + sys.exit(LSB_NOT_RUNNING) + print('Running as %s' % process.read_pid()) + + +def handle_command(command, options): + process = Process(options.pid_file) + if command == 'run': + run(process, options) + elif command == 'start': + start(process, options) + elif command == 'stop': + stop(process) + elif command == 'restart': + stop(process) + start(process, options) + elif command == 'kill': + kill(process) + elif command == 'status': + status(process) + else: + raise AssertionError('Unhandled command: ' + command) + + +def create_parser(): + commands = 'Commands: ' + ', '.join(COMMANDS) + parser = OptionParser(prog='launcher', usage='usage: %prog [options] command', description=commands) + parser.add_option('-v', '--verbose', action='store_true', default=False, help='Run verbosely') + parser.add_option('--etc-dir', metavar='DIR', help='Defaults to INSTALL_PATH/etc') + parser.add_option('--launcher-config', metavar='FILE', help='Defaults to INSTALL_PATH/bin/launcher.properties') + parser.add_option('--node-config', metavar='FILE', help='Defaults to ETC_DIR/node.properties') + parser.add_option('--jvm-config', metavar='FILE', help='Defaults to ETC_DIR/jvm.config') + parser.add_option('--config', metavar='FILE', help='Defaults to ETC_DIR/config.properties') + parser.add_option('--log-levels-file', metavar='FILE', help='Defaults to ETC_DIR/log.properties') + parser.add_option('--data-dir', metavar='DIR', help='Defaults to INSTALL_PATH') + parser.add_option('--pid-file', metavar='FILE', help='Defaults to DATA_DIR/var/run/launcher.pid') + parser.add_option('--launcher-log-file', metavar='FILE', help='Defaults to DATA_DIR/var/log/launcher.log (only in daemon mode)') + parser.add_option('--server-log-file', metavar='FILE', help='Defaults to DATA_DIR/var/log/server.log (only in daemon mode)') + parser.add_option('-J', action='append', metavar='OPT', dest='jvm_options', help='Set a JVM option') + parser.add_option('-D', action='append', metavar='NAME=VALUE', dest='properties', help='Set a Java system property') + return parser + + +def parse_properties(parser, args): + properties = {} + for arg in args: + if '=' not in arg: + parser.error('property is malformed: %s' % arg) + key, value = [i.strip() for i in arg.split('=', 1)] + if key == 'config': + parser.error('cannot specify config using -D option (use --config)') + if key == 'log.output-file': + parser.error('cannot specify server log using -D option (use --server-log-file)') + if key == 'log.levels-file': + parser.error('cannot specify log levels using -D option (use --log-levels-file)') + properties[key] = value + return properties + + +def print_options(options): + if options.verbose: + for i in sorted(vars(options)): + print("%-15s = %s" % (i, getattr(options, i))) + print("") + + +class Options: + pass + + +def main(): + parser = create_parser() + + (options, args) = parser.parse_args() + + if len(args) != 1: + if len(args) == 0: + parser.error('command name not specified') + else: + parser.error('too many arguments') + command = args[0] + + if command not in COMMANDS: + parser.error('unsupported command: %s' % command) + + try: + install_path = find_install_path(sys.argv[0]) + except Exception as e: + print('ERROR: %s' % e) + sys.exit(LSB_STATUS_UNKNOWN) + + o = Options() + o.verbose = options.verbose + o.install_path = install_path + o.launcher_config = realpath(options.launcher_config or pathjoin(o.install_path, 'bin/launcher.properties')) + o.etc_dir = realpath(options.etc_dir or pathjoin(o.install_path, 'etc')) + o.node_config = realpath(options.node_config or pathjoin(o.etc_dir, 'node.properties')) + o.jvm_config = realpath(options.jvm_config or pathjoin(o.etc_dir, 'jvm.config')) + o.config_path = realpath(options.config or pathjoin(o.etc_dir, 'config.properties')) + o.log_levels = realpath(options.log_levels_file or pathjoin(o.etc_dir, 'log.properties')) + o.log_levels_set = bool(options.log_levels_file) + o.jvm_options = options.jvm_options or [] + + if options.node_config and not exists(o.node_config): + parser.error('Node config file is missing: %s' % o.node_config) + + node_properties = {} + if exists(o.node_config): + node_properties = load_properties(o.node_config) + + data_dir = node_properties.get('node.data-dir') + o.data_dir = realpath(options.data_dir or data_dir or o.install_path) + + o.pid_file = realpath(options.pid_file or pathjoin(o.data_dir, 'var/run/launcher.pid')) + o.launcher_log = realpath(options.launcher_log_file or pathjoin(o.data_dir, 'var/log/launcher.log')) + o.server_log = realpath(options.server_log_file or pathjoin(o.data_dir, 'var/log/server.log')) + + o.properties = parse_properties(parser, options.properties or {}) + for k, v in node_properties.items(): + if k not in o.properties: + o.properties[k] = v + + if o.verbose: + print_options(o) + + try: + handle_command(command, o) + except SystemExit: + raise + except Exception as e: + if o.verbose: + traceback.print_exc() + else: + print('ERROR: %s' % e) + sys.exit(LSB_STATUS_UNKNOWN) + + +if __name__ == '__main__': + main() + +``` +#### 5.3 修改Prometheus配置文件 +```shell +vim /opt/datasophon/prometheus/prometheus.yml +``` +新增presto配置 +```shell + - job_name: 'prestocoordinator' + file_sd_configs: + - files: + - configs/prestocoordinator.json + - job_name: 'prestoworker' + file_sd_configs: + - files: + - configs/prestoworker.json +``` +在 /opt/datasophon/prometheus/configs 目录新增 prestocoordinator.json 和 prestoworker.json 配置文件 +```shell +[ + { + "targets":["hadoop1:7778"] + } +] +``` +```shell +[ + { + "targets":["hadoop2:7779","hadoop3:7779"] + } +] +``` +重启prometheus,访问webui可看到采集过来的指标 +[http://hadoop1:9090/targets](http://hadoop1:9090/targets) + +![image](https://github.com/datavane/datasophon/assets/62798940/f93a3ad1-64c6-463c-b989-c7c7af93cd82) + +#### 5.4 绘制grafana +打开grafana ui + +![image](https://github.com/datavane/datasophon/assets/62798940/369c0997-5a5e-44ce-bcc8-5163360b240c) + +将下面json粘贴进去 +```shell +{ + "annotations": { + "list": [ + { + "$$hashKey": "object:7978", + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 10866, + "graphTooltip": 0, + "id": 42, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dateTimeAsIso" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 16, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 38 + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "editorMode": "code", + "expr": "process_start_time_seconds{job=\"prestocoordinator\"}*1000", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "PrestoCoordinator启动时间", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 6, + "y": 0 + }, + "id": 34, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 38 + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "editorMode": "code", + "expr": "time() - process_start_time_seconds{job=\"prestocoordinator\"}", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "PrestoCoordinator运行时长", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 10, + "y": 0 + }, + "id": 20, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 38 + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "editorMode": "code", + "expr": "jvm_memory_bytes_max{job=\"prestocoordinator\",area=\"heap\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Presto最大堆内存", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "%" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 14, + "y": 0 + }, + "id": 28, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.1.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "editorMode": "code", + "expr": "jvm_memory_bytes_used{area=\"heap\",job=\"prestocoordinator\"}*100/jvm_memory_bytes_max{area=\"heap\",job=\"prestocoordinator\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "PrestoCoordinator堆内存使用率", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 18, + "y": 0 + }, + "id": 24, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 38 + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "editorMode": "code", + "expr": "sum(up{job=\"prestoworker\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "在线Worker数", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Usage %" + }, + "properties": [ + { + "id": "custom.drawStyle", + "value": "bars" + }, + { + "id": "custom.fillOpacity", + "value": 100 + }, + { + "id": "color", + "value": { + "fixedColor": "#6d1f62", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "min", + "value": 0 + }, + { + "id": "max", + "value": 1 + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 18, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.6", + "repeat": "memarea", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "editorMode": "code", + "expr": "jvm_memory_bytes_used{area=\"heap\",job=\"prestocoordinator\"}", + "legendFormat": "已用内存", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "editorMode": "code", + "expr": " jvm_memory_bytes_max{area=\"heap\",job=\"prestocoordinator\"}", + "hide": false, + "legendFormat": "总内存", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "editorMode": "code", + "expr": "jvm_memory_bytes_used{area=\"heap\",job=\"prestocoordinator\"} / jvm_memory_bytes_max >= 0", + "hide": false, + "legendFormat": "使用率", + "range": true, + "refId": "C" + } + ], + "title": "PrestoCoordinator堆内存使用趋势", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 26, + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.1.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "editorMode": "code", + "expr": "increase(jvm_gc_collection_seconds_sum{job=\"prestocoordinator\"}[$__interval])", + "format": "time_series", + "interval": "60s", + "intervalFactor": 1, + "legendFormat": "{{gc}}", + "metric": "jvm_gc_collection_seconds_sum", + "range": true, + "refId": "A", + "step": 10 + } + ], + "title": "PrestoCoordinator GC时间趋势图", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "color-text", + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "instance" + }, + "properties": [ + { + "id": "displayName", + "value": "PrestoWorker" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "displayName", + "value": "堆内存使用率" + }, + { + "id": "custom.align", + "value": "left" + }, + { + "id": "custom.displayMode", + "value": "lcd-gauge" + }, + { + "id": "min", + "value": 1 + }, + { + "id": "max", + "value": 100 + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 80 + }, + { + "color": "red", + "value": 90 + } + ] + } + }, + { + "id": "color", + "value": { + "mode": "continuous-GrYlRd" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "area" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "job" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.hidden", + "value": true + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 36, + "interval": "", + "links": [], + "options": { + "footer": { + "enablePagination": true, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "PrestoWorker" + } + ] + }, + "pluginVersion": "9.1.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "editorMode": "code", + "expr": "jvm_memory_bytes_used{area=\"heap\",job=\"prestoworker\"}*100/jvm_memory_bytes_max{area=\"heap\",job=\"prestoworker\"}", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "broker", + "refId": "A" + } + ], + "title": "PrestoWorker内存使用率", + "transformations": [], + "type": "table" + } + ], + "refresh": "5s", + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "hj6gjW44z" + }, + "definition": "label_values(up{job=\"trino\"},instance)", + "hide": 0, + "includeAll": false, + "label": "节点", + "multi": false, + "name": "node", + "options": [], + "query": { + "query": "label_values(up{job=\"trino\"},instance)", + "refId": "Prometheus-node-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Presto", + "uid": "7Iy7ibMIz", + "version": 13, + "weekStart": "" +} +``` +#### 5.5 添加dotasophon presto模块总览 +在grafana中复制面板链接 + +![image](https://github.com/datavane/datasophon/assets/62798940/02443af5-90ff-4dc8-9cbd-d42fee7b2ca4) + +打开datasophon mysql t_ddh_cluster_service_dashboard表,添加presto面板 +注意复制的面板连接后面要拼上&kiosk,如下图: + +![image](https://github.com/datavane/datasophon/assets/62798940/977f9796-00ca-4016-82a9-d0f663659a00) + +集成好的监控长这样 + +![image](https://github.com/datavane/datasophon/assets/62798940/d15fcc17-16bf-4604-acf8-014f29ae7713) + +#### 5.6 集成告警 +在 /opt/datasophon/prometheus/alert_rules 目录中添加presto告警配置文件 presto.yml +```shell +groups: +- name: PRESTO + # rules:定义规则 + rules: + # alert:告警规则的名称 + - alert: PrestoCoordinator进程存活 + expr: up{job="prestocoordinator"} != 1 + for: 15s + labels: + # severity: 指定告警级别。有三种等级,分别为warning、critical和emergency。严重等级依次递增。 + severity: exception + clusterId: 1 + serviceRoleName: PrestoCoordinator + annotations: + # summary描述告警的概要信息 + # description用于描述告警的详细信息。 + summary: 重新启动 + description: "{{ $labels.job }}的{{ $labels.instance }}实例产生告警" + - alert: PrestoWorker进程存活 + expr: up{job="prestoworker"} != 1 + for: 15s + labels: + # severity: 指定告警级别。有三种等级,分别为warning、critical和emergency。严重等级依次递增。 + severity: exception + clusterId: 1 + serviceRoleName: PrestoWorker + annotations: + # summary描述告警的概要信息 + # description用于描述告警的详细信息。 + summary: 重新启动 + description: "{{ $labels.job }}的{{ $labels.instance }}实例产生告警" +``` +重启prometheus,可以在UI上看到已经添加了告警 + +![image](https://github.com/datavane/datasophon/assets/62798940/75709858-b641-425c-b87f-f838a5dea1fc) diff --git "a/docs/zh/\345\215\207\347\272\247flink1.15\345\210\260flink1.16.2.md" "b/docs/zh/\345\215\207\347\272\247flink1.15\345\210\260flink1.16.2.md" new file mode 100644 index 00000000..5efe15e3 --- /dev/null +++ "b/docs/zh/\345\215\207\347\272\247flink1.15\345\210\260flink1.16.2.md" @@ -0,0 +1,200 @@ +### 1、构建压缩包 +下载flink官方包 flink-1.16.2-bin-scala_2.12.tgz +```shell +tar -zxvf flink-1.16.2-bin-scala_2.12.tgz +tar czf flink-1.16.2.tar.gz flink-1.16.2 + +# 默认支持hudi +cp ./hudi-flink1.16-bundle-0.13.0.jar /flink-1.16.2/lib + +md5sum flink-1.16.2.tar.gz +echo '8d6c243ebc9bf58d3ee3e45e5c6509f4' > flink-1.16.2.tar.gz.md5 +cp ./flink-1.16.2.tar.gz ./flink-1.16.2.tar.gz.md5 /opt/datasophon/DDP/packages/ +``` +### 2、修改service_ddl.json +```shell +vim /opt/apps/datasophon-manager-1.2.0/conf/meta/DDP-1.2.0/FLINK/service_ddl.json +``` +```shell +{ + "name": "FLINK", + "label": "Flink", + "description": "实时计算引擎", + "version": "1.16.2", + "sortNum": 6, + "dependencies":[], + "packageName": "flink-1.16.2.tar.gz", + "decompressPackageName": "flink-1.16.2", + "runAs":"root", + "roles": [ + { + "name": "FlinkClient", + "label": "FlinkClient", + "roleType": "client", + "cardinality": "1+", + "logFile": "logs/flink.log" + } + ], + "configWriter": { + "generators": [ + { + "filename": "flink-conf.yaml", + "configFormat": "custom", + "templateName": "properties3.ftl", + "outputDirectory": "conf", + "includeParams": [ + "jobmanager.memory.heap.size", + "taskmanager.memory.flink.size", + "high-availability", + "high-availability.storageDir", + "high-availability.zookeeper.quorum", + "high-availability.zookeeper.client.acl", + "high-availability.zookeeper.path.root", + "custom.flink.conf.yaml", + "classloader.check-leaked-classloader" + ] + } + ] + }, + "parameters": [ + { + "name": "jobmanager.memory.heap.size", + "label": "jobmanager堆内存大小", + "description": "", + "required": true, + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "1600m" + }, + { + "name": "taskmanager.memory.flink.size", + "label": "taskmanager堆内存大小", + "description": "", + "required": true, + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "1280m" + }, + { + "name": "enableJMHA", + "label": "开启JobManager高可用", + "description": "", + "required": true, + "type": "switch", + "value": false, + "configurableInWizard": true, + "hidden": false, + "defaultValue": false + }, + { + "name": "high-availability", + "label": "使用zookeeper搭建高可用", + "description": "使用zookeeper搭建高可用", + "configWithHA": true, + "required": false, + "type": "input", + "value": "zookeeper", + "configurableInWizard": true, + "hidden": true, + "defaultValue": "zookeeper" + },{ + "name": "high-availability.storageDir", + "label": "元数据存储HDFS目录", + "description": "存储JobManager的元数据到HDFS", + "configWithHA": true, + "required": false, + "type": "input", + "value": "hdfs://nameservice1/flink/ha/", + "configurableInWizard": true, + "hidden": true, + "defaultValue": "hdfs://nameservice1/flink/ha/" + },{ + "name": "high-availability.zookeeper.quorum", + "label": "ZK集群地址", + "description": "配置ZK集群地址", + "configWithHA": true, + "required": false, + "type": "input", + "value": "${zkUrls}", + "configurableInWizard": true, + "hidden": true, + "defaultValue": "" + }, + { + "name": "high-availability.zookeeper.path.root", + "label": "ZK元数据目录", + "description": "配置ZK元数据目录", + "configWithHA": true, + "required": false, + "type": "input", + "value": "/flink", + "configurableInWizard": true, + "hidden": true, + "defaultValue": "/flink" + }, + { + "name": "high-availability.zookeeper.client.acl", + "label": "high-availability.zookeeper.client.acl", + "description": "默认是 open,如果zookeeper security启用了更改成creator", + "configWithHA": true, + "required": false, + "type": "input", + "value": "open", + "configurableInWizard": true, + "hidden": true, + "defaultValue": "open" + }, + { + "name": "custom.flink.conf.yaml", + "label": "自定义配置flink-conf.yaml", + "description": "自定义配置", + "configType": "custom", + "required": false, + "type": "multipleWithKey", + "value": [], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + }, + { + "name": "classloader.check-leaked-classloader", + "label": "禁用classloader.check", + "description": "禁用classloader.check", + "required": true, + "type": "switch", + "value": false, + "configurableInWizard": true, + "hidden": false, + "defaultValue": false + } + ] +} +``` +### 3、修改环境变量 +```shell +vim /etc/profile.d/datasophon-env.sh +export FLINK_HOME=/opt/datasophon/flink-1.16.2 +export HADOOP_CLASSPATH=`hadoop classpath` +source /etc/profile.d/datasophon-env.sh +``` +各节点同样操作 +### 4、重启 +各节点worker重启 +```shell +sh /opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker +``` +主节点重启api +```shell +sh /opt/apps/datasophon-manager-1.2.0/bin/datasophon-api.sh restart api +``` +### 5、测试 +```shell +flink run -d -t yarn-per-job $FLINK_HOME/examples/streaming/WordCount.jar +``` +```shell +flink run-application -t yarn-application $FLINK_HOME/examples/streaming/TopSpeedWindowing.jar +``` diff --git "a/docs/zh/\345\215\207\347\272\247spark3.1\345\210\2603.2.2.md" "b/docs/zh/\345\215\207\347\272\247spark3.1\345\210\2603.2.2.md" new file mode 100644 index 00000000..aaf4c767 --- /dev/null +++ "b/docs/zh/\345\215\207\347\272\247spark3.1\345\210\2603.2.2.md" @@ -0,0 +1,147 @@ +### 1、构建压缩包 +下载官方包 spark-3.2.2-bin-hadoop3.2.tgz +```shell +tar -zxvf spark-3.2.2-bin-hadoop3.2.tgz +mv spark-3.2.2-bin-hadoop3.2 spark-3.2.2 + +# 默认集成hudi +cp ./hudi-spark3.2-bundle_2.12-0.13.0.jar /spark-3.2.2/jars/ +chown hadoop:hadoop /spark-3.2.2/jars/hudi-spark3.2-bundle_2.12-0.13.0.jar + +tar czf spark-3.2.2.tar.gz spark-3.2.2 +md5sum spark-3.2.2.tar.gz +echo 'eadd4bb2ce5d809ce4c8631f1e865252' > spark-3.2.2.tar.gz.md5 +cp ./spark-3.2.2.tar.gz ./spark-3.2.2.tar.gz.md5 /opt/datasophon/DDP/packages/ +``` +### 2、修改servcie_ddl.json +```shell +{ + "name": "SPARK3", + "label": "Spark3", + "description": "分布式计算系统", + "version": "3.2.2", + "sortNum": 7, + "dependencies":[], + "packageName": "spark-3.2.2.tar.gz", + "decompressPackageName": "spark-3.2.2", + "roles": [ + { + "name": "SparkClient3", + "label": "SparkClient3", + "roleType": "client", + "cardinality": "1+", + "logFile": "logs/hadoop-${user}-datanode-${host}.log" + } + ], + "configWriter": { + "generators": [ + { + "filename": "spark-env.sh", + "configFormat": "custom", + "templateName": "spark-env.ftl", + "outputDirectory": "conf", + "includeParams": [ + "SPARK_DIST_CLASSPATH", + "HADOOP_CONF_DIR", + "YARN_CONF_DIR", + "custom.spark.env.sh" + ] + }, + { + "filename": "spark-defaults.conf", + "configFormat": "properties2", + "outputDirectory": "conf", + "includeParams": [ + "custom.spark.defaults.conf" + ] + } + ] + }, + "parameters": [ + { + "name": "SPARK_DIST_CLASSPATH", + "label": "spark加载Classpath路径", + "description": "", + "required": true, + "configType": "map", + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "$(${HADOOP_HOME}/bin/hadoop classpath)" + }, + { + "name": "HADOOP_CONF_DIR", + "label": "Hadoop配置文件目录", + "description": "", + "configType": "map", + "required": true, + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "${HADOOP_HOME}/etc/hadoop" + },{ + "name": "YARN_CONF_DIR", + "label": "Yarn配置文件目录", + "description": "", + "configType": "map", + "required": true, + "type": "input", + "value": "", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "${HADOOP_HOME}/etc/hadoop" + }, + { + "name": "custom.spark.env.sh", + "label": "自定义配置spark-env.sh", + "description": "自定义配置spark-env.sh", + "configType": "custom", + "required": false, + "type": "multipleWithKey", + "value": [], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + }, + { + "name": "custom.spark.defaults.conf", + "label": "自定义配置spark-defaults.conf", + "description": "自定义配置", + "configType": "custom", + "required": false, + "type": "multipleWithKey", + "value": [], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + } + ] +} +``` +### 3、修改环境变量 +```shell +vim /etc/profile.d/datasophon-env.sh +export SPARK_HOME=/opt/datasophon/spark-3.2.2 +``` +各节点分发 +### 4、重启 +各节点worker重启 +```shell +sh /opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker +``` +主节点重启api +```shell +sh /opt/apps/datasophon-manager-1.2.0/bin/datasophon-api.sh restart api +``` +### 5、测试 +单机: +```shell +sh /opt/datasophon/spark-3.2.2/bin/spark-submit --class org.apache.spark.examples.SparkPi /opt/datasophon/spark-3.2.2/examples/jars/spark-examples_2.12-3.2.2.jar 12 +``` +yarn: +```shell +su - hdfs +sh /opt/datasophon/spark-3.2.2/bin/spark-submit --master yarn --deploy-mode client --class org.apache.spark.examples.SparkPi /opt/datasophon/spark-3.2.2/examples/jars/spark-examples_2.12-3.2.2.jar 12 +``` From 2e560b0a26dc2189ba423b251eded8df9ec13e72 Mon Sep 17 00:00:00 2001 From: gaozhenfeng Date: Mon, 6 Nov 2023 14:25:11 +0800 Subject: [PATCH 2/3] Integrated alluxio --- ...asophon\351\233\206\346\210\220alluxio.md" | 497 ++++++++++++++++++ 1 file changed, 497 insertions(+) create mode 100644 "docs/zh/datasophon\351\233\206\346\210\220alluxio.md" diff --git "a/docs/zh/datasophon\351\233\206\346\210\220alluxio.md" "b/docs/zh/datasophon\351\233\206\346\210\220alluxio.md" new file mode 100644 index 00000000..89bea444 --- /dev/null +++ "b/docs/zh/datasophon\351\233\206\346\210\220alluxio.md" @@ -0,0 +1,497 @@ +### 1、构建压缩包 +官方下载安装包 alluxio-2.9.3-bin.tar.gz +```shell +tar -zxvf alluxio-2.9.3-bin.tar.gz +cd alluxio-2.9.3 +vim control_alluxio.sh +cd .. +tar czf alluxio-2.9.3.tar.gz alluxio-2.9.3 +mkd5sum alluxio-2.9.3.tar.gz +echo 'bf0bf449ee28d0db8da56a5dba8ecee3' > alluxio-2.9.3.tar.gz.md5 +cp ./alluxio-2.9.3.tar.gz ./alluxio-2.9.3.tar.gz.md5 /opt/datasophon/DDP/packages +``` +control_alluxio.sh: +```shell +#!/bin/bash + +operation=$1 +node_type=$2 + +alluxio_start="./bin/alluxio-start.sh" +alluxio_stop="./bin/alluxio-stop.sh" + +check_process() { + if ps -ef | grep -v grep | grep -q "$1"; then + return 0 # Process exists + else + return 1 # Process doesn't exist + fi +} + +start_master() { + if ! check_process "AlluxioMaster"; then + $alluxio_start master + fi + if ! check_process "AlluxioJobMaster"; then + $alluxio_start job_master + fi + if ! check_process "AlluxioProxy"; then + $alluxio_start proxy + fi +} + +start_worker() { + if ! check_process "AlluxioWorker"; then + $alluxio_start worker + fi + if ! check_process "AlluxioJobWorker"; then + $alluxio_start job_worker + fi + if ! check_process "AlluxioProxy"; then + $alluxio_start proxy + fi +} + +stop_master() { + if check_process "AlluxioProxy"; then + $alluxio_stop proxy + fi + if check_process "AlluxioJobMaster"; then + $alluxio_stop job_master + fi + if check_process "AlluxioMaster"; then + $alluxio_stop master + fi +} + +stop_worker() { + if check_process "AlluxioProxy"; then + $alluxio_stop proxy + fi + if check_process "AlluxioJobWorker"; then + $alluxio_stop job_worker + fi + if check_process "AlluxioWorker"; then + $alluxio_stop worker + fi +} + +if [ "$operation" == "start" ]; then + case "$node_type" in + "master") + start_master + ;; + "worker") + start_worker + ;; + *) + echo "Invalid node type. Please use 'master' or 'worker'." + ;; + esac +elif [ "$operation" == "stop" ]; then + case "$node_type" in + "master") + stop_master + ;; + "worker") + stop_worker + ;; + *) + echo "Invalid node type. Please use 'master' or 'worker'." + ;; + esac +elif [ "$operation" == "status" ]; then + case "$node_type" in + "master") + if check_process "AlluxioMaster"; then + exit 0 + else + exit 1 + fi + ;; + "worker") + if check_process "AlluxioWorker"; then + exit 0 + else + exit 1 + fi + ;; + *) + echo "Invalid node type. Please use 'master' or 'worker'." + ;; + esac +else + echo "Invalid operation. Please use 'start', 'stop', or 'status'." +fi + +``` +### 2、配置元数据文件 +```shell +cd /opt/apps/datasophon-manager-1.2.0/conf/meta/DDP-1.2.0 +mkdir ALLUXIO +touch service_ddl.json +touch properties_value.flt +``` +将下面两个文件放进去 + +servcie_ddl.json: +```shell +{ + "name": "ALLUXIO", + "label": "ALLUXIO", + "description": "分布式内存文件系统", + "version": "2.9.3", + "sortNum": 30, + "dependencies": [ + "ZOOKEEPER" + ], + "packageName": "alluxio-2.9.3.tar.gz", + "decompressPackageName": "alluxio-2.9.3", + "roles": [ + { + "name": "AlluxioMaster", + "label": "AlluxioMaster", + "roleType": "master", + "runAs": {}, + "cardinality": "1+", + "sortNum": 2, + "logFile": "logs/master.log", + "jmxPort": "", + "startRunner": { + "timeout": "600", + "program": "control_alluxio.sh", + "args": [ + "start", + "master" + ] + }, + "stopRunner": { + "timeout": "600", + "program": "control_alluxio.sh", + "args": [ + "stop", + "master" + ] + }, + "statusRunner": { + "timeout": "60", + "program": "control_alluxio.sh", + "args": [ + "status", + "master" + ] + }, + "externalLink": { + "name": "master Ui", + "label": "master Ui", + "url": "http://${host}:19999" + } + }, + { + "name": "AlluxioWorker", + "label": "AlluxioWorker", + "roleType": "worker", + "runAs": {}, + "cardinality": "1+", + "sortNum": 1, + "logFile": "logs/worker.log", + "jmxPort": "", + "startRunner": { + "timeout": "60", + "program": "control_alluxio.sh", + "args": [ + "start", + "worker" + ] + }, + "stopRunner": { + "timeout": "600", + "program": "control_alluxio.sh", + "args": [ + "stop", + "worker" + ] + }, + "statusRunner": { + "timeout": "60", + "program": "control_alluxio.sh", + "args": [ + "status", + "worker" + ] + } + } + ], + "configWriter": { + "generators": [ + { + "filename": "alluxio-site.properties", + "configFormat": "properties", + "outputDirectory": "conf", + "includeParams": [ + "alluxio.master.mount.table.root.ufs", + "alluxio.underfs.hdfs.configuration", + "alluxio.master.embedded.journal.addresses", + "alluxio.zookeeper.enabled", + "alluxio.zookeeper.address", + "alluxio.master.journal.type", + "alluxio.master.journal.folder", + "alluxio.worker.block.heartbeat.timeout.ms", + "alluxio.zookeeper.session.timeout", + "custom.common.properties" + ] + }, + { + "filename": "masters", + "configFormat": "custom", + "outputDirectory": "conf", + "templateName": "properties_value.ftl", + "includeParams": [ + "masters" + ] + }, + { + "filename": "workers", + "configFormat": "custom", + "outputDirectory": "conf", + "templateName": "properties_value.ftl", + "includeParams": [ + "workers" + ] + } + ] + }, + "parameters": [ + { + "name": "alluxio.master.mount.table.root.ufs", + "label": "挂载到Alluxio根目录的底层存储URI", + "description": "挂载到Alluxio根目录的底层存储URI", + "required": true, + "type": "input", + "value": "${fs.defaultFS}/alluxio", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "${fs.defaultFS}/alluxio" + }, + { + "name": "alluxio.underfs.hdfs.configuration", + "label": "hdfs配置文件路径", + "description": "hdfs配置文件路径", + "required": true, + "type": "input", + "value": "${HADOOP_HOME}/etc/hadoop/core-site.xml:${HADOOP_HOME}/etc/hadoop/hdfs-site.xml", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "${HADOOP_HOME}/etc/hadoop/core-site.xml:${HADOOP_HOME}/etc/hadoop/hdfs-site.xml" + }, + { + "name": "alluxio.master.embedded.journal.addresses", + "label": "参加leading master选举的master节点集", + "description": "参加Alluxio leading master选举的master节点集", + "required": true, + "type": "input", + "value": "${host1}:19200,${host2}:19200,${host3}:19200", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + }, + { + "name": "alluxio.zookeeper.enabled", + "label": "启用HA模式", + "description": "启用HA模式", + "required": true, + "type": "switch", + "value": true, + "configurableInWizard": true, + "hidden": false, + "defaultValue": true + }, + { + "name": "alluxio.zookeeper.address", + "label": "zookeeper地址", + "description": "zookeeper地址", + "required": true, + "type": "input", + "value": "${zkUrls}", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "${zkUrls}" + }, + { + "name": "alluxio.master.journal.type", + "label": "", + "description": "", + "required": true, + "type": "input", + "value": "UFS", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "UFS" + }, + { + "name": "alluxio.master.journal.folder", + "label": "共享日志位置的URI", + "description": "共享日志位置的URI", + "required": true, + "type": "input", + "value": "${fs.defaultFS}/alluxio/journal/", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "${fs.defaultFS}/alluxio/journal/" + }, + { + "name": "alluxio.worker.block.heartbeat.timeout.ms", + "label": "Zookeeper服务器的最小/最大session timeout", + "description": "Zookeeper服务器的最小/最大session timeout", + "required": true, + "type": "input", + "value": "300000", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "300000" + }, + { + "name": "alluxio.zookeeper.session.timeout", + "label": "zookeeper连接超时时间", + "description": "zookeeper连接超时时间", + "required": true, + "type": "input", + "value": "120s", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "120s" + }, + { + "name": "custom.common.properties", + "label": "自定义配置common.properties", + "description": "自定义配置", + "configType": "custom", + "required": false, + "type": "multipleWithKey", + "value": [], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + }, + { + "name": "masters", + "label": "masters", + "description": "masters机器的IP", + "required": true, + "separator":"\n", + "type": "multiple", + "value": [], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + }, + { + "name": "workers", + "label": "workers", + "description": "workers机器的IP", + "required": true, + "separator":"\n", + "type": "multiple", + "value": [], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + } + ] +} +``` +properties_value.flt: +```shell +<#list itemList as item> +${item.value} + +``` +### 3、新增worker源码Handler +修改 com.datasophon.worker.handler.ConfigureServiceHandler +新增: +```shell +if ("AlluxioMaster".equals(serviceRoleName) && "alluxio-site.properties".equals(generators.getFilename())) { + ServiceConfig serviceConfig = new ServiceConfig(); + serviceConfig.setName("alluxio.master.hostname"); + serviceConfig.setValue(hostName); + customConfList.add(serviceConfig); +} +if ("AlluxioWorker".equals(serviceRoleName) && "alluxio-site.properties".equals(generators.getFilename())) { + File alluxioFile = + new File(Constants.INSTALL_PATH + File.separator + decompressPackageName, "conf/alluxio-site.properties"); + if (alluxioFile.exists()) { + continue; + } +} +``` + +![image](https://github.com/datavane/datasophon/assets/62798940/475ae77d-8865-457c-9699-dd4bff5e46f2) + + +修改 com.datasophon.worker.strategy.ServiceRoleStrategyContext: +```shell +map.put("AlluxioMaster", new AlluxioHandlerStrategy("ALLUXIO", "AlluxioMaster")); +``` + +创建:com.datasophon.worker.strategy.AlluxioHandlerStrategy +```shell +package com.datasophon.worker.strategy; + +import com.datasophon.common.Constants; +import com.datasophon.common.command.ServiceRoleOperateCommand; +import com.datasophon.common.enums.CommandType; +import com.datasophon.common.utils.ExecResult; +import com.datasophon.common.utils.ShellUtils; +import com.datasophon.worker.handler.ServiceHandler; + +import java.sql.SQLException; +import java.util.ArrayList; + +public class AlluxioHandlerStrategy extends AbstractHandlerStrategy implements ServiceRoleStrategy { + + public AlluxioHandlerStrategy(String serviceName, String serviceRoleName) { + super(serviceName, serviceRoleName); + } + + @Override + public ExecResult handler(ServiceRoleOperateCommand command) throws SQLException, ClassNotFoundException { + ServiceHandler serviceHandler = new ServiceHandler(command.getServiceName(), command.getServiceRoleName()); + String workPath = Constants.INSTALL_PATH + Constants.SLASH + command.getDecompressPackageName(); + + if (command.getCommandType().equals(CommandType.INSTALL_SERVICE)) { + ArrayList commands = new ArrayList<>(); + + logger.info("start format master"); + commands.add(workPath + "/bin/alluxio"); + commands.add("format"); + ShellUtils.execWithStatus(workPath, commands, 300L, logger); + logger.info("alluxio master format success"); + + commands.clear(); + commands.add(workPath + "/alluxio/bin/alluxio-start.sh"); + commands.add("all"); + ExecResult execResult = ShellUtils.execWithStatus(workPath, commands, 300L, logger); + if (execResult.getExecResult()) { + logger.info("alluxio start all success"); + } + } + + ExecResult startResult = serviceHandler.start(command.getStartRunner(), command.getStatusRunner(), + command.getDecompressPackageName(), command.getRunAs()); + return startResult; + } +} + +``` +### 4、重启 +各节点worker重启 +```shell +sh /opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker debug +``` +主节点重启api +```shell +sh /opt/apps/datasophon-manager-1.2.0/bin/datasophon-api.sh restart api debug +``` +### 5、配置样例 + +![image](https://github.com/datavane/datasophon/assets/62798940/bd626fec-c581-4c22-8f36-b582afbb7ea4) From 992d23b7f22fdbfa8e649286d90f7ccc3cdb9459 Mon Sep 17 00:00:00 2001 From: gaozhenfeng Date: Fri, 10 Nov 2023 14:48:48 +0800 Subject: [PATCH 3/3] Integrated seatunnel doc --- ...ophon\351\233\206\346\210\220seatunnel.md" | 232 ++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 "docs/zh/datasophon\351\233\206\346\210\220seatunnel.md" diff --git "a/docs/zh/datasophon\351\233\206\346\210\220seatunnel.md" "b/docs/zh/datasophon\351\233\206\346\210\220seatunnel.md" new file mode 100644 index 00000000..24bb5450 --- /dev/null +++ "b/docs/zh/datasophon\351\233\206\346\210\220seatunnel.md" @@ -0,0 +1,232 @@ +### 1、构建安装包 +下载安装包 +[https://www.apache.org/dyn/closer.lua/seatunnel/2.3.3/apache-seatunnel-2.3.3-bin.tar.gz](https://www.apache.org/dyn/closer.lua/seatunnel/2.3.3/apache-seatunnel-2.3.3-bin.tar.gz) +```shell +tar -zxvf apache-seatunnel-2.3.3-bin.tar.gz +mv apache-seatunnel-2.3.3-bin seatunnel-2.3.3 + +cd seatunnel-2.3.3 +# 修改config/plugin_config文件只保留需要的连接器后安装连接器 +sh bin/install-plugin.sh 2.3.3 + +# bin目录新增status脚本 +touch ./bin/seatunnel-status.sh +chmod 755 ./bin/seatunnel-status.sh + +# 打包 +tar czf seatunnel-2.3.3.tar.gz seatunnel-2.3.3 +md5sum seatunnel-2.3.3.tar.gz +echo '27c821b7d7ead2f99a4db2d7503fc2b5' > seatunnel-2.3.3.tar.gz.md5 +``` +```shell +#!/bin/bash + +# 检查 SeaTunnelServer 进程是否存在 +if ps -ef | grep -q "[S]eaTunnelServer"; then + echo "SeaTunnelServer 进程正在运行." + exit 0 +else + echo "SeaTunnelServer 进程未找到." + exit 1 +fi +``` +### 2、元数据文件 +api节点新增: +```shell +cd /opt/apps/datasophon-manager-1.2.0/conf/meta/DDP-1.2.0 +mkdir SEATUNNEL +cd SEATUNNEL +touch service_ddl.json +``` +```shell +{ + "name": "SEATUNNEL", + "label": "Seatunnel", + "description": "数据同步工具", + "version": "2.3.3", + "sortNum": 32, + "dependencies": [], + "packageName": "seatunnel-2.3.3.tar.gz", + "decompressPackageName": "seatunnel-2.3.3", + "roles": [ + { + "name": "SeatunnelServer", + "label": "SeatunnelServer", + "roleType": "worker", + "cardinality": "1+", + "logFile": "logs/seatunnel-engine-server.log", + "startRunner": { + "timeout": "60", + "program": "bin/seatunnel-cluster.sh", + "args": [ + "-d" + ] + }, + "stopRunner": { + "timeout": "60", + "program": "bin/stop-seatunnel-cluster.sh", + "args": [] + }, + "statusRunner": { + "timeout": "60", + "program": "bin/seatunnel-status.sh", + "args": [] + } + } + ], + "configWriter": { + "generators": [ + { + "filename": "seatunnel.yaml", + "configFormat": "custom", + "templateName": "seatunnel-yml.flt", + "outputDirectory": "config", + "includeParams": [ + "backupCount", + "custom.checkPoint" + ] + }, + { + "filename": "hazelcast.yaml", + "configFormat": "custom", + "templateName": "hazelcast.flt", + "outputDirectory": "config", + "includeParams": [ + "hosts" + ] + }, + { + "filename": "hazelcast-client.yaml", + "configFormat": "custom", + "templateName": "hazelcast-client.flt", + "outputDirectory": "config", + "includeParams": [ + "hosts" + ] + } + ] + }, + "parameters": [ + { + "name": "backupCount", + "label": "同步备份的数量", + "description": "同步备份的数量", + "required": true, + "type": "input", + "configType": "map", + "value": "1", + "configurableInWizard": true, + "hidden": false, + "defaultValue": "1" + }, + { + "name": "custom.checkPoint", + "label": "自定义配置检查点存储", + "description": "自定义配置", + "configType": "custom", + "required": false, + "type": "multipleWithKey", + "value": [{"namespace":"/tmp/seatunnel/checkpoint_snapshot"},{"storage.type":"hdfs"},{"fs.defaultFS":"file:///tmp/"}], + "configurableInWizard": true, + "hidden": false, + "defaultValue": [{"namespace":"/tmp/seatunnel/checkpoint_snapshot"},{"storage.type":"hdfs"},{"fs.defaultFS":"file:///tmp/"}] + }, + { + "name": "hosts", + "label": "集群节点ip", + "description": "集群节点ip", + "required": true, + "type": "multiple", + "separator": ",", + "value": [], + "configurableInWizard": true, + "hidden": false, + "defaultValue": "" + } + ] +} + +``` +各worker节点新增: +```shell +cd /opt/datasophon/datasophon-worker/conf/templates +touch seatunnel-yml.flt +touch hazelcast.flt +touch hazelcast-client.flt +``` +```shell +hazelcast: + cluster-name: seatunnel + network: + rest-api: + enabled: true + endpoint-groups: + CLUSTER_WRITE: + enabled: true + DATA: + enabled: true + join: + tcp-ip: + enabled: true + member-list: +<#list itemList as item> + <#list item.value?split(",") as host> + - ${host} + + + port: + auto-increment: false + port: 5801 + properties: + hazelcast.invocation.max.retry.count: 20 + hazelcast.tcp.join.port.try.count: 30 + hazelcast.logging.type: log4j2 + hazelcast.operation.generic.thread.count: 50 +``` +```shell +hazelcast-client: + cluster-name: seatunnel + properties: + hazelcast.logging.type: log4j2 + network: + cluster-members: +<#list itemList as item> + <#list item.value?split(",") as host> + - ${host}:5801 + + +``` +```shell +seatunnel: + engine: + backup-count: ${backupCount} + queue-type: blockingqueue + print-execution-info-interval: 60 + print-job-metrics-info-interval: 60 + slot-service: + dynamic-slot: true + checkpoint: + interval: 10000 + timeout: 60000 + max-concurrent: 1 + tolerable-failure: 2 + storage: + type: hdfs + max-retained: 3 + plugin-config: + <#list itemList as item> + ${item.name}: ${item.value} + +``` +### 3、重启 +各节点worker重启 +```shell +sh /opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker debug +``` +主节点重启api +```shell +sh /opt/apps/datasophon-manager-1.2.0/bin/datasophon-api.sh restart api debug +``` +### 4、页面配置样例 + +![image](https://github.com/datavane/datasophon/assets/62798940/e72af3f5-cbd2-41c4-9d30-988c3cfb36ee)