From 21fe8f36767da7e52e2f5d77d7648abddc913652 Mon Sep 17 00:00:00 2001 From: siaimes <34199488+siaimes@users.noreply.github.com> Date: Sun, 18 Jul 2021 18:19:04 +0800 Subject: [PATCH 01/17] Fix update docker cache error (#5539) Fix update docker cache error: [issue comment](https://github.com/microsoft/pai/issues/5445#issuecomment-826238676). If /etc/docker/daemon.json doesn't exist or is an empty file, the script will fail. --- .../install/files/add_docker_cache_config.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/contrib/kubespray/roles/docker-cache/install/files/add_docker_cache_config.py b/contrib/kubespray/roles/docker-cache/install/files/add_docker_cache_config.py index e5de7a742c..c672977349 100644 --- a/contrib/kubespray/roles/docker-cache/install/files/add_docker_cache_config.py +++ b/contrib/kubespray/roles/docker-cache/install/files/add_docker_cache_config.py @@ -17,14 +17,15 @@ def main(): backup_path = Path("/etc/docker/daemon.json.bk") folder_path.mkdir(parents=True, exist_ok=True) - target_path.touch(mode=0o666) - backup_path.touch(mode=0o666) - - with open(str(target_path)) as f: - current_config = json.load(f); - - with open(str(backup_path), 'w') as f: - json.dump(current_config, f) + if target_path.exists() and target_path.stat().st_size: + backup_path.touch(mode=0o666) + with open(str(target_path)) as f: + current_config = json.load(f) + with open(str(backup_path), 'w') as f: + json.dump(current_config, f) + else: + target_path.touch(mode=0o666) + current_config = {} docker_cache_mirror = "http://{}".format(args.host) if "registry-mirrors" in current_config: From 33087520e916d282a27d8aae6eaece6f9201bea2 Mon Sep 17 00:00:00 2001 From: Binyang2014 Date: Tue, 27 Jul 2021 17:18:22 +0800 Subject: [PATCH 02/17] Fix: change tail log to 16KB (#5575) --- src/rest-server/src/controllers/internal/tail-log.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rest-server/src/controllers/internal/tail-log.js b/src/rest-server/src/controllers/internal/tail-log.js index 3c0896b388..0106dd547d 100644 --- a/src/rest-server/src/controllers/internal/tail-log.js +++ b/src/rest-server/src/controllers/internal/tail-log.js @@ -26,7 +26,7 @@ const logger = require('@pai/config/logger'); const createError = require('@pai/utils/error'); const getTailLog = asyncHandler(async (req, res) => { - const tailLogSize = 16 * 1024 * 1024; // 16 KB + const tailLogSize = 16 * 1024; // 16 KB const logName = req.params.logName; const queryString = req.url.substring(req.url.indexOf('?') + 1); const account = launcherConfig.logAzureStorageAccount; From 99393743f98a4b6acaa219160e6750f6682a6dbc Mon Sep 17 00:00:00 2001 From: Zhiyuan He <362583303@qq.com> Date: Wed, 28 Jul 2021 10:30:31 +0800 Subject: [PATCH 03/17] make enable_docker_cache effective (#5574) --- contrib/kubespray/docker-cache-config-distribute.yml | 8 +------- contrib/kubespray/quick-start-kubespray.sh | 2 +- .../install/tasks/add-docker-cache-config.yml | 5 +++++ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/contrib/kubespray/docker-cache-config-distribute.yml b/contrib/kubespray/docker-cache-config-distribute.yml index 8f6ea95da0..81e8d22abe 100644 --- a/contrib/kubespray/docker-cache-config-distribute.yml +++ b/contrib/kubespray/docker-cache-config-distribute.yml @@ -5,10 +5,4 @@ roles: - role: '../roles/docker-cache/install' vars: - enable_docker_cache: true - docker_cache_host: "{{ hostvars[groups['kube-master'][0]]['ip'] }}:30500" - tasks: - - name: Restart service docker config from /etc/docker/daemon.json after update - ansible.builtin.systemd: - name: docker - state: restarted + docker_cache_host: "{{ hostvars[groups['kube-master'][0]]['ip'] }}:30500" \ No newline at end of file diff --git a/contrib/kubespray/quick-start-kubespray.sh b/contrib/kubespray/quick-start-kubespray.sh index 5fc5417239..f0aab526ba 100644 --- a/contrib/kubespray/quick-start-kubespray.sh +++ b/contrib/kubespray/quick-start-kubespray.sh @@ -57,7 +57,7 @@ echo "Performing pre-installation..." ansible-playbook -i ${HOME}/pai-deploy/cluster-cfg/hosts.yml pre-installation.yml || exit $? echo "Performing docker-cache config distribution..." -ansible-playbook -i ${HOME}/pai-deploy/cluster-cfg/hosts.yml docker-cache-config-distribute.yml || exit $? +ansible-playbook -i ${HOME}/pai-deploy/cluster-cfg/hosts.yml docker-cache-config-distribute.yml -e "@${CLUSTER_CONFIG}" || exit $? echo "Starting kubernetes..." /bin/bash script/kubernetes-boot.sh || exit $? diff --git a/contrib/kubespray/roles/docker-cache/install/tasks/add-docker-cache-config.yml b/contrib/kubespray/roles/docker-cache/install/tasks/add-docker-cache-config.yml index cfda3ae373..9837ae00dd 100644 --- a/contrib/kubespray/roles/docker-cache/install/tasks/add-docker-cache-config.yml +++ b/contrib/kubespray/roles/docker-cache/install/tasks/add-docker-cache-config.yml @@ -10,3 +10,8 @@ - python3 - /tmp/add_docker_cache_config.py - "{{ docker_cache_host }}" + +- name: Restart service docker config from /etc/docker/daemon.json after update + ansible.builtin.systemd: + name: docker + state: restarted From be38c00b92141bd8ef97ba3b45106d7700c03c69 Mon Sep 17 00:00:00 2001 From: siaimes <34199488+siaimes@users.noreply.github.com> Date: Thu, 29 Jul 2021 10:21:48 +0800 Subject: [PATCH 04/17] Use sed instead of pip to change ansible version (#5573) Signed-off-by: siaimes <34199488+siaimes@users.noreply.github.com> --- contrib/kubespray/script/environment.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/contrib/kubespray/script/environment.sh b/contrib/kubespray/script/environment.sh index 5ab5730fd7..bcac661a06 100644 --- a/contrib/kubespray/script/environment.sh +++ b/contrib/kubespray/script/environment.sh @@ -40,12 +40,11 @@ sudo python3 -m pip install -r script/requirements.txt echo "Install sshpass" sudo apt-get -y install sshpass -echo "Install kubespray's requirements and ansible is included" -sudo python3 -m pip install -r ${HOME}/pai-deploy/kubespray/requirements.txt - # ansible 2.7 doesn't support distribution info collection on Ubuntu 20.04 # Use ansible 2.9.7 as a workaround # Reference: https://stackoverflow.com/questions/61460151/ansible-not-reporting-distribution-info-on-ubuntu-20-04 # We can upgrade kubespray version to avoid this issue in the future. -sudo python3 -m pip install ansible==2.9.7 +sed -i 's/ansible==.*/ansible==2.9.7/' ${HOME}/pai-deploy/kubespray/requirements.txt +echo "Install kubespray's requirements and ansible is included" +sudo python3 -m pip install -r ${HOME}/pai-deploy/kubespray/requirements.txt From f31d612f57504a39e55da5bb84f2d094f3b1ba10 Mon Sep 17 00:00:00 2001 From: siaimes <34199488+siaimes@users.noreply.github.com> Date: Mon, 9 Aug 2021 10:46:15 +0800 Subject: [PATCH 05/17] fix missing `WEBPORTAL_URL` issue when installing services (#5538) [issue comment](https://github.com/microsoft/pai/issues/5445#issuecomment-827309308) --- contrib/kubespray/script/service-boot.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/kubespray/script/service-boot.sh b/contrib/kubespray/script/service-boot.sh index 0f6ea18153..0655707a18 100644 --- a/contrib/kubespray/script/service-boot.sh +++ b/contrib/kubespray/script/service-boot.sh @@ -45,7 +45,7 @@ echo "Starting OpenPAI service with dev-box..." sudo docker exec -w /mnt/pai dev-box-quick-start /bin/bash ./contrib/kubespray/script/start-service-in-dev-box.sh # print cluster info -WEBPORTAL_URL=http:$(kubectl config view -o jsonpath='{.clusters[].cluster.server}' | cut -d ":" -f 2) +WEBPORTAL_URL=http:$(sudo docker exec dev-box-quick-start kubectl config view -o jsonpath='{.clusters[].cluster.server}' | cut -d ":" -f 2) echo "" echo "OpenPAI is successfully deployed, please check the following information:" echo "Kubernetes cluster config : ~/pai-deploy/kube/config" From 4f54b78e666cb13aef1c2ec7a398997a3b614f26 Mon Sep 17 00:00:00 2001 From: Guoxin Date: Tue, 10 Aug 2021 14:26:31 +0800 Subject: [PATCH 06/17] Add Prometheus Pushgateway as an optional service (#5590) - Add an optional service Prometheus Pushagteway - add a container `metrics-cleaner` to clean Pushgateway metrics by fixed interval - add prometheus-pushgateway in job-exporter - set `honor_lables` as true in Prometheus --- .../services-configuration.yaml.template | 1 + src/job-exporter/src/collector.py | 1 + .../build/metrics-cleaner.common.dockerfile | 10 +++ .../prometheus-pushgateway.common.dockerfile | 4 + .../config/prometheus-pushgateway.yaml | 9 ++ .../config/prometheus_pushgateway.py | 35 ++++++++ src/prometheus-pushgateway/deploy/delete.sh | 11 +++ .../prometheus-pushgateway.yaml.template | 70 ++++++++++++++++ src/prometheus-pushgateway/deploy/refresh.sh | 12 +++ .../deploy/service.yaml | 25 ++++++ .../deploy/start.sh.template | 16 ++++ .../deploy/stop.sh.template | 15 ++++ .../src/metrics-cleaner/__init__.py | 0 .../src/metrics-cleaner/main.py | 83 +++++++++++++++++++ .../src/metrics-cleaner/pylintrc | 7 ++ .../src/metrics-cleaner/requirements.txt | 2 + .../deploy/prometheus-configmap.yaml.template | 1 + 17 files changed, 302 insertions(+) create mode 100644 src/prometheus-pushgateway/build/metrics-cleaner.common.dockerfile create mode 100644 src/prometheus-pushgateway/build/prometheus-pushgateway.common.dockerfile create mode 100644 src/prometheus-pushgateway/config/prometheus-pushgateway.yaml create mode 100644 src/prometheus-pushgateway/config/prometheus_pushgateway.py create mode 100644 src/prometheus-pushgateway/deploy/delete.sh create mode 100644 src/prometheus-pushgateway/deploy/prometheus-pushgateway.yaml.template create mode 100644 src/prometheus-pushgateway/deploy/refresh.sh create mode 100644 src/prometheus-pushgateway/deploy/service.yaml create mode 100644 src/prometheus-pushgateway/deploy/start.sh.template create mode 100644 src/prometheus-pushgateway/deploy/stop.sh.template create mode 100644 src/prometheus-pushgateway/src/metrics-cleaner/__init__.py create mode 100644 src/prometheus-pushgateway/src/metrics-cleaner/main.py create mode 100644 src/prometheus-pushgateway/src/metrics-cleaner/pylintrc create mode 100644 src/prometheus-pushgateway/src/metrics-cleaner/requirements.txt diff --git a/contrib/kubespray/quick-start/services-configuration.yaml.template b/contrib/kubespray/quick-start/services-configuration.yaml.template index d521d0c445..82a228ded8 100644 --- a/contrib/kubespray/quick-start/services-configuration.yaml.template +++ b/contrib/kubespray/quick-start/services-configuration.yaml.template @@ -7,6 +7,7 @@ cluster: data-path: "/datastorage" qos-switch: "{{ env["cfg"]["qos-switch"] | default('false') }}" docker-data-root: "{{ env['cfg']['docker_data_root'] | default('/mnt/docker') }}" + prometheus-pushgateway: false marketplace: "{{ env["cfg"]["enable_marketplace"] | default('false') }}" # the docker registry to store docker images that contain system services like frameworklauncher, hadoop, etc. diff --git a/src/job-exporter/src/collector.py b/src/job-exporter/src/collector.py index 15884bda5f..f82250325f 100644 --- a/src/job-exporter/src/collector.py +++ b/src/job-exporter/src/collector.py @@ -542,6 +542,7 @@ class ContainerCollector(Collector): "pylon", "webportal", "grafana", + "prometheus-pushgateway", "prometheus", "alertmanager", "watchdog", diff --git a/src/prometheus-pushgateway/build/metrics-cleaner.common.dockerfile b/src/prometheus-pushgateway/build/metrics-cleaner.common.dockerfile new file mode 100644 index 0000000000..cbf4a168a8 --- /dev/null +++ b/src/prometheus-pushgateway/build/metrics-cleaner.common.dockerfile @@ -0,0 +1,10 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +FROM python:3.7 + +COPY ./src/metrics-cleaner . + +RUN python -m pip install --upgrade pip && python -m pip install -r requirements.txt + +ENTRYPOINT ["python3", "main.py"] diff --git a/src/prometheus-pushgateway/build/prometheus-pushgateway.common.dockerfile b/src/prometheus-pushgateway/build/prometheus-pushgateway.common.dockerfile new file mode 100644 index 0000000000..cd26cb94c2 --- /dev/null +++ b/src/prometheus-pushgateway/build/prometheus-pushgateway.common.dockerfile @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +FROM prom/pushgateway:v1.3.1 diff --git a/src/prometheus-pushgateway/config/prometheus-pushgateway.yaml b/src/prometheus-pushgateway/config/prometheus-pushgateway.yaml new file mode 100644 index 0000000000..292cd8a429 --- /dev/null +++ b/src/prometheus-pushgateway/config/prometheus-pushgateway.yaml @@ -0,0 +1,9 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +service_type: "common" + +port: 9097 +metrics-cleaner: + job-time-to-live: 30 # seconds + clean-interval: 15 # seconds diff --git a/src/prometheus-pushgateway/config/prometheus_pushgateway.py b/src/prometheus-pushgateway/config/prometheus_pushgateway.py new file mode 100644 index 0000000000..26783db1da --- /dev/null +++ b/src/prometheus-pushgateway/config/prometheus_pushgateway.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import copy + +class PrometheusPushgateway(object): + def __init__(self, cluster_conf, service_conf, default_service_conf): + self.cluster_conf = cluster_conf + self.service_conf = service_conf + self.default_service_conf = default_service_conf + + def get_master_ip(self): + for host_conf in self.cluster_conf["machine-list"]: + if "pai-master" in host_conf and host_conf["pai-master"] == "true": + return host_conf["hostip"] + + def validation_pre(self): + return True, None + + def run(self): + result = copy.deepcopy(self.default_service_conf) + result.update(self.service_conf) + result["url"] = "http://{0}:{1}".format(self.get_master_ip(), result["port"]) + return result + + def validation_post(self, conf): + error_msg ="expect %s in prometheus to be int but get %s with type %s" + + port = conf["prometheus-pushgateway"].get("port") + if type(port) != int: + return False, error_msg % ("port", port, type(port)) + + return True, None diff --git a/src/prometheus-pushgateway/deploy/delete.sh b/src/prometheus-pushgateway/deploy/delete.sh new file mode 100644 index 0000000000..b545c22398 --- /dev/null +++ b/src/prometheus-pushgateway/deploy/delete.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +pushd $(dirname "$0") > /dev/null + +echo "Call stop to stop service first" +/bin/bash stop.sh || exit $? + +popd > /dev/null diff --git a/src/prometheus-pushgateway/deploy/prometheus-pushgateway.yaml.template b/src/prometheus-pushgateway/deploy/prometheus-pushgateway.yaml.template new file mode 100644 index 0000000000..3ab95dd9c0 --- /dev/null +++ b/src/prometheus-pushgateway/deploy/prometheus-pushgateway.yaml.template @@ -0,0 +1,70 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +{% set prometheus_pushgateway_port = cluster_cfg["prometheus-pushgateway"]["port"] %} + +{% if cluster_cfg["alert-manager"]["use-pylon"] %} +{% set external_url = cluster_cfg["pylon"]["uri"] %} +{% else %} +{% set external_url = cluster_cfg["prometheus-pushgateway"]["url"] %} +{% endif %} + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-pushgateway +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus-pushgateway + template: + metadata: + name: prometheus-pushgateway + labels: + app: prometheus-pushgateway + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: "/prometheus-pushgateway/metrics" + prometheus.io/port: "{{ prometheus_pushgateway_port }}" + spec: + containers: + - name: prometheus-pushgateway + image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}prometheus-pushgateway:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }} + imagePullPolicy: Always + {%- if cluster_cfg['cluster']['common']['qos-switch'] == "true" %} + resources: + limits: + memory: "2Gi" + cpu: "1000m" + requests: + memory: "512Mi" + cpu: "500m" + {%- endif %} + args: + - '--web.listen-address=0.0.0.0:8080' + - '--web.external-url={{ external_url }}/prometheus-pushgateway/' + - '--web.route-prefix=prometheus-pushgateway' + - '--web.enable-admin-api' + - '--web.enable-lifecycle' + ports: + - name: web + containerPort: 8080 + hostPort: {{ prometheus_pushgateway_port }} + - name: metrics-cleaner + image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}metrics-cleaner:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }} + imagePullPolicy: Always + env: + - name: PROMETHEUS_PUSHGATEWAY_URI + value: {{ cluster_cfg['prometheus-pushgateway']['url'] }} + - name: JOB_TIME_TO_LIVE + value: "{{ cluster_cfg['prometheus-pushgateway']['metrics-cleaner']['job-time-to-live'] }}" + - name: CLEAN_INTERVAL + value: "{{ cluster_cfg['prometheus-pushgateway']['metrics-cleaner']['clean-interval'] }}" + imagePullSecrets: + - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} + tolerations: + - key: node.kubernetes.io/memory-pressure + operator: "Exists" + - key: node.kubernetes.io/disk-pressure + operator: "Exists" diff --git a/src/prometheus-pushgateway/deploy/refresh.sh b/src/prometheus-pushgateway/deploy/refresh.sh new file mode 100644 index 0000000000..05adc8d986 --- /dev/null +++ b/src/prometheus-pushgateway/deploy/refresh.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + + +pushd $(dirname "$0") > /dev/null + +bash stop.sh +bash start.sh + +popd > /dev/null diff --git a/src/prometheus-pushgateway/deploy/service.yaml b/src/prometheus-pushgateway/deploy/service.yaml new file mode 100644 index 0000000000..0c76df9756 --- /dev/null +++ b/src/prometheus-pushgateway/deploy/service.yaml @@ -0,0 +1,25 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +cluster-type: + - yarn + - k8s + +prerequisite: + - cluster-configuration + - docker-cache + - prometheus + +template-list: + - prometheus-pushgateway.yaml + - start.sh + - stop.sh + +start-script: start.sh +stop-script: stop.sh +delete-script: delete.sh +refresh-script: refresh.sh +upgraded-script: upgraded.sh + +deploy-rules: + - in: pai-master diff --git a/src/prometheus-pushgateway/deploy/start.sh.template b/src/prometheus-pushgateway/deploy/start.sh.template new file mode 100644 index 0000000000..d223db0d04 --- /dev/null +++ b/src/prometheus-pushgateway/deploy/start.sh.template @@ -0,0 +1,16 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +{%- if cluster_cfg['cluster']['common']['prometheus-pushgateway'] == 'true' %} +pushd $(dirname "$0") > /dev/null + +kubectl apply --overwrite=true -f prometheus-pushgateway.yaml || exit $? + +sleep 10 +# Wait until the service is ready. +PYTHONPATH="../../../deployment" python -m k8sPaiLibrary.monitorTool.check_pod_ready_status -w -k app -v prometheus-pushgateway || exit $? + +popd > /dev/null +{%- endif %} diff --git a/src/prometheus-pushgateway/deploy/stop.sh.template b/src/prometheus-pushgateway/deploy/stop.sh.template new file mode 100644 index 0000000000..4ba6bd6529 --- /dev/null +++ b/src/prometheus-pushgateway/deploy/stop.sh.template @@ -0,0 +1,15 @@ +#!/bin/bash + +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +{%- if cluster_cfg['cluster']['common']['prometheus-pushgateway'] == 'true' %} +INSTANCES=" +deployment/prometheus-pushgateway +" + +for instance in ${INSTANCES}; do + kubectl delete --ignore-not-found --now ${instance} || exit $? +done + +{%- endif %} diff --git a/src/prometheus-pushgateway/src/metrics-cleaner/__init__.py b/src/prometheus-pushgateway/src/metrics-cleaner/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/prometheus-pushgateway/src/metrics-cleaner/main.py b/src/prometheus-pushgateway/src/metrics-cleaner/main.py new file mode 100644 index 0000000000..7a83c7a9f7 --- /dev/null +++ b/src/prometheus-pushgateway/src/metrics-cleaner/main.py @@ -0,0 +1,83 @@ +from datetime import timezone, datetime, timedelta +import os +import logging +import time +import requests +import pytz + + +def enable_request_debug_log(func): + def wrapper(*args, **kwargs): + requests_log = logging.getLogger("urllib3") + level = requests_log.level + requests_log.setLevel(logging.DEBUG) + requests_log.propagate = True + + try: + return func(*args, **kwargs) + finally: + requests_log.setLevel(level) + requests_log.propagate = False + + return wrapper + + +def getPushgatewayJobsToDelete(pushgateway_uri, seconds): + """ + Jobs have not been updated within the given seconds should be deleted + + Returns: + -------- + list of job names + """ + resp = requests.get("{}/prometheus-pushgateway/api/v1/metrics".format(pushgateway_uri)) + resp.raise_for_status() + + job_names = [] + for job in resp.json()["data"]: + # get job name + if "labels" not in job or "job" not in job["labels"]: + continue + job_name = job["labels"]["job"] + + # get last pushed time + if "push_time_seconds" not in job or "time_stamp" not in job["push_time_seconds"]: + continue + last_pushed_time = job["push_time_seconds"]["time_stamp"] + + # if the job has been updated within the interval, ignore it + last_pushed_time = datetime.strptime(last_pushed_time.split(".")[0], '%Y-%m-%dT%H:%M:%S').replace(tzinfo=pytz.UTC) + if last_pushed_time > datetime.now(timezone.utc) - timedelta(seconds=seconds): + continue + + job_names.append(job_name) + + return job_names + + +@enable_request_debug_log +def cleanPushgatewayJobs(): + PROMETHEUS_PUSHGATEWAY_URI = os.environ.get('PROMETHEUS_PUSHGATEWAY_URI') + JOB_TIME_TO_LIVE = int(os.environ.get('JOB_TIME_TO_LIVE')) + + logging.info("Getting Pushgateway jobs to delete...") + job_names = getPushgatewayJobsToDelete(PROMETHEUS_PUSHGATEWAY_URI, JOB_TIME_TO_LIVE) + logging.info("Pushgateway jobs to delete: %s", job_names) + + # delete related metrics from Prometheus Pushgateway by job + for job_name in job_names: + url = "{}/prometheus-pushgateway/metrics/job/{}".format(PROMETHEUS_PUSHGATEWAY_URI, job_name) + requests.delete(url) + + +if __name__ == "__main__": + logging.basicConfig( + format= + "%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", + level=logging.INFO, + ) + + CLEAN_INTERVAL = int(os.environ.get('CLEAN_INTERVAL')) + while True: + cleanPushgatewayJobs() + time.sleep(CLEAN_INTERVAL) diff --git a/src/prometheus-pushgateway/src/metrics-cleaner/pylintrc b/src/prometheus-pushgateway/src/metrics-cleaner/pylintrc new file mode 100644 index 0000000000..49e462d14e --- /dev/null +++ b/src/prometheus-pushgateway/src/metrics-cleaner/pylintrc @@ -0,0 +1,7 @@ +[SETTINGS] + +max-line-length=140 + +disable = + missing-docstring, + invalid-name, diff --git a/src/prometheus-pushgateway/src/metrics-cleaner/requirements.txt b/src/prometheus-pushgateway/src/metrics-cleaner/requirements.txt new file mode 100644 index 0000000000..3c2c305595 --- /dev/null +++ b/src/prometheus-pushgateway/src/metrics-cleaner/requirements.txt @@ -0,0 +1,2 @@ +requests==2.23.0 +pytz==2021.1 \ No newline at end of file diff --git a/src/prometheus/deploy/prometheus-configmap.yaml.template b/src/prometheus/deploy/prometheus-configmap.yaml.template index 95e21c7d17..5199eab203 100644 --- a/src/prometheus/deploy/prometheus-configmap.yaml.template +++ b/src/prometheus/deploy/prometheus-configmap.yaml.template @@ -29,6 +29,7 @@ data: - "/etc/prometheus-record/*.rules" scrape_configs: - job_name: 'pai_service_exporter' + honor_labels: true scrape_interval: {{ prom_info["scrape_interval"] }}s kubernetes_sd_configs: - role: pod From baa2805d17a956b5f8cce8212177e0234377f5c2 Mon Sep 17 00:00:00 2001 From: Guoxin Date: Tue, 10 Aug 2021 14:29:09 +0800 Subject: [PATCH 07/17] adjust grafana to fit more metrics (#5591) - support more metrics, including - node_memory_bytes with `type` label - node_disk_other_bytes_total, task_block_other_byte - get task cpu utilization with `task_cpu_seconds_total` - task_network_receive_bytes_total, task_network_transmit_bytes_total - avoid wrongly computed 100% cpu utilization by using `idelta` - use `irate` instead of `rate` for fast-moving metrics & change the computing interval - set `editable` as true in all the dashboards --- .../pai-clusterview-dashboard.json.template | 74 ++++++++++++++--- .../pai-jobview-dashboard.json.template | 38 ++++++++- .../pai-nodeview-dashboard.json.template | 80 ++++++++++++++++--- .../pai-serviceview-dashboard.json.template | 7 +- .../pai-taskroleview-dashboard.json.template | 38 ++++++++- .../pai-tasks-in-node-dashboard.json.template | 35 +++++++- .../pai-taskview-dashboard.json.template | 39 ++++++++- 7 files changed, 280 insertions(+), 31 deletions(-) diff --git a/src/grafana/deploy/grafana-configuration/pai-clusterview-dashboard.json.template b/src/grafana/deploy/grafana-configuration/pai-clusterview-dashboard.json.template index cb3d42b9ad..54841bdff0 100644 --- a/src/grafana/deploy/grafana-configuration/pai-clusterview-dashboard.json.template +++ b/src/grafana/deploy/grafana-configuration/pai-clusterview-dashboard.json.template @@ -1,4 +1,4 @@ -{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 10 %} +{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 2 %} {"dashboard": { "annotations": { @@ -14,7 +14,7 @@ } ] }, - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": true, @@ -466,7 +466,7 @@ "steppedLine": false, "targets": [ { - "expr": "100 - avg (irate(node_cpu_seconds_total{mode=\"idle\"}[{{interval}}s])) * 100", + "expr": "avg ((sum by (instance) (idelta(node_cpu_seconds_total{}[{{interval}}s])) > bool 0) * (100 - (avg by (instance)(irate(node_cpu_seconds_total{mode=\"idle\"}[{{interval}}s])) * 100)))", "format": "time_series", "intervalFactor": 2, "legendFormat": "cpu utilization", @@ -553,14 +553,63 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "free", - "refId": "D" + "refId": "B" }, { "expr": "sum(node_memory_Buffers_bytes) + sum(node_memory_Cached_bytes)", "format": "time_series", "intervalFactor": 2, "legendFormat": "buff/cache", - "refId": "B" + "refId": "C" + }, + { + "expr": "sum(node_memory_bytes{type=\"physical_total\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "phisical total", + "refId": "D" + }, + { + "expr": "sum(node_memory_bytes{type=\"physical_available\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "phisical available", + "refId": "E" + }, + { + "expr": "sum(node_memory_bytes{type=\"committed\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "committed", + "refId": "F" + }, + { + "expr": "sum(node_memory_bytes{type=\"commit_limit\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "commit limit", + "refId": "G" + }, + { + "expr": "sum(node_memory_bytes{type=\"system_cache\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "system cache", + "refId": "H" + }, + { + "expr": "sum(node_memory_bytes{type=\"kernel_paged\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "kernel paged", + "refId": "I" + }, + { + "expr": "(node_memory_bytes{type=\"kernel_non_paged\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "kernel non paged", + "refId": "J" } ], "thresholds": [], @@ -631,14 +680,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo\"}[{{interval}}s]))", + "expr": "sum(irate(node_network_receive_bytes_total{device!~\"lo\"}[{{interval}}s]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "in", "refId": "A" }, { - "expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo\"}[{{interval}}s]))", + "expr": "sum(irate(node_network_transmit_bytes_total{device!~\"lo\"}[{{interval}}s]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "out", @@ -725,18 +774,25 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_disk_read_bytes_total[{{interval}}s]))", + "expr": "sum(irate(node_disk_read_bytes_total[{{interval}}s]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "read", "refId": "A" }, { - "expr": "sum(rate(node_disk_written_bytes_total[{{interval}}s]))", + "expr": "sum(irate(node_disk_written_bytes_total[{{interval}}s]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "write", "refId": "B" + }, + { + "expr": "sum(irate(node_disk_other_bytes_total[{{interval}}s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "other", + "refId": "C" } ], "thresholds": [], diff --git a/src/grafana/deploy/grafana-configuration/pai-jobview-dashboard.json.template b/src/grafana/deploy/grafana-configuration/pai-jobview-dashboard.json.template index 32e53ad716..fa446d44b5 100644 --- a/src/grafana/deploy/grafana-configuration/pai-jobview-dashboard.json.template +++ b/src/grafana/deploy/grafana-configuration/pai-jobview-dashboard.json.template @@ -1,4 +1,4 @@ -{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 10 %} +{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 2 %} {% set url = cluster_cfg["grafana"]["url"] %} {"dashboard": { @@ -96,6 +96,13 @@ "instant": false, "intervalFactor": 2, "refId": "A" + }, + { + "expr": "avg by (job_name)(irate(task_cpu_seconds_total{job_name=~\"$job\"}[{{interval}}s])) * 100", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "refId": "B" } ], "thresholds": [ @@ -300,6 +307,26 @@ "legendFormat": "Network Outbound", "refId": "B", "step": 600 + }, + { + "expr": "avg by (job_name) (irate(task_network_receive_bytes_total{job_name=~\"$job\"}[{{interval}}s]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Network Inbound", + "refId": "C", + "step": 600 + }, + { + "expr": "avg by (job_name) (irate(task_network_transmit_bytes_total{job_name=~\"$job\"}[{{interval}}s]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Network Outbound", + "refId": "D", + "step": 600 } ], "thresholds": [], @@ -404,6 +431,13 @@ "intervalFactor": 2, "legendFormat": "Disk Write", "refId": "B" + }, + { + "expr": "avg by (job_name) (irate(task_block_other_byte{ job_name=~\"$job\"}[{{interval}}s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Disk Other", + "refId": "C" } ], "thresholds": [], @@ -633,7 +667,7 @@ "multiFormat": "regex values", "name": "job", "options": [], - "query": "label_values(task_cpu_percent, job_name)", + "query": "label_values(task_mem_usage_byte, job_name)", "refresh": 1, "regex": "^(?!\\s*$).+", "sort": 1, diff --git a/src/grafana/deploy/grafana-configuration/pai-nodeview-dashboard.json.template b/src/grafana/deploy/grafana-configuration/pai-nodeview-dashboard.json.template index 1ff95a654f..f3f3c93280 100644 --- a/src/grafana/deploy/grafana-configuration/pai-nodeview-dashboard.json.template +++ b/src/grafana/deploy/grafana-configuration/pai-nodeview-dashboard.json.template @@ -1,4 +1,4 @@ -{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 10 %} +{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 2 %} {% set url = cluster_cfg["grafana"]["url"] %} {"dashboard": { @@ -16,7 +16,7 @@ ] }, "description": "Dashboard to view multiple servers", - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": true, @@ -89,11 +89,11 @@ "steppedLine": false, "targets": [ { - "expr": "100 - (avg by (instance)(irate(node_cpu_seconds_total{mode=\"idle\",instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s])) * 100)", + "expr": "(sum by (instance) (idelta(node_cpu_seconds_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s])) > bool 0) * (100 - (avg by (instance)(irate(node_cpu_seconds_total{mode=\"idle\",instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s])) * 100))", "format": "time_series", "hide": false, "intervalFactor": 2, - "refId": "B" + "refId": "A" } ], "thresholds": [ @@ -206,7 +206,7 @@ "interval": "", "intervalFactor": 2, "legendFormat": "free", - "refId": "E", + "refId": "B", "step": 600 }, { @@ -214,7 +214,56 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "buff/cache", - "refId": "B" + "refId": "C" + }, + { + "expr": "node_memory_bytes{type=\"physical_total\",instance=~'$node(:[0-9]*)?$'}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "phisical total", + "refId": "D" + }, + { + "expr": "node_memory_bytes{type=\"physical_available\", instance=~'$node(:[0-9]*)?$'}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "phisical available", + "refId": "E" + }, + { + "expr": "node_memory_bytes{type=\"committed\", instance=~'$node(:[0-9]*)?$'}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "committed", + "refId": "F" + }, + { + "expr": "node_memory_bytes{type=\"commit_limit\", instance=~'$node(:[0-9]*)?$'}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "commit limit", + "refId": "G" + }, + { + "expr": "node_memory_bytes{type=\"system_cache\", instance=~'$node(:[0-9]*)?$'}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "system cache", + "refId": "H" + }, + { + "expr": "node_memory_bytes{type=\"kernel_paged\", instance=~'$node(:[0-9]*)?$'}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "kernel paged", + "refId": "I" + }, + { + "expr": "node_memory_bytes{type=\"kernel_non_paged\", instance=~'$node(:[0-9]*)?$'}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "kernel non paged", + "refId": "J" } ], "thresholds": [], @@ -295,7 +344,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_network_receive_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s]))", + "expr": "sum(irate(node_network_receive_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s]))", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -306,9 +355,8 @@ "target": "" }, { - "expr": "sum(rate(node_network_transmit_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s]))", + "expr": "sum(irate(node_network_transmit_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s]))", "format": "time_series", - "hide": true, "interval": "", "intervalFactor": 2, "legendFormat": "out", @@ -402,7 +450,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_disk_read_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s]))", + "expr": "sum(irate(node_disk_read_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s]))", "format": "time_series", "interval": "", "intervalFactor": 2, @@ -413,12 +461,20 @@ "target": "" }, { - "expr": "sum(rate(node_disk_written_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s]))", + "expr": "sum(irate(node_disk_written_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s]))", "format": "time_series", "hide": false, "intervalFactor": 2, "legendFormat": "write", "refId": "B" + }, + { + "expr": "sum(irate(node_disk_other_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s]))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "other", + "refId": "C" } ], "thresholds": [], @@ -809,7 +865,7 @@ "multiFormat": "regex values", "name": "node", "options": [], - "query": "label_values(node_uname_info, instance)", + "query": "label_values(node_cpu_seconds_total, instance)", "refresh": 1, "regex": "/([^:]*)/", "sort": 1, diff --git a/src/grafana/deploy/grafana-configuration/pai-serviceview-dashboard.json.template b/src/grafana/deploy/grafana-configuration/pai-serviceview-dashboard.json.template index fbf1d07755..063a1e3234 100644 --- a/src/grafana/deploy/grafana-configuration/pai-serviceview-dashboard.json.template +++ b/src/grafana/deploy/grafana-configuration/pai-serviceview-dashboard.json.template @@ -1,4 +1,4 @@ -{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 10 %} +{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 2 %} {"dashboard": { "annotations": { @@ -412,6 +412,11 @@ "text": "prometheus", "value": "prometheus" }, + { + "selected": false, + "text": "prometheus-pushgateway", + "value": "prometheus-pushgateway" + }, { "selected": false, "text": "pylon", diff --git a/src/grafana/deploy/grafana-configuration/pai-taskroleview-dashboard.json.template b/src/grafana/deploy/grafana-configuration/pai-taskroleview-dashboard.json.template index c01508d390..e709a817d0 100644 --- a/src/grafana/deploy/grafana-configuration/pai-taskroleview-dashboard.json.template +++ b/src/grafana/deploy/grafana-configuration/pai-taskroleview-dashboard.json.template @@ -1,4 +1,4 @@ -{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 10 %} +{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 2 %} {% set url = cluster_cfg["grafana"]["url"] %} {"dashboard": { @@ -114,6 +114,13 @@ "intervalFactor": 2, "legendFormat": "{{'{{role_name}}'}}", "refId": "A" + }, + { + "expr": "avg by (job_name)(irate(task_cpu_seconds_total{job_name=~\"$job\"}[{{interval}}s])) * 100", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "refId": "B" } ], "thresholds": [ @@ -318,6 +325,26 @@ "legendFormat": "Net Out {{'{{role_name}}'}}", "refId": "B", "step": 600 + }, + { + "expr": "avg by (job_name, role_name) (irate(task_network_receive_bytes_total{job_name=~\"$job\"}[{{interval}}s]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Net In {{'{{role_name}}'}}", + "refId": "C", + "step": 600 + }, + { + "expr": "avg by (job_name, role_name) (irate(task_network_transmit_bytes_total{job_name=~\"$job\"}[{{interval}}s]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Net Out {{'{{role_name}}'}}", + "refId": "D", + "step": 600 } ], "thresholds": [], @@ -420,6 +447,13 @@ "intervalFactor": 2, "legendFormat": "Write {{'{{role_name}}'}}", "refId": "B" + }, + { + "expr": "avg by (job_name) (irate(task_block_other_byte{ job_name=~\"$job\"}[{{interval}}s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Disk Other", + "refId": "C" } ], "thresholds": [], @@ -665,7 +699,7 @@ "multiFormat": "regex values", "name": "job", "options": [], - "query": "label_values(task_cpu_percent, job_name)", + "query": "label_values(task_mem_usage_byte, job_name)", "refresh": 1, "regex": "^(?!\\s*$).+", "sort": 1, diff --git a/src/grafana/deploy/grafana-configuration/pai-tasks-in-node-dashboard.json.template b/src/grafana/deploy/grafana-configuration/pai-tasks-in-node-dashboard.json.template index 9337d61257..cb03ffe57d 100644 --- a/src/grafana/deploy/grafana-configuration/pai-tasks-in-node-dashboard.json.template +++ b/src/grafana/deploy/grafana-configuration/pai-tasks-in-node-dashboard.json.template @@ -1,4 +1,4 @@ -{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 10 %} +{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 2 %} {% set url = cluster_cfg["grafana"]["url"] %} {"dashboard": { @@ -16,7 +16,7 @@ ] }, "description": "Dashboard to view jobs in node", - "editable": false, + "editable": true, "gnetId": null, "graphTooltip": 0, "hideControls": true, @@ -86,6 +86,14 @@ "intervalFactor": 2, "legendFormat": "{{'{{'}}job_name{{'}}'}}-{{'{{'}}role_name{{'}}'}}-{{'{{'}}task_index{{'}}'}}", "refId": "A" + }, + { + "expr": "irate(task_cpu_seconds_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s]) * 100", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "{{'{{'}}job_name{{'}}'}}-{{'{{'}}role_name{{'}}'}}-{{'{{'}}task_index{{'}}'}}", + "refId": "B" } ], "thresholds": [], @@ -245,6 +253,20 @@ "intervalFactor": 2, "legendFormat": "Net Out {{'{{'}}job_name{{'}}'}}-{{'{{'}}role_name{{'}}'}}-{{'{{'}}task_index{{'}}'}}", "refId": "B" + }, + { + "expr": "irate(task_network_receive_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Net In {{'{{'}}job_name{{'}}'}}-{{'{{'}}role_name{{'}}'}}-{{'{{'}}task_index{{'}}'}}", + "refId": "C" + }, + { + "expr": "irate(task_network_transmit_bytes_total{instance=~\"$node(:[0-9]*)?$\"}[{{interval}}s])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Net Out {{'{{'}}job_name{{'}}'}}-{{'{{'}}role_name{{'}}'}}-{{'{{'}}task_index{{'}}'}}", + "refId": "D" } ], "thresholds": [], @@ -328,6 +350,13 @@ "intervalFactor": 2, "legendFormat": "Write {{'{{'}}job_name{{'}}'}}-{{'{{'}}role_name{{'}}'}}-{{'{{'}}task_index{{'}}'}}", "refId": "B" + }, + { + "expr": "avg by (job_name) (irate(task_block_other_byte{ job_name=~\"$job\"}[{{interval}}s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Disk Other", + "refId": "C" } ], "thresholds": [], @@ -753,7 +782,7 @@ "multi": false, "name": "node", "options": [], - "query": "label_values(node_uname_info, instance)", + "query": "label_values(node_cpu_seconds_total, instance)", "refresh": 1, "regex": "/([^:]*)/", "sort": 1, diff --git a/src/grafana/deploy/grafana-configuration/pai-taskview-dashboard.json.template b/src/grafana/deploy/grafana-configuration/pai-taskview-dashboard.json.template index c586162128..c1b51ab7dd 100644 --- a/src/grafana/deploy/grafana-configuration/pai-taskview-dashboard.json.template +++ b/src/grafana/deploy/grafana-configuration/pai-taskview-dashboard.json.template @@ -1,4 +1,4 @@ -{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 10 %} +{% set interval = cluster_cfg["prometheus"]["scrape_interval"]|default(30) * 2 %} {% set url = cluster_cfg["grafana"]["url"] %} {"dashboard": { @@ -102,6 +102,14 @@ "intervalFactor": 2, "legendFormat": "{{'{{role_name}}'}}-{{'{{task_index}}'}}", "refId": "A" + }, + { + "expr": "avg by (job_name, role_name, task_index)(irate(task_cpu_seconds_total{job_name=~\"$job\"}[{{interval}}s])) * 100", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "{{'{{role_name}}'}}-{{'{{task_index}}'}}", + "refId": "B" } ], "thresholds": [ @@ -307,6 +315,26 @@ "legendFormat": "Net Out {{'{{role_name}}'}}-{{'{{task_index}}'}}", "refId": "B", "step": 600 + }, + { + "expr": "avg by (job_name, role_name, task_index) (irate(task_network_receive_bytes_total{job_name=~\"$job\"}[{{interval}}s]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Net In {{'{{role_name}}'}}-{{'{{task_index}}'}}", + "refId": "C", + "step": 600 + }, + { + "expr": "avg by (job_name, role_name, task_index) (irate(task_network_transmit_bytes_total{job_name=~\"$job\"}[{{interval}}s]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Net Out {{'{{role_name}}'}}-{{'{{task_index}}'}}", + "refId": "D", + "step": 600 } ], "thresholds": [], @@ -411,6 +439,13 @@ "intervalFactor": 2, "legendFormat": "Write {{'{{role_name}}'}}-{{'{{task_index}}'}}", "refId": "B" + }, + { + "expr": "avg by (job_name) (irate(task_block_other_byte{ job_name=~\"$job\"}[{{interval}}s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Disk Other", + "refId": "C" } ], "thresholds": [], @@ -850,7 +885,7 @@ "multiFormat": "regex values", "name": "job", "options": [], - "query": "label_values(task_cpu_percent, job_name)", + "query": "label_values(task_mem_usage_byte, job_name)", "refresh": 1, "regex": "^(?!\\s*$).+", "sort": 1, From aa2b18b668313a94fb40e3b38b2b4b8590bbc9d3 Mon Sep 17 00:00:00 2001 From: Zhiyuan He <362583303@qq.com> Date: Tue, 10 Aug 2021 16:02:12 +0800 Subject: [PATCH 08/17] fix doc related to china deployment (#5593) * fix * fix --- .../cluster-admin/configuration-for-china.md | 29 ------------------- .../installation-faqs-and-troubleshooting.md | 2 +- .../cluster-admin/installation-guide.md | 2 +- .../cluster-admin/configuration-for-china.md | 29 ------------------- .../installation-faqs-and-troubleshooting.md | 2 +- .../cluster-admin/installation-guide.md | 2 +- 6 files changed, 4 insertions(+), 62 deletions(-) delete mode 100644 docs/manual/cluster-admin/configuration-for-china.md delete mode 100644 docs_zh_CN/manual/cluster-admin/configuration-for-china.md diff --git a/docs/manual/cluster-admin/configuration-for-china.md b/docs/manual/cluster-admin/configuration-for-china.md deleted file mode 100644 index 58c5ed3b0c..0000000000 --- a/docs/manual/cluster-admin/configuration-for-china.md +++ /dev/null @@ -1,29 +0,0 @@ -如果您是中国用户,在[创建设置文件这一步](./installation-guide.md#create-configurations),请使用下面的`config`文件: - -###### `config` example - -```yaml -user: -password: -docker_image_tag: v1.5.0 - -gcr_image_repo: "gcr.azk8s.cn" -kube_image_repo: "gcr.azk8s.cn/google-containers" -kubeadm_download_url: "https://shaiictestblob01.blob.core.chinacloudapi.cn/share-all/kubeadm" -hyperkube_download_url: "https://shaiictestblob01.blob.core.chinacloudapi.cn/share-all/hyperkube" - -openpai_kubespray_extra_var: - pod_infra_image_repo: "gcr.azk8s.cn/google_containers/pause-{{ image_arch }}" - dnsautoscaler_image_repo: "gcr.azk8s.cn/google_containers/cluster-proportional-autoscaler-{{ image_arch }}" - tiller_image_repo: "gcr.azk8s.cn/kubernetes-helm/tiller" - registry_proxy_image_repo: "gcr.azk8s.cn/google_containers/kube-registry-proxy" - metrics_server_image_repo: "gcr.azk8s.cn/google_containers/metrics-server-amd64" - addon_resizer_image_repo: "gcr.azk8s.cn/google_containers/addon-resizer" - dashboard_image_repo: "gcr.azk8s.cn/google_containers/kubernetes-dashboard-{{ image_arch }}" -``` - -此文件中,请把`user`和`password`替换为您master和worker机器的SSH用户及密码;`docker_image_tag`请替换为想要安装的OpenPAI版本,例如如果想要安装`v1.5.0`版本,请将`docker_image_tag`替换为`v1.5.0`。另外,如果您在Azure China中搭建,请加入一行`openpai_kube_network_plugin: weave`,因为Azure暂时不支持默认的calico插件。 - -如果使用此`config`文件,会从我们合作伙伴[上海仪电创新院](https://www.shaiic.com/)提供的地址下载必要的`kubeadm`和`hyperkube`文件;此外会使用`gcr.azk8s.cn`作为`gcr.io`的镜像服务器。如果您的网络无法访问`gcr.azk8s.cn`,可以寻找别的`gcr.io`替代镜像,并对`config`文件作对应修改。 - -除了该`config`文件外,其他的步骤都和[Installation Guide](./installation-guide.md)一致。 diff --git a/docs/manual/cluster-admin/installation-faqs-and-troubleshooting.md b/docs/manual/cluster-admin/installation-faqs-and-troubleshooting.md index 5c71138bcd..8abd9eaa05 100644 --- a/docs/manual/cluster-admin/installation-faqs-and-troubleshooting.md +++ b/docs/manual/cluster-admin/installation-faqs-and-troubleshooting.md @@ -167,7 +167,7 @@ Sometimes it is not fixable even you have the `python3-apt` package installed. I #### Network-related Issues -If you are a China user, please refer to [here](./configuration-for-china.md). +If you are a China user, please refer to [this issue](https://github.com/microsoft/pai/issues/5592). **Cannot download kubeadm or hyperkube** diff --git a/docs/manual/cluster-admin/installation-guide.md b/docs/manual/cluster-admin/installation-guide.md index 212b5dddb8..3b1bc99752 100644 --- a/docs/manual/cluster-admin/installation-guide.md +++ b/docs/manual/cluster-admin/installation-guide.md @@ -170,7 +170,7 @@ Please edit `layout.yaml` and a `config.yaml` file under `/contrib These two files specify the cluster layout and the customized configuration, respectively. The following is the format and example of these 2 files. -**Tips for Chinese Users**: If you are in Mainland China, please refer to [here](./configuration-for-china.md) first before you edit these files. +**Tips for Chinese Users**: If you are in Mainland China, please read [this issue](https://github.com/microsoft/pai/issues/5592) first before you edit these files. #### `layout.yaml` format diff --git a/docs_zh_CN/manual/cluster-admin/configuration-for-china.md b/docs_zh_CN/manual/cluster-admin/configuration-for-china.md deleted file mode 100644 index 50de646a26..0000000000 --- a/docs_zh_CN/manual/cluster-admin/configuration-for-china.md +++ /dev/null @@ -1,29 +0,0 @@ -如果您是中国用户,在[创建设置文件这一步](./installation-guide.md#create-configurations),请使用下面的`config`文件: - -###### `config` 示例 - -```yaml -user: -password: -docker_image_tag: v1.5.0 - -gcr_image_repo: "gcr.azk8s.cn" -kube_image_repo: "gcr.azk8s.cn/google-containers" -kubeadm_download_url: "https://shaiictestblob01.blob.core.chinacloudapi.cn/share-all/kubeadm" -hyperkube_download_url: "https://shaiictestblob01.blob.core.chinacloudapi.cn/share-all/hyperkube" - -openpai_kubespray_extra_var: - pod_infra_image_repo: "gcr.azk8s.cn/google_containers/pause-{{ image_arch }}" - dnsautoscaler_image_repo: "gcr.azk8s.cn/google_containers/cluster-proportional-autoscaler-{{ image_arch }}" - tiller_image_repo: "gcr.azk8s.cn/kubernetes-helm/tiller" - registry_proxy_image_repo: "gcr.azk8s.cn/google_containers/kube-registry-proxy" - metrics_server_image_repo: "gcr.azk8s.cn/google_containers/metrics-server-amd64" - addon_resizer_image_repo: "gcr.azk8s.cn/google_containers/addon-resizer" - dashboard_image_repo: "gcr.azk8s.cn/google_containers/kubernetes-dashboard-{{ image_arch }}" -``` - -此文件中,请把`user`和`password`替换为您master和worker机器的SSH用户及密码;`docker_image_tag`请替换为想要安装的OpenPAI版本,例如如果想要安装`v1.5.0`版本,请将`docker_image_tag`替换为`v1.5.0`。另外,如果您在Azure China中搭建,请加入一行`openpai_kube_network_plugin: weave`,因为Azure暂时不支持默认的calico插件。 - -如果使用此`config`文件,会从我们合作伙伴[上海仪电创新院](https://www.shaiic.com/)提供的地址下载必要的`kubeadm`和`hyperkube`文件;此外会使用`gcr.azk8s.cn`作为`gcr.io`的镜像服务器。如果您的网络无法访问`gcr.azk8s.cn`,可以寻找别的`gcr.io`替代镜像,并对`config`文件作对应修改。 - -除了该`config`文件外,其他的步骤都和[Installation Guide](./installation-guide.md)一致。 diff --git a/docs_zh_CN/manual/cluster-admin/installation-faqs-and-troubleshooting.md b/docs_zh_CN/manual/cluster-admin/installation-faqs-and-troubleshooting.md index 2688952192..3ccb2652ca 100644 --- a/docs_zh_CN/manual/cluster-admin/installation-faqs-and-troubleshooting.md +++ b/docs_zh_CN/manual/cluster-admin/installation-faqs-and-troubleshooting.md @@ -163,7 +163,7 @@ sudo chmod 644 /etc/hosts #### 网络相关的问题 -如果您是中国用户,请先参考[这个文档](./configuration-for-china.md). +如果您是中国用户,请先参考[这个issue](https://github.com/microsoft/pai/issues/5592). **无法下载kubeadm或hyperkube二进制文件** diff --git a/docs_zh_CN/manual/cluster-admin/installation-guide.md b/docs_zh_CN/manual/cluster-admin/installation-guide.md index ce904b1b24..9b8432dace 100644 --- a/docs_zh_CN/manual/cluster-admin/installation-guide.md +++ b/docs_zh_CN/manual/cluster-admin/installation-guide.md @@ -171,7 +171,7 @@ git checkout v1.8.0 #### 关于中国用户的提示 -如果您是中国用户,在编辑这两个文件前,请先阅读[这个文档](./configuration-for-china.md)。 +在中国安装会有一些网络问题,在开始前,请先阅读[这个issue](https://github.com/microsoft/pai/issues/5592)。 ####
`layout.yaml` 格式示例
From 7cf6e7682943d37ad5e5bb25e08c07cc8343f4cd Mon Sep 17 00:00:00 2001 From: Binyang2014 Date: Wed, 11 Aug 2021 20:39:40 -0600 Subject: [PATCH 09/17] Bump runtime version (#5600) --- src/openpai-runtime/build/openpai-runtime.k8s.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openpai-runtime/build/openpai-runtime.k8s.dockerfile b/src/openpai-runtime/build/openpai-runtime.k8s.dockerfile index 34d247bf12..d9287d15e9 100644 --- a/src/openpai-runtime/build/openpai-runtime.k8s.dockerfile +++ b/src/openpai-runtime/build/openpai-runtime.k8s.dockerfile @@ -15,5 +15,5 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -FROM openpairuntime/openpai-runtime:v0.1.11 +FROM openpairuntime/openpai-runtime:v0.1.12 From bb7c394d5541f2f8a06a1f8828d980de110135f7 Mon Sep 17 00:00:00 2001 From: Zhiyuan He <362583303@qq.com> Date: Thu, 12 Aug 2021 11:17:52 +0800 Subject: [PATCH 10/17] fix link in readme (#5595) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b2a1b698fd..05dcda922b 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ OpenPAI provides end-to-end manuals for both cluster users and administrators. ### For cluster administrators -The [admin manual](https://openpai.readthedocs.io/en/latest/manual/cluster-admin/README.html) is a comprehensive guide for cluster administrators, it covers (but not limited to) the following contents: +The [admin manual](https://openpai.readthedocs.io/en/latest/manual/cluster-admin/index.html) is a comprehensive guide for cluster administrators, it covers (but not limited to) the following contents: - **Installation and upgrade**. The installation is based on Kubespray, and here is the [system requirements](https://openpai.readthedocs.io/en/latest/manual/cluster-admin/installation-guide.html#installation-requirements). OpenPAI provides an [installation guide](https://openpai.readthedocs.io/en/latest/manual/cluster-admin/installation-guide.html) to facilitate the installation. @@ -140,7 +140,7 @@ The [admin manual](https://openpai.readthedocs.io/en/latest/manual/cluster-admin ### For cluster users -The [user manual](https://openpai.readthedocs.io/en/latest/manual/cluster-user/README.html) is a guidance for cluster users, who could train and serve deep learning (and other) tasks on OpenPAI. +The [user manual](https://openpai.readthedocs.io/en/latest/manual/cluster-user/index.html) is a guidance for cluster users, who could train and serve deep learning (and other) tasks on OpenPAI. - **Job submission and monitoring**. The [quick start tutorial](https://openpai.readthedocs.io/en/latest/manual/cluster-user/quick-start.html) is a good start for learning how to train models on OpenPAI. And more examples and supports to multiple mainstream frameworks (out-of-the-box docker images) are in [here](https://openpai.readthedocs.io/en/latest/manual/cluster-user/docker-images-and-job-examples.html). OpenPAI also provides supports for [good debuggability](https://openpai.readthedocs.io/en/latest/manual/cluster-user/how-to-debug-jobs.html) and [advanced job functionalities](https://openpai.readthedocs.io/en/latest/manual/cluster-user/advanced-jobs.html). From 0d9760a404e8c6719c0dfad2ea6d2197abf28742 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Aug 2021 08:23:41 +0000 Subject: [PATCH 11/17] Bump merge-deep from 3.0.2 to 3.0.3 in /src/webportal (#5524) --- src/webportal/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/webportal/yarn.lock b/src/webportal/yarn.lock index f89009a49b..b587b0b775 100644 --- a/src/webportal/yarn.lock +++ b/src/webportal/yarn.lock @@ -7150,9 +7150,9 @@ merge-anything@^2.2.4: is-what "^3.2.3" merge-deep@^3.0.2: - version "3.0.2" - resolved "https://registry.yarnpkg.com/merge-deep/-/merge-deep-3.0.2.tgz#f39fa100a4f1bd34ff29f7d2bf4508fbb8d83ad2" - integrity sha512-T7qC8kg4Zoti1cFd8Cr0M+qaZfOwjlPDEdZIIPPB2JZctjaPM4fX+i7HOId69tAti2fvO6X5ldfYUONDODsrkA== + version "3.0.3" + resolved "https://registry.yarnpkg.com/merge-deep/-/merge-deep-3.0.3.tgz#1a2b2ae926da8b2ae93a0ac15d90cd1922766003" + integrity sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA== dependencies: arr-union "^3.1.0" clone-deep "^0.2.4" From d6f13772c1ff8c38a736330d2d93819f024dd72b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Aug 2021 08:24:55 +0000 Subject: [PATCH 12/17] Bump postcss from 7.0.17 to 7.0.36 in /src/webportal (#5531) --- src/webportal/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/webportal/yarn.lock b/src/webportal/yarn.lock index b587b0b775..7472aa32bc 100644 --- a/src/webportal/yarn.lock +++ b/src/webportal/yarn.lock @@ -8554,9 +8554,9 @@ postcss-value-parser@^3.0.0, postcss-value-parser@^3.2.3, postcss-value-parser@^ integrity sha512-pISE66AbVkp4fDQ7VHBwRNXzAAKJjw4Vw7nWI/+Q3vuly7SNfgYXvm6i5IgFylHGK5sP/xHAbB7N49OS4gWNyQ== postcss@^7.0.0, postcss@^7.0.1, postcss@^7.0.14, postcss@^7.0.16, postcss@^7.0.5, postcss@^7.0.6: - version "7.0.17" - resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.17.tgz#4da1bdff5322d4a0acaab4d87f3e782436bad31f" - integrity sha512-546ZowA+KZ3OasvQZHsbuEpysvwTZNGJv9EfyCQdsIDltPSWHAeTQ5fQy/Npi2ZDtLI3zs7Ps/p6wThErhm9fQ== + version "7.0.36" + resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.36.tgz#056f8cffa939662a8f5905950c07d5285644dfcb" + integrity sha512-BebJSIUMwJHRH0HAQoxN4u1CN86glsrwsW0q7T+/m44eXOUAxSNdHRkNZPYz5vVUbg17hFgOQDE7fZk7li3pZw== dependencies: chalk "^2.4.2" source-map "^0.6.1" From 6ad87f213e413a6719ec6f273a99f2fd235b7e19 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Aug 2021 08:24:58 +0000 Subject: [PATCH 13/17] Bump postcss from 7.0.14 to 7.0.36 in /contrib/submit-job-v2 (#5532) --- contrib/submit-job-v2/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/submit-job-v2/yarn.lock b/contrib/submit-job-v2/yarn.lock index 9855e74491..23b0211f7d 100644 --- a/contrib/submit-job-v2/yarn.lock +++ b/contrib/submit-job-v2/yarn.lock @@ -3871,9 +3871,9 @@ postcss-value-parser@^3.3.0, postcss-value-parser@^3.3.1: integrity sha512-pISE66AbVkp4fDQ7VHBwRNXzAAKJjw4Vw7nWI/+Q3vuly7SNfgYXvm6i5IgFylHGK5sP/xHAbB7N49OS4gWNyQ== postcss@^7.0.14, postcss@^7.0.5, postcss@^7.0.6: - version "7.0.14" - resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.14.tgz#4527ed6b1ca0d82c53ce5ec1a2041c2346bbd6e5" - integrity sha512-NsbD6XUUMZvBxtQAJuWDJeeC4QFsmWsfozWxCJPWf3M55K9iu2iMDaKqyoOdTJ1R4usBXuxlVFAIo8rZPQD4Bg== + version "7.0.36" + resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.36.tgz#056f8cffa939662a8f5905950c07d5285644dfcb" + integrity sha512-BebJSIUMwJHRH0HAQoxN4u1CN86glsrwsW0q7T+/m44eXOUAxSNdHRkNZPYz5vVUbg17hFgOQDE7fZk7li3pZw== dependencies: chalk "^2.4.2" source-map "^0.6.1" From 3cfc50cfcd3a20824694bb53f89e59a49555b7e5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Aug 2021 08:25:49 +0000 Subject: [PATCH 14/17] Bump path-parse from 1.0.6 to 1.0.7 in /src/rest-server (#5597) --- src/rest-server/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rest-server/yarn.lock b/src/rest-server/yarn.lock index dc094088c5..a4d3bfeb29 100644 --- a/src/rest-server/yarn.lock +++ b/src/rest-server/yarn.lock @@ -2820,9 +2820,9 @@ path-key@^3.1.0: integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== path-parse@^1.0.6: - version "1.0.6" - resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" - integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw== + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== path-to-regexp@0.1.7: version "0.1.7" From 1aca8069a5fd20cb89e8777aa3c83c358e05a5d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Aug 2021 08:26:58 +0000 Subject: [PATCH 15/17] Bump color-string from 1.5.3 to 1.6.0 in /src/webportal (#5594) --- src/webportal/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/webportal/yarn.lock b/src/webportal/yarn.lock index 7472aa32bc..1f29338aa3 100644 --- a/src/webportal/yarn.lock +++ b/src/webportal/yarn.lock @@ -3351,9 +3351,9 @@ color-name@^1.0.0, color-name@~1.1.4: integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== color-string@^1.5.2: - version "1.5.3" - resolved "https://registry.yarnpkg.com/color-string/-/color-string-1.5.3.tgz#c9bbc5f01b58b5492f3d6857459cb6590ce204cc" - integrity sha512-dC2C5qeWoYkxki5UAXapdjqO672AM4vZuPGRQfO8b5HKuKGBbKWpITyDYN7TOFKvRW7kOgAn3746clDBMDJyQw== + version "1.6.0" + resolved "https://registry.yarnpkg.com/color-string/-/color-string-1.6.0.tgz#c3915f61fe267672cb7e1e064c9d692219f6c312" + integrity sha512-c/hGS+kRWJutUBEngKKmk4iH3sD59MBkoxVapS/0wgpCz2u7XsNloxknyvBhzwEs1IbV36D9PwqLPJ2DTu3vMA== dependencies: color-name "^1.0.0" simple-swizzle "^0.2.2" From 4a80d52d559601b267da6405e6a5879a7ea92270 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Aug 2021 08:31:17 +0000 Subject: [PATCH 16/17] Bump path-parse from 1.0.6 to 1.0.7 in /src/webportal (#5598) --- src/webportal/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/webportal/yarn.lock b/src/webportal/yarn.lock index 1f29338aa3..f9c7a02ae7 100644 --- a/src/webportal/yarn.lock +++ b/src/webportal/yarn.lock @@ -8098,9 +8098,9 @@ path-key@^2.0.0, path-key@^2.0.1: integrity sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A= path-parse@^1.0.6: - version "1.0.6" - resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" - integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw== + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== path-platform@~0.11.15: version "0.11.15" From 3950e50c4a07a261a9e1a6f37e460aee3fb7b849 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Aug 2021 08:31:42 +0000 Subject: [PATCH 17/17] Bump path-parse from 1.0.6 to 1.0.7 in /contrib/submit-job-v2 (#5599) --- contrib/submit-job-v2/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/submit-job-v2/yarn.lock b/contrib/submit-job-v2/yarn.lock index 23b0211f7d..612a0d00cc 100644 --- a/contrib/submit-job-v2/yarn.lock +++ b/contrib/submit-job-v2/yarn.lock @@ -3737,9 +3737,9 @@ path-key@^2.0.0, path-key@^2.0.1: integrity sha1-QRyttXTFoUDTpLGRDUDYDMn0C0A= path-parse@^1.0.6: - version "1.0.6" - resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" - integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw== + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== path-to-regexp@0.1.7: version "0.1.7"