diff --git a/ansible/playbooks/vars/default.yaml b/ansible/playbooks/vars/default.yaml index 3b5654dbfe..9644fe096b 100644 --- a/ansible/playbooks/vars/default.yaml +++ b/ansible/playbooks/vars/default.yaml @@ -12,7 +12,7 @@ # Overwrite by passing sd_user and/or sd_password variables when running the playbook seldon_admin_user: "admin" -seldon_random_gen_password: "{{ lookup('password', '/dev/null chars=digits length=6') }}" +seldon_random_gen_password: "r-{{ lookup('password', '/dev/null chars=digits length=6') }}" # KinD Configuration kind_local_mount: false diff --git a/ansible/roles/grafana/defaults/main.yaml b/ansible/roles/grafana/defaults/main.yaml index 50b00941e6..cb853caaa1 100644 --- a/ansible/roles/grafana/defaults/main.yaml +++ b/ansible/roles/grafana/defaults/main.yaml @@ -1,7 +1,9 @@ --- -grafana_chart_version: 7.3.9 -grafana_app_version: 10.4.2 +grafana_chart_version: 8.4.1 +grafana_app_version: 11.1.3 grafana_preloaded_dashboards: - name: mms json: "{{ lookup('file', playbook_dir + '/../../prometheus/dashboards/provisioning/seldon.json') }}" + - name: perf + json: "{{ lookup('file', playbook_dir + '/../../prometheus/dashboards/provisioning/perf_and_scaling.json') }}" diff --git a/prometheus/dashboards/perf_and_scaling.json b/prometheus/dashboards/perf_and_scaling.json new file mode 100644 index 0000000000..2af134d932 --- /dev/null +++ b/prometheus/dashboards/perf_and_scaling.json @@ -0,0 +1,1282 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.4.1" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 9, + "panels": [], + "title": "Per-model metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 19, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 10, + "x": 0, + "y": 1 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 150, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(model, pod) (rate(seldon_model_infer_total{namespace=~\"${namespace:regex}\", model=~\"${model:regex}\", pod=~\"${inference_pods:regex}\"}[${rate_window}]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{model}}, {{pod}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Response throughput by (model, pod) (stacked)", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 10, + "x": 10, + "y": 1 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg((rate(seldon_model_infer_seconds_total{container=\"agent\", namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\", model=~\"${model:regex}\", method_type=\"rest\"}[${rate_window}])/rate(seldon_model_infer_total{container=\"agent\", namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\", model=~\"${model:regex}\", method_type=\"rest\"}[${rate_window}])) > 0) by (model, pod)", + "instant": false, + "legendFormat": "{{model}}, {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "REST average latency (agent -> inference server -> agent) by (model, pod)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 0, + "y": 18 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(model) (rate(seldon_model_infer_total{namespace=~\"${namespace:regex}\", model=~\"${model:regex}\", pod=~\"${inference_pods:regex}\"}[${rate_window}]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Response throughput by model (stacked)", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 20, + "w": 10, + "x": 10, + "y": 18 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg((rate(seldon_model_infer_seconds_total{container=\"agent\", namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\", model=~\"${model:regex}\", method_type=\"grpc\"}[${rate_window}])/rate(seldon_model_infer_total{container=\"agent\", namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\", model=~\"${model:regex}\", method_type=\"grpc\"}[${rate_window}])) > 0) by (model, pod)", + "instant": false, + "legendFormat": "{{model}}, {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "gRPC average latency (agent -> inference server -> agent) by (model, pod)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 30 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(pod) (rate(seldon_model_infer_total{namespace=~\"${namespace:regex}\", model=~\"${model:regex}\", pod=~\"${inference_pods:regex}\"}[${rate_window}]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Response throughput by inference pod, filtered by model(s) (stacked)", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 8, + "panels": [], + "title": "Per inference pod metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Measures latency from the time the inference server agent receives the request and the time when it gets back the inference response.", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 0, + "y": 39 + }, + "id": 2, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdYlBu", + "steps": 73 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(seldon_model_infer_api_seconds_bucket{namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\"}[$__rate_interval])) by(le)", + "format": "heatmap", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "${rate_window}", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Inference latency (agent -> inference server -> agent)", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Pending response futures in worker processes, per inference server pod", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 10, + "y": 39 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by (pod) (increase(parallel_request_queue_sum{namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\"}[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Inference requests in-flight by inference server pod (stacked)", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 0, + "y": 51 + }, + "id": 4, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdYlBu", + "steps": 73 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(rest_server_request_duration_seconds_bucket{namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\"}[$__rate_interval])) by(le)", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "${rate_window}", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Inference latency (inference-server <-> model)", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 10, + "y": 51 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(pod)(rate(container_cpu_usage_seconds_total{namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\"}[${rate_window}])) * 1000", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "CPU usage by inference pod (1000 = 1cpu, stacked)", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values({namespace=~\"$namespace\", pod=~\"mlserver.*|triton.*\"},pod)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "inference_pods", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({namespace=~\"$namespace\", pod=~\"mlserver.*|triton.*\"},pod)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values({namespace=~\"$namespace\", pod=~\"$inference_pods\"},model)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "model", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({namespace=~\"$namespace\", pod=~\"$inference_pods\"},model)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "text": "1m", + "value": "1m" + }, + "hide": 0, + "name": "rate_window", + "options": [ + { + "selected": true, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + } + ], + "query": "1m,3m,5m,10m", + "queryValue": "", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "2024-08-06T13:59:49.416Z", + "to": "2024-08-06T15:09:30.277Z" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Performance and scaling", + "uid": "edtwytaz6xvk0d", + "version": 24, + "weekStart": "" +} \ No newline at end of file diff --git a/prometheus/dashboards/provisioning/perf_and_scaling.json b/prometheus/dashboards/provisioning/perf_and_scaling.json new file mode 100644 index 0000000000..1be8866bb3 --- /dev/null +++ b/prometheus/dashboards/provisioning/perf_and_scaling.json @@ -0,0 +1,1270 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.4.1" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 9, + "panels": [], + "title": "Per-model metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 19, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 17, + "w": 10, + "x": 0, + "y": 1 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 150, + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(model, pod) (rate(seldon_model_infer_total{namespace=~\"${namespace:regex}\", model=~\"${model:regex}\", pod=~\"${inference_pods:regex}\"}[${rate_window}]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{model}}, {{pod}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Response throughput by (model, pod) (stacked)", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 10, + "x": 10, + "y": 1 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "editorMode": "code", + "expr": "avg((rate(seldon_model_infer_seconds_total{container=\"agent\", namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\", model=~\"${model:regex}\", method_type=\"rest\"}[${rate_window}])/rate(seldon_model_infer_total{container=\"agent\", namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\", model=~\"${model:regex}\", method_type=\"rest\"}[${rate_window}])) > 0) by (model, pod)", + "instant": false, + "legendFormat": "{{model}}, {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "REST average latency (agent -> inference server -> agent) by (model, pod)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 0, + "y": 18 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(model) (rate(seldon_model_infer_total{namespace=~\"${namespace:regex}\", model=~\"${model:regex}\", pod=~\"${inference_pods:regex}\"}[${rate_window}]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Response throughput by model (stacked)", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 20, + "w": 10, + "x": 10, + "y": 18 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "editorMode": "code", + "expr": "avg((rate(seldon_model_infer_seconds_total{container=\"agent\", namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\", model=~\"${model:regex}\", method_type=\"grpc\"}[${rate_window}])/rate(seldon_model_infer_total{container=\"agent\", namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\", model=~\"${model:regex}\", method_type=\"grpc\"}[${rate_window}])) > 0) by (model, pod)", + "instant": false, + "legendFormat": "{{model}}, {{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "gRPC average latency (agent -> inference server -> agent) by (model, pod)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 0, + "y": 30 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(pod) (rate(seldon_model_infer_total{namespace=~\"${namespace:regex}\", model=~\"${model:regex}\", pod=~\"${inference_pods:regex}\"}[${rate_window}]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Response throughput by inference pod, filtered by model(s) (stacked)", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 8, + "panels": [], + "title": "Per inference pod metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "description": "Measures latency from the time the inference server agent receives the request and the time when it gets back the inference response.", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 0, + "y": 39 + }, + "id": 2, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdYlBu", + "steps": 73 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(seldon_model_infer_api_seconds_bucket{namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\"}[$__rate_interval])) by(le)", + "format": "heatmap", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "interval": "${rate_window}", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Inference latency (agent -> inference server -> agent)", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "description": "Pending response futures in worker processes, per inference server pod", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 10, + "y": 39 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by (pod) (increase(parallel_request_queue_sum{namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\"}[$__rate_interval]))", + "format": "time_series", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Inference requests in-flight by inference server pod (stacked)", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 0, + "y": 51 + }, + "id": 4, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "RdYlBu", + "steps": 73 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(rest_server_request_duration_seconds_bucket{namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\"}[$__rate_interval])) by(le)", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "${rate_window}", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Inference latency (inference-server <-> model)", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + }, + { + "id": "custom.stacking", + "value": { + "group": "A", + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 10, + "y": 51 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(pod)(rate(container_cpu_usage_seconds_total{namespace=~\"${namespace:regex}\", pod=~\"${inference_pods:regex}\"}[${rate_window}])) * 1000", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "CPU usage by inference pod (1000 = 1cpu, stacked)", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "definition": "label_values({namespace=~\"$namespace\", pod=~\"mlserver.*|triton.*\"},pod)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "inference_pods", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({namespace=~\"$namespace\", pod=~\"mlserver.*|triton.*\"},pod)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "definition": "label_values({namespace=~\"$namespace\", pod=~\"$inference_pods\"},model)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "model", + "options": [], + "query": { + "qryType": 1, + "query": "label_values({namespace=~\"$namespace\", pod=~\"$inference_pods\"},model)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "text": "1m", + "value": "1m" + }, + "hide": 0, + "name": "rate_window", + "options": [ + { + "selected": true, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + } + ], + "query": "1m,3m,5m,10m", + "queryValue": "", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "2024-08-06T13:59:49.416Z", + "to": "2024-08-06T15:09:30.277Z" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Performance and scaling", + "uid": "edtwytaz6xvk0d", + "version": 24, + "weekStart": "" +}