Merge pull request #225 from ashley-o0o/kserveTemp1

Added kserve dashboard metrics reconciler
opendatahub-io · Jun 24, 2024 · 0e78e2e · 0e78e2e
2 parents 5165fb5 + 126af31
commit 0e78e2e
Show file tree

Hide file tree

Showing 14 changed files with 581 additions and 237 deletions.
diff --git a/Containerfile b/Containerfile
@@ -13,6 +13,10 @@ RUN go mod download
 COPY main.go main.go
 #COPY api/ api/
 COPY controllers/ controllers/
+COPY controllers/constants/ovms-metrics.json metrics_dashboards/ovms-metrics.json
+COPY controllers/constants/tgis-metrics.json metrics_dashboards/tgis-metrics.json
+COPY controllers/constants/vllm-metrics.json metrics_dashboards/vllm-metrics.json
+COPY controllers/constants/caikit-metrics.json metrics_dashboards/caikit-metrics.json
 
 # Build
 USER root
@@ -23,6 +27,10 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -o manager main.go
 FROM registry.access.redhat.com/ubi8/ubi-minimal:8.6
 WORKDIR /
 COPY --from=builder /workspace/manager .
+COPY --from=builder /workspace/metrics_dashboards/ovms-metrics.json .
+COPY --from=builder /workspace/metrics_dashboards/tgis-metrics.json .
+COPY --from=builder /workspace/metrics_dashboards/vllm-metrics.json .
+COPY --from=builder /workspace/metrics_dashboards/caikit-metrics.json .
 USER 65532:65532
 
 ENTRYPOINT ["/manager"]
diff --git a/controllers/comparators/configmap_comparator.go b/controllers/comparators/configmap_comparator.go
@@ -0,0 +1,29 @@
+/*
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package comparators
+
+import (
+	"reflect"
+
+	corev1 "k8s.io/api/core/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+func GetConfigMapComparator() ResourceComparator {
+	return func(deployed client.Object, requested client.Object) bool {
+		deployedConfigMap := deployed.(*corev1.ConfigMap)
+		requestedConfigMap := requested.(*corev1.ConfigMap)
+		return reflect.DeepEqual(deployedConfigMap.Data, requestedConfigMap.Data) &&
+			reflect.DeepEqual(deployedConfigMap.Labels, requestedConfigMap.Labels)
+	}
+}
diff --git a/controllers/constants/caikit-metrics.json b/controllers/constants/caikit-metrics.json
@@ -1,55 +1,52 @@
 {
-    "metrics": {
-        "supported": "true",
-        "config": [
-            {
-                "title": "Number of requests",
-                "type": "REQUEST_COUNT",
-                "queries": [
-                    {
-                        "title": "Number of successful incoming requests",
-                        "query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
-                    },
-                    {
-                        "title": "Number of failed incoming requests",
-                        "query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code!='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
-                    }
-                ]
-            },
-            {
-                "title": "Average response time (ms)",
-                "type": "MEAN_LATENCY",
-                "queries": [
-                    {
-                        "title": "Average inference latency",
-                        "query": "sum by (model_id) (rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
-                    },
-                    {
-                        "title": "Average e2e latency",
-                        "query": "sum by (model_id) (rate(caikit_core_load_model_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m]) + rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(caikit_core_load_model_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]) + rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
-                    }
-                ]
-            },
-            {
-                "title": "CPU utilization %",
-                "type": "CPU_USAGE",
-                "queries": [
-                    {
-                        "title": "CPU usage",
-                        "query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
-                    }
-                ]
-            },
-            {
-                "title": "Memory utilization %",
-                "type": "MEMORY_USAGE",
-                "queries": [
-                    {
-                        "title": "Memory usage",
-                        "query":  "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
-                    }
-                ]
-            }
-        ]
-    }
+    "config": [
+        {
+            "title": "Number of requests",
+            "type": "REQUEST_COUNT",
+            "queries": [
+                {
+                    "title": "Number of successful incoming requests",
+                    "query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                },
+                {
+                    "title": "Number of failed incoming requests",
+                    "query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code!='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                }
+            ]
+        },
+        {
+            "title": "Average response time (ms)",
+            "type": "MEAN_LATENCY",
+            "queries": [
+                {
+                    "title": "Average inference latency",
+                    "query": "sum by (model_id) (rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                },
+                {
+                    "title": "Average e2e latency",
+                    "query": "sum by (model_id) (rate(caikit_core_load_model_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m]) + rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(caikit_core_load_model_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]) + rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                }
+            ]
+        },
+        {
+            "title": "CPU utilization %",
+            "type": "CPU_USAGE",
+            "queries": [
+                {
+                    "title": "CPU usage",
+                    "query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
+                }
+            ]
+        },
+        {
+            "title": "Memory utilization %",
+            "type": "MEMORY_USAGE",
+            "queries": [
+                {
+                    "title": "Memory usage",
+                    "query":  "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
+                }
+            ]
+        }
+    ]
 }
diff --git a/controllers/constants/constants.go b/controllers/constants/constants.go
@@ -53,4 +53,9 @@ const (
 
 const (
 	DefaultStorageConfig = "storage-config"
+	IntervalValue        = "1m"
+	OvmsImageName        = "openvino_model_server"
+	TgisImageName        = "text-generation-inference"
+	VllmImageName        = "vllm"
+	CaikitImageName      = "caikit-nlp"
 )
diff --git a/controllers/constants/ovms-metrics.json b/controllers/constants/ovms-metrics.json
@@ -1,55 +1,52 @@
 {
-    "metrics": {
-        "supported": "true",
-        "config": [
-            {
-                "title": "Number of requests",
-                "type": "REQUEST_COUNT",
-                "queries": [
-                    {
-                        "title": "Number of successful incoming requests",
-                        "query": "sum(increase(ovms_requests_success{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
-                    },
-                    {
-                        "title": "Number of failed incoming requests",
-                        "query": "sum(increase(ovms_requests_fail{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
-                    }
-                ]
-            },
-            {
-                "title": "Average response time (ms)",
-                "type": "MEAN_LATENCY",
-                "queries": [
-                    {
-                        "title": "Average inference latency",
-                        "query": "sum by (name) (rate(ovms_inference_time_us_sum{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_inference_time_us_count{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[{RATE_INTERVAL}]))"
-                    },
-                    {
-                        "title": "Average e2e latency",
-                        "query": "sum by (name) (rate(ovms_request_time_us_sum{name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_request_time_us_count{name='${MODEL_NAME}'}[{RATE_INTERVAL}]))"
-                    }
-                ]
-            },
-            {
-                "title": "CPU utilization %",
-                "type": "CPU_USAGE",
-                "queries": [
-                    {
-                        "title": "CPU usage",
-                        "query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
-                    }
-                ]
-            },
-            {
-                "title": "Memory utilization %",
-                "type": "MEMORY_USAGE",
-                "queries": [
-                    {
-                        "title": "Memory usage",
-                        "query": "sum(container_memory_working_set_bytes{namespace='$(MODEL_NAMESPACE)', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
-                    }
-                ]
-            }
-        ]
-    }
+    "config": [
+        {
+            "title": "Number of requests",
+            "type": "REQUEST_COUNT",
+            "queries": [
+                {
+                    "title": "Number of successful incoming requests",
+                    "query": "sum(increase(ovms_requests_success{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                },
+                {
+                    "title": "Number of failed incoming requests",
+                    "query": "sum(increase(ovms_requests_fail{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                }
+            ]
+        },
+        {
+            "title": "Average response time (ms)",
+            "type": "MEAN_LATENCY",
+            "queries": [
+                {
+                    "title": "Average inference latency",
+                    "query": "sum by (name) (rate(ovms_inference_time_us_sum{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_inference_time_us_count{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                },
+                {
+                    "title": "Average e2e latency",
+                    "query": "sum by (name) (rate(ovms_request_time_us_sum{name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_request_time_us_count{name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
+                }
+            ]
+        },
+        {
+            "title": "CPU utilization %",
+            "type": "CPU_USAGE",
+            "queries": [
+                {
+                    "title": "CPU usage",
+                    "query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
+                }
+            ]
+        },
+        {
+            "title": "Memory utilization %",
+            "type": "MEMORY_USAGE",
+            "queries": [
+                {
+                    "title": "Memory usage",
+                    "query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
+                }
+            ]
+        }
+    ]
 }
diff --git a/controllers/constants/tgis-metrics.json b/controllers/constants/tgis-metrics.json
@@ -1,55 +1,52 @@
 {
-    "metrics": {
-        "supported": "true",
-        "config": [
-            {
-                "title": "Number of requests",
-                "type": "REQUEST_COUNT",
-                "queries": [
-                    {
-                        "title": "Number of successful incoming requests",
-                        "query": "sum(increase(tgi_request_success{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
-                    },
-                    {
-                        "title": "Number of failed incoming requests",
-                        "query": "sum(increase(tgi_request_failure{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
-                    }
-                ]
-            },
-            {
-                "title": "Average response time (ms)",
-                "type": "MEAN_LATENCY",
-                "queries": [
-                    {
-                        "title": "Average inference latency",
-                        "query": "sum by (pod) (rate(tgi_request_inference_duration_sum{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_inference_duration_count{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))  "
-                    },
-                    {
-                        "title": "Average e2e latency",
-                        "query": "sum by (pod) (rate(tgi_request_duration_sum{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_duration_count{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
-                    }
-                ]
-            },
-            {
-                "title": "CPU utilization %",
-                "type": "CPU_USAGE",
-                "queries": [
-                    {
-                        "title": "CPU usage",
-                        "query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
-                    }
-                ]
-            },
-            {
-                "title": "Memory utilization %",
-                "type": "MEMORY_USAGE",
-                "queries": [
-                    {
-                        "title": "Memory usage",
-                        "query": "sum(container_memory_working_set_bytes{namespace='$(MODEL_NAMESPACE)', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
-                    }
-                ]
-            }
-        ]
-    }
+    "config": [
+        {
+            "title": "Number of requests",
+            "type": "REQUEST_COUNT",
+            "queries": [
+                {
+                    "title": "Number of successful incoming requests",
+                    "query": "sum(increase(tgi_request_success{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
+                },
+                {
+                    "title": "Number of failed incoming requests",
+                    "query": "sum(increase(tgi_request_failure{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
+                }
+            ]
+        },
+        {
+            "title": "Average response time (ms)",
+            "type": "MEAN_LATENCY",
+            "queries": [
+                {
+                    "title": "Average inference latency",
+                    "query": "sum by (pod) (rate(tgi_request_inference_duration_sum{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_inference_duration_count{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))  "
+                },
+                {
+                    "title": "Average e2e latency",
+                    "query": "sum by (pod) (rate(tgi_request_duration_sum{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_duration_count{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
+                }
+            ]
+        },
+        {
+            "title": "CPU utilization %",
+            "type": "CPU_USAGE",
+            "queries": [
+                {
+                    "title": "CPU usage",
+                    "query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
+                }
+            ]
+        },
+        {
+            "title": "Memory utilization %",
+            "type": "MEMORY_USAGE",
+            "queries": [
+                {
+                    "title": "Memory usage",
+                    "query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
+                }
+            ]
+        }
+    ]
 }