Skip to content

Commit

Permalink
Merge pull request #225 from ashley-o0o/kserveTemp1
Browse files Browse the repository at this point in the history
Added kserve dashboard metrics reconciler
  • Loading branch information
openshift-merge-bot[bot] authored Jun 24, 2024
2 parents 5165fb5 + 126af31 commit 0e78e2e
Show file tree
Hide file tree
Showing 14 changed files with 581 additions and 237 deletions.
8 changes: 8 additions & 0 deletions Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ RUN go mod download
COPY main.go main.go
#COPY api/ api/
COPY controllers/ controllers/
COPY controllers/constants/ovms-metrics.json metrics_dashboards/ovms-metrics.json
COPY controllers/constants/tgis-metrics.json metrics_dashboards/tgis-metrics.json
COPY controllers/constants/vllm-metrics.json metrics_dashboards/vllm-metrics.json
COPY controllers/constants/caikit-metrics.json metrics_dashboards/caikit-metrics.json

# Build
USER root
Expand All @@ -23,6 +27,10 @@ RUN CGO_ENABLED=0 GOOS=linux go build -a -o manager main.go
FROM registry.access.redhat.com/ubi8/ubi-minimal:8.6
WORKDIR /
COPY --from=builder /workspace/manager .
COPY --from=builder /workspace/metrics_dashboards/ovms-metrics.json .
COPY --from=builder /workspace/metrics_dashboards/tgis-metrics.json .
COPY --from=builder /workspace/metrics_dashboards/vllm-metrics.json .
COPY --from=builder /workspace/metrics_dashboards/caikit-metrics.json .
USER 65532:65532

ENTRYPOINT ["/manager"]
29 changes: 29 additions & 0 deletions controllers/comparators/configmap_comparator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package comparators

import (
"reflect"

corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

func GetConfigMapComparator() ResourceComparator {
return func(deployed client.Object, requested client.Object) bool {
deployedConfigMap := deployed.(*corev1.ConfigMap)
requestedConfigMap := requested.(*corev1.ConfigMap)
return reflect.DeepEqual(deployedConfigMap.Data, requestedConfigMap.Data) &&
reflect.DeepEqual(deployedConfigMap.Labels, requestedConfigMap.Labels)
}
}
103 changes: 50 additions & 53 deletions controllers/constants/caikit-metrics.json
Original file line number Diff line number Diff line change
@@ -1,55 +1,52 @@
{
"metrics": {
"supported": "true",
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code!='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (model_id) (rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Average e2e latency",
"query": "sum by (model_id) (rate(caikit_core_load_model_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m]) + rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(caikit_core_load_model_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]) + rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(predict_rpc_count_total{namespace='${NAMESPACE}',code!='OK',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (model_id) (rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Average e2e latency",
"query": "sum by (model_id) (rate(caikit_core_load_model_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m]) + rate(predict_caikit_library_duration_seconds_sum{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[1m])) / sum by (model_id) (rate(caikit_core_load_model_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]) + rate(predict_caikit_library_duration_seconds_count{namespace='${NAMESPACE}',model_id='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
5 changes: 5 additions & 0 deletions controllers/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,9 @@ const (

const (
DefaultStorageConfig = "storage-config"
IntervalValue = "1m"
OvmsImageName = "openvino_model_server"
TgisImageName = "text-generation-inference"
VllmImageName = "vllm"
CaikitImageName = "caikit-nlp"
)
103 changes: 50 additions & 53 deletions controllers/constants/ovms-metrics.json
Original file line number Diff line number Diff line change
@@ -1,55 +1,52 @@
{
"metrics": {
"supported": "true",
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(ovms_requests_success{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(ovms_requests_fail{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (name) (rate(ovms_inference_time_us_sum{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_inference_time_us_count{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[{RATE_INTERVAL}]))"
},
{
"title": "Average e2e latency",
"query": "sum by (name) (rate(ovms_request_time_us_sum{name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_request_time_us_count{name='${MODEL_NAME}'}[{RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='$(MODEL_NAMESPACE)', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(ovms_requests_success{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(ovms_requests_fail{namespace='${NAMESPACE}',name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (name) (rate(ovms_inference_time_us_sum{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_inference_time_us_count{namespace='${NAMESPACE}', name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
},
{
"title": "Average e2e latency",
"query": "sum by (name) (rate(ovms_request_time_us_sum{name='${MODEL_NAME}'}[1m])) / sum by (name) (rate(ovms_request_time_us_count{name='${MODEL_NAME}'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
103 changes: 50 additions & 53 deletions controllers/constants/tgis-metrics.json
Original file line number Diff line number Diff line change
@@ -1,55 +1,52 @@
{
"metrics": {
"supported": "true",
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(tgi_request_success{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(tgi_request_failure{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (pod) (rate(tgi_request_inference_duration_sum{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_inference_duration_count{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) "
},
{
"title": "Average e2e latency",
"query": "sum by (pod) (rate(tgi_request_duration_sum{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_duration_count{namespace=${NAMESPACE}, pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'}* on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='$(MODEL_NAMESPACE)', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
"config": [
{
"title": "Number of requests",
"type": "REQUEST_COUNT",
"queries": [
{
"title": "Number of successful incoming requests",
"query": "sum(increase(tgi_request_success{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
},
{
"title": "Number of failed incoming requests",
"query": "sum(increase(tgi_request_failure{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "Average response time (ms)",
"type": "MEAN_LATENCY",
"queries": [
{
"title": "Average inference latency",
"query": "sum by (pod) (rate(tgi_request_inference_duration_sum{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_inference_duration_count{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) "
},
{
"title": "Average e2e latency",
"query": "sum by (pod) (rate(tgi_request_duration_sum{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}])) / sum by (pod) (rate(tgi_request_duration_count{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}[${RATE_INTERVAL}]))"
}
]
},
{
"title": "CPU utilization %",
"type": "CPU_USAGE",
"queries": [
{
"title": "CPU usage",
"query": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace='${NAMESPACE}'} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace='${NAMESPACE}', workload=~'${MODEL_NAME}-predictor-.*', workload_type=~'deployment'}) by (pod)"
}
]
},
{
"title": "Memory utilization %",
"type": "MEMORY_USAGE",
"queries": [
{
"title": "Memory usage",
"query": "sum(container_memory_working_set_bytes{namespace='${NAMESPACE}', pod=~'${MODEL_NAME}-predictor-.*'}) by (pod)"
}
]
}
]
}
Loading

0 comments on commit 0e78e2e

Please sign in to comment.