Skip to content

Commit

Permalink
Merge pull request #37 from harrytucker/grpc-alerts
Browse files Browse the repository at this point in the history
add grpc alerting
  • Loading branch information
adam-rummer-hpe committed Nov 14, 2023
2 parents 5a329fc + e080fd4 commit 12f2de9
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 7 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@

# intellij editors
.idea

kubeconfig.yaml
37 changes: 36 additions & 1 deletion example-custom-resource.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,45 @@ spec:
gRPC:
errorPercent:
- operation: MoreThan
value: 10 # %
value: 10
for: 3m
withLabels:
severity: warning
trafficPercent:
- operation: MoreThan
value: 1000
for: 1m
withLabels:
severity: warning
latencyMillisecondsP50:
- operation: MoreThan
value: 10
for: 2m
withLabels:
severity: warning
latencyMillisecondsP90:
- operation: MoreThan
value: 15
for: 2m
withLabels:
severity: warning
latencyMillisecondsP95:
- operation: MoreThan
value: 20
for: 2m
withLabels:
severity: warning
latencyMillisecondsP99:
- operation: MoreThan
value: 25
for: 5m
withLabels:
severity: warning
- operation: MoreThan
value: 50
for: 2m
withLabels:
severity: critical
replica:
count:
- operation: LessThan
Expand Down
13 changes: 13 additions & 0 deletions src/crd/service_alert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,19 @@ pub enum NetworkAlert {
LatencyMillisecondsP99,
}

impl Display for NetworkAlert {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
NetworkAlert::ErrorPercent => write!(f, "Error %"),
NetworkAlert::TrafficPerSecond => write!(f, "Traffic /sec"),
NetworkAlert::LatencyMillisecondsP50 => write!(f, "Latency P50 (ms)"),
NetworkAlert::LatencyMillisecondsP90 => write!(f, "Latency P90 (ms)"),
NetworkAlert::LatencyMillisecondsP95 => write!(f, "Latency P95 (ms)"),
NetworkAlert::LatencyMillisecondsP99 => write!(f, "Latency P99 (ms)"),
}
}
}

#[derive(Debug, Serialize, Deserialize, Clone, JsonSchema, PartialEq, Eq, Hash)]
#[serde(rename_all = "camelCase")]
pub enum ReplicaAlert {
Expand Down
8 changes: 4 additions & 4 deletions src/crd/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn test_serialisation_happy_path() -> color_eyre::Result<()> {
NetworkAlert::ErrorPercent,
vec![AlertConfig {
operation: Operation::MoreThan,
value: 10 as f32,
value: 10_f32,
for_: String::from("3m"),
with_labels: HashMap::from([(
String::from("severity"),
Expand All @@ -70,7 +70,7 @@ fn test_serialisation_happy_path() -> color_eyre::Result<()> {
vec![
AlertConfig {
operation: Operation::MoreThan,
value: 20 as f32,
value: 20_f32,
for_: String::from("5m"),
with_labels: HashMap::from([(
String::from("severity"),
Expand All @@ -79,7 +79,7 @@ fn test_serialisation_happy_path() -> color_eyre::Result<()> {
},
AlertConfig {
operation: Operation::MoreThan,
value: 50 as f32,
value: 50_f32,
for_: String::from("2m"),
with_labels: HashMap::from([(
String::from("severity"),
Expand All @@ -93,7 +93,7 @@ fn test_serialisation_happy_path() -> color_eyre::Result<()> {
vec![
AlertConfig {
operation: Operation::LessThan,
value: 3 as f32,
value: 3_f32,
for_: String::from("5m"),
with_labels: HashMap::from([(
String::from("severity"),
Expand Down
2 changes: 1 addition & 1 deletion src/http/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ mod test {
)?;

registry.register(Box::new(counter.clone()))?;
let expected_metric_count = 5 as f64;
let expected_metric_count = 5_f64;
counter.inc_by(expected_metric_count);

let router = test_router(registry, counter);
Expand Down
10 changes: 9 additions & 1 deletion src/prometheus/alert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ use serde::{Deserialize, Serialize};

use crate::{
crd::{ReplicaAlert, ServiceAlertSpec},
prometheus::{http_alerts::http_rules, replica_alerts::replica_count_rules},
prometheus::{
grpc_alerts::grpc_alert_rules, http_alerts::http_rules, replica_alerts::replica_count_rules,
},
};

#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
Expand Down Expand Up @@ -92,6 +94,12 @@ impl TryFrom<ServiceAlertSpec> for PromAlerts {
alerts.groups.push(http_rules(&spec))
}

if let Some(grpc_alerts) = &spec.alerts.grpc {
grpc_alerts
.iter()
.for_each(|(key, val)| alerts.groups.push(grpc_alert_rules(key, val, &spec)));
}

Ok(alerts)
}
}
Expand Down
102 changes: 102 additions & 0 deletions src/prometheus/grpc_alerts.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
use super::alert::{AlertGroup, AlertRules, Annotations, Labels, PrometheusSeverity};
use crate::crd::{AlertConfig, NetworkAlert, ServiceAlertSpec};

pub fn grpc_alert_rules(
network_alert: &NetworkAlert,
alert_configs: &[AlertConfig],
spec: &ServiceAlertSpec,
) -> AlertGroup {
let grpc_rules = alert_configs
.iter()
.enumerate()
.map(|(i, conf)| AlertRules {
alert: format!("{0} {1} {2}", network_alert, conf.operation, conf.value),
expr: grpc_promql(network_alert, conf, spec),
for_: conf.for_.clone(),
labels: Labels {
severity: PrometheusSeverity::from(&conf.with_labels),
source: spec.common_labels.origin.clone(),
owner: spec.common_labels.owner.clone(),
},
annotations: Annotations {
summary: grpc_summary(network_alert, &alert_configs[i]),
description: grpc_description(network_alert, &alert_configs[i]),
},
})
.collect();

AlertGroup {
name: String::from("gRPC Alerts"),
rules: grpc_rules,
}
}

fn grpc_promql(
network_alert: &NetworkAlert,
alert_config: &AlertConfig,
_spec: &ServiceAlertSpec,
) -> String {
match network_alert {
NetworkAlert::ErrorPercent => {
todo!()
}
NetworkAlert::TrafficPerSecond => {
todo!()
}
NetworkAlert::LatencyMillisecondsP50 => {
todo!()
}
NetworkAlert::LatencyMillisecondsP90 => {
todo!()
}
NetworkAlert::LatencyMillisecondsP95 => {
todo!()
}
NetworkAlert::LatencyMillisecondsP99 => {
format!(
"histogram_quantile(0.99, istio_request_duration_milliseconds{}[{0}])",
alert_config.for_
)
}
}
}

fn grpc_summary(network_alert: &NetworkAlert, alert_config: &AlertConfig) -> String {
match { network_alert } {
NetworkAlert::ErrorPercent => format!(
"error rate {0} {1}% for {2}",
alert_config.operation, alert_config.value, alert_config.for_
),
NetworkAlert::TrafficPerSecond => format!(
"traffic {0} {1}/sec for {2}",
alert_config.operation, alert_config.value, alert_config.for_
),
NetworkAlert::LatencyMillisecondsP50 => format!(
"latency P(50) {0} {1} ms for {2}",
alert_config.operation, alert_config.value, alert_config.for_
),
NetworkAlert::LatencyMillisecondsP90 => format!(
"latency P(90) {0} {1} ms for {2}",
alert_config.operation, alert_config.value, alert_config.for_
),
NetworkAlert::LatencyMillisecondsP95 => format!(
"latency P(95) {0} {1} ms for {2}",
alert_config.operation, alert_config.value, alert_config.for_
),
NetworkAlert::LatencyMillisecondsP99 => format!(
"latency P(99) {0} {1} ms for {2}",
alert_config.operation, alert_config.value, alert_config.for_
),
}
}

fn grpc_description(network_alert: &NetworkAlert, _alert_config: &AlertConfig) -> String {
match network_alert {
NetworkAlert::ErrorPercent => String::from("this is a placeholder description"),
NetworkAlert::TrafficPerSecond => String::from("this is a placeholder description"),
NetworkAlert::LatencyMillisecondsP50 => String::from("this is a placeholder description"),
NetworkAlert::LatencyMillisecondsP90 => String::from("this is a placeholder description"),
NetworkAlert::LatencyMillisecondsP95 => String::from("this is a placeholder description"),
NetworkAlert::LatencyMillisecondsP99 => String::from("this is a placeholder description"),
}
}
1 change: 1 addition & 0 deletions src/prometheus/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//! Prometheus alert that Cactuar can produce as a Kubernetes `ConfigMap`.

pub mod alert;
pub mod grpc_alerts;
pub mod http_alerts;
pub mod replica_alerts;

Expand Down

0 comments on commit 12f2de9

Please sign in to comment.