diff --git a/.changesets/fix_garypen_3443_fix_prom.md b/.changesets/fix_garypen_3443_fix_prom.md new file mode 100644 index 0000000000..4eec703272 --- /dev/null +++ b/.changesets/fix_garypen_3443_fix_prom.md @@ -0,0 +1,7 @@ +### Fix prometheus statistics issues with _total_total names([Issue #3443](https://github.com/apollographql/router/issues/3443)) + +When producing prometheus statistics the otel crate (0.19.0) now automatically appends "_total" which is unhelpful. + +This fix remove duplicated "_total_total" from our statistics. + +By [@garypen](https://github.com/garypen) in https://github.com/apollographql/router/pull/3471 \ No newline at end of file diff --git a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs index e5c60e32d8..310ed2dc6e 100644 --- a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs +++ b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs @@ -155,11 +155,15 @@ impl Service for PrometheusService { let encoder = TextEncoder::new(); let mut result = Vec::new(); encoder.encode(&metric_families, &mut result)?; + // otel 0.19.0 started adding "_total" onto various statistics. + // Let's remove any problems they may have created for us. + let stats = String::from_utf8_lossy(&result); + let modified_stats = stats.replace("_total_total{", "_total{"); Ok(router::Response { response: http::Response::builder() .status(StatusCode::OK) .header(http::header::CONTENT_TYPE, "text/plain; version=0.0.4") - .body::(result.into()) + .body::(modified_stats.into()) .map_err(BoxError::from)?, context: req.context, }) diff --git a/apollo-router/tests/common.rs b/apollo-router/tests/common.rs index d40082a293..3cf3d59e27 100644 --- a/apollo-router/tests/common.rs +++ b/apollo-router/tests/common.rs @@ -517,6 +517,21 @@ impl IntegrationTest { panic!("'{text}' not detected in metrics\n{last_metrics}"); } + #[allow(dead_code)] + pub async fn assert_metrics_does_not_contain(&self, text: &str) { + if let Ok(metrics) = self + .get_metrics_response() + .await + .expect("failed to fetch metrics") + .text() + .await + { + if metrics.contains(text) { + panic!("'{text}' detected in metrics\n{metrics}"); + } + } + } + #[allow(dead_code)] pub async fn assert_shutdown(&mut self) { let router = self.router.as_mut().expect("router must have been started"); diff --git a/apollo-router/tests/metrics_tests.rs b/apollo-router/tests/metrics_tests.rs index 2dca27af51..be58864d1e 100644 --- a/apollo-router/tests/metrics_tests.rs +++ b/apollo-router/tests/metrics_tests.rs @@ -58,6 +58,9 @@ async fn test_metrics_reloading() -> Result<(), BoxError> { router .assert_metrics_contains(r#"custom_header="test_custom""#, None) .await; + router + .assert_metrics_does_not_contain(r#"_total_total{"#) + .await; if std::env::var("APOLLO_KEY").is_ok() && std::env::var("APOLLO_GRAPH_REF").is_ok() { router.assert_metrics_contains(r#"apollo_router_uplink_fetch_duration_seconds_count{kind="unchanged",query="License",service_name="apollo-router",url="https://uplink.api.apollographql.com/",otel_scope_name="apollo/router",otel_scope_version=""}"#, Some(Duration::from_secs(120))).await;