From 5d57ee9d6272a7c12c327e8f33c8eb75a8cab3d0 Mon Sep 17 00:00:00 2001 From: Gary Pennington Date: Mon, 7 Aug 2023 11:57:46 +0100 Subject: [PATCH 1/9] add a warning if we think istio-proxy injection is causing problems We have encountered situations where the injection of istio-proxy in a router pod (executing in kubernetes) causes strange networking errors during uplink retrieval. The root cause of the issue is that the router is executing and attempting retrieve uplink schemas whilst the istio-proxy is modifying network configuration at the same time. This warning message will direct users to information which should help them to configure their cluster or pod to avoid this problem. fixes: #3533 --- apollo-router/src/uplink/mod.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/apollo-router/src/uplink/mod.rs b/apollo-router/src/uplink/mod.rs index f9afb7ce49..8f10a44c28 100644 --- a/apollo-router/src/uplink/mod.rs +++ b/apollo-router/src/uplink/mod.rs @@ -1,3 +1,4 @@ +use std::error::Error as stdError; use std::fmt::Debug; use std::time::Duration; use std::time::Instant; @@ -359,7 +360,27 @@ where Query: graphql_client::GraphQLQuery, { let client = reqwest::Client::builder().timeout(timeout).build()?; - let res = client.post(url).json(request_body).send().await?; + // It is possible that istio-proxy is re-configuring networking beneath us. If it is, we'll see an error something like this: + // level: "ERROR" + // message: "fetch failed from all endpoints" + // target: "apollo_router::router::event::schema" + // timestamp: "2023-08-01T10:40:28.831196Z" + // That's deeply confusing and very hard to debug. Let's try to help by printing out a helpful error message here + let res = client + .post(url) + .json(request_body) + .send() + .await + .map_err(|e| { + if let Some(hyper_err) = e.source() { + if let Some(os_err) = hyper_err.source() { + if os_err.to_string().contains("tcp connect error: Cannot assign requested address (os error 99)") { + tracing::warn!("If your router is executing within a kubernetes pod, this failure may be caused by istio-proxy injection. See https://github.com/apollographql/router/issues/3533 for more details about how to solve this"); + } + } + } + e + })?; tracing::debug!("uplink response {:?}", res); let response_body: graphql_client::Response = res.json().await?; Ok(response_body) From 3c418ca24c4c51fd84058b08d3e9831a2d4ee7bc Mon Sep 17 00:00:00 2001 From: Gary Pennington Date: Mon, 7 Aug 2023 12:43:04 +0100 Subject: [PATCH 2/9] add some documentation for the problem and a changeset --- docs/source/containerization/kubernetes.mdx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/source/containerization/kubernetes.mdx b/docs/source/containerization/kubernetes.mdx index b0481dbb8d..df7872edea 100644 --- a/docs/source/containerization/kubernetes.mdx +++ b/docs/source/containerization/kubernetes.mdx @@ -254,3 +254,8 @@ If you had a router running on your localhost, with default health-check configu curl "http://localhost:8088/health" +## Using `istio` with the router + +The [istio service mesh](https://istio.io/) is a very popular choice for enhanced traffic routing within kubernetes. + +We have encountered issues with `istio-proxy` pod injection. It is possible for the router to start executing at the same time that istio is reconfiguring networking for the router pod. This is an issue with `istio`, not the router, and the fix is to follow the istio advice documented [here](https://istio.io/latest/docs/ops/common-problems/injection/#pod-or-containers-start-with-network-issues-if-istio-proxy-is-not-ready) From 1db4a0959f9ad2195c829b484f7df22463d7e684 Mon Sep 17 00:00:00 2001 From: Gary Pennington Date: Mon, 7 Aug 2023 12:43:36 +0100 Subject: [PATCH 3/9] remember to include the changeset in the commit... --- .changesets/maint_garypen_3533_istio_warn.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .changesets/maint_garypen_3533_istio_warn.md diff --git a/.changesets/maint_garypen_3533_istio_warn.md b/.changesets/maint_garypen_3533_istio_warn.md new file mode 100644 index 0000000000..7c65cd03df --- /dev/null +++ b/.changesets/maint_garypen_3533_istio_warn.md @@ -0,0 +1,9 @@ +### Add a warning if we think istio-proxy injection is causing problems ([Issue #3533](https://github.com/apollographql/router/issues/3533)) + +We have encountered situations where the injection of istio-proxy in a router pod (executing in kubernetes) causes strange networking errors during uplink retrieval. + +The root cause of the issue is that the router is executing and attempting to retrieve uplink schemas whilst the istio-proxy is modifying network configuration at the same time. + +This new warning message will direct users to information which should help them to configure their kubernetes cluster or pod to avoid this problem. + +By [@garypen](https://github.com/garypen) in https://github.com/apollographql/router/pull/3545 \ No newline at end of file From c18c6c7f8492b098851881076bf1f8f92a342698 Mon Sep 17 00:00:00 2001 From: Gary Pennington Date: Mon, 7 Aug 2023 12:46:29 +0100 Subject: [PATCH 4/9] tidy up the doc entry a little --- docs/source/containerization/kubernetes.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/containerization/kubernetes.mdx b/docs/source/containerization/kubernetes.mdx index df7872edea..c41eb20c73 100644 --- a/docs/source/containerization/kubernetes.mdx +++ b/docs/source/containerization/kubernetes.mdx @@ -258,4 +258,4 @@ curl "http://localhost:8088/health" The [istio service mesh](https://istio.io/) is a very popular choice for enhanced traffic routing within kubernetes. -We have encountered issues with `istio-proxy` pod injection. It is possible for the router to start executing at the same time that istio is reconfiguring networking for the router pod. This is an issue with `istio`, not the router, and the fix is to follow the istio advice documented [here](https://istio.io/latest/docs/ops/common-problems/injection/#pod-or-containers-start-with-network-issues-if-istio-proxy-is-not-ready) +We have encountered an [issue](https://github.com/apollographql/router/issues/3533) with `istio-proxy` pod injection. It is possible for the router to start executing at the same time that istio is reconfiguring networking for the router pod. This is an issue with `istio`, not the router, and the fix is to follow the istio advice documented [here](https://istio.io/latest/docs/ops/common-problems/injection/#pod-or-containers-start-with-network-issues-if-istio-proxy-is-not-ready). From a84c91adbe1dc71780dc68468b83401d3d5e111b Mon Sep 17 00:00:00 2001 From: Gary Pennington Date: Wed, 9 Aug 2023 08:35:21 +0100 Subject: [PATCH 5/9] Update .changesets/maint_garypen_3533_istio_warn.md Co-authored-by: Maria Elisabeth Schreiber --- .changesets/maint_garypen_3533_istio_warn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changesets/maint_garypen_3533_istio_warn.md b/.changesets/maint_garypen_3533_istio_warn.md index 7c65cd03df..eafa2e275b 100644 --- a/.changesets/maint_garypen_3533_istio_warn.md +++ b/.changesets/maint_garypen_3533_istio_warn.md @@ -1,6 +1,6 @@ ### Add a warning if we think istio-proxy injection is causing problems ([Issue #3533](https://github.com/apollographql/router/issues/3533)) -We have encountered situations where the injection of istio-proxy in a router pod (executing in kubernetes) causes strange networking errors during uplink retrieval. +We have encountered situations where the injection of istio-proxy in a router pod (executing in Kubernetes) causes networking errors during uplink retrieval. The root cause of the issue is that the router is executing and attempting to retrieve uplink schemas whilst the istio-proxy is modifying network configuration at the same time. From 58d47ba140c276563554c6b03d5e132adf06d27c Mon Sep 17 00:00:00 2001 From: Gary Pennington Date: Wed, 9 Aug 2023 08:35:35 +0100 Subject: [PATCH 6/9] Update .changesets/maint_garypen_3533_istio_warn.md Co-authored-by: Maria Elisabeth Schreiber --- .changesets/maint_garypen_3533_istio_warn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changesets/maint_garypen_3533_istio_warn.md b/.changesets/maint_garypen_3533_istio_warn.md index eafa2e275b..38a89c3178 100644 --- a/.changesets/maint_garypen_3533_istio_warn.md +++ b/.changesets/maint_garypen_3533_istio_warn.md @@ -2,7 +2,7 @@ We have encountered situations where the injection of istio-proxy in a router pod (executing in Kubernetes) causes networking errors during uplink retrieval. -The root cause of the issue is that the router is executing and attempting to retrieve uplink schemas whilst the istio-proxy is modifying network configuration at the same time. +The root cause is that the router is executing and attempting to retrieve uplink schemas while the istio-proxy is simultaneously modifying network configuration. This new warning message will direct users to information which should help them to configure their kubernetes cluster or pod to avoid this problem. From 5011619436ddae85ea5b2dd770d58e76e52570e1 Mon Sep 17 00:00:00 2001 From: Gary Pennington Date: Wed, 9 Aug 2023 08:35:49 +0100 Subject: [PATCH 7/9] Update .changesets/maint_garypen_3533_istio_warn.md Co-authored-by: Maria Elisabeth Schreiber --- .changesets/maint_garypen_3533_istio_warn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changesets/maint_garypen_3533_istio_warn.md b/.changesets/maint_garypen_3533_istio_warn.md index 38a89c3178..916457ce2d 100644 --- a/.changesets/maint_garypen_3533_istio_warn.md +++ b/.changesets/maint_garypen_3533_istio_warn.md @@ -4,6 +4,6 @@ We have encountered situations where the injection of istio-proxy in a router po The root cause is that the router is executing and attempting to retrieve uplink schemas while the istio-proxy is simultaneously modifying network configuration. -This new warning message will direct users to information which should help them to configure their kubernetes cluster or pod to avoid this problem. +This new warning message directs users to information which should help them to configure their Kubernetes cluster or pod to avoid this problem. By [@garypen](https://github.com/garypen) in https://github.com/apollographql/router/pull/3545 \ No newline at end of file From ec72e052c0eb5f5b63095e53f1d340f0255e7fd6 Mon Sep 17 00:00:00 2001 From: Gary Pennington Date: Wed, 9 Aug 2023 08:36:00 +0100 Subject: [PATCH 8/9] Update docs/source/containerization/kubernetes.mdx Co-authored-by: Maria Elisabeth Schreiber --- docs/source/containerization/kubernetes.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/containerization/kubernetes.mdx b/docs/source/containerization/kubernetes.mdx index c41eb20c73..7d0ae9d90d 100644 --- a/docs/source/containerization/kubernetes.mdx +++ b/docs/source/containerization/kubernetes.mdx @@ -256,6 +256,6 @@ curl "http://localhost:8088/health" ## Using `istio` with the router -The [istio service mesh](https://istio.io/) is a very popular choice for enhanced traffic routing within kubernetes. +The [istio service mesh](https://istio.io/) is a very popular choice for enhanced traffic routing within Kubernetes. We have encountered an [issue](https://github.com/apollographql/router/issues/3533) with `istio-proxy` pod injection. It is possible for the router to start executing at the same time that istio is reconfiguring networking for the router pod. This is an issue with `istio`, not the router, and the fix is to follow the istio advice documented [here](https://istio.io/latest/docs/ops/common-problems/injection/#pod-or-containers-start-with-network-issues-if-istio-proxy-is-not-ready). From b075be36de0fe638d4cff2c341a36f84f27e4347 Mon Sep 17 00:00:00 2001 From: Gary Pennington Date: Wed, 9 Aug 2023 08:36:36 +0100 Subject: [PATCH 9/9] Update docs/source/containerization/kubernetes.mdx Co-authored-by: Maria Elisabeth Schreiber --- docs/source/containerization/kubernetes.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/containerization/kubernetes.mdx b/docs/source/containerization/kubernetes.mdx index 7d0ae9d90d..46c4b30187 100644 --- a/docs/source/containerization/kubernetes.mdx +++ b/docs/source/containerization/kubernetes.mdx @@ -258,4 +258,4 @@ curl "http://localhost:8088/health" The [istio service mesh](https://istio.io/) is a very popular choice for enhanced traffic routing within Kubernetes. -We have encountered an [issue](https://github.com/apollographql/router/issues/3533) with `istio-proxy` pod injection. It is possible for the router to start executing at the same time that istio is reconfiguring networking for the router pod. This is an issue with `istio`, not the router, and the fix is to follow the istio advice documented [here](https://istio.io/latest/docs/ops/common-problems/injection/#pod-or-containers-start-with-network-issues-if-istio-proxy-is-not-ready). +`istio-proxy` pod injection can cause an [issue](https://github.com/apollographql/router/issues/3533) in the router. The router may start executing at the same time that istio is reconfiguring networking for the router pod. This is an issue with `istio`, not the router, and you can resolve it by following the advice in [istio's injection documentation](https://istio.io/latest/docs/ops/common-problems/injection/#pod-or-containers-start-with-network-issues-if-istio-proxy-is-not-ready).