From aebff4791208ab8f2df0787f9147196ab392c91d Mon Sep 17 00:00:00 2001 From: Sean Vong-Karlage Date: Fri, 12 May 2023 21:10:23 -0700 Subject: [PATCH] hyper client: Implement NO_PROXY support Summary: Implements support for NO_PROXY definitions for proxies specified by HTTP_PROXY / HTTPS_PROXY. This takes [Curl's definition of NO_PROXY](https://everything.curl.dev/usingcurl/proxies/env#no-proxy), e.g * NO_PROXY=".facebook.com" will not proxy connections to www.facebook.com * NO_PROXY="facebook.com" will _also_ not proxy connections to www.facebook.com * Supports defining NO_PROXY in terms of domain names, IP addresses, and IP network definitions (in CIDR format). The implementation initially started off heavily inspired by [`request::NoProxy`](https://github.com/seanmonstar/reqwest/blob/master/src/proxy.rs#L414) but ended up changing a decent amount because of how `hyper::Proxy::Custom` works. Reviewed By: krallin Differential Revision: D45804045 fbshipit-source-id: 569c605e458eeda3ca605d176ca64e3865e3f70a --- Cargo.toml | 1 + app/buck2_common/BUCK | 1 + app/buck2_common/Cargo.toml | 1 + app/buck2_common/src/http/mod.rs | 128 ++++++----- app/buck2_common/src/http/proxy.rs | 326 +++++++++++++++++++++++++++++ shim/third-party/rust/Cargo.toml | 1 + 6 files changed, 400 insertions(+), 58 deletions(-) create mode 100644 app/buck2_common/src/http/proxy.rs diff --git a/Cargo.toml b/Cargo.toml index f5336490640e..b215b5290eb7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -152,6 +152,7 @@ hyper = { version = "0.14.7", features = ["client", "http1", "http2"] } hyper-proxy = { git = "https://github.com/get9/hyper-proxy", rev = "0a5aa97a3cd00ca5be1d21559dcf8ef29dc63e08", features = ["rustls"], default_features = false } # branch = tokio-rustls-0.23 Many PRs to bump versions (#28, #30, #31) are several years old, possibly abandoned crate. This fork contains changes from #28. hyper-rustls = { version = "0.23.0", features = ["http2"] } hyper-unix-connector = "0.2" +ipnetwork = "0.15" indent_write = "2.2.0" indenter = "0.3.3" indexmap = { version = "1.9.1", features = ["serde-1"] } diff --git a/app/buck2_common/BUCK b/app/buck2_common/BUCK index 61f7d601728b..ad815dd59048 100644 --- a/app/buck2_common/BUCK +++ b/app/buck2_common/BUCK @@ -60,6 +60,7 @@ rust_library( "fbsource//third-party/rust:hyper-proxy", "fbsource//third-party/rust:hyper-rustls", "fbsource//third-party/rust:indexmap", + "fbsource//third-party/rust:ipnetwork", "fbsource//third-party/rust:itertools", "fbsource//third-party/rust:libc", "fbsource//third-party/rust:num_enum", diff --git a/app/buck2_common/Cargo.toml b/app/buck2_common/Cargo.toml index 630b5d1b6693..4e07e86c1bc5 100644 --- a/app/buck2_common/Cargo.toml +++ b/app/buck2_common/Cargo.toml @@ -22,6 +22,7 @@ hyper = { workspace = true } hyper-proxy = { workspace = true } hyper-rustls = { workspace = true } indexmap = { workspace = true } +ipnetwork = { workspace = true } itertools = { workspace = true } libc = { workspace = true } once_cell = { workspace = true } diff --git a/app/buck2_common/src/http/mod.rs b/app/buck2_common/src/http/mod.rs index bbc489d6df3e..94a25f532b4f 100644 --- a/app/buck2_common/src/http/mod.rs +++ b/app/buck2_common/src/http/mod.rs @@ -10,7 +10,6 @@ use std::fs::File; use std::io::BufReader; use std::path::Path; -use std::str::FromStr; use std::sync::Arc; use allocative::Allocative; @@ -19,11 +18,7 @@ use buck2_core::is_open_source; use dice::UserComputationData; use dupe::Dupe; use gazebo::prelude::VecExt; -use http::uri::InvalidUri; -use http::uri::PathAndQuery; -use http::uri::Scheme; use http::Method; -use http::Uri; use hyper::body; use hyper::client::connect::Connect; use hyper::client::ResponseFuture; @@ -31,7 +26,6 @@ use hyper::Body; use hyper::Request; use hyper::Response; use hyper::StatusCode; -use hyper_proxy::Intercept; use hyper_proxy::Proxy; use hyper_proxy::ProxyConnector; use hyper_rustls::HttpsConnectorBuilder; @@ -42,7 +36,10 @@ use rustls::RootCertStore; use thiserror::Error; use tokio_rustls::TlsConnector; +mod proxy; mod redirect; +use proxy::http_proxy_from_env; +use proxy::https_proxy_from_env; use redirect::PendingRequest; use redirect::RedirectEngine; @@ -71,10 +68,10 @@ pub fn http_client_for_oss() -> anyhow::Result> { // Add standard proxy variables if defined. // Ignores values that cannot be turned into valid URIs. let mut proxies = Vec::new(); - if let Some(proxy) = https_proxy_from_env() { + if let Some(proxy) = https_proxy_from_env()? { proxies.push(proxy); } - if let Some(proxy) = http_proxy_from_env() { + if let Some(proxy) = http_proxy_from_env()? { proxies.push(proxy); } @@ -140,53 +137,6 @@ impl SetHttpClient for UserComputationData { } } -/// Lookup environment variable and return string value. Checks first for uppercase -/// and falls back to lowercase if unset. -fn env_to_string(env: &'static str) -> Option { - std::env::var_os(env) - .or_else(|| std::env::var_os(env.to_lowercase())) - .and_then(|s| s.into_string().ok()) -} - -fn https_proxy_from_env() -> Option { - env_to_string("HTTPS_PROXY") - .and_then(|https_proxy| https_proxy.parse::().ok()) - .map(|uri| Proxy::new(Intercept::Https, uri.into())) -} - -fn http_proxy_from_env() -> Option { - env_to_string("HTTP_PROXY") - .and_then(|http_proxy| http_proxy.parse::().ok()) - .map(|uri| Proxy::new(Intercept::Http, uri.into())) -} - -/// A wrapped Uri that handles inserting a default scheme (http) if one is not present. -/// -/// See https://everything.curl.dev/usingcurl/proxies/type for more information about -/// how curl treats default schemes for e.g. proxy env vars. -struct DefaultSchemeUri(Uri); - -impl FromStr for DefaultSchemeUri { - type Err = InvalidUri; - - fn from_str(s: &str) -> Result { - s.parse::().map(Self) - } -} - -impl From for Uri { - fn from(default_scheme_uri: DefaultSchemeUri) -> Self { - let mut parts = default_scheme_uri.0.into_parts(); - if parts.scheme.is_none() { - parts.scheme = Some(Scheme::HTTP); - } - if parts.path_and_query.is_none() { - parts.path_and_query = Some(PathAndQuery::from_static("/")); - } - Uri::from_parts(parts).expect("Got invalid uri from formerly valid uri") - } -} - /// Load the system root certificates into rustls cert store. fn load_system_root_certs() -> anyhow::Result { let mut roots = rustls::RootCertStore::empty(); @@ -518,8 +468,8 @@ mod tests { }) } - fn uri(&self) -> anyhow::Result { - Uri::builder() + fn uri(&self) -> anyhow::Result { + http::Uri::builder() .scheme("http") .authority(self.addr.to_string().as_str()) .path_and_query("/") @@ -717,7 +667,69 @@ mod tests { println!("proxy_uri: {}", proxy_uri); let client = SecureProxiedClient::with_proxies([Proxy::new( hyper_proxy::Intercept::Http, - DefaultSchemeUri(proxy_uri.try_into()?).into(), + crate::http::proxy::DefaultSchemeUri(proxy_uri.try_into()?).into(), + )])?; + let resp = client.get(&test_server.url_str("/foo")).await?; + assert_eq!(200, resp.status().as_u16()); + + Ok(()) + } + + #[tokio::test] + #[cfg(any(fbcode_build, cargo_internal_build))] // TODO(@akozhevnikov): Debug why this fails on CircleCI + async fn test_does_not_proxy_when_no_proxy_matches() -> anyhow::Result<()> { + let test_server = httptest::Server::run(); + test_server.expect( + Expectation::matching(all_of![request::method_path("GET", "/foo")]) + .times(1) + .respond_with(responders::status_code(200)), + ); + + let proxy_server = ProxyServer::new().await?; + println!("proxy_server uri: {}", proxy_server.uri()?); + + let test_server_host = test_server + .url("/") + .authority() + .unwrap() + .clone() + .host() + .to_owned(); + let no_proxy = crate::http::proxy::NoProxy::new(http::uri::Scheme::HTTP, test_server_host); + + // Don't proxy connections to test_server. + let client = SecureProxiedClient::with_proxies([Proxy::new( + no_proxy.into_proxy_intercept(), + proxy_server.uri()?, + )])?; + let resp = client.get(&test_server.url_str("/foo")).await?; + assert_eq!(200, resp.status().as_u16()); + + Ok(()) + } + + #[tokio::test] + #[cfg(any(fbcode_build, cargo_internal_build))] // TODO(@akozhevnikov): Debug why this fails on CircleCI + async fn test_proxies_when_no_proxy_does_not_match() -> anyhow::Result<()> { + let test_server = httptest::Server::run(); + test_server.expect( + Expectation::matching(all_of![ + request::method_path("GET", "/foo"), + request::headers(contains(("via", "testing-proxy-server"))) + ]) + .times(1) + .respond_with(responders::status_code(200)), + ); + + let proxy_server = ProxyServer::new().await?; + println!("proxy_server uri: {}", proxy_server.uri()?); + + // Don't proxy HTTPS connections to *.foobar.com + let no_proxy = crate::http::proxy::NoProxy::new(http::uri::Scheme::HTTP, ".foobar.com"); + + let client = SecureProxiedClient::with_proxies([Proxy::new( + no_proxy.into_proxy_intercept(), + proxy_server.uri()?, )])?; let resp = client.get(&test_server.url_str("/foo")).await?; assert_eq!(200, resp.status().as_u16()); diff --git a/app/buck2_common/src/http/proxy.rs b/app/buck2_common/src/http/proxy.rs new file mode 100644 index 000000000000..2342b2e34491 --- /dev/null +++ b/app/buck2_common/src/http/proxy.rs @@ -0,0 +1,326 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under both the MIT license found in the + * LICENSE-MIT file in the root directory of this source tree and the Apache + * License, Version 2.0 found in the LICENSE-APACHE file in the root directory + * of this source tree. + */ + +use std::net::IpAddr; +use std::str::FromStr; + +use anyhow::Context; +use http::uri::InvalidUri; +use http::uri::PathAndQuery; +use http::uri::Scheme; +use http::Uri; +use hyper_proxy::Intercept; +use hyper_proxy::Proxy; +use ipnetwork::IpNetwork; + +/// Lookup environment variable and return string value. Checks first for uppercase +/// and falls back to lowercase if unset. +fn env_to_string(env: &'static str) -> anyhow::Result> { + std::env::var_os(env) + .or_else(|| std::env::var_os(env.to_lowercase())) + .map(|s| s.into_string()) + .transpose() + .map_err(|original| anyhow::anyhow!("Invalid utf8 string: '{:?}'", original)) +} + +fn noproxy_from_env(scheme: Scheme) -> anyhow::Result> { + Ok(env_to_string("NO_PROXY")?.map(|no_proxy| NoProxy::new(scheme, no_proxy))) +} + +/// Returns a hyper_proxy::Proxy struct that proxies connections to the uri at +/// $HTTPS_PROXY (or $https_proxy if the former is unset). Respects $NO_PROXY. +pub(super) fn https_proxy_from_env() -> anyhow::Result> { + if let Some(https_proxy) = env_to_string("HTTPS_PROXY")? { + let uri: DefaultSchemeUri = https_proxy + .parse() + .with_context(|| format!("Invalid HTTPS_PROXY uri: {}", https_proxy))?; + if let Some(no_proxy) = noproxy_from_env(Scheme::HTTPS)? { + Ok(Some(Proxy::new( + no_proxy.into_proxy_intercept(), + uri.into(), + ))) + } else { + Ok(Some(Proxy::new(Intercept::Https, uri.into()))) + } + } else { + Ok(None) + } +} + +/// Returns a hyper_proxy::Proxy struct that proxies connections to the uri at +/// $HTTP_PROXY (or $http_proxy if the former is unset). Respects $NO_PROXY. +pub(super) fn http_proxy_from_env() -> anyhow::Result> { + if let Some(http_proxy) = env_to_string("HTTP_PROXY")? { + let uri: DefaultSchemeUri = http_proxy + .parse() + .with_context(|| format!("Invalid HTTP_PROXY uri: {}", http_proxy))?; + if let Some(no_proxy) = noproxy_from_env(Scheme::HTTP)? { + Ok(Some(Proxy::new( + no_proxy.into_proxy_intercept(), + uri.into(), + ))) + } else { + Ok(Some(Proxy::new(Intercept::Http, uri.into()))) + } + } else { + Ok(None) + } +} + +/// A wrapped Uri that handles inserting a default scheme (http) if one is not present. +/// +/// See https://everything.curl.dev/usingcurl/proxies/type for more information about +/// how curl treats default schemes for e.g. proxy env vars. +pub(super) struct DefaultSchemeUri(pub(super) Uri); + +impl FromStr for DefaultSchemeUri { + type Err = InvalidUri; + + fn from_str(s: &str) -> Result { + s.parse::().map(Self) + } +} + +impl From for Uri { + fn from(default_scheme_uri: DefaultSchemeUri) -> Self { + let mut parts = default_scheme_uri.0.into_parts(); + if parts.scheme.is_none() { + parts.scheme = Some(Scheme::HTTP); + } + if parts.path_and_query.is_none() { + parts.path_and_query = Some(PathAndQuery::from_static("/")); + } + Uri::from_parts(parts).expect("Got invalid uri from formerly valid uri") + } +} + +#[derive(Debug)] +struct Domain(String); + +impl Domain { + /// Returns whether this domain "matches" candidate according to Curl's rules + /// for NO_PROXY. + /// + /// See https://github.com/curl/curl/issues/1208 for a bit of discussion about + /// some of the particulars of subdomain matching. + fn is_match>(&self, candidate: S) -> bool { + let candidate = candidate.as_ref(); + // * unambiguously matches all domains. + self.0 == "*" + // Exact match + || self.0 == candidate + // . matches all subdomains, look for exact match + || self.0.trim_start_matches('.') == candidate + // Candidate suffixed by domain, only match if candidate is a subdomain of domain + // Ex: domain=".facebook.com" matches "images.facebook.com" but not "www.thefacebook.com" + || candidate.trim_end_matches(self.0.as_str().trim_start_matches('.')).ends_with('.') + } +} + +/// Wrapper for the parsed version of Curl's "no proxy" format from the standard +/// NO_PROXY / no_proxy environment variables. +/// +/// Uses hyper-proxy's Intercept::Custom to drive matching logic for whether Uris +/// should be proxied or not. +/// +/// Incorporates a scheme to proxy connections as well. Connections matching this +/// (scheme, no_proxy_spec) pair will *not* be proxied. +/// +/// Matching logic derived from reqwest::NoProxy (e.g. [here](https://github.com/seanmonstar/reqwest/blob/master/src/proxy.rs#L467)). +pub(super) struct NoProxy { + addresses: Vec, + networks: Vec, + domains: Vec, + proxy_scheme: Scheme, +} + +impl NoProxy { + /// Constructs a new NoProxy struct. Connections made to a host matching the + /// no_proxy spec with a scheme of `proxy_scheme` WILL NOT be proxied (e.g. + /// requests to these hosts with `proxy_scheme` will go directly to the dest). + pub(super) fn new>(proxy_scheme: Scheme, s: S) -> Self { + let mut addresses = Vec::new(); + let mut networks = Vec::new(); + let mut domains = Vec::new(); + let s = s.as_ref(); + for entity in s.split(',').map(str::trim) { + if let Ok(network) = entity.parse::() { + networks.push(network); + } else if let Ok(address) = entity.parse::() { + addresses.push(address); + } else { + domains.push(Domain(entity.to_owned())); + } + } + + Self { + addresses, + networks, + domains, + proxy_scheme, + } + } + + /// Returns whether `host` matches any of the NO_PROXY entities - i.e. that we + /// should *not* proxy requests to this host. + fn should_bypass_proxy_for_host>(&self, host: S) -> bool { + let host = host.as_ref(); + if let Ok(host_address) = host.parse::() { + self.addresses + .iter() + .any(|address| host_address == *address) + || self + .networks + .iter() + .any(|network| network.contains(host_address)) + } else { + self.domains.iter().any(|domain| domain.is_match(host)) + } + } + + /// Converts this NoProxy spec into a hyper_proxy::Intercept::Custom closure + /// so it can be used to build a new hyper_proxy::Proxy. + /// + /// Note: There's a tricky bit of logic below. We explicitly *negate* the return + /// condition of the closure because of the way hyper_proxy::Intercept::Custom's + /// closure works; if it returns `true`, the connection is proxied. + /// + /// For NoProxy, we want to *negate* this logic - if a (scheme, host) pair + /// match our NoProxy instance, we _don't want to proxy_. Therefore we negate + /// the return conditions below. + /// + /// Some examples to clarify: + /// + /// NO_PROXY=".facebook.com" for HTTPS + /// does not proxy https://www.facebook.com + /// does not proxy https://images.facebook.com + /// does proxy https://www.thefacebook.com + /// does proxy http://www.thefacebook.com + /// + /// NO_PROXY="192.168.0.1" for HTTP + /// does not proxy http://192.168.0.1 + /// does proxy https://192.168.0.1 + /// does proxy http://192.168.0.2 + pub(super) fn into_proxy_intercept(self) -> Intercept { + let should_proxy = move |scheme: Option<&str>, host: Option<&str>, _port: Option| { + // IPv6 addresses are wrapped in [ ] so remove those for equality checks. + let host = host.map(|h| h.trim_start_matches('[').trim_end_matches(']')); + let should_bypass_proxy = + host.map_or(false, |host| self.should_bypass_proxy_for_host(host)); + + // Negation happens here - true means we're going to proxy the connection. + !should_bypass_proxy + && self.proxy_scheme.as_str() == scheme.unwrap_or(Scheme::HTTP.as_str()) + }; + should_proxy.into() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn uri(s: &'static str) -> Uri { + s.parse().unwrap() + } + + #[test] + fn test_domain_match() { + let d = Domain(".facebook.com".to_owned()); + assert!(d.is_match("www.facebook.com")); + assert!(!d.is_match("boofacebook.com")); + + let d = Domain("facebook.com".to_owned()); + assert!(d.is_match("facebook.com")); + assert!(d.is_match("www.facebook.com")); + + let d = Domain("photos.facebook.com".to_owned()); + assert!(!d.is_match("facebook.com")); + assert!(d.is_match("jpg.photos.facebook.com")); + + let d = Domain("*".to_owned()); + assert!(d.is_match("www.facebook.com")); + assert!(d.is_match("facebook.com")); + } + + #[test] + fn test_noproxy_empty_string_does_not_match() { + let noproxy = NoProxy::new(Scheme::HTTP, ""); + assert!(!noproxy.should_bypass_proxy_for_host("facebook.com")); + } + + #[test] + fn test_noproxy_matches_ip_address() { + let noproxy = NoProxy::new(Scheme::HTTP, "192.168.0.1"); + assert!(noproxy.should_bypass_proxy_for_host("192.168.0.1")); + } + + #[test] + fn test_noproxy_matches_ip_network() { + let noproxy = NoProxy::new(Scheme::HTTP, "192.168.0.0/16"); + assert!(noproxy.should_bypass_proxy_for_host("192.168.0.1")); + } + + #[test] + fn test_noproxy_matches_subdomain() { + let noproxy = NoProxy::new(Scheme::HTTP, ".facebook.com"); + assert!(noproxy.should_bypass_proxy_for_host("images.facebook.com")); + } + + #[test] + fn test_noproxy_matches_multiple() { + let noproxy = NoProxy::new(Scheme::HTTP, ".facebook.com, 192.168.0.0/24, 28.0.0.1"); + assert!(noproxy.should_bypass_proxy_for_host("images.facebook.com")); + assert!(noproxy.should_bypass_proxy_for_host("192.168.0.1")); + assert!(!noproxy.should_bypass_proxy_for_host("28.0.0.2")); + } + + #[test] + fn test_noproxy_intercept_does_not_proxy_for_ip_addr_match() { + let noproxy = NoProxy::new(Scheme::HTTPS, "192.168.0.1"); + let intercept = noproxy.into_proxy_intercept(); + // DON'T proxy https connections to 192.168.0.1 because it's an IP match + assert!(!intercept.matches(&uri("https://192.168.0.1/foo"))); + // DON'T proxy http connections to 192.168.0.1 because it's a different scheme + assert!(!intercept.matches(&uri("http://192.168.0.1/foo"))); + // DO proxy https connections to 192.168.0.2 because no IP match and schemes match + assert!(intercept.matches(&uri("https://192.168.0.2/bar"))); + } + + #[test] + fn test_noproxy_intercept_does_not_proxy_for_ip_net_match() { + let noproxy = NoProxy::new(Scheme::HTTPS, "192.168.0.0/24"); + let intercept = noproxy.into_proxy_intercept(); + // DON'T proxy https to 192.168.0.1 because IP and scheme match + assert!(!intercept.matches(&uri("https://192.168.0.1/foo"))); + // DO proxy https to 192.168.1.1 because IP mismatch and scheme match + assert!(intercept.matches(&uri("https://192.168.1.1/foo"))); + // DON'T proxy http to 192.168.0.1 because scheme mismatch + assert!(!intercept.matches(&uri("http://192.168.0.1/foo"))); + } + + #[test] + fn test_noproxy_intercept_does_not_proxy_for_domain_match() { + let noproxy = NoProxy::new(Scheme::HTTPS, ".facebook.com"); + let intercept = noproxy.into_proxy_intercept(); + // DON'T proxy because scheme matches and is subdomain. + assert!(!intercept.matches(&uri("https://www.facebook.com/foo/bar"))); + // DO proxy because scheme matches but domain is different. + assert!(intercept.matches(&uri("https://www.thefacebook.com/foo/bar"))); + // DON'T proxy because scheme mismatch + assert!(!intercept.matches(&uri("http://www.facebook.com/foo/bar"))); + } + + #[test] + fn test_noproxy_intercept_does_not_proxy_for_scheme_mismatch() { + let noproxy = NoProxy::new(Scheme::HTTP, ".facebook.com"); + let intercept = noproxy.into_proxy_intercept(); + assert!(!intercept.matches(&uri("https://www.facebook.com/foo/bar"))); + } +} diff --git a/shim/third-party/rust/Cargo.toml b/shim/third-party/rust/Cargo.toml index bec4b84a758b..16e1dca46729 100644 --- a/shim/third-party/rust/Cargo.toml +++ b/shim/third-party/rust/Cargo.toml @@ -97,6 +97,7 @@ indoc = "1.0.3" inferno = { version = "0.11.11", default-features = false } internment = { version = "0.7", features = ["arc"] } inventory = "0.1.9" +ipnetwork = "0.15" is_proc_translated = "0.1.1" itertools = "0.10.3" jemallocator = { version = "0.5.0", features = ["profiling"] }