From 67a63949855490cc50712dcc71a12f5d768eb51d Mon Sep 17 00:00:00 2001 From: Matthew Keeler Date: Wed, 7 Aug 2024 12:46:25 -0400 Subject: [PATCH] feat: `NO_PROXY` environment variable can be used to override `HTTP(S)_PROXY` values When determining if a proxy should be used, the SDK would: 1. Check the `config.http_config.http_proxy` value. If that is set, use that value without further consideration. 2. If the target URI is `https`, use the value from the `HTTPS_PROXY` environment variable. 3. If the target is `http`, use `HTTP_PROXY` instead. The SDK will now support another environment variable -- `NO_PROXY`. This variable can be set to a comma-separated list of hosts to exclude from proxy support, or the special case '*' meaning to ignore all hosts. The `NO_PROXY` variable will only take affect if the SDK isn't explicitly configured to use a proxy as specified in #1 above. --- ldclient/impl/http.py | 70 ++++++++++++++++++++++++++--- ldclient/testing/impl/test_http.py | 57 +++++++++++++++++++++++ ldclient/testing/proxy_test_util.py | 22 ++++----- 3 files changed, 132 insertions(+), 17 deletions(-) create mode 100644 ldclient/testing/impl/test_http.py diff --git a/ldclient/impl/http.py b/ldclient/impl/http.py index 5fea123..bb7a262 100644 --- a/ldclient/impl/http.py +++ b/ldclient/impl/http.py @@ -2,6 +2,9 @@ import certifi from os import environ import urllib3 +from urllib.parse import urlparse +from typing import Tuple + def _application_header_value(application: dict) -> str: parts = [] @@ -34,9 +37,11 @@ def _base_headers(config): return headers + def _http_factory(config): return HTTPFactory(_base_headers(config), config.http) + class HTTPFactory: def __init__(self, base_headers, http_config, override_read_timeout=None): self.__base_headers = base_headers @@ -73,26 +78,77 @@ def create_pool_manager(self, num_pools, target_base_uri): num_pools=num_pools, cert_reqs=cert_reqs, ca_certs=ca_certs - ) + ) else: # Get proxy authentication, if provided url = urllib3.util.parse_url(proxy_url) proxy_headers = None - if url.auth != None: + if url.auth is not None: proxy_headers = urllib3.util.make_headers(proxy_basic_auth=url.auth) # Create a proxied connection return urllib3.ProxyManager( proxy_url, num_pools=num_pools, cert_reqs=cert_reqs, - ca_certs = ca_certs, + ca_certs=ca_certs, proxy_headers=proxy_headers ) + def _get_proxy_url(target_base_uri): + """ + Determine the proxy URL to use for a given target URI, based on the + environment variables http_proxy, https_proxy, and no_proxy. + + If the target URI is an https URL, the proxy will be determined from the HTTPS_PROXY variable. + If the target URI is not https, the proxy will be determined from the HTTP_PROXY variable. + + In either of the above instances, if the NO_PROXY variable contains either + the target domain or '*', no proxy will be used. + """ if target_base_uri is None: return None - is_https = target_base_uri.startswith('https:') - if is_https: - return environ.get('https_proxy') - return environ.get('http_proxy') + + target_host, target_port, is_https = _get_target_host_and_port(target_base_uri) + + proxy_url = environ.get('https_proxy') if is_https else environ.get('http_proxy') + no_proxy = environ.get('no_proxy', '').strip() + + if proxy_url is None or no_proxy == '*': + return None + elif no_proxy == '': + return proxy_url + + for no_proxy_entry in no_proxy.split(','): + parts = no_proxy_entry.strip().split(':') + if len(parts) == 1: + if target_host.endswith(no_proxy_entry): + return None + continue + + if target_host.endswith(parts[0]) and target_port == int(parts[1]): + return None + + return proxy_url + + +def _get_target_host_and_port(uri: str) -> Tuple[str, int, bool]: + """ + Given a URL, return the effective hostname, port, and whether it is considered a secure scheme. + + If a scheme is not supplied, the port is assumed to be 80 and the connection unsecure. + If a scheme and port is provided, the port will be parsed from the URI. + If only a scheme is provided, the port will be 443 if the scheme is 'https', otherwise 80. + """ + if '//' not in uri: + parts = uri.split(':') + return (parts[0], int(parts[1]) if len(parts) > 1 else 80, False) + + parsed = urlparse(uri) + is_https = parsed.scheme == 'https' + + port = parsed.port + if port is None: + port = 443 if is_https else 80 + + return parsed.hostname or "", port, is_https diff --git a/ldclient/testing/impl/test_http.py b/ldclient/testing/impl/test_http.py new file mode 100644 index 0000000..f506c89 --- /dev/null +++ b/ldclient/testing/impl/test_http.py @@ -0,0 +1,57 @@ +import pytest + +from typing import Optional +from ldclient.impl.http import _get_proxy_url + + +@pytest.mark.parametrize( + 'target_uri, no_proxy, expected', + [ + ('https://secure.example.com', '', 'https://secure.proxy:1234'), + ('http://insecure.example.com', '', 'http://insecure.proxy:6789'), + + ('https://secure.example.com', 'secure.example.com', None), + ('https://secure.example.com', 'secure.example.com:443', None), + ('https://secure.example.com', 'secure.example.com:80', 'https://secure.proxy:1234'), + ('https://secure.example.com', 'wrong.example.com', 'https://secure.proxy:1234'), + ('https://secure.example.com:8080', 'secure.example.com', None), + ('https://secure.example.com:8080', 'secure.example.com:443', 'https://secure.proxy:1234'), + + ('https://secure.example.com', 'example.com', None), + ('https://secure.example.com', 'example.com:443', None), + ('https://secure.example.com', 'example.com:80', 'https://secure.proxy:1234'), + + ('http://insecure.example.com', 'insecure.example.com', None), + ('http://insecure.example.com', 'insecure.example.com:443', 'http://insecure.proxy:6789'), + ('http://insecure.example.com', 'insecure.example.com:80', None), + ('http://insecure.example.com', 'wrong.example.com', 'http://insecure.proxy:6789'), + ('http://insecure.example.com:8080', 'secure.example.com', None), + ('http://insecure.example.com:8080', 'secure.example.com:443', 'http://insecure.proxy:6789'), + + ('http://insecure.example.com', 'example.com', None), + ('http://insecure.example.com', 'example.com:443', 'http://insecure.proxy:6789'), + ('http://insecure.example.com', 'example.com:80', None), + + ('secure.example.com', 'secure.example.com', None), + ('secure.example.com', 'secure.example.com:443', 'http://insecure.proxy:6789'), + ('secure.example.com', 'secure.example.com:80', None), + ('secure.example.com', 'wrong.example.com', 'http://insecure.proxy:6789'), + ('secure.example.com:8080', 'secure.example.com', None), + ('secure.example.com:8080', 'secure.example.com:80', 'http://insecure.proxy:6789'), + + ('https://secure.example.com', '*', None), + ('https://secure.example.com:8080', '*', None), + ('http://insecure.example.com', '*', None), + ('http://insecure.example.com:8080', '*', None), + ('secure.example.com:443', '*', None), + ('insecure.example.com:8080', '*', None), + ] +) +def test_honors_no_proxy(target_uri: str, no_proxy: str, expected: Optional[str], monkeypatch): + monkeypatch.setenv('https_proxy', 'https://secure.proxy:1234') + monkeypatch.setenv('http_proxy', 'http://insecure.proxy:6789') + monkeypatch.setenv('no_proxy', no_proxy) + + proxy_url = _get_proxy_url(target_uri) + + assert proxy_url == expected diff --git a/ldclient/testing/proxy_test_util.py b/ldclient/testing/proxy_test_util.py index dab075d..b9d1e85 100644 --- a/ldclient/testing/proxy_test_util.py +++ b/ldclient/testing/proxy_test_util.py @@ -1,5 +1,6 @@ from ldclient.config import Config, HTTPConfig -from ldclient.testing.http_util import start_server, BasicResponse, JsonResponse +from ldclient.testing.http_util import start_server + # Runs tests of all of our supported proxy server configurations: secure or insecure, configured # by Config.http_proxy or by an environment variable, with or without authentication. The action @@ -16,7 +17,8 @@ def do_proxy_tests(action, action_method, monkeypatch): (False, True, False), (True, False, False), (True, False, True), - (True, True, False)]: + (True, True, False) + ]: test_desc = "%s, %s, %s" % ( "using env vars" if use_env_vars else "using Config", "secure" if secure else "insecure", @@ -27,15 +29,15 @@ def do_proxy_tests(action, action_method, monkeypatch): if use_env_vars: monkeypatch.setenv('https_proxy' if secure else 'http_proxy', proxy_uri) config = Config( - sdk_key = 'sdk_key', - base_uri = target_uri, - events_uri = target_uri, - stream_uri = target_uri, - http = HTTPConfig(http_proxy=proxy_uri), - diagnostic_opt_out = True) + sdk_key='sdk_key', + base_uri=target_uri, + events_uri=target_uri, + stream_uri=target_uri, + http=HTTPConfig(http_proxy=proxy_uri), + diagnostic_opt_out=True) try: action(server, config, secure) - except: + except Exception: print("test action failed (%s)" % test_desc) raise # For an insecure proxy request, our stub server behaves enough like the real thing to satisfy the @@ -43,7 +45,7 @@ def do_proxy_tests(action, action_method, monkeypatch): # actually be an absolute URI for a proxy request. try: req = server.require_request() - except: + except Exception: print("server did not receive a request (%s)" % test_desc) raise expected_method = 'CONNECT' if secure else action_method