diff --git a/.github/workflows/Linux.yml b/.github/workflows/Linux.yml index d1908b2..a6b6498 100644 --- a/.github/workflows/Linux.yml +++ b/.github/workflows/Linux.yml @@ -26,6 +26,7 @@ jobs: GEN: Ninja VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;' + HTTP_PROXY_RUNNING: '1' steps: - name: Install required ubuntu packages @@ -56,7 +57,7 @@ jobs: - name: install Azure test service if: ${{ matrix.arch == 'linux_amd64_gcc4' }} run: | - yum install -y nodejs npm + yum install -y nodejs npm squid npm install -g azurite echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo yum install -y azure-cli @@ -65,9 +66,10 @@ jobs: if: ${{ matrix.arch == 'linux_amd64' }} run: | curl -fsSL https://deb.nodesource.com/setup_16.x | bash - apt-get install -y -qq nodejs + apt-get install -y -qq nodejs squid node -v npm -v + squid --version npm install -g azurite curl -sL https://aka.ms/InstallAzureCLIDeb | bash @@ -75,6 +77,8 @@ jobs: if: ${{ matrix.arch == 'linux_amd64' || matrix.arch == 'linux_amd64_gcc4' }} run: | azurite > azurite_log.txt 2>&1 & + ./scripts/run_squid.sh --port 3128 --log_dir squid_logs & + ./scripts/run_squid.sh --port 3129 --log_dir squid_auth_logs --auth & sleep 10 ./scripts/upload_test_files_to_azurite.sh @@ -121,4 +125,11 @@ jobs: if: always() && matrix.arch == 'linux_amd64_gcc4' shell: bash run: | - cat azurite_log.txt \ No newline at end of file + echo "## azurite" + cat azurite_log.txt + + echo "## squid" + cat squid_logs/* + + echo "## squid auth" + cat squid_auth_logs/* diff --git a/.github/workflows/MacOS.yml b/.github/workflows/MacOS.yml index ba91c61..eff4300 100644 --- a/.github/workflows/MacOS.yml +++ b/.github/workflows/MacOS.yml @@ -25,6 +25,7 @@ jobs: GEN: Ninja VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;' + HTTP_PROXY_RUNNING: '1' steps: - uses: actions/checkout@v3 @@ -59,7 +60,10 @@ jobs: - name: Launch & populate Azure test service if: ${{ matrix.osx_build_arch == 'x86_64'}} run: | + brew install squid npm install -g azurite + ./scripts/run_squid.sh --port 3128 --log_dir squid_logs & + ./scripts/run_squid.sh --port 3129 --log_dir squid_auth_logs --auth & azurite > azurite_log.txt 2>&1 & sleep 10 ./scripts/upload_test_files_to_azurite.sh @@ -74,4 +78,11 @@ jobs: if: always() && matrix.osx_build_arch == 'x86_64' shell: bash run: | - cat azurite_log.txt \ No newline at end of file + echo "## azurite" + cat azurite_log.txt + + echo "## squid" + cat squid_logs/* + + echo "## squid with auth" + cat squid_auth_logs/* diff --git a/.gitignore b/.gitignore index b9f264b..72d2124 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,22 @@ build .idea +.vscode cmake-build-debug duckdb_unittest_tempdir/ .DS_Store testext test/python/__pycache__/ .Rhistory + +# Azurite +__blobstorage__/ +__queuestorage__/ +__azurite_db_*__.json + +# squid +squid_logs +squid_users +squid.pid +squid.pid +squid.conf +squid_auth.conf diff --git a/scripts/run_squid.sh b/scripts/run_squid.sh new file mode 100755 index 0000000..4964af0 --- /dev/null +++ b/scripts/run_squid.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +help() { + echo "Usage: ${0} [port] [auth]" + echo " port Port number for squid to lisen to (by default 3128)" + echo " auth Optional string ('auth') to force user basic authentification (autherwise no authentification is required)" + exit 0 +} + +port='3128' +auth='false' +log_dir="squid_logs" +conf_file="squid.conf" +pid_file='${service_name}.pid' + +while [[ $# -gt 0 ]]; do + case "${1}" in + -h|--help) + help + ;; + -p|--port) + port="${2}" + shift # past argument + shift # past value + ;; + --auth) + auth='true' + conf_file="squid_auth.conf" + pid_file='${service_name}_auth.pid' + shift # past argument + ;; + --log_dir) + log_dir="${2}" + shift # past argument + shift # past value + ;; + *) + echo "Unknown option ${1}" + exit 1 + ;; + esac +done + +mkdir "${log_dir}" +touch "${log_dir}/daemon.log" +chmod -R 777 "${log_dir}" + +echo "http_port 127.0.0.1:${port}" >"${conf_file}" +echo "pid_filename ${pid_file}" >>"${conf_file}" + +echo 'logfile_rotate 0' >>"${conf_file}" +echo "logfile_daemon ${log_dir}/daemon.log" >>"${conf_file}" +echo "access_log ${log_dir}/access.log" >>"${conf_file}" +echo "cache_log ${log_dir}/cache.log" >>"${conf_file}" +echo "cache_store_log ${log_dir}/cache_store.log" >>"${conf_file}" + + +if [[ "${auth}" == "true" ]]; then + # User 'john' with password 'doe' + echo 'john:$apr1$dalj9e7s$AhqY28Hvl3EcNblNJMiXa0' >squid_users + + squid_version="$(squid -v | head -n1 | grep -o 'Version [^ ]*' | cut -d ' ' -f 2)" + if [[ "$(uname)" == "Darwin" ]]; then + auth_basic_program="/usr/local/Cellar/squid/${squid_version}/libexec/basic_ncsa_auth" + else + if [[ -e '/usr/lib64/squid/basic_ncsa_auth' ]]; then + auth_basic_program="/usr/lib64/squid/basic_ncsa_auth" + else + auth_basic_program="/usr/lib/squid/basic_ncsa_auth" + fi + fi + + echo '# Add authentification options' >>"${conf_file}" + echo "auth_param basic program ${auth_basic_program} squid_users" >>"${conf_file}" + echo 'auth_param basic children 3' >>"${conf_file}" + echo 'auth_param basic realm Squid BA' >>"${conf_file}" + echo 'acl auth_users proxy_auth REQUIRED' >>"${conf_file}" + echo 'http_access allow auth_users' >>"${conf_file}" + echo 'http_access deny all' >>"${conf_file}" +else + echo 'http_access allow localhost' >>"${conf_file}" +fi + +exec squid -N -f "${conf_file}" diff --git a/src/azure_extension.cpp b/src/azure_extension.cpp index df2cda1..a801c5f 100644 --- a/src/azure_extension.cpp +++ b/src/azure_extension.cpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace duckdb { @@ -56,19 +57,22 @@ static void Log(Logger::Level level, std::string const &message) { } static Azure::Identity::ChainedTokenCredential::Sources -CreateCredentialChainFromSetting(const string &credential_chain) { +CreateCredentialChainFromSetting(const string &credential_chain, + const Azure::Core::Http::Policies::TransportOptions &transport_options) { auto chain_list = StringUtil::Split(credential_chain, ';'); Azure::Identity::ChainedTokenCredential::Sources result; + Azure::Core::Credentials::TokenCredentialOptions options; + options.Transport = transport_options; for (const auto &item : chain_list) { if (item == "cli") { - result.push_back(std::make_shared()); + result.push_back(std::make_shared(options)); } else if (item == "managed_identity") { - result.push_back(std::make_shared()); + result.push_back(std::make_shared(options)); } else if (item == "env") { - result.push_back(std::make_shared()); + result.push_back(std::make_shared(options)); } else if (item == "default") { - result.push_back(std::make_shared()); + result.push_back(std::make_shared(options)); } else if (item != "none") { throw InvalidInputException("Unknown credential provider found: " + item); } @@ -114,6 +118,22 @@ static AzureAuthentication ParseAzureAuthSettings(FileOpener *opener, const stri } } + // Load proxy options + Value http_proxy; + if (FileOpener::TryGetCurrentSetting(opener, "azure_http_proxy", http_proxy)) { + auth.proxy_options.http_proxy = http_proxy.ToString(); + } + + Value http_proxy_user_name; + if (FileOpener::TryGetCurrentSetting(opener, "azure_proxy_user_name", http_proxy_user_name)) { + auth.proxy_options.user_name = http_proxy_user_name.ToString(); + } + + Value http_proxy_password; + if (FileOpener::TryGetCurrentSetting(opener, "azure_proxy_password", http_proxy_password)) { + auth.proxy_options.password = http_proxy_password.ToString(); + } + return auth; } @@ -138,6 +158,47 @@ static AzureReadOptions ParseAzureReadOptions(FileOpener *opener) { return options; } +static Azure::Core::Http::Policies::TransportOptions GetTransportOptions(AzureAuthentication &auth) { + Azure::Core::Http::Policies::TransportOptions options; + if (auth.secret) { + auto http_proxy = auth.secret->TryGetValue("http_proxy"); + if (!http_proxy.IsNull()) { + options.HttpProxy = http_proxy.ToString(); + } else { + // Keep honoring the env variable if present + auto *http_proxy_env = std::getenv("HTTP_PROXY"); + if (http_proxy_env != nullptr) { + options.HttpProxy = http_proxy_env; + } + } + + auto http_proxy_user_name = auth.secret->TryGetValue("proxy_user_name"); + if (!http_proxy_user_name.IsNull()) { + options.ProxyUserName = http_proxy_user_name.ToString(); + } + + auto http_proxypassword = auth.secret->TryGetValue("proxy_password"); + if (!http_proxypassword.IsNull()) { + options.ProxyPassword = http_proxypassword.ToString(); + } + } else { + const auto &proxy_options = auth.proxy_options; + if (!proxy_options.http_proxy.empty()) { + options.HttpProxy = proxy_options.http_proxy; + } + + if (!proxy_options.user_name.empty()) { + options.ProxyUserName = proxy_options.user_name; + } + + if (!proxy_options.password.empty()) { + options.ProxyPassword = proxy_options.password; + } + } + + return options; +} + static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthentication &auth, AzureParsedUrl &url) { string connection_string; bool use_secret = false; @@ -145,13 +206,17 @@ static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthen string account_name; string endpoint; + auto transport_options = GetTransportOptions(auth); + Azure::Storage::Blobs::BlobClientOptions options; + options.Transport = transport_options; + // Firstly, try to use the auth from the secret if (auth.secret) { // If connection string, we're done heres auto connection_string_value = auth.secret->TryGetValue("connection_string"); if (!connection_string_value.IsNull()) { return Azure::Storage::Blobs::BlobContainerClient::CreateFromConnectionString( - connection_string_value.ToString(), url.container); + connection_string_value.ToString(), url.container, options); } // Account_name can be used both for unauthenticated @@ -181,7 +246,7 @@ static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthen if (!auth.connection_string.empty()) { return Azure::Storage::Blobs::BlobContainerClient::CreateFromConnectionString(auth.connection_string, - url.container); + url.container, options); } } @@ -192,17 +257,17 @@ static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthen // Build credential chain, from last to first Azure::Identity::ChainedTokenCredential::Sources credential_chain; if (!chain.empty()) { - credential_chain = CreateCredentialChainFromSetting(chain); + credential_chain = CreateCredentialChainFromSetting(chain, transport_options); } auto accountURL = "https://" + account_name + "." + endpoint; if (!credential_chain.empty()) { // A set of credentials providers was passed auto chainedTokenCredential = std::make_shared(credential_chain); - Azure::Storage::Blobs::BlobServiceClient blob_service_client(accountURL, chainedTokenCredential); + Azure::Storage::Blobs::BlobServiceClient blob_service_client(accountURL, chainedTokenCredential, options); return blob_service_client.GetBlobContainerClient(url.container); } else if (!account_name.empty()) { - return Azure::Storage::Blobs::BlobContainerClient(accountURL + "/" + url.container); + return Azure::Storage::Blobs::BlobContainerClient(accountURL + "/" + url.container, options); } else { throw InvalidInputException("No valid Azure credentials found!"); } @@ -361,6 +426,17 @@ static void LoadInternal(DatabaseInstance &instance) { "Size of the read buffer. It is recommended that this is evenly divisible by " "azure_read_transfer_chunk_size.", LogicalType::UBIGINT, Value::UBIGINT(default_read_options.buffer_size)); + + auto *http_proxy = std::getenv("HTTP_PROXY"); + Value default_http_value = http_proxy ? Value(http_proxy) : Value(nullptr); + config.AddExtensionOption("azure_http_proxy", + "Proxy to use when login & performing request to azure. " + "By default it will use the HTTP_PROXY environment variable if set.", + LogicalType::VARCHAR, default_http_value); + config.AddExtensionOption("azure_proxy_user_name", "Http proxy user name if needed.", LogicalType::VARCHAR, + Value(nullptr)); + config.AddExtensionOption("azure_proxy_password", "Http proxy password if needed.", LogicalType::VARCHAR, + Value(nullptr)); } int64_t AzureStorageFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) { diff --git a/src/azure_secret.cpp b/src/azure_secret.cpp index 46dc108..499d300 100644 --- a/src/azure_secret.cpp +++ b/src/azure_secret.cpp @@ -1,5 +1,7 @@ #include "azure_secret.hpp" +#include "duckdb/common/unique_ptr.hpp" #include "duckdb/main/extension_util.hpp" +#include "duckdb/main/secret/secret.hpp" #include #include #include @@ -18,6 +20,26 @@ static string TryGetStringParam(CreateSecretInput &input, const string ¶m_na } } +static void FillWithAzureProxyInfo(ClientContext &context, CreateSecretInput &input, KeyValueSecret &result) { + string http_proxy = TryGetStringParam(input, "http_proxy"); + string proxy_user_name = TryGetStringParam(input, "proxy_user_name"); + string proxy_password = TryGetStringParam(input, "proxy_password"); + + // Proxy info + if (!http_proxy.empty()) { + result.secret_map["http_proxy"] = http_proxy; + } + if (!proxy_user_name.empty()) { + result.secret_map["proxy_user_name"] = proxy_user_name; + } + if (!proxy_password.empty()) { + result.secret_map["proxy_password"] = proxy_password; + } + + // Same goes for password information + result.redact_keys.insert("proxy_password"); +} + static unique_ptr CreateAzureSecretFromConfig(ClientContext &context, CreateSecretInput &input) { string connection_string = TryGetStringParam(input, "connection_string"); string account_name = TryGetStringParam(input, "account_name"); @@ -30,6 +52,8 @@ static unique_ptr CreateAzureSecretFromConfig(ClientContext &context auto result = make_uniq(scope, input.type, input.provider, input.name); + FillWithAzureProxyInfo(context, input, *result); + //! Add connection string if (!connection_string.empty()) { result->secret_map["connection_string"] = connection_string; @@ -59,6 +83,8 @@ static unique_ptr CreateAzureSecretFromCredentialChain(ClientContext auto result = make_uniq(scope, input.type, input.provider, input.name); + FillWithAzureProxyInfo(context, input, *result); + // Add config to kv secret if (input.options.find("chain") != input.options.end()) { result->secret_map["chain"] = TryGetStringParam(input, "chain"); @@ -73,6 +99,16 @@ static unique_ptr CreateAzureSecretFromCredentialChain(ClientContext return std::move(result); } +static void RegisterCommonSecretParameters(CreateSecretFunction &function) { + // Register azure common parameters + function.named_parameters["account_name"] = LogicalType::VARCHAR; + + // Register proxy parameters + function.named_parameters["http_proxy"] = LogicalType::VARCHAR; + function.named_parameters["proxy_user_name"] = LogicalType::VARCHAR; + function.named_parameters["proxy_password"] = LogicalType::VARCHAR; +} + void CreateAzureSecretFunctions::Register(DatabaseInstance &instance) { string type = "azure"; @@ -86,14 +122,14 @@ void CreateAzureSecretFunctions::Register(DatabaseInstance &instance) { // Register the connection string secret provider CreateSecretFunction connection_string_function = {type, "config", CreateAzureSecretFromConfig}; connection_string_function.named_parameters["connection_string"] = LogicalType::VARCHAR; - connection_string_function.named_parameters["account_name"] = LogicalType::VARCHAR; + RegisterCommonSecretParameters(connection_string_function); ExtensionUtil::RegisterFunction(instance, connection_string_function); // Register the credential_chain secret provider CreateSecretFunction cred_chain_function = {type, "credential_chain", CreateAzureSecretFromCredentialChain}; cred_chain_function.named_parameters["chain"] = LogicalType::VARCHAR; - cred_chain_function.named_parameters["account_name"] = LogicalType::VARCHAR; cred_chain_function.named_parameters["azure_endpoint"] = LogicalType::VARCHAR; + RegisterCommonSecretParameters(cred_chain_function); ExtensionUtil::RegisterFunction(instance, cred_chain_function); } diff --git a/src/include/azure_extension.hpp b/src/include/azure_extension.hpp index 9db01a9..d124d01 100644 --- a/src/include/azure_extension.hpp +++ b/src/include/azure_extension.hpp @@ -21,12 +21,19 @@ class AzureExtension : public Extension { std::string Name() override; }; +struct AzureProxyOptions { + string http_proxy; + string user_name; + string password; +}; + struct AzureAuthentication { //! Main Auth method: through secret optional_ptr secret; //! Auth method #1: setting the connection string string connection_string; + AzureProxyOptions proxy_options; //! Auth method #2: setting account name + defining a credential chain. string account_name; diff --git a/test/sql/azure_proxy.test b/test/sql/azure_proxy.test new file mode 100644 index 0000000..0c804c9 --- /dev/null +++ b/test/sql/azure_proxy.test @@ -0,0 +1,26 @@ +# name: test/sql/azure_proxy.test +# description: test azure extension with a simple proxy +# group: [azure] + +require azure + +require parquet + +require-env AZURE_STORAGE_CONNECTION_STRING + +require-env HTTP_PROXY_RUNNING + +foreach prefix azure:// az:// + +statement ok +SET azure_storage_connection_string = '${AZURE_STORAGE_CONNECTION_STRING}'; + +statement ok +SET azure_http_proxy = 'http://localhost:3128'; + +query I +SELECT count(*) FROM '${prefix}testing-private/l.csv'; +---- +60175 + +endloop diff --git a/test/sql/azure_proxy_auth.test b/test/sql/azure_proxy_auth.test new file mode 100644 index 0000000..10235d0 --- /dev/null +++ b/test/sql/azure_proxy_auth.test @@ -0,0 +1,32 @@ +# name: test/sql/azure_proxy_auth.test +# description: test azure extension with a proxy requiring an authentication +# group: [azure] + +require azure + +require parquet + +require-env AZURE_STORAGE_CONNECTION_STRING + +require-env HTTP_PROXY_RUNNING + +foreach prefix azure:// az:// + +statement ok +SET azure_storage_connection_string = '${AZURE_STORAGE_CONNECTION_STRING}'; + +statement ok +SET azure_http_proxy = 'http://localhost:3129'; + +statement ok +SET azure_proxy_user_name = 'john'; + +statement ok +SET azure_proxy_password = 'doe'; + +query I +SELECT count(*) FROM '${prefix}testing-private/l.csv'; +---- +60175 + +endloop diff --git a/test/sql/azure_proxy_auth_secrets.test b/test/sql/azure_proxy_auth_secrets.test new file mode 100644 index 0000000..49c3abf --- /dev/null +++ b/test/sql/azure_proxy_auth_secrets.test @@ -0,0 +1,34 @@ +# name: test/sql/azure_proxy_auth_secrets.test +# description: test azure extension with a proxy requiring an authentication but configured with DuckDB secret +# group: [azure] + +require azure + +require parquet + +require-env AZURE_STORAGE_CONNECTION_STRING + +require-env HTTP_PROXY_RUNNING + +foreach prefix azure:// az:// + +# Start with default provider +statement ok +CREATE SECRET s1 ( + TYPE AZURE, + CONNECTION_STRING '${AZURE_STORAGE_CONNECTION_STRING}', + HTTP_PROXY 'http://localhost:3129', + PROXY_USER_NAME 'john', + PROXY_PASSWORD 'doe' +) + +query I +SELECT count(*) FROM '${prefix}testing-private/l.csv'; +---- +60175 + +# Remove secret +statement ok +DROP SECRET s1 + +endloop diff --git a/test/sql/azure_proxy_secrets.test b/test/sql/azure_proxy_secrets.test new file mode 100644 index 0000000..55ce32d --- /dev/null +++ b/test/sql/azure_proxy_secrets.test @@ -0,0 +1,32 @@ +# name: test/sql/azure_proxy_secrets.test +# description: test azure extension with a simple proxy but configured with DuckDB secret +# group: [azure] + +require azure + +require parquet + +require-env AZURE_STORAGE_CONNECTION_STRING + +require-env HTTP_PROXY_RUNNING + +foreach prefix azure:// az:// + +# Start with default provider +statement ok +CREATE SECRET s1 ( + TYPE AZURE, + CONNECTION_STRING '${AZURE_STORAGE_CONNECTION_STRING}', + http_proxy 'http://localhost:3128' +) + +query I +SELECT count(*) FROM '${prefix}testing-private/l.csv'; +---- +60175 + +# Remove secret +statement ok +DROP SECRET s1 + +endloop