Skip to content

Commit

Permalink
Merge pull request #31 from quentingodeau/feature/proxy
Browse files Browse the repository at this point in the history
Add proxy configuration support
  • Loading branch information
samansmink authored Jan 29, 2024
2 parents 03a0878 + f8db505 commit fb2799b
Show file tree
Hide file tree
Showing 11 changed files with 379 additions and 16 deletions.
17 changes: 14 additions & 3 deletions .github/workflows/Linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
GEN: Ninja
VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'
HTTP_PROXY_RUNNING: '1'

steps:
- name: Install required ubuntu packages
Expand Down Expand Up @@ -56,7 +57,7 @@ jobs:
- name: install Azure test service
if: ${{ matrix.arch == 'linux_amd64_gcc4' }}
run: |
yum install -y nodejs npm
yum install -y nodejs npm squid
npm install -g azurite
echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo
yum install -y azure-cli
Expand All @@ -65,16 +66,19 @@ jobs:
if: ${{ matrix.arch == 'linux_amd64' }}
run: |
curl -fsSL https://deb.nodesource.com/setup_16.x | bash
apt-get install -y -qq nodejs
apt-get install -y -qq nodejs squid
node -v
npm -v
squid --version
npm install -g azurite
curl -sL https://aka.ms/InstallAzureCLIDeb | bash
- name: Launch & populate Azure test service
if: ${{ matrix.arch == 'linux_amd64' || matrix.arch == 'linux_amd64_gcc4' }}
run: |
azurite > azurite_log.txt 2>&1 &
./scripts/run_squid.sh --port 3128 --log_dir squid_logs &
./scripts/run_squid.sh --port 3129 --log_dir squid_auth_logs --auth &
sleep 10
./scripts/upload_test_files_to_azurite.sh
Expand Down Expand Up @@ -121,4 +125,11 @@ jobs:
if: always() && matrix.arch == 'linux_amd64_gcc4'
shell: bash
run: |
cat azurite_log.txt
echo "## azurite"
cat azurite_log.txt
echo "## squid"
cat squid_logs/*
echo "## squid auth"
cat squid_auth_logs/*
13 changes: 12 additions & 1 deletion .github/workflows/MacOS.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
GEN: Ninja
VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
AZURE_STORAGE_CONNECTION_STRING: 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'
HTTP_PROXY_RUNNING: '1'

steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -59,7 +60,10 @@ jobs:
- name: Launch & populate Azure test service
if: ${{ matrix.osx_build_arch == 'x86_64'}}
run: |
brew install squid
npm install -g azurite
./scripts/run_squid.sh --port 3128 --log_dir squid_logs &
./scripts/run_squid.sh --port 3129 --log_dir squid_auth_logs --auth &
azurite > azurite_log.txt 2>&1 &
sleep 10
./scripts/upload_test_files_to_azurite.sh
Expand All @@ -74,4 +78,11 @@ jobs:
if: always() && matrix.osx_build_arch == 'x86_64'
shell: bash
run: |
cat azurite_log.txt
echo "## azurite"
cat azurite_log.txt
echo "## squid"
cat squid_logs/*
echo "## squid with auth"
cat squid_auth_logs/*
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
build
.idea
.vscode
cmake-build-debug
duckdb_unittest_tempdir/
.DS_Store
testext
test/python/__pycache__/
.Rhistory

# Azurite
__blobstorage__/
__queuestorage__/
__azurite_db_*__.json

# squid
squid_logs
squid_users
squid.pid
squid.pid
squid.conf
squid_auth.conf
84 changes: 84 additions & 0 deletions scripts/run_squid.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash

help() {
echo "Usage: ${0} [port] [auth]"
echo " port Port number for squid to lisen to (by default 3128)"
echo " auth Optional string ('auth') to force user basic authentification (autherwise no authentification is required)"
exit 0
}

port='3128'
auth='false'
log_dir="squid_logs"
conf_file="squid.conf"
pid_file='${service_name}.pid'

while [[ $# -gt 0 ]]; do
case "${1}" in
-h|--help)
help
;;
-p|--port)
port="${2}"
shift # past argument
shift # past value
;;
--auth)
auth='true'
conf_file="squid_auth.conf"
pid_file='${service_name}_auth.pid'
shift # past argument
;;
--log_dir)
log_dir="${2}"
shift # past argument
shift # past value
;;
*)
echo "Unknown option ${1}"
exit 1
;;
esac
done

mkdir "${log_dir}"
touch "${log_dir}/daemon.log"
chmod -R 777 "${log_dir}"

echo "http_port 127.0.0.1:${port}" >"${conf_file}"
echo "pid_filename ${pid_file}" >>"${conf_file}"

echo 'logfile_rotate 0' >>"${conf_file}"
echo "logfile_daemon ${log_dir}/daemon.log" >>"${conf_file}"
echo "access_log ${log_dir}/access.log" >>"${conf_file}"
echo "cache_log ${log_dir}/cache.log" >>"${conf_file}"
echo "cache_store_log ${log_dir}/cache_store.log" >>"${conf_file}"


if [[ "${auth}" == "true" ]]; then
# User 'john' with password 'doe'
echo 'john:$apr1$dalj9e7s$AhqY28Hvl3EcNblNJMiXa0' >squid_users

squid_version="$(squid -v | head -n1 | grep -o 'Version [^ ]*' | cut -d ' ' -f 2)"
if [[ "$(uname)" == "Darwin" ]]; then
auth_basic_program="/usr/local/Cellar/squid/${squid_version}/libexec/basic_ncsa_auth"
else
if [[ -e '/usr/lib64/squid/basic_ncsa_auth' ]]; then
auth_basic_program="/usr/lib64/squid/basic_ncsa_auth"
else
auth_basic_program="/usr/lib/squid/basic_ncsa_auth"
fi
fi

echo '# Add authentification options' >>"${conf_file}"
echo "auth_param basic program ${auth_basic_program} squid_users" >>"${conf_file}"
echo 'auth_param basic children 3' >>"${conf_file}"
echo 'auth_param basic realm Squid BA' >>"${conf_file}"
echo 'acl auth_users proxy_auth REQUIRED' >>"${conf_file}"
echo 'http_access allow auth_users' >>"${conf_file}"
echo 'http_access deny all' >>"${conf_file}"
else
echo 'http_access allow localhost' >>"${conf_file}"
fi

exec squid -N -f "${conf_file}"
96 changes: 86 additions & 10 deletions src/azure_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <azure/storage/blobs.hpp>
#include <duckdb/parser/parsed_data/create_scalar_function_info.hpp>
#include <iostream>
#include <cstdlib>

namespace duckdb {

Expand Down Expand Up @@ -56,19 +57,22 @@ static void Log(Logger::Level level, std::string const &message) {
}

static Azure::Identity::ChainedTokenCredential::Sources
CreateCredentialChainFromSetting(const string &credential_chain) {
CreateCredentialChainFromSetting(const string &credential_chain,
const Azure::Core::Http::Policies::TransportOptions &transport_options) {
auto chain_list = StringUtil::Split(credential_chain, ';');
Azure::Identity::ChainedTokenCredential::Sources result;

Azure::Core::Credentials::TokenCredentialOptions options;
options.Transport = transport_options;
for (const auto &item : chain_list) {
if (item == "cli") {
result.push_back(std::make_shared<Azure::Identity::AzureCliCredential>());
result.push_back(std::make_shared<Azure::Identity::AzureCliCredential>(options));
} else if (item == "managed_identity") {
result.push_back(std::make_shared<Azure::Identity::ManagedIdentityCredential>());
result.push_back(std::make_shared<Azure::Identity::ManagedIdentityCredential>(options));
} else if (item == "env") {
result.push_back(std::make_shared<Azure::Identity::EnvironmentCredential>());
result.push_back(std::make_shared<Azure::Identity::EnvironmentCredential>(options));
} else if (item == "default") {
result.push_back(std::make_shared<Azure::Identity::DefaultAzureCredential>());
result.push_back(std::make_shared<Azure::Identity::DefaultAzureCredential>(options));
} else if (item != "none") {
throw InvalidInputException("Unknown credential provider found: " + item);
}
Expand Down Expand Up @@ -114,6 +118,22 @@ static AzureAuthentication ParseAzureAuthSettings(FileOpener *opener, const stri
}
}

// Load proxy options
Value http_proxy;
if (FileOpener::TryGetCurrentSetting(opener, "azure_http_proxy", http_proxy)) {
auth.proxy_options.http_proxy = http_proxy.ToString();
}

Value http_proxy_user_name;
if (FileOpener::TryGetCurrentSetting(opener, "azure_proxy_user_name", http_proxy_user_name)) {
auth.proxy_options.user_name = http_proxy_user_name.ToString();
}

Value http_proxy_password;
if (FileOpener::TryGetCurrentSetting(opener, "azure_proxy_password", http_proxy_password)) {
auth.proxy_options.password = http_proxy_password.ToString();
}

return auth;
}

Expand All @@ -138,20 +158,65 @@ static AzureReadOptions ParseAzureReadOptions(FileOpener *opener) {
return options;
}

static Azure::Core::Http::Policies::TransportOptions GetTransportOptions(AzureAuthentication &auth) {
Azure::Core::Http::Policies::TransportOptions options;
if (auth.secret) {
auto http_proxy = auth.secret->TryGetValue("http_proxy");
if (!http_proxy.IsNull()) {
options.HttpProxy = http_proxy.ToString();
} else {
// Keep honoring the env variable if present
auto *http_proxy_env = std::getenv("HTTP_PROXY");
if (http_proxy_env != nullptr) {
options.HttpProxy = http_proxy_env;
}
}

auto http_proxy_user_name = auth.secret->TryGetValue("proxy_user_name");
if (!http_proxy_user_name.IsNull()) {
options.ProxyUserName = http_proxy_user_name.ToString();
}

auto http_proxypassword = auth.secret->TryGetValue("proxy_password");
if (!http_proxypassword.IsNull()) {
options.ProxyPassword = http_proxypassword.ToString();
}
} else {
const auto &proxy_options = auth.proxy_options;
if (!proxy_options.http_proxy.empty()) {
options.HttpProxy = proxy_options.http_proxy;
}

if (!proxy_options.user_name.empty()) {
options.ProxyUserName = proxy_options.user_name;
}

if (!proxy_options.password.empty()) {
options.ProxyPassword = proxy_options.password;
}
}

return options;
}

static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthentication &auth, AzureParsedUrl &url) {
string connection_string;
bool use_secret = false;
string chain;
string account_name;
string endpoint;

auto transport_options = GetTransportOptions(auth);
Azure::Storage::Blobs::BlobClientOptions options;
options.Transport = transport_options;

// Firstly, try to use the auth from the secret
if (auth.secret) {
// If connection string, we're done heres
auto connection_string_value = auth.secret->TryGetValue("connection_string");
if (!connection_string_value.IsNull()) {
return Azure::Storage::Blobs::BlobContainerClient::CreateFromConnectionString(
connection_string_value.ToString(), url.container);
connection_string_value.ToString(), url.container, options);
}

// Account_name can be used both for unauthenticated
Expand Down Expand Up @@ -181,7 +246,7 @@ static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthen

if (!auth.connection_string.empty()) {
return Azure::Storage::Blobs::BlobContainerClient::CreateFromConnectionString(auth.connection_string,
url.container);
url.container, options);
}
}

Expand All @@ -192,17 +257,17 @@ static Azure::Storage::Blobs::BlobContainerClient GetContainerClient(AzureAuthen
// Build credential chain, from last to first
Azure::Identity::ChainedTokenCredential::Sources credential_chain;
if (!chain.empty()) {
credential_chain = CreateCredentialChainFromSetting(chain);
credential_chain = CreateCredentialChainFromSetting(chain, transport_options);
}

auto accountURL = "https://" + account_name + "." + endpoint;
if (!credential_chain.empty()) {
// A set of credentials providers was passed
auto chainedTokenCredential = std::make_shared<Azure::Identity::ChainedTokenCredential>(credential_chain);
Azure::Storage::Blobs::BlobServiceClient blob_service_client(accountURL, chainedTokenCredential);
Azure::Storage::Blobs::BlobServiceClient blob_service_client(accountURL, chainedTokenCredential, options);
return blob_service_client.GetBlobContainerClient(url.container);
} else if (!account_name.empty()) {
return Azure::Storage::Blobs::BlobContainerClient(accountURL + "/" + url.container);
return Azure::Storage::Blobs::BlobContainerClient(accountURL + "/" + url.container, options);
} else {
throw InvalidInputException("No valid Azure credentials found!");
}
Expand Down Expand Up @@ -361,6 +426,17 @@ static void LoadInternal(DatabaseInstance &instance) {
"Size of the read buffer. It is recommended that this is evenly divisible by "
"azure_read_transfer_chunk_size.",
LogicalType::UBIGINT, Value::UBIGINT(default_read_options.buffer_size));

auto *http_proxy = std::getenv("HTTP_PROXY");
Value default_http_value = http_proxy ? Value(http_proxy) : Value(nullptr);
config.AddExtensionOption("azure_http_proxy",
"Proxy to use when login & performing request to azure. "
"By default it will use the HTTP_PROXY environment variable if set.",
LogicalType::VARCHAR, default_http_value);
config.AddExtensionOption("azure_proxy_user_name", "Http proxy user name if needed.", LogicalType::VARCHAR,
Value(nullptr));
config.AddExtensionOption("azure_proxy_password", "Http proxy password if needed.", LogicalType::VARCHAR,
Value(nullptr));
}

int64_t AzureStorageFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) {
Expand Down
Loading

0 comments on commit fb2799b

Please sign in to comment.