Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix GCS credential config and rename to hive.gcs.json-key-file-path #11086

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ std::string HiveConfig::gcsScheme() const {
return config_->get<std::string>(kGCSScheme, std::string("https"));
}

std::string HiveConfig::gcsCredentials() const {
return config_->get<std::string>(kGCSCredentials, std::string(""));
std::string HiveConfig::gcsCredentialsPath() const {
return config_->get<std::string>(kGCSCredentialsPath, std::string(""));
}

std::optional<int> HiveConfig::gcsMaxRetryCount() const {
Expand Down
7 changes: 4 additions & 3 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,9 @@ class HiveConfig {
/// The GCS storage scheme, https for default credentials.
static constexpr const char* kGCSScheme = "hive.gcs.scheme";

/// The GCS service account configuration as json string
static constexpr const char* kGCSCredentials = "hive.gcs.credentials";
/// The GCS service account configuration JSON key file.
static constexpr const char* kGCSCredentialsPath =
"hive.gcs.json-key-file-path";

/// The GCS maximum retry counter of transient errors.
static constexpr const char* kGCSMaxRetryCount = "hive.gcs.max-retry-count";
Expand Down Expand Up @@ -284,7 +285,7 @@ class HiveConfig {

std::string gcsScheme() const;

std::string gcsCredentials() const;
std::string gcsCredentialsPath() const;

std::optional<int> gcsMaxRetryCount() const;

Expand Down
19 changes: 14 additions & 5 deletions velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,21 @@ class GCSFileSystem::Impl {
options.set<gcs::RestEndpointOption>(scheme + "://" + endpointOverride);
}

auto cred = hiveConfig_->gcsCredentials();
if (!cred.empty()) {
auto credentials = gc::MakeServiceAccountCredentials(cred);
options.set<gc::UnifiedCredentialsOption>(credentials);
auto credFile = hiveConfig_->gcsCredentialsPath();
if (!credFile.empty() && std::filesystem::exists(credFile)) {
std::ifstream jsonFile(credFile, std::ios::in);
if (!jsonFile.is_open()) {
LOG(WARNING) << "Error opening file " << credFile;
} else {
std::stringstream credsBuffer;
credsBuffer << jsonFile.rdbuf();
auto creds = credsBuffer.str();
auto credentials = gc::MakeServiceAccountCredentials(std::move(creds));
options.set<gc::UnifiedCredentialsOption>(credentials);
}
} else {
LOG(WARNING) << "Config::gcsCredentials is empty";
LOG(WARNING)
<< "Config hive.gcs.json-key-file-path is empty or key file path not found";
}

client_ = std::make_shared<gcs::Client>(options);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ add_executable(velox_gcsfile_test GCSUtilTest.cpp GCSFileSystemTest.cpp)
add_test(velox_gcsfile_test velox_gcsfile_test)
target_link_libraries(
velox_gcsfile_test
velox_file
velox_gcs
velox_core
velox_hive_connector
velox_dwio_common_exception
velox_exec
velox_file
velox_gcs
velox_hive_connector
velox_temp_path
GTest::gmock
GTest::gtest
GTest::gtest_main)
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ TEST_F(GCSFileSystemTest, credentialsConfig) {
// for authentication because the key has been deactivated on the server-side,
// *and* the account(s) involved are deleted *and* they are not the accounts
// or projects do not match its contents.
configOverride["hive.gcs.credentials"] = R"""({
auto creds = R"""({
"type": "service_account",
"project_id": "foo-project",
"private_key_id": "a1a111aa1111a11a11a11aa111a111a1a1111111",
Expand Down Expand Up @@ -344,13 +344,17 @@ TEST_F(GCSFileSystemTest, credentialsConfig) {
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/foo-email%40foo-project.iam.gserviceaccount.com"
})""";
auto jsonFile = exec::test::TempFilePath::create();
std::ofstream credsOut(jsonFile->getPath());
credsOut << creds;
credsOut.close();
configOverride["hive.gcs.json-key-file-path"] = jsonFile->getPath();
configOverride["hive.gcs.scheme"] = "http";
configOverride["hive.gcs.endpoint"] = "localhost:" + testbench_->port();
std::shared_ptr<const config::ConfigBase> conf =
std::make_shared<const config::ConfigBase>(std::move(configOverride));

filesystems::GCSFileSystem gcfs(conf);

gcfs.initializeClient();
try {
const std::string gcsFile =
Expand Down
8 changes: 4 additions & 4 deletions velox/connectors/hive/tests/HiveConfigTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ TEST(HiveConfigTest, defaultConfig) {
ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox-session");
ASSERT_EQ(hiveConfig.gcsEndpoint(), "");
ASSERT_EQ(hiveConfig.gcsScheme(), "https");
ASSERT_EQ(hiveConfig.gcsCredentials(), "");
ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "");
ASSERT_EQ(hiveConfig.isOrcUseColumnNames(emptySession.get()), false);
ASSERT_EQ(
hiveConfig.isFileColumnNamesReadAsLowerCase(emptySession.get()), false);
Expand Down Expand Up @@ -95,7 +95,7 @@ TEST(HiveConfigTest, overrideConfig) {
{HiveConfig::kS3IamRoleSessionName, "velox"},
{HiveConfig::kGCSEndpoint, "hey"},
{HiveConfig::kGCSScheme, "http"},
{HiveConfig::kGCSCredentials, "hey"},
{HiveConfig::kGCSCredentialsPath, "hey"},
{HiveConfig::kOrcUseColumnNames, "true"},
{HiveConfig::kFileColumnNamesReadAsLowerCase, "true"},
{HiveConfig::kAllowNullPartitionKeys, "false"},
Expand Down Expand Up @@ -134,7 +134,7 @@ TEST(HiveConfigTest, overrideConfig) {
ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox");
ASSERT_EQ(hiveConfig.gcsEndpoint(), "hey");
ASSERT_EQ(hiveConfig.gcsScheme(), "http");
ASSERT_EQ(hiveConfig.gcsCredentials(), "hey");
ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "hey");
ASSERT_EQ(hiveConfig.isOrcUseColumnNames(emptySession.get()), true);
ASSERT_EQ(
hiveConfig.isFileColumnNamesReadAsLowerCase(emptySession.get()), true);
Expand Down Expand Up @@ -206,7 +206,7 @@ TEST(HiveConfigTest, overrideSession) {
ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox-session");
ASSERT_EQ(hiveConfig.gcsEndpoint(), "");
ASSERT_EQ(hiveConfig.gcsScheme(), "https");
ASSERT_EQ(hiveConfig.gcsCredentials(), "");
ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "");
ASSERT_EQ(hiveConfig.isOrcUseColumnNames(session.get()), true);
ASSERT_EQ(hiveConfig.isFileColumnNamesReadAsLowerCase(session.get()), true);

Expand Down
4 changes: 2 additions & 2 deletions velox/docs/configs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -641,10 +641,10 @@ Each query can override the config by setting corresponding query session proper
- string
-
- The GCS storage scheme, https for default credentials.
* - hive.gcs.credentials
* - hive.gcs.json-key-file-path
- string
-
- The GCS service account configuration as json string.
- The GCS service account configuration JSON key file.
* - hive.gcs.max-retry-count
- integer
-
Expand Down
Loading