Skip to content

Commit

Permalink
[Backport-release-2.24] Support setting S3 storage class. (#5053) (#5068
Browse files Browse the repository at this point in the history
)

Backport
875f8f4
from #5053.

---
TYPE: CONFIG
DESC: Add `vfs.s3.storage_class` config option to set the storage class
of newly uploaded S3 objects.

Co-authored-by: Theodore Tsirpanis <theodore.tsirpanis@tiledb.com>
  • Loading branch information
KiterLuc and teo-tsirpanis authored Jun 11, 2024
1 parent f761f94 commit 7ad870c
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 1 deletion.
4 changes: 4 additions & 0 deletions test/src/unit-capi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ void check_save_to_file() {
ss << "vfs.s3.requester_pays false\n";
ss << "vfs.s3.scheme https\n";
ss << "vfs.s3.skip_init false\n";
ss << "vfs.s3.storage_class NOT_SET\n";
ss << "vfs.s3.use_multipart_upload true\n";
ss << "vfs.s3.use_virtual_addressing true\n";
ss << "vfs.s3.verify_ssl true\n";
Expand Down Expand Up @@ -739,6 +740,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
all_param_values["vfs.s3.connect_scale_factor"] = "25";
all_param_values["vfs.s3.sse"] = "";
all_param_values["vfs.s3.sse_kms_key_id"] = "";
all_param_values["vfs.s3.storage_class"] = "NOT_SET";
all_param_values["vfs.s3.logging_level"] = "Off";
all_param_values["vfs.s3.request_timeout_ms"] = "3000";
all_param_values["vfs.s3.requester_pays"] = "false";
Expand Down Expand Up @@ -812,6 +814,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
vfs_param_values["s3.connect_scale_factor"] = "25";
vfs_param_values["s3.sse"] = "";
vfs_param_values["s3.sse_kms_key_id"] = "";
vfs_param_values["s3.storage_class"] = "NOT_SET";
vfs_param_values["s3.logging_level"] = "Off";
vfs_param_values["s3.request_timeout_ms"] = "3000";
vfs_param_values["s3.requester_pays"] = "false";
Expand Down Expand Up @@ -880,6 +883,7 @@ TEST_CASE("C API: Test config iter", "[capi][config]") {
s3_param_values["connect_scale_factor"] = "25";
s3_param_values["sse"] = "";
s3_param_values["sse_kms_key_id"] = "";
s3_param_values["storage_class"] = "NOT_SET";
s3_param_values["logging_level"] = "Off";
s3_param_values["request_timeout_ms"] = "3000";
s3_param_values["requester_pays"] = "false";
Expand Down
2 changes: 1 addition & 1 deletion test/src/unit-cppapi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ TEST_CASE("C++ API: Config iterator", "[cppapi][config]") {
names.push_back(it->first);
}
// Check number of VFS params in default config object.
CHECK(names.size() == 68);
CHECK(names.size() == 69);
}

TEST_CASE("C++ API: Config Environment Variables", "[cppapi][config]") {
Expand Down
16 changes: 16 additions & 0 deletions tiledb/api/c_api/config/config_api_external.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,22 @@ TILEDB_EXPORT void tiledb_config_free(tiledb_config_t** config) TILEDB_NOEXCEPT;
* The server-side encryption key to use if
* vfs.s3.sse == "kms" (AWS key management service). <br>
* **Default**: ""
* - `vfs.s3.storage_class` <br>
* The storage class to use for the newly uploaded S3 objects. The set of
* accepted values is found in the Aws::S3::Model::StorageClass enumeration.
* "NOT_SET"
* "STANDARD"
* "REDUCED_REDUNDANCY"
* "STANDARD_IA"
* "ONEZONE_IA"
* "INTELLIGENT_TIERING"
* "GLACIER"
* "DEEP_ARCHIVE"
* "OUTPOSTS"
* "GLACIER_IR"
* "SNOW"
* "EXPRESS_ONEZONE" <br>
* **Default**: "NOT_SET"
* - `vfs.s3.bucket_canned_acl` <br>
* Names of values found in Aws::S3::Model::BucketCannedACL enumeration.
* "NOT_SET"
Expand Down
2 changes: 2 additions & 0 deletions tiledb/sm/config/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ const std::string Config::VFS_S3_CONNECT_MAX_TRIES = "5";
const std::string Config::VFS_S3_CONNECT_SCALE_FACTOR = "25";
const std::string Config::VFS_S3_SSE = "";
const std::string Config::VFS_S3_SSE_KMS_KEY_ID = "";
const std::string Config::VFS_S3_STORAGE_CLASS = "NOT_SET";
const std::string Config::VFS_S3_REQUEST_TIMEOUT_MS = "3000";
const std::string Config::VFS_S3_REQUESTER_PAYS = "false";
const std::string Config::VFS_S3_PROXY_SCHEME = "http";
Expand Down Expand Up @@ -482,6 +483,7 @@ const std::map<std::string, std::string> default_config_values = {
"vfs.s3.connect_scale_factor", Config::VFS_S3_CONNECT_SCALE_FACTOR),
std::make_pair("vfs.s3.sse", Config::VFS_S3_SSE),
std::make_pair("vfs.s3.sse_kms_key_id", Config::VFS_S3_SSE_KMS_KEY_ID),
std::make_pair("vfs.s3.storage_class", Config::VFS_S3_STORAGE_CLASS),
std::make_pair(
"vfs.s3.request_timeout_ms", Config::VFS_S3_REQUEST_TIMEOUT_MS),
std::make_pair("vfs.s3.requester_pays", Config::VFS_S3_REQUESTER_PAYS),
Expand Down
3 changes: 3 additions & 0 deletions tiledb/sm/config/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,9 @@ class Config {
/** The S3 KMS key id for KMS server-side-encryption. */
static const std::string VFS_S3_SSE_KMS_KEY_ID;

/** The S3 storage class to upload objects to. */
static const std::string VFS_S3_STORAGE_CLASS;

/** Request timeout in milliseconds. */
static const std::string VFS_S3_REQUEST_TIMEOUT_MS;

Expand Down
17 changes: 17 additions & 0 deletions tiledb/sm/cpp_api/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,23 @@ class Config {
* The server-side encryption key to use if
* vfs.s3.sse == "kms" (AWS key management service). <br>
* **Default**: ""
* - `vfs.s3.storage_class` <br>
* The storage class to use for the newly uploaded S3 objects. The set of
* accepted values is found in the Aws::S3::Model::StorageClass
* enumeration.
* "NOT_SET"
* "STANDARD"
* "REDUCED_REDUNDANCY"
* "STANDARD_IA"
* "ONEZONE_IA"
* "INTELLIGENT_TIERING"
* "GLACIER"
* "DEEP_ARCHIVE"
* "OUTPOSTS"
* "GLACIER_IR"
* "SNOW"
* "EXPRESS_ONEZONE" <br>
* **Default**: "NOT_SET"
* - `vfs.s3.bucket_canned_acl` <br>
* Names of values found in Aws::S3::Model::BucketCannedACL enumeration.
* "NOT_SET"
Expand Down
57 changes: 57 additions & 0 deletions tiledb/sm/filesystem/s3.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ using tiledb::common::filesystem::directory_entry;

namespace {

/*
* Functions to convert strings to AWS enums.
*
* The AWS SDK provides some enum conversion functions, but they must not be
* used, because they have non-deterministic behavior in certain scenarios.
*/

Aws::Utils::Logging::LogLevel aws_log_name_to_level(std::string loglevel) {
std::transform(loglevel.begin(), loglevel.end(), loglevel.begin(), ::tolower);
if (loglevel == "fatal")
Expand Down Expand Up @@ -157,6 +164,47 @@ Aws::S3::Model::BucketCannedACL S3_BucketCannedACL_from_str(
return Aws::S3::Model::BucketCannedACL::NOT_SET;
}

/**
* Return a S3 enum value for any recognized string or NOT_SET if
* B) the string is not recognized to match any of the enum values
*
* @param storage_class_str A textual string naming one of the
* Aws::S3::Model::StorageClass enum members.
*/
Aws::S3::Model::StorageClass S3_StorageClass_from_str(
const std::string& storage_class_str) {
using Aws::S3::Model::StorageClass;
if (storage_class_str.empty())
return StorageClass::NOT_SET;

if (storage_class_str == "NOT_SET")
return StorageClass::NOT_SET;
else if (storage_class_str == "STANDARD")
return StorageClass::STANDARD;
else if (storage_class_str == "REDUCED_REDUNDANCY")
return StorageClass::REDUCED_REDUNDANCY;
else if (storage_class_str == "STANDARD_IA")
return StorageClass::STANDARD_IA;
else if (storage_class_str == "ONEZONE_IA")
return StorageClass::ONEZONE_IA;
else if (storage_class_str == "INTELLIGENT_TIERING")
return StorageClass::INTELLIGENT_TIERING;
else if (storage_class_str == "GLACIER")
return StorageClass::GLACIER;
else if (storage_class_str == "DEEP_ARCHIVE")
return StorageClass::DEEP_ARCHIVE;
else if (storage_class_str == "OUTPOSTS")
return StorageClass::OUTPOSTS;
else if (storage_class_str == "GLACIER_IR")
return StorageClass::GLACIER_IR;
else if (storage_class_str == "SNOW")
return StorageClass::SNOW;
else if (storage_class_str == "EXPRESS_ONEZONE")
return StorageClass::EXPRESS_ONEZONE;
else
return StorageClass::NOT_SET;
}

} // namespace

using namespace tiledb::common;
Expand Down Expand Up @@ -200,6 +248,7 @@ S3::S3(
s3_params_.requester_pays_ ? Aws::S3::Model::RequestPayer::requester :
Aws::S3::Model::RequestPayer::NOT_SET)
, sse_(Aws::S3::Model::ServerSideEncryption::NOT_SET)
, storage_class_(S3_StorageClass_from_str(s3_params_.storage_class_))
, object_canned_acl_(
S3_ObjectCannedACL_from_str(s3_params_.object_acl_str_))
, bucket_canned_acl_(
Expand Down Expand Up @@ -511,6 +560,10 @@ void S3::touch(const URI& uri) const {
if (!s3_params_.sse_kms_key_id_.empty())
put_object_request.SetSSEKMSKeyId(
Aws::String(s3_params_.sse_kms_key_id_.c_str()));
// TODO: These checks are not needed since AWS SDK 1.11.275
// https://github.com/aws/aws-sdk-cpp/pull/2875
if (storage_class_ != Aws::S3::Model::StorageClass::NOT_SET)
put_object_request.SetStorageClass(storage_class_);
if (object_canned_acl_ != Aws::S3::Model::ObjectCannedACL::NOT_SET) {
put_object_request.SetACL(object_canned_acl_);
}
Expand Down Expand Up @@ -1562,6 +1615,8 @@ Status S3::initiate_multipart_request(
if (!s3_params_.sse_kms_key_id_.empty())
multipart_upload_request.SetSSEKMSKeyId(
Aws::String(s3_params_.sse_kms_key_id_.c_str()));
if (storage_class_ != Aws::S3::Model::StorageClass::NOT_SET)
multipart_upload_request.SetStorageClass(storage_class_);
if (object_canned_acl_ != Aws::S3::Model::ObjectCannedACL::NOT_SET) {
multipart_upload_request.SetACL(object_canned_acl_);
}
Expand Down Expand Up @@ -1756,6 +1811,8 @@ void S3::write_direct(const URI& uri, const void* buffer, uint64_t length) {
if (!s3_params_.sse_kms_key_id_.empty())
put_object_request.SetSSEKMSKeyId(
Aws::String(s3_params_.sse_kms_key_id_.c_str()));
if (storage_class_ != Aws::S3::Model::StorageClass::NOT_SET)
put_object_request.SetStorageClass(storage_class_);
if (object_canned_acl_ != Aws::S3::Model::ObjectCannedACL::NOT_SET) {
put_object_request.SetACL(object_canned_acl_);
}
Expand Down
8 changes: 8 additions & 0 deletions tiledb/sm/filesystem/s3.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ struct S3Parameters {
sse_algorithm_ == "kms" ?
config.get<std::string>("vfs.s3.sse_kms_key_id").value() :
"")
, storage_class_(
config.get<std::string>("vfs.s3.storage_class", Config::must_find))
, bucket_acl_str_(config.get<std::string>(
"vfs.s3.bucket_canned_acl", Config::must_find))
, object_acl_str_(config.get<std::string>(
Expand Down Expand Up @@ -326,6 +328,9 @@ struct S3Parameters {
/** The server-side encryption key to use with the kms algorithm. */
std::string sse_kms_key_id_;

/** The S3 storage class. */
std::string storage_class_;

/** Names of values found in Aws::S3::Model::BucketCannedACL enumeration. */
std::string bucket_acl_str_;

Expand Down Expand Up @@ -1378,6 +1383,9 @@ class S3 : FilesystemBase {
/** The server-side encryption algorithm. */
Aws::S3::Model::ServerSideEncryption sse_;

/** The storage class for a s3 upload request. */
Aws::S3::Model::StorageClass storage_class_;

/** Protects file_buffers map */
std::mutex file_buffers_mtx_;

Expand Down

0 comments on commit 7ad870c

Please sign in to comment.