Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CreateMultipartUpload failing to upload a file on aws s3 #45496

Open
kishorik-2097 opened this issue Feb 11, 2025 · 0 comments
Open

CreateMultipartUpload failing to upload a file on aws s3 #45496

kishorik-2097 opened this issue Feb 11, 2025 · 0 comments

Comments

@kishorik-2097
Copy link

Describe the bug, including details regarding any error messages, version, and platform.

Attaching my POC code which will use Arrow Version 17 API to make connection with aws s3 but getting Runtime error for -
IOError: When initiating multiple part upload for key 'data1.parquet' in bucket 'parquetpoc01': AWS Error NETWORK_CONNECTION during CreateMultipartUpload operation: curlCode: 28, Timeout was reached

we have verified access to aws s3 by aws CLI command line we are able to perform operation on same bucket.

#include <arrow/api.h>
#include <arrow/compute/cast.h>
#include <arrow/dataset/dataset.h>
#include <arrow/dataset/discovery.h>
#include <arrow/dataset/file_base.h>
#include <arrow/dataset/file_ipc.h>
#include <arrow/dataset/file_parquet.h>
#include <arrow/dataset/scanner.h>
#include <arrow/filesystem/filesystem.h>
#include <arrow/ipc/writer.h>
#include <arrow/util/iterator.h>
#include <parquet/arrow/reader.h>
#include <parquet/arrow/writer.h>
#include "arrow/compute/expression.h"
#include <arrow/filesystem/s3fs.h>
#include <arrow/filesystem/azurefs.h>
#include <arrow/status.h>
#include
#include <string.h>
#include
#include <parquet/arrow/writer.h>
#include <arrow/util/type_fwd.h>
#include<curl/curl.h>
#include <arrow/filesystem/azurefs.h>

using parquet::ArrowWriterProperties;
using parquet::WriterProperties;
namespace ds = arrow::dataset;
namespace fs = arrow::fs;
namespace cp = arrow::compute;
using parquet::ParquetDataPageVersion;
using arrow::Compression;

//namespace s3fs = arrow::fs::S3FileSystem;

/**

  • \brief Run Example
  • ./debug/dataset-documentation-example file:///<some_path>/<some_directory> parquet
    */

// (Doc section: Reading Datasets)
// Generate some data for the rest of this example.
arrow::Result<std::shared_ptrarrow::Table> CreateTable() {
auto schema =
arrow::schema({arrow::field("a", arrow::int64()), arrow::field("b", arrow::int64()),
arrow::field("c", arrow::int64())});
std::shared_ptrarrow::Array array_a;
std::shared_ptrarrow::Array array_b;
std::shared_ptrarrow::Array array_c;
arrow::NumericBuilderarrow::Int64Type builder;
ARROW_RETURN_NOT_OK(builder.AppendValues({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
ARROW_RETURN_NOT_OK(builder.Finish(&array_a));
builder.Reset();
ARROW_RETURN_NOT_OK(builder.AppendValues({9, 8, 7, 6, 5, 4, 3, 2, 1, 0}));
ARROW_RETURN_NOT_OK(builder.Finish(&array_b));
builder.Reset();
ARROW_RETURN_NOT_OK(builder.AppendValues({1, 2, 1, 2, 1, 2, 1, 2, 1, 2}));
ARROW_RETURN_NOT_OK(builder.Finish(&array_c));
return arrow::Table::Make(schema, {array_a, array_b, array_c});
}

// Set up a dataset by writing two Parquet files.
arrow::Resultstd::string CreateExampleParquetDataset(
const std::shared_ptrfs::FileSystem& filesystem, const std::string& root_path) {
// auto base_path = root_path + "/parquet_dataset";

auto base_path = root_path;
std::cout<<"base_path:"<<base_path<<std::endl;
// ARROW_RETURN_NOT_OK(filesystem->CreateDir("parquet_dataset"));
// Create an Arrow Table
std::shared_ptrarrow::Table table;
ARROW_ASSIGN_OR_RAISE(table, CreateTable());

// Choose compression
std::shared_ptr props =
WriterProperties::Builder().compression(arrow::Compression::ZSTD)->build();

ARROW_ASSIGN_OR_RAISE(auto output,
filesystem->OpenOutputStream(base_path + "/data1.parquet"));
return base_path;
}

// (Doc section: Reading and writing partitioned data #3)

arrow::Status RunDatasetDocumentation(const std::string& format_name,
const std::string& uri) {

std::string base_path;
std::shared_ptrds::FileFormat format;
std::string root_path;

ARROW_ASSIGN_OR_RAISE(auto fs, fs::FileSystemFromUri(uri, &root_path));

if (format_name == "parquet")
{
format = std::make_sharedds::ParquetFileFormat();
ARROW_ASSIGN_OR_RAISE(base_path, CreateExampleParquetDataset(fs, root_path));
}
else
{
std::cerr << "Unknown format: " << format_name << std::endl;
std::cerr << "Supported formats: feather, parquet, parquet_hive" << std::endl;
return arrow::Status::ExecutionError("Dataset creating failed.");
}

std::shared_ptrarrow::Table table;

ARROW_RETURN_NOT_OK(arrow::fs::FinalizeS3());
return arrow::Status::OK();

}

int main() {

arrow::fs::S3GlobalOptions options;
options.log_level = arrow::fs::S3LogLevel::Fatal; // Replace with your region

// Initialize Arrow with S3 options
arrow::Status status = fs::InitializeS3(options);
if (!status.ok()) {
// Handle error
return 1;
}
arrow::fs::S3Options opt;
opt.region="us-east-1";
arrow::Result<std::shared_ptrarrow::fs::S3FileSystem> s3file;
//s3file = arrow::fs::S3FileSystem::Make(opt);

std::string uri ="s3://AccessKey:secretkey@parquetpoc01?region=us-east-1";

std::string format_name ="parquet";

status = RunDatasetDocumentation(format_name, uri);

if (!status.ok()) {
std::cerr << status.ToString() << std::endl;
return EXIT_FAILURE;
}
// status = arrow::fs::FinalizeS3();
// if (!status.ok()) {
// std::cerr << status.ToString() << std::endl;
// return EXIT_FAILURE;
//}
//ARROW_RETURN_NOT_OK(arrow::fs::FinalizeS3());
return EXIT_SUCCESS;
}

Component(s)

C++

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant