Skip to content

Commit

Permalink
Revert
Browse files Browse the repository at this point in the history
  • Loading branch information
zanmato1984 committed Jun 17, 2024
1 parent 16d9e86 commit c8044b9
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 105 deletions.
13 changes: 0 additions & 13 deletions cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -250,19 +250,6 @@
"displayName": "Debug build with tests and more optional components",
"cacheVariables": {}
},
{
"name": "fix-41813",
"inherits": [
"base-debug",
"features-main"
],
"displayName": "Fix 41813",
"cacheVariables": {
"ARROW_JEMALLOC": "OFF",
"ARROW_MIMALLOC": "OFF",
"ARROW_USE_ASAN": "ON"
}
},
{
"name": "ninja-debug-cuda",
"inherits": [
Expand Down
14 changes: 0 additions & 14 deletions cpp/src/arrow/compute/row/compare_internal_avx2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
// under the License.

#include <immintrin.h>
#include <iostream>

#include "arrow/compute/row/compare_internal.h"
#include "arrow/compute/util.h"
Expand Down Expand Up @@ -689,18 +688,5 @@ uint32_t KeyCompare::CompareVarBinaryColumnToRow_avx2(
return num_rows_to_compare;
}

void RossiTest() {
size_t size = 0x100000000ull + 2 * sizeof(uint32_t);
uint32_t* data = new uint32_t[size / sizeof(uint32_t)];
data[0] = 0xDEADBEEF;
data[0x100000000ull / sizeof(uint32_t) + 1] = 0xFEEBDAED;
__m256i offset = _mm256_setr_epi32(-4, 0, 0, 0, 0, 0, 0, 0);
__m256i content = _mm256_i32gather_epi32(data + 1, offset, 1);
std::cout << "Content: " << std::hex << _mm256_extract_epi32(content, 0) << std::endl;
int32_t i_2g = 0x80000000;
int32_t i_over_2g = 0x800000AB;
std::cout << std::hex << i_over_2g - i_2g << std::endl;
}

} // namespace compute
} // namespace arrow
78 changes: 0 additions & 78 deletions cpp/src/arrow/dataset/dataset_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

#include "arrow/dataset/dataset_internal.h"
#include "arrow/dataset/discovery.h"
#include "arrow/dataset/file_parquet.h"
#include "arrow/dataset/partition.h"
#include "arrow/dataset/test_util_internal.h"
#include "arrow/filesystem/mockfs.h"
Expand Down Expand Up @@ -802,82 +801,5 @@ TEST(TestDictPartitionColumn, SelectPartitionColumnFilterPhysicalColumn) {
*ArrayFromJSON(partition_field->type(), R"(["one"])"));
}

namespace ac = arrow::acero;
namespace cp = arrow::compute;
namespace ds = arrow::dataset;
namespace fs = arrow::fs;

arrow::Result<std::shared_ptr<fs::FileSystem>> GetFileSystemFromUri(
const std::string& uri, std::string* path) {
return fs::FileSystemFromUri(uri, path);
}

arrow::Result<std::shared_ptr<ds::Dataset>> GetDatasetFromDirectory(
std::shared_ptr<fs::FileSystem> fs, std::shared_ptr<ds::ParquetFileFormat> format,
std::string dir) {
// Find all files under `path`
fs::FileSelector s;
s.base_dir = dir;
s.recursive = true;

ds::FileSystemFactoryOptions options;
options.partitioning = DirectoryPartitioning::MakeFactory({"year", "month"});
// The factory will try to build a child dataset.
ARROW_ASSIGN_OR_RAISE(auto factory,
ds::FileSystemDatasetFactory::Make(fs, s, format, options));

// Try to infer a common schema for all files.
ARROW_ASSIGN_OR_RAISE(auto schema, factory->Inspect({}));
// Caller can optionally decide another schema as long as it is compatible
// with the previous one, e.g. `factory->Finish(compatible_schema)`.
ARROW_ASSIGN_OR_RAISE(auto child, factory->Finish());

ds::DatasetVector children{1, child};
auto dataset = ds::UnionDataset::Make(std::move(schema), std::move(children));

return dataset;
}

arrow::Result<std::shared_ptr<ds::Scanner>> GetScannerFromDataset(
std::shared_ptr<ds::Dataset> dataset) {
ARROW_ASSIGN_OR_RAISE(auto scanner_builder, dataset->NewScan());

ARROW_RETURN_NOT_OK(scanner_builder->UseThreads(true));

return scanner_builder->Finish();
}

arrow::Status ExecutePlanAndCollectAsTable(ac::Declaration plan) {
// collect sink_reader into a Table
std::shared_ptr<arrow::Table> response_table;
ARROW_ASSIGN_OR_RAISE(response_table, ac::DeclarationToTable(std::move(plan)));

std::cout << "Results : " << response_table->ToString() << std::endl;

return arrow::Status::OK();
}

TEST(GH41813, GH41813) {
std::string uri =
"file:///Users/zanmato/Downloads/arrow_segfault_reproducer_2/data/reduced_attempt3";
std::string path;
auto format = std::make_shared<ds::ParquetFileFormat>();
ASSERT_OK_AND_ASSIGN(auto fs, GetFileSystemFromUri(uri, &path));
ASSERT_OK_AND_ASSIGN(auto dataset, GetDatasetFromDirectory(fs, format, path));
ASSERT_OK_AND_ASSIGN(auto scanner, GetScannerFromDataset(dataset));
auto scan_options = std::make_shared<arrow::dataset::ScanOptions>();
scan_options->projection = cp::project({}, {}); // create empty projection
auto scan_node_options = arrow::dataset::ScanNodeOptions{dataset, scan_options};
ac::Declaration scan{"scan", std::move(scan_node_options)};

auto count_options = std::make_shared<cp::CountOptions>(cp::CountOptions::ONLY_VALID);
auto aggregate_options = ac::AggregateNodeOptions{
/*aggregates=*/{{"hash_count", count_options, "date", "count(date)"}},
/*keys=*/{"year", "month", "cid"}};
ac::Declaration aggregate{"aggregate", {std::move(scan)}, std::move(aggregate_options)};

ASSERT_OK(ExecutePlanAndCollectAsTable(std::move(aggregate)));
}

} // namespace dataset
} // namespace arrow

0 comments on commit c8044b9

Please sign in to comment.