From c8044b9f5f51c3e9ec9172b25cae905491587042 Mon Sep 17 00:00:00 2001 From: Ruoxi Sun Date: Tue, 18 Jun 2024 01:13:21 +0800 Subject: [PATCH] Revert --- cpp/CMakePresets.json | 13 ---- .../compute/row/compare_internal_avx2.cc | 14 ---- cpp/src/arrow/dataset/dataset_test.cc | 78 ------------------- 3 files changed, 105 deletions(-) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 1fdb0c0456512..13d1241990c31 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -250,19 +250,6 @@ "displayName": "Debug build with tests and more optional components", "cacheVariables": {} }, - { - "name": "fix-41813", - "inherits": [ - "base-debug", - "features-main" - ], - "displayName": "Fix 41813", - "cacheVariables": { - "ARROW_JEMALLOC": "OFF", - "ARROW_MIMALLOC": "OFF", - "ARROW_USE_ASAN": "ON" - } - }, { "name": "ninja-debug-cuda", "inherits": [ diff --git a/cpp/src/arrow/compute/row/compare_internal_avx2.cc b/cpp/src/arrow/compute/row/compare_internal_avx2.cc index cbe7ce98cae7f..2abefc45f2098 100644 --- a/cpp/src/arrow/compute/row/compare_internal_avx2.cc +++ b/cpp/src/arrow/compute/row/compare_internal_avx2.cc @@ -16,7 +16,6 @@ // under the License. #include -#include #include "arrow/compute/row/compare_internal.h" #include "arrow/compute/util.h" @@ -689,18 +688,5 @@ uint32_t KeyCompare::CompareVarBinaryColumnToRow_avx2( return num_rows_to_compare; } -void RossiTest() { - size_t size = 0x100000000ull + 2 * sizeof(uint32_t); - uint32_t* data = new uint32_t[size / sizeof(uint32_t)]; - data[0] = 0xDEADBEEF; - data[0x100000000ull / sizeof(uint32_t) + 1] = 0xFEEBDAED; - __m256i offset = _mm256_setr_epi32(-4, 0, 0, 0, 0, 0, 0, 0); - __m256i content = _mm256_i32gather_epi32(data + 1, offset, 1); - std::cout << "Content: " << std::hex << _mm256_extract_epi32(content, 0) << std::endl; - int32_t i_2g = 0x80000000; - int32_t i_over_2g = 0x800000AB; - std::cout << std::hex << i_over_2g - i_2g << std::endl; -} - } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/dataset/dataset_test.cc b/cpp/src/arrow/dataset/dataset_test.cc index 6f4a86c0dc57c..eb3fd0e304750 100644 --- a/cpp/src/arrow/dataset/dataset_test.cc +++ b/cpp/src/arrow/dataset/dataset_test.cc @@ -21,7 +21,6 @@ #include "arrow/dataset/dataset_internal.h" #include "arrow/dataset/discovery.h" -#include "arrow/dataset/file_parquet.h" #include "arrow/dataset/partition.h" #include "arrow/dataset/test_util_internal.h" #include "arrow/filesystem/mockfs.h" @@ -802,82 +801,5 @@ TEST(TestDictPartitionColumn, SelectPartitionColumnFilterPhysicalColumn) { *ArrayFromJSON(partition_field->type(), R"(["one"])")); } -namespace ac = arrow::acero; -namespace cp = arrow::compute; -namespace ds = arrow::dataset; -namespace fs = arrow::fs; - -arrow::Result> GetFileSystemFromUri( - const std::string& uri, std::string* path) { - return fs::FileSystemFromUri(uri, path); -} - -arrow::Result> GetDatasetFromDirectory( - std::shared_ptr fs, std::shared_ptr format, - std::string dir) { - // Find all files under `path` - fs::FileSelector s; - s.base_dir = dir; - s.recursive = true; - - ds::FileSystemFactoryOptions options; - options.partitioning = DirectoryPartitioning::MakeFactory({"year", "month"}); - // The factory will try to build a child dataset. - ARROW_ASSIGN_OR_RAISE(auto factory, - ds::FileSystemDatasetFactory::Make(fs, s, format, options)); - - // Try to infer a common schema for all files. - ARROW_ASSIGN_OR_RAISE(auto schema, factory->Inspect({})); - // Caller can optionally decide another schema as long as it is compatible - // with the previous one, e.g. `factory->Finish(compatible_schema)`. - ARROW_ASSIGN_OR_RAISE(auto child, factory->Finish()); - - ds::DatasetVector children{1, child}; - auto dataset = ds::UnionDataset::Make(std::move(schema), std::move(children)); - - return dataset; -} - -arrow::Result> GetScannerFromDataset( - std::shared_ptr dataset) { - ARROW_ASSIGN_OR_RAISE(auto scanner_builder, dataset->NewScan()); - - ARROW_RETURN_NOT_OK(scanner_builder->UseThreads(true)); - - return scanner_builder->Finish(); -} - -arrow::Status ExecutePlanAndCollectAsTable(ac::Declaration plan) { - // collect sink_reader into a Table - std::shared_ptr response_table; - ARROW_ASSIGN_OR_RAISE(response_table, ac::DeclarationToTable(std::move(plan))); - - std::cout << "Results : " << response_table->ToString() << std::endl; - - return arrow::Status::OK(); -} - -TEST(GH41813, GH41813) { - std::string uri = - "file:///Users/zanmato/Downloads/arrow_segfault_reproducer_2/data/reduced_attempt3"; - std::string path; - auto format = std::make_shared(); - ASSERT_OK_AND_ASSIGN(auto fs, GetFileSystemFromUri(uri, &path)); - ASSERT_OK_AND_ASSIGN(auto dataset, GetDatasetFromDirectory(fs, format, path)); - ASSERT_OK_AND_ASSIGN(auto scanner, GetScannerFromDataset(dataset)); - auto scan_options = std::make_shared(); - scan_options->projection = cp::project({}, {}); // create empty projection - auto scan_node_options = arrow::dataset::ScanNodeOptions{dataset, scan_options}; - ac::Declaration scan{"scan", std::move(scan_node_options)}; - - auto count_options = std::make_shared(cp::CountOptions::ONLY_VALID); - auto aggregate_options = ac::AggregateNodeOptions{ - /*aggregates=*/{{"hash_count", count_options, "date", "count(date)"}}, - /*keys=*/{"year", "month", "cid"}}; - ac::Declaration aggregate{"aggregate", {std::move(scan)}, std::move(aggregate_options)}; - - ASSERT_OK(ExecutePlanAndCollectAsTable(std::move(aggregate))); -} - } // namespace dataset } // namespace arrow