diff --git a/cpp/src/arrow/compute/row/compare_internal_avx2.cc b/cpp/src/arrow/compute/row/compare_internal_avx2.cc index 18f656a2e458d..82991a8f1d162 100644 --- a/cpp/src/arrow/compute/row/compare_internal_avx2.cc +++ b/cpp/src/arrow/compute/row/compare_internal_avx2.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include "arrow/compute/row/compare_internal.h" #include "arrow/compute/util.h" @@ -281,7 +282,11 @@ inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* r ARROW_DCHECK(false); } - __m256i right = _mm256_i32gather_epi32((const int*)right_base, offset_right, 1); + // const int* normalized_right_base = (const int*)(right_base + 0x80000000ull); + // __m256i normalized_offset_right = + // _mm256_sub_epi32(offset_right, _mm256_set1_epi32(0x80000000)); + // __m256i right = _mm256_i32gather_epi32(normalized_right_base, normalized_offset_right, 1); + __m256i right = _mm256_i32gather_epi32(right_base, offset_right, 1); if (column_width != sizeof(uint32_t)) { constexpr uint32_t mask = column_width == 0 || column_width == 1 ? 0xff : 0xffff; right = _mm256_and_si256(right, _mm256_set1_epi32(mask)); @@ -330,7 +335,11 @@ inline uint64_t Compare8_avx2(const uint8_t* left_base, const uint8_t* right_bas ARROW_DCHECK(false); } - __m256i right = _mm256_i32gather_epi32((const int*)right_base, offset_right, 1); + // const int* normalized_right_base = (const int*)(right_base + 0x80000000ull); + // __m256i normalized_offset_right = + // _mm256_sub_epi32(offset_right, _mm256_set1_epi32(0x80000000)); + // __m256i right = _mm256_i32gather_epi32(normalized_right_base, normalized_offset_right, 1); + __m256i right = _mm256_i32gather_epi32(right_base, offset_right, 1); if (column_width != sizeof(uint32_t)) { constexpr uint32_t mask = column_width == 0 || column_width == 1 ? 0xff : 0xffff; right = _mm256_and_si256(right, _mm256_set1_epi32(mask)); @@ -670,5 +679,18 @@ uint32_t KeyCompare::CompareVarBinaryColumnToRow_avx2( return num_rows_to_compare; } +void RossiTest() { + size_t size = 0x100000000ull + 2 * sizeof(uint32_t); + uint32_t* data = new uint32_t[size / sizeof(uint32_t)]; + data[0] = 0xDEADBEEF; + data[0x100000000ull / sizeof(uint32_t) + 1] = 0xFEEBDAED; + __m256i offset = _mm256_setr_epi32(-4, 0, 0, 0, 0, 0, 0, 0); + __m256i content = _mm256_i32gather_epi32(data + 1, offset, 1); + std::cout << "Content: " << std::hex << _mm256_extract_epi32(content, 0) << std::endl; + int32_t i_2g = 0x80000000; + int32_t i_over_2g = 0x800000AB; + std::cout << std::hex << i_over_2g - i_2g << std::endl; +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/row/compare_test.cc b/cpp/src/arrow/compute/row/compare_test.cc index 4044049b10863..a389f529ba82f 100644 --- a/cpp/src/arrow/compute/row/compare_test.cc +++ b/cpp/src/arrow/compute/row/compare_test.cc @@ -17,6 +17,7 @@ #include +#include "arrow/array/builder_binary.h" #include "arrow/compute/row/compare_internal.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" @@ -164,5 +165,93 @@ TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) { } } +// Specialized case for GH-41813. +TEST(KeyCompare, CompareColumnsToRowsLarge) { + if constexpr (sizeof(void*) == 4) { + GTEST_SKIP() << "Test only works on 64-bit platforms"; + } + + constexpr auto fsb_length = 128 * 1024 * 1024; + + constexpr auto num_rows_base = 18; + MemoryPool* pool = default_memory_pool(); + TempVectorStack stack; + ASSERT_OK( + stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows_base))); + + // An array containing 17 null rows and one 'X...' row. + std::shared_ptr column_fsb; + { + FixedSizeBinaryBuilder builder(fixed_size_binary(fsb_length), pool); + ASSERT_OK(builder.Reserve(num_rows_base)); + std::string x(fsb_length, 'X'), y(fsb_length, 'Y'); + for (int i = 0; i < num_rows_base - 1; ++i) { + ASSERT_OK(builder.Append(x.data())); + } + ASSERT_OK(builder.Append(y.data())); + ASSERT_OK(builder.Finish(&column_fsb)); + } + std::shared_ptr column_binary; + { + BinaryBuilder builder(binary(), pool); + ASSERT_OK(builder.AppendNulls(num_rows_base)); + ASSERT_OK(builder.Finish(&column_binary)); + } + ExecBatch batch_base({column_fsb, column_binary}, num_rows_base); + + std::vector column_metadatas_base; + ASSERT_OK(ColumnMetadatasFromExecBatch(batch_base, &column_metadatas_base)); + std::vector column_arrays_base; + ASSERT_OK(ColumnArraysFromExecBatch(batch_base, &column_arrays_base)); + + RowTableMetadata table_metadata_right; + table_metadata_right.FromColumnMetadataVector(column_metadatas_base, sizeof(uint64_t), + sizeof(uint64_t)); + + RowTableImpl row_table; + ASSERT_OK(row_table.Init(pool, table_metadata_right)); + + // Encode row table with 18 rows, so that the last row is placed at over 2GB offset. + constexpr auto num_rows_right = num_rows_base; + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas_base, sizeof(uint64_t), sizeof(uint64_t)); + row_encoder.PrepareEncodeSelected(0, num_rows_right, column_arrays_base); + std::array row_ids_right; + std::iota(row_ids_right.begin(), row_ids_right.end(), 0); + // for (int i = 0; i < num_rows_right - 1; ++i) { + // row_ids_right[i] = 0; + // } + // row_ids_right[num_rows_right - 1] = 1; + ASSERT_OK(row_encoder.EncodeSelected(&row_table, num_rows_right, row_ids_right.data())); + + ASSERT_GT(row_table.offsets()[num_rows_right - 1], 0x80000000u); + + constexpr auto num_rows_left = 16; + std::vector row_ids_left(num_rows_left, num_rows_base - 1); + + LightContext ctx{CpuInfo::GetInstance()->hardware_flags(), &stack}; + + { + uint32_t num_rows_no_match; + std::vector row_ids_out(num_rows_left); + KeyCompare::CompareColumnsToRows(num_rows_left, NULLPTR, row_ids_left.data(), &ctx, + &num_rows_no_match, row_ids_out.data(), + column_arrays_base, row_table, true, NULLPTR); + ASSERT_EQ(num_rows_no_match, 0); + ASSERT_EQ(row_ids_out[0], 0); + } + + // { + // std::vector match_bitvector(BytesForBits(num_rows)); + // KeyCompare::CompareColumnsToRows(num_rows, NULLPTR, row_ids_left.data(), &ctx, + // NULLPTR, NULLPTR, column_arrays_left, row_table, + // true, match_bitvector.data()); + // for (int i = 0; i < num_rows; ++i) { + // SCOPED_TRACE(i); + // ASSERT_EQ(arrow::bit_util::GetBit(match_bitvector.data(), i), i != 6); + // } + // } +} + } // namespace compute } // namespace arrow