From b541a9c4107c04d9d9bc59dddf8656f9c0c576b4 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 2 Apr 2024 04:19:03 -0700 Subject: [PATCH] GH-40038: [Java] Export non empty offset buffer for variable-size layout through C Data Interface (#40043) ### Rationale for this change We encountered an error when exchanging string array from Java to Rust through Arrow C data interface. At Rust side, it complains that the buffer at position 1 (offset buffer) is null. After tracing down and some debugging, it looks like the issue is Java Arrow `BaseVariableWidthVector` class assigns an empty offset buffer if the array is empty (value count 0). According to Arrow [spec](https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-layout) for variable size binary layout: > The offsets buffer contains length + 1 signed integers ... So for an empty string array, its offset buffer should be a buffer with one element (generally it is `0`). ### What changes are included in this PR? This patch replaces current empty offset buffer in variable-size layout vector classes when exporting arrays through C Data Interface. ### Are these changes tested? Added test cases. ### Are there any user-facing changes? No * Closes: #40038 Authored-by: Liang-Chi Hsieh Signed-off-by: David Li --- .../arrow/vector/complex/ListVector.java | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 220f177659038..d7aaf8ec508e2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -553,21 +553,11 @@ public void splitAndTransfer(int startIndex, int length) { Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount); to.clear(); - if (length > 0) { - final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH); - final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint; - to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); - /* splitAndTransfer offset buffer */ - for (int i = 0; i < length + 1; i++) { - final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint; - to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset); - } - /* splitAndTransfer validity buffer */ - splitAndTransferValidityBuffer(startIndex, length, to); - /* splitAndTransfer data buffer */ - dataTransferPair.splitAndTransfer(startPoint, sliceLength); - to.lastSet = length - 1; - to.setValueCount(length); + to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); + /* splitAndTransfer offset buffer */ + for (int i = 0; i < length + 1; i++) { + final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint; + to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset); } }