apache · huaxingao · Aug 14, 2024 · Aug 14, 2024 · Aug 16, 2024 · Aug 16, 2024
diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml
@@ -1131,6 +1131,27 @@ acceptedBreaks:
       new: "method org.apache.iceberg.BaseMetastoreOperations.CommitStatus org.apache.iceberg.BaseMetastoreTableOperations::checkCommitStatus(java.lang.String,\
         \ org.apache.iceberg.TableMetadata)"
       justification: "Removing deprecated code"
+    org.apache.iceberg:iceberg-parquet:
+    - code: "java.method.numberOfParametersChanged"
+      old: "method void org.apache.iceberg.parquet.ParquetReader<T>::<init>(org.apache.iceberg.io.InputFile,\
+        \ org.apache.iceberg.Schema, org.apache.parquet.ParquetReadOptions, java.util.function.Function<org.apache.parquet.schema.MessageType,\
+        \ org.apache.iceberg.parquet.ParquetValueReader<?>>, org.apache.iceberg.mapping.NameMapping,\
+        \ org.apache.iceberg.expressions.Expression, boolean, boolean)"
+      new: "method void org.apache.iceberg.parquet.ParquetReader<T>::<init>(org.apache.iceberg.io.InputFile,\
+        \ org.apache.iceberg.Schema, org.apache.parquet.ParquetReadOptions, java.util.function.Function<org.apache.parquet.schema.MessageType,\
+        \ org.apache.iceberg.parquet.ParquetValueReader<?>>, org.apache.iceberg.mapping.NameMapping,\
+        \ org.apache.iceberg.expressions.Expression, boolean, boolean, java.lang.Integer)"
+      justification: "{limit push down}"
+    - code: "java.method.numberOfParametersChanged"
+      old: "method void org.apache.iceberg.parquet.VectorizedParquetReader<T>::<init>(org.apache.iceberg.io.InputFile,\
+        \ org.apache.iceberg.Schema, org.apache.parquet.ParquetReadOptions, java.util.function.Function<org.apache.parquet.schema.MessageType,\
+        \ org.apache.iceberg.parquet.VectorizedReader<?>>, org.apache.iceberg.mapping.NameMapping,\
+        \ org.apache.iceberg.expressions.Expression, boolean, boolean, int)"
+      new: "method void org.apache.iceberg.parquet.VectorizedParquetReader<T>::<init>(org.apache.iceberg.io.InputFile,\
+        \ org.apache.iceberg.Schema, org.apache.parquet.ParquetReadOptions, java.util.function.Function<org.apache.parquet.schema.MessageType,\
+        \ org.apache.iceberg.parquet.VectorizedReader<?>>, org.apache.iceberg.mapping.NameMapping,\
+        \ org.apache.iceberg.expressions.Expression, boolean, boolean, int, java.lang.Integer)"
+      justification: "{limit push down}"
   apache-iceberg-0.14.0:
     org.apache.iceberg:iceberg-api:
     - code: "java.class.defaultSerializationChanged"

diff --git a/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java b/arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java
@@ -147,54 +147,60 @@ public VectorHolder read(VectorHolder reuse, int numValsToRead) {
     }
     if (vectorizedColumnIterator.hasNext()) {
       if (dictEncoded) {
-        vectorizedColumnIterator.dictionaryBatchReader().nextBatch(vec, -1, nullabilityHolder);
+        vectorizedColumnIterator
+            .dictionaryBatchReader()
+            .nextBatch(numValsToRead, vec, -1, nullabilityHolder);
       } else {
         switch (readType) {
           case VARBINARY:
           case VARCHAR:
             vectorizedColumnIterator
                 .varWidthTypeBatchReader()
-                .nextBatch(vec, -1, nullabilityHolder);
+                .nextBatch(numValsToRead, vec, -1, nullabilityHolder);
             break;
           case BOOLEAN:
-            vectorizedColumnIterator.booleanBatchReader().nextBatch(vec, -1, nullabilityHolder);
+            vectorizedColumnIterator
+                .booleanBatchReader()
+                .nextBatch(numValsToRead, vec, -1, nullabilityHolder);
             break;
           case INT:
           case INT_BACKED_DECIMAL:
             vectorizedColumnIterator
                 .integerBatchReader()
-                .nextBatch(vec, typeWidth, nullabilityHolder);
+                .nextBatch(numValsToRead, vec, typeWidth, nullabilityHolder);
             break;
           case LONG:
           case LONG_BACKED_DECIMAL:
-            vectorizedColumnIterator.longBatchReader().nextBatch(vec, typeWidth, nullabilityHolder);
+            vectorizedColumnIterator
+                .longBatchReader()
+                .nextBatch(numValsToRead, vec, typeWidth, nullabilityHolder);
             break;
           case FLOAT:
             vectorizedColumnIterator
                 .floatBatchReader()
-                .nextBatch(vec, typeWidth, nullabilityHolder);
+                .nextBatch(numValsToRead, vec, typeWidth, nullabilityHolder);
             break;
           case DOUBLE:
             vectorizedColumnIterator
                 .doubleBatchReader()
-                .nextBatch(vec, typeWidth, nullabilityHolder);
+                .nextBatch(numValsToRead, vec, typeWidth, nullabilityHolder);
             break;
           case TIMESTAMP_MILLIS:
             vectorizedColumnIterator
                 .timestampMillisBatchReader()
-                .nextBatch(vec, typeWidth, nullabilityHolder);
+                .nextBatch(numValsToRead, vec, typeWidth, nullabilityHolder);
             break;
           case TIMESTAMP_INT96:
             vectorizedColumnIterator
                 .timestampInt96BatchReader()
-                .nextBatch(vec, typeWidth, nullabilityHolder);
+                .nextBatch(numValsToRead, vec, typeWidth, nullabilityHolder);
             break;
           case UUID:
           case FIXED_WIDTH_BINARY:
           case FIXED_LENGTH_DECIMAL:
             vectorizedColumnIterator
                 .fixedSizeBinaryBatchReader()
-                .nextBatch(vec, typeWidth, nullabilityHolder);
+                .nextBatch(numValsToRead, vec, typeWidth, nullabilityHolder);
             break;
         }
       }

diff --git a/...w/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedColumnIterator.java b/...w/src/main/java/org/apache/iceberg/arrow/vectorized/parquet/VectorizedColumnIterator.java
@@ -69,12 +69,22 @@ public boolean producesDictionaryEncodedVector() {
   }
 
   public abstract class BatchReader {
-    public void nextBatch(FieldVector fieldVector, int typeWidth, NullabilityHolder holder) {
+    public void nextBatch(
+        int numValsToRead, FieldVector fieldVector, int typeWidth, NullabilityHolder holder) {
       int rowsReadSoFar = 0;
-      while (rowsReadSoFar < batchSize && hasNext()) {
+      while (rowsReadSoFar < batchSize && hasNext() && rowsReadSoFar < numValsToRead) {
         advance();
+        int expectedBatchSize;
+        if (numValsToRead < 0) {
+          throw new IllegalStateException(
+              String.format(
+                  "Cannot read a negative number of values. numValsToRead = %d", numValsToRead));
+        } else {
+          expectedBatchSize = Math.min(batchSize - rowsReadSoFar, numValsToRead - rowsReadSoFar);
+        }
+
         int rowsInThisBatch =
-            nextBatchOf(fieldVector, batchSize - rowsReadSoFar, rowsReadSoFar, typeWidth, holder);
+            nextBatchOf(fieldVector, expectedBatchSize, rowsReadSoFar, typeWidth, holder);
         rowsReadSoFar += rowsInThisBatch;
         triplesRead += rowsInThisBatch;
         fieldVector.setValueCount(rowsReadSoFar);

diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java b/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java
@@ -1048,6 +1048,7 @@ public static class ReadBuilder {
     private NameMapping nameMapping = null;
     private ByteBuffer fileEncryptionKey = null;
     private ByteBuffer fileAADPrefix = null;
+    private Integer pushedLimit;
 
     private ReadBuilder(InputFile file) {
       this.file = file;
@@ -1151,6 +1152,12 @@ public ReadBuilder withAADPrefix(ByteBuffer aadPrefix) {
       return this;
     }
 
+    public ReadBuilder pushedlimit(Integer limit) {
+      Preconditions.checkArgument(limit >= 0);
+      this.pushedLimit = limit;
+      return this;
+    }
+
     @SuppressWarnings({"unchecked", "checkstyle:CyclomaticComplexity"})
     public <D> CloseableIterable<D> build() {
       FileDecryptionProperties fileDecryptionProperties = null;
@@ -1212,10 +1219,19 @@ public <D> CloseableIterable<D> build() {
               filter,
               reuseContainers,
               caseSensitive,
-              maxRecordsPerBatch);
+              maxRecordsPerBatch,
+              pushedLimit);
         } else {
           return new org.apache.iceberg.parquet.ParquetReader<>(
-              file, schema, options, readerFunc, mapping, filter, reuseContainers, caseSensitive);
+              file,
+              schema,
+              options,
+              readerFunc,
+              mapping,
+              filter,
+              reuseContainers,
+              caseSensitive,
+              pushedLimit);
         }
       }
 

diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetReader.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetReader.java
@@ -43,6 +43,7 @@ public class ParquetReader<T> extends CloseableGroup implements CloseableIterabl
   private final boolean reuseContainers;
   private final boolean caseSensitive;
   private final NameMapping nameMapping;
+  private Integer pushedLimit;
 
   public ParquetReader(
       InputFile input,
@@ -52,7 +53,8 @@ public ParquetReader(
       NameMapping nameMapping,
       Expression filter,
       boolean reuseContainers,
-      boolean caseSensitive) {
+      boolean caseSensitive,
+      Integer pushedLimit) {
     this.input = input;
     this.expectedSchema = expectedSchema;
     this.options = options;
@@ -62,6 +64,7 @@ public ParquetReader(
     this.reuseContainers = reuseContainers;
     this.caseSensitive = caseSensitive;
     this.nameMapping = nameMapping;
+    this.pushedLimit = pushedLimit;
   }
 
   private ReadConf<T> conf = null;
@@ -89,6 +92,7 @@ private ReadConf<T> init() {
   @Override
   public CloseableIterator<T> iterator() {
     FileIterator<T> iter = new FileIterator<>(init());
+    iter.pushedLimit = pushedLimit;
     addCloseable(iter);
     return iter;
   }
@@ -105,6 +109,7 @@ private static class FileIterator<T> implements CloseableIterator<T> {
     private long nextRowGroupStart = 0;
     private long valuesRead = 0;
     private T last = null;
+    private Integer pushedLimit;
 
     FileIterator(ReadConf<T> conf) {
       this.reader = conf.reader();
@@ -117,7 +122,11 @@ private static class FileIterator<T> implements CloseableIterator<T> {
 
     @Override
     public boolean hasNext() {
-      return valuesRead < totalValues;
+      if (pushedLimit != null && pushedLimit > 0) {
+        return valuesRead < Math.min(totalValues, pushedLimit);
+      } else {
+        return valuesRead < totalValues;
+      }
     }
 
     @Override

diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/VectorizedParquetReader.java b/parquet/src/main/java/org/apache/iceberg/parquet/VectorizedParquetReader.java
@@ -49,6 +49,7 @@ public class VectorizedParquetReader<T> extends CloseableGroup implements Closea
   private final boolean caseSensitive;
   private final int batchSize;
   private final NameMapping nameMapping;
+  private Integer pushedLimit;
 
   public VectorizedParquetReader(
       InputFile input,
@@ -59,7 +60,8 @@ public VectorizedParquetReader(
       Expression filter,
       boolean reuseContainers,
       boolean caseSensitive,
-      int maxRecordsPerBatch) {
+      int maxRecordsPerBatch,
+      Integer pushedLimit) {
     this.input = input;
     this.expectedSchema = expectedSchema;
     this.options = options;
@@ -70,6 +72,7 @@ public VectorizedParquetReader(
     this.caseSensitive = caseSensitive;
     this.batchSize = maxRecordsPerBatch;
     this.nameMapping = nameMapping;
+    this.pushedLimit = pushedLimit;
   }
 
   private ReadConf conf = null;
@@ -97,6 +100,7 @@ private ReadConf init() {
   @Override
   public CloseableIterator<T> iterator() {
     FileIterator<T> iter = new FileIterator<>(init());
+    iter.pushedLimit = pushedLimit;
     addCloseable(iter);
     return iter;
   }
@@ -114,6 +118,7 @@ private static class FileIterator<T> implements CloseableIterator<T> {
     private long valuesRead = 0;
     private T last = null;
     private final long[] rowGroupsStartRowPos;
+    private Integer pushedLimit;
 
     FileIterator(ReadConf conf) {
       this.reader = conf.reader();
@@ -129,7 +134,11 @@ private static class FileIterator<T> implements CloseableIterator<T> {
 
     @Override
     public boolean hasNext() {
-      return valuesRead < totalValues;
+      if (pushedLimit != null && pushedLimit > 0) {
+        return valuesRead < Math.min(totalValues, pushedLimit);
+      } else {
+        return valuesRead < totalValues;
+      }
     }
 
     @Override
@@ -141,8 +150,18 @@ public T next() {
         advance();
       }
 
-      // batchSize is an integer, so casting to integer is safe
-      int numValuesToRead = (int) Math.min(nextRowGroupStart - valuesRead, batchSize);
+      long remainingValues = nextRowGroupStart - valuesRead;
+
+      int numValuesToRead;
+      if (pushedLimit != null && pushedLimit - valuesRead > 0) {
+        // batchSize is an integer, so casting to integer is safe
+        numValuesToRead =
+            (int) Math.min(remainingValues, Math.min(batchSize, pushedLimit - valuesRead));
+      } else {
+        // batchSize is an integer, so casting to integer is safe
+        numValuesToRead = (int) Math.min(remainingValues, batchSize);
+      }
+
       if (reuseContainers) {
         this.last = model.read(last, numValuesToRead);
       } else {

diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/SparkReadConf.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/SparkReadConf.java
@@ -277,6 +277,14 @@ public boolean aggregatePushDownEnabled() {
         .parse();
   }
 
+  public boolean limitPushDownEnabled() {
+    return confParser
+        .booleanConf()
+        .sessionConf(SparkSQLProperties.LIMIT_PUSH_DOWN_ENABLED)
+        .defaultValue(SparkSQLProperties.LIMIT_PUSH_DOWN_ENABLED_DEFAULT)
+        .parse();
+  }
+
   public boolean adaptiveSplitSizeEnabled() {
     return confParser
         .booleanConf()

diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/SparkSQLProperties.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/SparkSQLProperties.java
@@ -45,6 +45,10 @@ private SparkSQLProperties() {}
       "spark.sql.iceberg.aggregate-push-down.enabled";
   public static final boolean AGGREGATE_PUSH_DOWN_ENABLED_DEFAULT = true;
 
+  // Controls whether to push down limit to Iceberg
+  public static final String LIMIT_PUSH_DOWN_ENABLED = "spark.sql.iceberg.limit-push-down.enabled";
+  public static final boolean LIMIT_PUSH_DOWN_ENABLED_DEFAULT = true;
+
   // Controls write distribution mode
   public static final String DISTRIBUTION_MODE = "spark.sql.iceberg.distribution-mode";