diff --git a/.github/actions/setup-builder/action.yaml b/.github/actions/setup-builder/action.yaml
index 13a3008b74bc..0157caf8c296 100644
--- a/.github/actions/setup-builder/action.yaml
+++ b/.github/actions/setup-builder/action.yaml
@@ -25,6 +25,31 @@ inputs:
 runs:
   using: "composite"
   steps:
+    - name: Cache Cargo
+      uses: actions/cache@v3
+      with:
+        # these represent dependencies downloaded by cargo
+        # and thus do not depend on the OS, arch nor rust version.
+        #
+        # source https://github.com/actions/cache/blob/main/examples.md#rust---cargo
+        path: |
+          /usr/local/cargo/bin/
+          /usr/local/cargo/registry/index/
+          /usr/local/cargo/registry/cache/
+          /usr/local/cargo/git/db/
+        key: cargo-cache3-${{ hashFiles('**/Cargo.toml') }}
+        restore-keys: cargo-cache3-
+    - name: Generate lockfile
+      shell: bash
+      run: cargo fetch
+    - name: Cache Rust dependencies
+      uses: actions/cache@v3
+      with:
+        # these represent compiled steps of both dependencies and arrow
+        # and thus are specific for a particular OS, arch and rust version.
+        path: /github/home/target
+        key: ${{ runner.os }}-${{ runner.arch }}-target-cache3-${{ inputs.rust-version }}-${{ hashFiles('**/Cargo.lock') }}
+        restore-keys: ${{ runner.os }}-${{ runner.arch }}-target-cache3-${{ inputs.rust-version }}-
     - name: Install Build Dependencies
       shell: bash
       run: |
@@ -36,4 +61,4 @@ runs:
         echo "Installing ${{ inputs.rust-version }}"
         rustup toolchain install ${{ inputs.rust-version }}
         rustup default ${{ inputs.rust-version }}
-        rustup component add rustfmt
+        echo "CARGO_TARGET_DIR=/github/home/target" >> $GITHUB_ENV
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 41b1dcbe8eb9..7eed6b8e94c9 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -39,7 +39,7 @@ jobs:
           path: rust
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
         with:
           python-version: 3.8
       - name: Setup Archery
@@ -64,17 +64,17 @@ jobs:
           rustup default ${{ matrix.rust }}
           rustup component add rustfmt clippy
       - name: Cache Cargo
-        uses: actions/cache@v2
+        uses: actions/cache@v3
         with:
           path: /home/runner/.cargo
           key: cargo-maturin-cache-
       - name: Cache Rust dependencies
-        uses: actions/cache@v2
+        uses: actions/cache@v3
         with:
           path: /home/runner/target
           # this key is not equal because maturin uses different compilation flags.
           key: ${{ runner.os }}-${{ matrix.arch }}-target-maturin-cache-${{ matrix.rust }}-
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v3
         with:
           python-version: '3.7'
       - name: Upgrade pip and setuptools
diff --git a/.github/workflows/miri.sh b/.github/workflows/miri.sh
index 27c6f5eecc87..56da5c5c5d3e 100755
--- a/.github/workflows/miri.sh
+++ b/.github/workflows/miri.sh
@@ -6,21 +6,12 @@
 # rustup default nightly
 
 
-export MIRIFLAGS="-Zmiri-disable-isolation"
+# stacked borrows checking uses too much memory to run successfully in github actions
+# re-enable if the CI is migrated to something more powerful (https://github.com/apache/arrow-rs/issues/1833)
+# see also https://github.com/rust-lang/miri/issues/1367
+export MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-disable-stacked-borrows"
 cargo miri setup
 cargo clean
 
-run_miri() {
-    # Currently only the arrow crate is tested with miri
-    # IO related tests and some unsupported tests are skipped
-    cargo miri test -p arrow -- --skip csv --skip ipc --skip json
-}
-
-# If MIRI fails, automatically retry
-# Seems like miri is occasionally killed by the github runner
-# https://github.com/apache/arrow-rs/issues/879
-for i in `seq 1 5`; do
-    echo "Starting Arrow MIRI run..."
-    run_miri && break
-    echo "foo" > /tmp/data.txt
-done
+echo "Starting Arrow MIRI run..."
+cargo miri test -p arrow -- --skip csv --skip ipc --skip json
diff --git a/.github/workflows/miri.yaml b/.github/workflows/miri.yaml
index 73dfc0092836..7feacc07dd73 100644
--- a/.github/workflows/miri.yaml
+++ b/.github/workflows/miri.yaml
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-name: Rust
+name: MIRI
 
 on:
   # always trigger
@@ -26,19 +26,15 @@ jobs:
   miri-checks:
     name: MIRI
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [nightly-2022-01-17]
     steps:
       - uses: actions/checkout@v2
         with:
           submodules: true
       - name: Setup Rust toolchain
         run: |
-          rustup toolchain install ${{ matrix.rust }}
-          rustup default ${{ matrix.rust }}
-          rustup component add rustfmt clippy miri
+          rustup toolchain install nightly --component miri
+          rustup override set nightly
+          cargo miri setup
       - name: Run Miri Checks
         env:
           RUST_BACKTRACE: full
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 67272053e6b2..9331db745659 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -30,8 +30,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        arch: [amd64]
-        rust: [stable]
+        arch: [ amd64 ]
+        rust: [ stable ]
     container:
       image: ${{ matrix.arch }}/rust
       env:
@@ -40,39 +40,23 @@ jobs:
         RUSTFLAGS: "-C debuginfo=1"
     steps:
       - uses: actions/checkout@v2
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          # these represent dependencies downloaded by cargo
-          # and thus do not depend on the OS, arch nor rust version.
-          path: /github/home/.cargo
-          key: cargo-cache3-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          # these represent compiled steps of both dependencies and arrow
-          # and thus are specific for a particular OS, arch and rust version.
-          path: /github/home/target
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache3-${{ matrix.rust }}-
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           rust-version: ${{ matrix.rust }}
       - name: Build Workspace
         run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
           cargo build
 
   # test the crate
   linux-test:
     name: Test Workspace on AMD64 Rust ${{ matrix.rust }}
-    needs: [linux-build-lib]
+    needs: [ linux-build-lib ]
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        arch: [amd64]
-        rust: [stable]
+        arch: [ amd64 ]
+        rust: [ stable ]
     container:
       image: ${{ matrix.arch }}/rust
       env:
@@ -85,54 +69,52 @@ jobs:
       - uses: actions/checkout@v2
         with:
           submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache3-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache3-${{ matrix.rust }}
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           rust-version: ${{ matrix.rust }}
       - name: Run tests
         run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-
           # run tests on all workspace members with default feature list
           cargo test
-
-          # Switch to arrow crate
-          cd arrow
-          # re-run tests on arrow crate to ensure
-          # all arrays are created correctly
-          cargo test --features=force_validate
-          cargo test --features=prettyprint
-          # run test on arrow crate with minimal set of features
-          cargo test --no-default-features
+      - name: Re-run tests with all supported features
+        run: |
+          cargo test -p arrow --features=force_validate,prettyprint
+      - name: Run examples
+        run: |
+          # Test arrow examples
           cargo run --example builders
           cargo run --example dynamic_types
           cargo run --example read_csv
           cargo run --example read_csv_infer_schema
-          cargo check --no-default-features
-
-          # Switch to parquet crate
-          cd ../parquet
-          # re-run tests on parquet crate with async feature enabled
-          cargo test --features=async
-          cargo check --no-default-features
-
-          # Switch to arrow-flight
-          cd ../arrow-flight
-          cargo test --features=flight-sql-experimental
-          cargo check --no-default-features
+      - name: Test compilation of arrow library crate with different feature combinations
+        run: |
+          cargo check -p arrow
+          cargo check -p arrow --no-default-features
+      - name: Test compilation of arrow targets with different feature combinations
+        run: |
+          cargo check -p arrow --all-targets
+          cargo check -p arrow --no-default-features --all-targets
+          cargo check -p arrow --no-default-features --all-targets --features test_utils
+      - name: Re-run tests on arrow-flight with all features
+        run: |
+          cargo test -p arrow-flight --all-features
+      - name: Re-run tests on parquet crate with all features
+        run: |
+          cargo test -p parquet --all-features
+      - name: Test compilation of parquet library crate with different feature combinations
+        run: |
+          cargo check -p parquet
+          cargo check -p parquet --no-default-features
+          cargo check -p parquet --no-default-features --features arrow
+      - name: Test compilation of parquet targets with different feature combinations
+        run: |
+          cargo check -p parquet --all-targets
+          cargo check -p parquet --no-default-features --all-targets
+          cargo check -p parquet --no-default-features --features arrow --all-targets
+      - name: Test compilation of parquet_derive macro with different feature combinations
+        run: |
+          cargo check -p parquet_derive
 
   # test the --features "simd" of the arrow crate. This requires nightly.
   linux-test-simd:
@@ -140,8 +122,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        arch: [amd64]
-        rust: [nightly-2022-05-23]
+        arch: [ amd64 ]
+        rust: [ nightly ]
     container:
       image: ${{ matrix.arch }}/rust
       env:
@@ -153,40 +135,25 @@ jobs:
       - uses: actions/checkout@v2
         with:
           submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          key: cargo-nightly-cache3-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-nightly-cache3-${{ matrix.rust }}
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           rust-version: ${{ matrix.rust }}
       - name: Run tests
         run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd arrow
-          cargo test --features "simd"
-      - name: Check new project build with simd features
+          cargo test -p arrow --features "simd"
+      - name: Check compilation with simd features
         run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd arrow/test/dependency/simd
-          cargo check
+          cargo check -p arrow --features simd
+          cargo check -p arrow --features simd --all-targets
 
   windows-and-macos:
     name: Test on ${{ matrix.os }} Rust ${{ matrix.rust }}
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [windows-latest, macos-latest]
-        rust: [stable]
+        os: [ windows-latest, macos-latest ]
+        rust: [ stable ]
     steps:
       - uses: actions/checkout@v2
         with:
@@ -197,7 +164,6 @@ jobs:
         run: |
           rustup toolchain install ${{ matrix.rust }}
           rustup default ${{ matrix.rust }}
-          rustup component add rustfmt
       - name: Run tests
         shell: bash
         run: |
@@ -209,12 +175,12 @@ jobs:
 
   clippy:
     name: Clippy
-    needs: [linux-build-lib]
+    needs: [ linux-build-lib ]
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        arch: [amd64]
-        rust: [stable]
+        arch: [ amd64 ]
+        rust: [ stable ]
     container:
       image: ${{ matrix.arch }}/rust
       env:
@@ -225,29 +191,15 @@ jobs:
       - uses: actions/checkout@v2
         with:
           submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache3-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache3-${{ matrix.rust }}
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           rust-version: ${{ matrix.rust }}
       - name: Setup Clippy
         run: |
-          rustup component add rustfmt clippy
+          rustup component add clippy
       - name: Run clippy
         run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
           cargo clippy --features test_common --features prettyprint  --features=async --all-targets --workspace -- -D warnings
 
   check_benches:
@@ -255,8 +207,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        arch: [amd64]
-        rust: [stable]
+        arch: [ amd64 ]
+        rust: [ stable ]
     container:
       image: ${{ matrix.arch }}/rust
       env:
@@ -267,27 +219,13 @@ jobs:
       - uses: actions/checkout@v2
         with:
           submodules: true
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache3-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache3-${{ matrix.rust }}
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           rust-version: ${{ matrix.rust }}
       - name: Check benchmarks
         run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cargo check --benches --workspace --features test_common,prettyprint,async,experimental 
+          cargo check --benches --workspace --features test_common,prettyprint,async,experimental
 
   lint:
     name: Lint (cargo fmt)
@@ -309,8 +247,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        arch: [amd64]
-        rust: [stable]
+        arch: [ amd64 ]
+        rust: [ stable ]
     steps:
       - uses: actions/checkout@v2
         with:
@@ -319,15 +257,14 @@ jobs:
         run: |
           rustup toolchain install ${{ matrix.rust }}
           rustup default ${{ matrix.rust }}
-          rustup component add rustfmt clippy
       - name: Cache Cargo
-        uses: actions/cache@v2
+        uses: actions/cache@v3
         with:
           path: /home/runner/.cargo
           # this key is not equal because the user is different than on a container (runner vs github)
           key: cargo-coverage-cache3-
       - name: Cache Rust dependencies
-        uses: actions/cache@v2
+        uses: actions/cache@v3
         with:
           path: /home/runner/target
           # this key is not equal because coverage uses different compilation flags.
@@ -354,8 +291,8 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        arch: [amd64]
-        rust: [nightly-2022-05-23]
+        arch: [ amd64 ]
+        rust: [ nightly ]
     container:
       image: ${{ matrix.arch }}/rust
       env:
@@ -369,12 +306,12 @@ jobs:
         with:
           submodules: true
       - name: Cache Cargo
-        uses: actions/cache@v2
+        uses: actions/cache@v3
         with:
           path: /github/home/.cargo
           key: cargo-wasm32-cache3-
       - name: Cache Rust dependencies
-        uses: actions/cache@v2
+        uses: actions/cache@v3
         with:
           path: /github/home/target
           key: ${{ runner.os }}-${{ matrix.arch }}-target-wasm32-cache3-${{ matrix.rust }}
@@ -382,13 +319,10 @@ jobs:
         run: |
           rustup toolchain install ${{ matrix.rust }}
           rustup override set ${{ matrix.rust }}
-          rustup component add rustfmt
           rustup target add wasm32-unknown-unknown
           rustup target add wasm32-wasi
       - name: Build arrow crate
         run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
           cd arrow
           cargo build --no-default-features --features=csv,ipc,simd --target wasm32-unknown-unknown
           cargo build --no-default-features --features=csv,ipc,simd --target wasm32-wasi
@@ -399,14 +333,15 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        arch: [amd64]
-        rust: [nightly-2022-05-23]
+        arch: [ amd64 ]
+        rust: [ nightly ]
     container:
       image: ${{ matrix.arch }}/rust
       env:
         # Disable full debug symbol generation to speed up CI build and keep memory down
         # "1" means line tables only, which is useful for panic tracebacks.
         RUSTFLAGS: "-C debuginfo=1"
+        RUSTDOCFLAGS: "-Dwarnings"
     steps:
       - uses: actions/checkout@v2
         with:
@@ -415,74 +350,10 @@ jobs:
         run: |
           apt update
           apt install -y libpython3.9-dev
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          key: cargo-nightly-cache3-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-nightly-cache3-${{ matrix.rust }}
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           rust-version: ${{ matrix.rust }}
       - name: Run cargo doc
         run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          export RUSTDOCFLAGS="-Dwarnings"
           cargo doc --document-private-items --no-deps --workspace --all-features
-
-
-  # test builds with various feature flag combinations outside the main workspace
-  default-build:
-    name: Feature Flag Builds ${{ matrix.rust }}
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        arch: [amd64]
-        rust: [stable]
-    container:
-      image: ${{ matrix.arch }}/rust
-      env:
-        # Disable debug symbol generation to speed up CI build and keep memory down
-        RUSTFLAGS: "-C debuginfo=0"
-    steps:
-      - uses: actions/checkout@v2
-      - name: Cache Cargo
-        uses: actions/cache@v2
-        with:
-          path: /github/home/.cargo
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: cargo-cache3-
-      - name: Cache Rust dependencies
-        uses: actions/cache@v2
-        with:
-          path: /github/home/target
-          # this key equals the ones on `linux-build-lib` for re-use
-          key: ${{ runner.os }}-${{ matrix.arch }}-target-cache3-${{ matrix.rust }}
-      - name: Setup Rust toolchain
-        uses: ./.github/actions/setup-builder
-        with:
-          rust-version: ${{ matrix.rust }}
-      - name: Arrow Build with default features
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd arrow/test/dependency/default-features
-          cargo check
-      - name: Arrow Build with default-features=false
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd arrow/test/dependency/no-default-features
-          cargo check
-      - name: Parquet Derive build with default-features
-        run: |
-          export CARGO_HOME="/github/home/.cargo"
-          export CARGO_TARGET_DIR="/github/home/target"
-          cd parquet_derive/test/dependency/default-features
-          cargo check
diff --git a/CHANGELOG-old.md b/CHANGELOG-old.md
new file mode 100644
index 000000000000..518697ce09a0
--- /dev/null
+++ b/CHANGELOG-old.md
@@ -0,0 +1,1311 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+
+## [15.0.0](https://github.com/apache/arrow-rs/tree/15.0.0) (2022-05-27)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/14.0.0...15.0.0)
+
+**Breaking changes:**
+
+- Change `ArrayDataBuilder::null_bit_buffer` to accept `Option<Buffer>` rather than `Buffer` [\#1739](https://github.com/apache/arrow-rs/pull/1739) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Remove `null_count` from `ArrayData::try_new()` [\#1721](https://github.com/apache/arrow-rs/pull/1721) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Change parquet writers to use standard `std:io::Write` rather custom `ParquetWriter` trait \(\#1717\) \(\#1163\) [\#1719](https://github.com/apache/arrow-rs/pull/1719) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Add explicit column mask for selection in parquet: `ProjectionMask` \(\#1701\) [\#1716](https://github.com/apache/arrow-rs/pull/1716) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Add type\_ids in Union datatype [\#1703](https://github.com/apache/arrow-rs/pull/1703) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fix Parquet Reader's Arrow Schema Inference [\#1682](https://github.com/apache/arrow-rs/pull/1682) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+
+**Implemented enhancements:**
+
+- Rename the `string` kernel to `concatenate_elements` [\#1747](https://github.com/apache/arrow-rs/issues/1747) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `ArrayDataBuilder::null_bit_buffer` should accept `Option<Buffer>` as input type [\#1737](https://github.com/apache/arrow-rs/issues/1737) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Fix schema comparison for non\_canonical\_map when running flight test [\#1730](https://github.com/apache/arrow-rs/issues/1730) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add support in aggregate kernel for `BinaryArray` [\#1724](https://github.com/apache/arrow-rs/issues/1724) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Fix incorrect null\_count in `generate_unions_case` integration test [\#1712](https://github.com/apache/arrow-rs/issues/1712) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Keep type ids in Union datatype to follow Arrow spec and integrate with other implementations [\#1690](https://github.com/apache/arrow-rs/issues/1690) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support Reading Alternative List Representations to Arrow From Parquet [\#1680](https://github.com/apache/arrow-rs/issues/1680) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Speed up the offsets checking [\#1675](https://github.com/apache/arrow-rs/issues/1675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Separate Parquet -\> Arrow Schema Conversion From ArrayBuilder [\#1655](https://github.com/apache/arrow-rs/issues/1655) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add `leaf_columns` argument to `ArrowReader::get_record_reader_by_columns` [\#1653](https://github.com/apache/arrow-rs/issues/1653) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Implement `string_concat` kernel  [\#1540](https://github.com/apache/arrow-rs/issues/1540) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Improve Unit Test Coverage of ArrayReaderBuilder [\#1484](https://github.com/apache/arrow-rs/issues/1484) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Fixed bugs:**
+
+- Parquet write failure \(from record batches\) when data is nested two levels deep  [\#1744](https://github.com/apache/arrow-rs/issues/1744) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- IPC reader may break on projection [\#1735](https://github.com/apache/arrow-rs/issues/1735) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Latest nightly fails to build with feature simd [\#1734](https://github.com/apache/arrow-rs/issues/1734) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Trying to write parquet file in parallel results in corrupt file [\#1717](https://github.com/apache/arrow-rs/issues/1717) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Roundtrip failure when using DELTA\_BINARY\_PACKED [\#1708](https://github.com/apache/arrow-rs/issues/1708) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `ArrayData::try_new` cannot always return expected error. [\#1707](https://github.com/apache/arrow-rs/issues/1707) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+-  "out of order projection is not supported" after Fix Parquet Arrow Schema Inference [\#1701](https://github.com/apache/arrow-rs/issues/1701) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Rust is not interoperability with C++ for IPC schemas with dictionaries [\#1694](https://github.com/apache/arrow-rs/issues/1694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Incorrect Repeated Field Schema Inference [\#1681](https://github.com/apache/arrow-rs/issues/1681) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Parquet Treats Embedded Arrow Schema as Authoritative [\#1663](https://github.com/apache/arrow-rs/issues/1663) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- parquet\_to\_arrow\_schema\_by\_columns Incorrectly Handles Nested Types [\#1654](https://github.com/apache/arrow-rs/issues/1654) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Inconsistent Arrow Schema When Projecting Nested Parquet File [\#1652](https://github.com/apache/arrow-rs/issues/1652) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- StructArrayReader Cannot Handle Nested Lists [\#1651](https://github.com/apache/arrow-rs/issues/1651) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Bug \(`substring` kernel\): The null buffer is not aligned when `offset != 0` [\#1639](https://github.com/apache/arrow-rs/issues/1639) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Documentation updates:**
+
+- Parquet command line tool does not install "globally" [\#1710](https://github.com/apache/arrow-rs/issues/1710) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Improve integration test document to follow Arrow C++ repo CI [\#1742](https://github.com/apache/arrow-rs/pull/1742) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+
+**Merged pull requests:**
+
+- Test for list array equality with different offsets [\#1756](https://github.com/apache/arrow-rs/pull/1756) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Rename `string_concat` to `concat_elements_utf8` [\#1754](https://github.com/apache/arrow-rs/pull/1754) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Rename the `string` kernel to `concat_elements`. [\#1752](https://github.com/apache/arrow-rs/pull/1752) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Support writing nested lists to parquet [\#1746](https://github.com/apache/arrow-rs/pull/1746) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Pin nightly version to bypass packed\_simd build error [\#1743](https://github.com/apache/arrow-rs/pull/1743) ([viirya](https://github.com/viirya))
+- Fix projection in IPC reader [\#1736](https://github.com/apache/arrow-rs/pull/1736) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([iyupeng](https://github.com/iyupeng))
+- `cargo install` installs not globally [\#1732](https://github.com/apache/arrow-rs/pull/1732) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([kazuk](https://github.com/kazuk))
+- Fix schema comparison for non\_canonical\_map when running flight test [\#1731](https://github.com/apache/arrow-rs/pull/1731) ([viirya](https://github.com/viirya))
+- Add `min_binary` and `max_binary` aggregate kernels [\#1725](https://github.com/apache/arrow-rs/pull/1725) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Fix parquet benchmarks [\#1723](https://github.com/apache/arrow-rs/pull/1723) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Fix BitReader::get\_batch zero extension \(\#1708\) [\#1722](https://github.com/apache/arrow-rs/pull/1722) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Implementation string concat [\#1720](https://github.com/apache/arrow-rs/pull/1720) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Ismail-Maj](https://github.com/Ismail-Maj))
+- Check the length of `null_bit_buffer` in `ArrayData::try_new()` [\#1714](https://github.com/apache/arrow-rs/pull/1714) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Fix incorrect null\_count in `generate_unions_case` integration test [\#1713](https://github.com/apache/arrow-rs/pull/1713) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fix: Null buffer accounts for `offset` in `substring` kernel. [\#1704](https://github.com/apache/arrow-rs/pull/1704) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Minor: Refine `OffsetSizeTrait` to extend `num::Integer`  [\#1702](https://github.com/apache/arrow-rs/pull/1702) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Fix StructArrayReader handling nested lists \(\#1651\)  [\#1700](https://github.com/apache/arrow-rs/pull/1700) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Speed up the offsets checking [\#1684](https://github.com/apache/arrow-rs/pull/1684) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+
+## [14.0.0](https://github.com/apache/arrow-rs/tree/14.0.0) (2022-05-13)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/13.0.0...14.0.0)
+
+**Breaking changes:**
+
+- Use `bytes` in parquet rather than custom Buffer implementation \(\#1474\) [\#1683](https://github.com/apache/arrow-rs/pull/1683) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Rename  `OffsetSize::fn is_large` to `const OffsetSize::IS_LARGE` [\#1664](https://github.com/apache/arrow-rs/pull/1664) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Remove `StringOffsetTrait` and `BinaryOffsetTrait` [\#1645](https://github.com/apache/arrow-rs/pull/1645) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Fix `generate_nested_dictionary_case` integration test failure  [\#1636](https://github.com/apache/arrow-rs/pull/1636) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([viirya](https://github.com/viirya))
+
+**Implemented enhancements:**
+
+- Add support for `DataType::Duration` in ffi interface [\#1688](https://github.com/apache/arrow-rs/issues/1688) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Fix `generate_unions_case` integration test  [\#1676](https://github.com/apache/arrow-rs/issues/1676) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+-  Add `DictionaryArray` support for `bit_length` kernel [\#1673](https://github.com/apache/arrow-rs/issues/1673) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+-  Add `DictionaryArray` support for `length` kernel [\#1672](https://github.com/apache/arrow-rs/issues/1672) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- flight\_client\_scenarios integration test should receive schema from flight data [\#1669](https://github.com/apache/arrow-rs/issues/1669) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Unpin Flatbuffer version dependency [\#1667](https://github.com/apache/arrow-rs/issues/1667) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add dictionary array support for substring function [\#1656](https://github.com/apache/arrow-rs/issues/1656) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Exclude dict\_id and dict\_is\_ordered from equality comparison of `Field` [\#1646](https://github.com/apache/arrow-rs/issues/1646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Remove `StringOffsetTrait` and `BinaryOffsetTrait` [\#1644](https://github.com/apache/arrow-rs/issues/1644) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add tests and examples for `UnionArray::from(data: ArrayData)` [\#1643](https://github.com/apache/arrow-rs/issues/1643) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add methods `pub fn offsets_buffer`, `pub fn types_ids_buffer`and `pub fn data_buffer` for `ArrayDataBuilder` [\#1640](https://github.com/apache/arrow-rs/issues/1640) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Fix `generate_nested_dictionary_case` integration test failure for Rust cases [\#1635](https://github.com/apache/arrow-rs/issues/1635) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Expose `ArrowWriter` row group flush in public API [\#1626](https://github.com/apache/arrow-rs/issues/1626) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add `substring` support for `FixedSizeBinaryArray` [\#1618](https://github.com/apache/arrow-rs/issues/1618) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add PrettyPrint for `UnionArray`s [\#1594](https://github.com/apache/arrow-rs/issues/1594) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add SIMD support for the `length` kernel [\#1489](https://github.com/apache/arrow-rs/issues/1489) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support dictionary arrays in length and bit\_length [\#1674](https://github.com/apache/arrow-rs/pull/1674) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Add dictionary array support for substring function [\#1665](https://github.com/apache/arrow-rs/pull/1665) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sunchao](https://github.com/sunchao))
+- Add `DecimalType` support in `new_null_array ` [\#1659](https://github.com/apache/arrow-rs/pull/1659) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
+
+**Fixed bugs:**
+
+- Docs.rs build is broken  [\#1695](https://github.com/apache/arrow-rs/issues/1695)
+- Interoperability with C++ for IPC schemas with dictionaries [\#1694](https://github.com/apache/arrow-rs/issues/1694)
+- `UnionArray::is_null` incorrect [\#1625](https://github.com/apache/arrow-rs/issues/1625) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Published Parquet documentation missing `arrow::async_reader` [\#1617](https://github.com/apache/arrow-rs/issues/1617) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Files written with Julia's Arrow.jl in IPC format cannot be read by arrow-rs [\#1335](https://github.com/apache/arrow-rs/issues/1335) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Documentation updates:**
+
+- Correct arrow-flight readme version [\#1641](https://github.com/apache/arrow-rs/pull/1641) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+
+**Closed issues:**
+
+- Make `OffsetSizeTrait::IS_LARGE` as a const value [\#1658](https://github.com/apache/arrow-rs/issues/1658)
+- Question: Why are there 3 types of `OffsetSizeTrait`s? [\#1638](https://github.com/apache/arrow-rs/issues/1638)
+- Written Parquet file way bigger than input files  [\#1627](https://github.com/apache/arrow-rs/issues/1627)
+- Ensure there is a single zero in the offsets buffer for an empty ListArray. [\#1620](https://github.com/apache/arrow-rs/issues/1620)
+- Filtering `UnionArray` Changes DataType [\#1595](https://github.com/apache/arrow-rs/issues/1595)
+
+**Merged pull requests:**
+
+- Fix docs.rs build [\#1696](https://github.com/apache/arrow-rs/pull/1696) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- support duration in ffi [\#1689](https://github.com/apache/arrow-rs/pull/1689) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ryan-jacobs1](https://github.com/ryan-jacobs1))
+- fix bench command line options [\#1685](https://github.com/apache/arrow-rs/pull/1685) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kazuk](https://github.com/kazuk))
+- Enable branch protection [\#1679](https://github.com/apache/arrow-rs/pull/1679) ([tustvold](https://github.com/tustvold))
+- Fix logical merge conflict in \#1588 [\#1678](https://github.com/apache/arrow-rs/pull/1678) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Fix generate\_unions\_case for Rust case [\#1677](https://github.com/apache/arrow-rs/pull/1677) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Receive schema from flight data [\#1670](https://github.com/apache/arrow-rs/pull/1670) ([viirya](https://github.com/viirya))
+- unpin flatbuffers dependency version [\#1668](https://github.com/apache/arrow-rs/pull/1668) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Cheappie](https://github.com/Cheappie))
+- Remove parquet dictionary converters \(\#1661\) [\#1662](https://github.com/apache/arrow-rs/pull/1662) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Minor: simplify the function `GenericListArray::get_type` [\#1650](https://github.com/apache/arrow-rs/pull/1650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Pretty Print `UnionArray`s [\#1648](https://github.com/apache/arrow-rs/pull/1648) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tfeda](https://github.com/tfeda))
+- Exclude `dict_id` and `dict_is_ordered` from equality comparison of `Field` [\#1647](https://github.com/apache/arrow-rs/pull/1647) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- expose row-group flush in public api [\#1634](https://github.com/apache/arrow-rs/pull/1634) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Cheappie](https://github.com/Cheappie))
+- Add `substring` support for `FixedSizeBinaryArray` [\#1633](https://github.com/apache/arrow-rs/pull/1633) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Fix UnionArray is\_null [\#1632](https://github.com/apache/arrow-rs/pull/1632) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Do not assume dictionaries exists in footer [\#1631](https://github.com/apache/arrow-rs/pull/1631) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pcjentsch](https://github.com/pcjentsch))
+- Add support for nested list arrays from parquet to arrow arrays \(\#993\) [\#1588](https://github.com/apache/arrow-rs/pull/1588) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Add `async` into doc features [\#1349](https://github.com/apache/arrow-rs/pull/1349) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([HaoYang670](https://github.com/HaoYang670))
+
+
+## [13.0.0](https://github.com/apache/arrow-rs/tree/13.0.0) (2022-04-29)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/12.0.0...13.0.0)
+
+**Breaking changes:**
+
+- Update `parquet::basic::LogicalType` to be more idomatic [\#1612](https://github.com/apache/arrow-rs/pull/1612) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tfeda](https://github.com/tfeda))
+- Fix Null Mask Handling in `ArrayData`,  `UnionArray`, and `MapArray` [\#1589](https://github.com/apache/arrow-rs/pull/1589) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Replace `&Option<T>`  with `Option<&T>` in several `arrow` and `parquet` APIs [\#1571](https://github.com/apache/arrow-rs/pull/1571) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tfeda](https://github.com/tfeda))
+
+**Implemented enhancements:**
+
+- Read/write nested dictionary under fixed size list in ipc stream reader/write [\#1609](https://github.com/apache/arrow-rs/issues/1609) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add support for `BinaryArray` in `substring`  kernel [\#1593](https://github.com/apache/arrow-rs/issues/1593) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Read/write nested dictionary under large list in ipc stream reader/write [\#1584](https://github.com/apache/arrow-rs/issues/1584) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Read/write nested dictionary under map in ipc stream reader/write [\#1582](https://github.com/apache/arrow-rs/issues/1582) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement `Clone` for JSON `DecoderOptions` [\#1580](https://github.com/apache/arrow-rs/issues/1580) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add utf-8 validation checking to `substring` kernel [\#1575](https://github.com/apache/arrow-rs/issues/1575) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support casting to/from `DataType::Null` in `cast` kernel [\#1572](https://github.com/apache/arrow-rs/pull/1572) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([WinkerDu](https://github.com/WinkerDu))
+
+**Fixed bugs:**
+
+- Parquet schema should allow scale == precision for decimal type [\#1606](https://github.com/apache/arrow-rs/issues/1606) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- ListArray::from\(ArrayData\) dereferences invalid pointer when offsets are empty [\#1601](https://github.com/apache/arrow-rs/issues/1601) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- ArrayData Equality Incorrect Null Mask Offset Handling [\#1599](https://github.com/apache/arrow-rs/issues/1599)
+- Filtering UnionArray Incorrect Handles Runs [\#1598](https://github.com/apache/arrow-rs/issues/1598)
+- \[Safety\] Filtering Dense UnionArray Produces Invalid Offsets [\#1596](https://github.com/apache/arrow-rs/issues/1596)
+- \[Safety\] UnionBuilder Doesn't Check Types [\#1591](https://github.com/apache/arrow-rs/issues/1591)
+- Union Layout Should Not Support Separate Validity Mask [\#1590](https://github.com/apache/arrow-rs/issues/1590)
+- Incorrect nullable flag when reading maps \( test\_read\_maps fails when `force_validate` is active\)  [\#1587](https://github.com/apache/arrow-rs/issues/1587) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Output of `ipc::reader::tests::projection_should_work` fails validation [\#1548](https://github.com/apache/arrow-rs/issues/1548) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Incorrect min/max statistics for decimals with byte-array notation [\#1532](https://github.com/apache/arrow-rs/issues/1532)
+
+**Documentation updates:**
+
+- Minor: Clarify docs on `UnionBuilder::append_null` [\#1628](https://github.com/apache/arrow-rs/pull/1628) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Closed issues:**
+
+- Dense UnionArray Offsets Are i32 not i8 [\#1597](https://github.com/apache/arrow-rs/issues/1597) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Replace `&Option<T>` with `Option<&T>` in some APIs [\#1556](https://github.com/apache/arrow-rs/issues/1556) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Improve ergonomics of `parquet::basic::LogicalType`  [\#1554](https://github.com/apache/arrow-rs/issues/1554) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Mark the current `substring` function as `unsafe` and rename it. [\#1541](https://github.com/apache/arrow-rs/issues/1541) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Requirements for Async Parquet API [\#1473](https://github.com/apache/arrow-rs/issues/1473) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Merged pull requests:**
+
+- Nit: use the standard function `div_ceil`  [\#1629](https://github.com/apache/arrow-rs/pull/1629) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Update flatbuffers requirement from =2.1.1 to =2.1.2 [\#1622](https://github.com/apache/arrow-rs/pull/1622) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix decimals min max statistics [\#1621](https://github.com/apache/arrow-rs/pull/1621) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([atefsawaed](https://github.com/atefsawaed))
+- Add example readme [\#1615](https://github.com/apache/arrow-rs/pull/1615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve docs and examples links on main readme [\#1614](https://github.com/apache/arrow-rs/pull/1614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Read/Write nested dictionaries under FixedSizeList in IPC [\#1610](https://github.com/apache/arrow-rs/pull/1610) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Add `substring` support for binary [\#1608](https://github.com/apache/arrow-rs/pull/1608) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Parquet: schema validation should allow scale == precision for decimal type [\#1607](https://github.com/apache/arrow-rs/pull/1607) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sunchao](https://github.com/sunchao))
+- Don't access and validate offset buffer in ListArray::from\(ArrayData\) [\#1602](https://github.com/apache/arrow-rs/pull/1602) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Fix map nullable flag in `ParquetTypeConverter` [\#1592](https://github.com/apache/arrow-rs/pull/1592) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
+- Read/write nested dictionary under large list in ipc stream reader/writer [\#1585](https://github.com/apache/arrow-rs/pull/1585) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Read/write nested dictionary under map in ipc stream reader/writer [\#1583](https://github.com/apache/arrow-rs/pull/1583) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Derive `Clone` and `PartialEq` for json `DecoderOptions` [\#1581](https://github.com/apache/arrow-rs/pull/1581) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add utf-8 validation checking for `substring` [\#1577](https://github.com/apache/arrow-rs/pull/1577) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Use `Option<T>` rather than `Option<&T>` for copy types in substring kernel [\#1576](https://github.com/apache/arrow-rs/pull/1576) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Use littleendian arrow files for `projection_should_work` [\#1573](https://github.com/apache/arrow-rs/pull/1573) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+
+
+## [12.0.0](https://github.com/apache/arrow-rs/tree/12.0.0) (2022-04-15)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/11.1.0...12.0.0)
+
+**Breaking changes:**
+
+- Add `ArrowReaderOptions` to `ParquetFileArrowReader`, add option to skip decoding arrow metadata from parquet \(\#1459\) [\#1558](https://github.com/apache/arrow-rs/pull/1558) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Support `RecordBatch` with zero columns but non zero row count, add field to `RecordBatchOptions` \(\#1536\) [\#1552](https://github.com/apache/arrow-rs/pull/1552) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Consolidate JSON Reader options and `DecoderOptions` [\#1539](https://github.com/apache/arrow-rs/pull/1539) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Update `prost`, `prost-derive` and `prost-types` to 0.10, `tonic`, and `tonic-build` to `0.7` [\#1510](https://github.com/apache/arrow-rs/pull/1510) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+- Add Json `DecoderOptions` and support custom `format_string` for each field  [\#1451](https://github.com/apache/arrow-rs/pull/1451) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sum12](https://github.com/sum12))
+
+**Implemented enhancements:**
+
+- Read/write nested dictionary in ipc stream reader/writer [\#1565](https://github.com/apache/arrow-rs/issues/1565) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support `FixedSizeBinary` in the Arrow C data interface [\#1553](https://github.com/apache/arrow-rs/issues/1553) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support Empty Column Projection in `ParquetRecordBatchReader` [\#1537](https://github.com/apache/arrow-rs/issues/1537) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support `RecordBatch` with zero columns but non zero row count [\#1536](https://github.com/apache/arrow-rs/issues/1536) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add support for `Date32`/`Date64`\<--\> `String`/`LargeString` in `cast` kernel [\#1535](https://github.com/apache/arrow-rs/issues/1535) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support creating arrays from externally owned memory like `Vec` or `String` [\#1516](https://github.com/apache/arrow-rs/issues/1516) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Speed up the `substring` kernel [\#1511](https://github.com/apache/arrow-rs/issues/1511) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Handle Parquet Files With Inconsistent Timestamp Units [\#1459](https://github.com/apache/arrow-rs/issues/1459) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Fixed bugs:**
+
+- Error Infering Schema for LogicalType::UNKNOWN [\#1557](https://github.com/apache/arrow-rs/issues/1557) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Read dictionary from nested struct in ipc stream reader panics [\#1549](https://github.com/apache/arrow-rs/issues/1549) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `filter` produces invalid sparse `UnionArray`s [\#1547](https://github.com/apache/arrow-rs/issues/1547) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Documentation for `GenericListBuilder` is not exposed.  [\#1518](https://github.com/apache/arrow-rs/issues/1518) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- cannot read parquet file  [\#1515](https://github.com/apache/arrow-rs/issues/1515) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- The `substring` kernel panics when chars \> U+0x007F [\#1478](https://github.com/apache/arrow-rs/issues/1478) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Hang due to infinite loop when reading some parquet files with RLE encoding and bit packing [\#1458](https://github.com/apache/arrow-rs/issues/1458) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Documentation updates:**
+
+- Improve JSON reader documentation [\#1559](https://github.com/apache/arrow-rs/pull/1559) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve doc string for `substring` kernel [\#1529](https://github.com/apache/arrow-rs/pull/1529) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Expose documentation of `GenericListBuilder` [\#1525](https://github.com/apache/arrow-rs/pull/1525) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([comath](https://github.com/comath))
+- Add a diagram to `take` kernel documentation [\#1524](https://github.com/apache/arrow-rs/pull/1524) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Closed issues:**
+
+- Interesting benchmark results of `min_max_helper` [\#1400](https://github.com/apache/arrow-rs/issues/1400)
+
+**Merged pull requests:**
+
+- Fix incorrect `into_buffers` for UnionArray [\#1567](https://github.com/apache/arrow-rs/pull/1567) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Read/write nested dictionary in ipc stream reader/writer [\#1566](https://github.com/apache/arrow-rs/pull/1566) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Support FixedSizeBinary and FixedSizeList for the C data interface [\#1564](https://github.com/apache/arrow-rs/pull/1564) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sunchao](https://github.com/sunchao))
+- Split out ListArrayReader into separate module \(\#1483\) [\#1563](https://github.com/apache/arrow-rs/pull/1563) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Split out `MapArray` into separate module \(\#1483\) [\#1562](https://github.com/apache/arrow-rs/pull/1562) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Support empty projection in `ParquetRecordBatchReader` [\#1560](https://github.com/apache/arrow-rs/pull/1560) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- fix infinite loop in not fully packed bit-packed runs [\#1555](https://github.com/apache/arrow-rs/pull/1555) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Add test for creating FixedSizeBinaryArray::try\_from\_sparse\_iter failed when given all Nones [\#1551](https://github.com/apache/arrow-rs/pull/1551) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fix reading dictionaries from nested structs in ipc `StreamReader` [\#1550](https://github.com/apache/arrow-rs/pull/1550) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dispanser](https://github.com/dispanser))
+- Add support for Date32/64 \<--\> String/LargeString in `cast` kernel [\#1534](https://github.com/apache/arrow-rs/pull/1534) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
+- fix clippy errors in 1.60 [\#1527](https://github.com/apache/arrow-rs/pull/1527) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Mark `remove-old-releases.sh` executable [\#1522](https://github.com/apache/arrow-rs/pull/1522) ([alamb](https://github.com/alamb))
+- Delete duplicate code in the `sort` kernel [\#1519](https://github.com/apache/arrow-rs/pull/1519) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Fix reading nested lists from parquet files  [\#1517](https://github.com/apache/arrow-rs/pull/1517) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
+- Speed up the `substring` kernel by about 2x [\#1512](https://github.com/apache/arrow-rs/pull/1512) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Add `new_from_strings` to create `MapArrays` [\#1507](https://github.com/apache/arrow-rs/pull/1507) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Decouple buffer deallocation from ffi and allow creating buffers from rust vec [\#1494](https://github.com/apache/arrow-rs/pull/1494) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+
+## [11.1.0](https://github.com/apache/arrow-rs/tree/11.1.0) (2022-03-31)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/11.0.0...11.1.0)
+
+**Implemented enhancements:**
+
+- Implement `size_hint` and `ExactSizedIterator` for DecimalArray [\#1505](https://github.com/apache/arrow-rs/issues/1505) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support calculate length by chars for `StringArray` [\#1493](https://github.com/apache/arrow-rs/issues/1493) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `length` kernel support for `ListArray` [\#1470](https://github.com/apache/arrow-rs/issues/1470) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- The length kernel should work with `BinaryArray`s [\#1464](https://github.com/apache/arrow-rs/issues/1464) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- FFI for Arrow C Stream Interface [\#1348](https://github.com/apache/arrow-rs/issues/1348) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Improve performance of `DictionaryArray::try_new()` [\#1313](https://github.com/apache/arrow-rs/issues/1313) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Fixed bugs:**
+
+- MIRI error in math\_checked\_divide\_op/try\_from\_trusted\_len\_iter [\#1496](https://github.com/apache/arrow-rs/issues/1496) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet Writer Incorrect Definition Levels for Nested NullArray [\#1480](https://github.com/apache/arrow-rs/issues/1480) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- FFI: ArrowArray::try\_from\_raw shouldn't clone [\#1425](https://github.com/apache/arrow-rs/issues/1425) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet reader fails to read null list. [\#1399](https://github.com/apache/arrow-rs/issues/1399) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Documentation updates:**
+
+- A small mistake in the doc of `BinaryArray` and `LargeBinaryArray` [\#1455](https://github.com/apache/arrow-rs/issues/1455) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- A small mistake in the doc of `GenericBinaryArray::take_iter_unchecked` [\#1454](https://github.com/apache/arrow-rs/issues/1454) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add links in the doc of `BinaryOffsetSizeTrait` [\#1453](https://github.com/apache/arrow-rs/issues/1453) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- The doc of `FixedSizeBinaryArray` is confusing. [\#1452](https://github.com/apache/arrow-rs/issues/1452) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Clarify docs that SlicesIterator ignores null values [\#1504](https://github.com/apache/arrow-rs/pull/1504) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Update the doc of `BinaryArray` and `LargeBinaryArray` [\#1471](https://github.com/apache/arrow-rs/pull/1471) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+
+**Closed issues:**
+
+- `packed_simd` v.s. `portable_simd`, which should be used? [\#1492](https://github.com/apache/arrow-rs/issues/1492)
+- Cleanup: Use Arrow take kernel Within parquet ListArrayReader [\#1482](https://github.com/apache/arrow-rs/issues/1482) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Merged pull requests:**
+
+- Implement `size_hint` and `ExactSizedIterator` for `DecimalArray` [\#1506](https://github.com/apache/arrow-rs/pull/1506) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add `StringArray::num_chars` for calculating number of characters [\#1503](https://github.com/apache/arrow-rs/pull/1503) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Workaround nightly miri error in `try_from_trusted_len_iter` [\#1497](https://github.com/apache/arrow-rs/pull/1497) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- update doc of array\_binary and array\_string [\#1491](https://github.com/apache/arrow-rs/pull/1491) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Use Arrow take kernel within ListArrayReader [\#1490](https://github.com/apache/arrow-rs/pull/1490) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
+- Add `length` kernel support for List Array [\#1488](https://github.com/apache/arrow-rs/pull/1488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Support sort for `Decimal` data type [\#1487](https://github.com/apache/arrow-rs/pull/1487) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
+- Fix reading/writing nested null arrays \(\#1480\) \(\#1036\) \(\#1399\) [\#1481](https://github.com/apache/arrow-rs/pull/1481) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+-  Implement ArrayEqual for UnionArray [\#1469](https://github.com/apache/arrow-rs/pull/1469) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Support the `length` kernel on Binary Array [\#1465](https://github.com/apache/arrow-rs/pull/1465) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Remove Clone and copy source structs internally [\#1449](https://github.com/apache/arrow-rs/pull/1449) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fix Parquet reader for null lists [\#1448](https://github.com/apache/arrow-rs/pull/1448) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
+- Improve performance of DictionaryArray::try\_new\(\)  [\#1435](https://github.com/apache/arrow-rs/pull/1435) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
+- Add FFI for Arrow C Stream Interface [\#1384](https://github.com/apache/arrow-rs/pull/1384) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+
+## [11.0.0](https://github.com/apache/arrow-rs/tree/11.0.0) (2022-03-17)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/10.0.0...11.0.0)
+
+**Breaking changes:**
+
+- Replace `filter_row_groups` with `ReadOptions` in parquet SerializedFileReader  [\#1389](https://github.com/apache/arrow-rs/pull/1389) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([yjshen](https://github.com/yjshen))
+- Implement projection for arrow `IPC Reader` file / streams [\#1339](https://github.com/apache/arrow-rs/pull/1339) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([Dandandan](https://github.com/Dandandan))
+
+**Implemented enhancements:**
+
+- Fix generate\_interval\_case integration test failure [\#1445](https://github.com/apache/arrow-rs/issues/1445)
+- Make the doc examples of `ListArray` and `LargeListArray` more readable [\#1433](https://github.com/apache/arrow-rs/issues/1433)
+- Redundant `if` and `abs` in `shift()` [\#1427](https://github.com/apache/arrow-rs/issues/1427)
+- Improve substring kernel performance [\#1422](https://github.com/apache/arrow-rs/issues/1422) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add missing value\_unchecked\(\) of `FixedSizeBinaryArray` [\#1419](https://github.com/apache/arrow-rs/issues/1419)
+- Remove duplicate bound check in function `shift` [\#1408](https://github.com/apache/arrow-rs/issues/1408)
+- Support dictionary array in C data interface [\#1397](https://github.com/apache/arrow-rs/issues/1397)
+- filter kernel should work with `UnionArray`s [\#1394](https://github.com/apache/arrow-rs/issues/1394) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- filter kernel should work with `FixedSizeListArrays`s [\#1393](https://github.com/apache/arrow-rs/issues/1393) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add doc examples for creating FixedSizeListArray [\#1392](https://github.com/apache/arrow-rs/issues/1392) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Update `rust-version` to 1.59 [\#1377](https://github.com/apache/arrow-rs/issues/1377)
+- Arrow IPC projection support [\#1338](https://github.com/apache/arrow-rs/issues/1338)
+- Implement basic FlightSQL Server [\#1386](https://github.com/apache/arrow-rs/pull/1386) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([wangfenjin](https://github.com/wangfenjin))
+
+**Fixed bugs:**
+
+- DictionaryArray::try\_new ignores validity bitmap of the keys [\#1429](https://github.com/apache/arrow-rs/issues/1429) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- The doc of `GenericListArray` is confusing [\#1424](https://github.com/apache/arrow-rs/issues/1424)
+- DeltaBitPackDecoder Incorrectly Handles Non-Zero MiniBlock Bit Width Padding [\#1417](https://github.com/apache/arrow-rs/issues/1417) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- DeltaBitPackEncoder Pads Miniblock BitWidths With Arbitrary Values [\#1416](https://github.com/apache/arrow-rs/issues/1416) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Possible unaligned write with MutableBuffer::push [\#1410](https://github.com/apache/arrow-rs/issues/1410) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Integration Test is failing on master branch [\#1398](https://github.com/apache/arrow-rs/issues/1398) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Documentation updates:**
+
+- Rewrite doc of `GenericListArray` [\#1450](https://github.com/apache/arrow-rs/pull/1450) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Fix integration doc about build.ninja location [\#1438](https://github.com/apache/arrow-rs/pull/1438) ([viirya](https://github.com/viirya))
+
+**Merged pull requests:**
+
+- Rewrite doc example of `ListArray` and `LargeListArray` [\#1447](https://github.com/apache/arrow-rs/pull/1447) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Fix generate\_interval\_case in integration test [\#1446](https://github.com/apache/arrow-rs/pull/1446) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fix generate\_decimal128\_case in integration test [\#1440](https://github.com/apache/arrow-rs/pull/1440) ([viirya](https://github.com/viirya))
+- `filter` kernel should work with FixedSizeListArrays [\#1434](https://github.com/apache/arrow-rs/pull/1434) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Support nullable keys in DictionaryArray::try\_new [\#1430](https://github.com/apache/arrow-rs/pull/1430) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- remove redundant if/clamp\_min/abs [\#1428](https://github.com/apache/arrow-rs/pull/1428) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
+- Add doc example for creating `FixedSizeListArray` [\#1426](https://github.com/apache/arrow-rs/pull/1426) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Directly write to MutableBuffer in substring [\#1423](https://github.com/apache/arrow-rs/pull/1423) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fix possibly unaligned writes in MutableBuffer [\#1421](https://github.com/apache/arrow-rs/pull/1421) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Add value\_unchecked\(\) and unit test [\#1420](https://github.com/apache/arrow-rs/pull/1420) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
+- Fix DeltaBitPack MiniBlock Bit Width Padding [\#1418](https://github.com/apache/arrow-rs/pull/1418) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Update zstd requirement from 0.10 to 0.11 [\#1415](https://github.com/apache/arrow-rs/pull/1415) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Set `default-features = false` for `zstd` in the parquet crate to support `wasm32-unknown-unknown` [\#1414](https://github.com/apache/arrow-rs/pull/1414) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([kylebarron](https://github.com/kylebarron))
+- Add support for `UnionArray` in`filter` kernel [\#1412](https://github.com/apache/arrow-rs/pull/1412) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Remove duplicate bound check in the function `shift` [\#1409](https://github.com/apache/arrow-rs/pull/1409) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Add dictionary support for C data interface [\#1407](https://github.com/apache/arrow-rs/pull/1407) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sunchao](https://github.com/sunchao))
+- Fix a small spelling mistake in docs. [\#1406](https://github.com/apache/arrow-rs/pull/1406) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Add unit test to check `FixedSizeBinaryArray` input all none [\#1405](https://github.com/apache/arrow-rs/pull/1405) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
+- Move csv Parser trait and its implementations to utils module [\#1385](https://github.com/apache/arrow-rs/pull/1385) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sum12](https://github.com/sum12))
+
+## [10.0.0](https://github.com/apache/arrow-rs/tree/10.0.0) (2022-03-04)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/9.1.0...10.0.0)
+
+**Breaking changes:**
+
+- Remove existing has\_ methods for optional fields in `ColumnChunkMetaData` [\#1346](https://github.com/apache/arrow-rs/pull/1346) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
+- Remove redundant `has_` methods in `ColumnChunkMetaData` [\#1345](https://github.com/apache/arrow-rs/pull/1345) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
+
+**Implemented enhancements:**
+
+- Add extract month and day  in temporal.rs [\#1387](https://github.com/apache/arrow-rs/issues/1387)
+- Add clone to `IpcWriteOptions` [\#1381](https://github.com/apache/arrow-rs/issues/1381) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support `MapArray` in `filter` kernel  [\#1378](https://github.com/apache/arrow-rs/issues/1378) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `week` temporal kernel [\#1375](https://github.com/apache/arrow-rs/issues/1375) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Improve performance of `compare_dict_op` [\#1371](https://github.com/apache/arrow-rs/issues/1371) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add support for LargeUtf8 in json writer [\#1357](https://github.com/apache/arrow-rs/issues/1357) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Make `arrow::array::builder::MapBuilder` public [\#1354](https://github.com/apache/arrow-rs/issues/1354) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Refactor `StructArray::from` [\#1351](https://github.com/apache/arrow-rs/issues/1351) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Refactor `RecordBatch::validate_new_batch` [\#1350](https://github.com/apache/arrow-rs/issues/1350) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Remove redundant has\_ methods for optional column metadata fields [\#1344](https://github.com/apache/arrow-rs/issues/1344) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add `write` method to JsonWriter [\#1340](https://github.com/apache/arrow-rs/issues/1340) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Refactor the code of `Bitmap::new` [\#1337](https://github.com/apache/arrow-rs/issues/1337) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+-  Use DictionaryArray's iterator in `compare_dict_op` [\#1329](https://github.com/apache/arrow-rs/issues/1329) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add  `as_decimal_array(arr: &dyn Array) -> &DecimalArray` [\#1312](https://github.com/apache/arrow-rs/issues/1312) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- More ergonomic / idiomatic primitive array creation from iterators [\#1298](https://github.com/apache/arrow-rs/issues/1298) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement DictionaryArray support in `eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` [\#1201](https://github.com/apache/arrow-rs/issues/1201) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Fixed bugs:**
+
+- `cargo clippy` fails on the `master` branch [\#1362](https://github.com/apache/arrow-rs/issues/1362) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `ArrowArray::try_from_raw` should not assume the pointers are from Arc [\#1333](https://github.com/apache/arrow-rs/issues/1333) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Fix CSV Writer::new to accept delimiter and make WriterBuilder::build use it   [\#1328](https://github.com/apache/arrow-rs/issues/1328) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Make bounds configurable via builder when reading CSV [\#1327](https://github.com/apache/arrow-rs/issues/1327) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `with_datetime_format()` to CSV WriterBuilder  [\#1272](https://github.com/apache/arrow-rs/issues/1272) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Performance improvements:**
+
+- Improve performance of `min` and `max` aggregation kernels without nulls [\#1373](https://github.com/apache/arrow-rs/issues/1373) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Closed issues:**
+
+- Consider removing redundant has\_XXX metadata functions in `ColumnChunkMetadata` [\#1332](https://github.com/apache/arrow-rs/issues/1332)
+
+**Merged pull requests:**
+
+- Support extract `day` and `month` in temporal.rs [\#1388](https://github.com/apache/arrow-rs/pull/1388) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Ted-Jiang](https://github.com/Ted-Jiang))
+- Add write method to Json Writer [\#1383](https://github.com/apache/arrow-rs/pull/1383) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([matthewmturner](https://github.com/matthewmturner))
+- Derive `Clone` for  `IpcWriteOptions` [\#1382](https://github.com/apache/arrow-rs/pull/1382) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([matthewmturner](https://github.com/matthewmturner))
+- feat: support maps in MutableArrayData [\#1379](https://github.com/apache/arrow-rs/pull/1379) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([helgikrs](https://github.com/helgikrs))
+- Support extract `week` in temporal.rs [\#1376](https://github.com/apache/arrow-rs/pull/1376) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Ted-Jiang](https://github.com/Ted-Jiang))
+- Speed up the function `min_max_string` [\#1374](https://github.com/apache/arrow-rs/pull/1374) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Improve performance if dictionary kernels, add benchmark and add `take_iter_unchecked` [\#1372](https://github.com/apache/arrow-rs/pull/1372) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Update pyo3 requirement from 0.15 to 0.16 [\#1369](https://github.com/apache/arrow-rs/pull/1369) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Update contributing guide [\#1368](https://github.com/apache/arrow-rs/pull/1368) ([HaoYang670](https://github.com/HaoYang670))
+- Allow primitive array creation from iterators of PrimitiveTypes \(as well as `Option`\) [\#1367](https://github.com/apache/arrow-rs/pull/1367) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Update flatbuffers requirement from =2.1.0 to =2.1.1 [\#1364](https://github.com/apache/arrow-rs/pull/1364) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix clippy lints [\#1363](https://github.com/apache/arrow-rs/pull/1363) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Refactor `RecordBatch::validate_new_batch` [\#1361](https://github.com/apache/arrow-rs/pull/1361) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Refactor `StructArray::from` [\#1360](https://github.com/apache/arrow-rs/pull/1360) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Update flatbuffers requirement from =2.0.0 to =2.1.0 [\#1359](https://github.com/apache/arrow-rs/pull/1359) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- fix: add LargeUtf8 support in json writer [\#1358](https://github.com/apache/arrow-rs/pull/1358) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tiphaineruy](https://github.com/tiphaineruy))
+- Add `as_decimal_array` function [\#1356](https://github.com/apache/arrow-rs/pull/1356) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liukun4515](https://github.com/liukun4515))
+- Publicly export arrow::array::MapBuilder [\#1355](https://github.com/apache/arrow-rs/pull/1355) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tjwilson90](https://github.com/tjwilson90))
+- Add with\_datetime\_format to csv WriterBuilder [\#1347](https://github.com/apache/arrow-rs/pull/1347) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
+- Refactor `Bitmap::new` [\#1343](https://github.com/apache/arrow-rs/pull/1343) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Remove delimiter from csv Writer [\#1342](https://github.com/apache/arrow-rs/pull/1342) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
+- Make bounds configurable in csv ReaderBuilder [\#1341](https://github.com/apache/arrow-rs/pull/1341) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
+- `ArrowArray::try_from_raw` should not assume the pointers are from Arc [\#1334](https://github.com/apache/arrow-rs/pull/1334) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Use DictionaryArray's iterator in `compare_dict_op` [\#1330](https://github.com/apache/arrow-rs/pull/1330) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Implement DictionaryArray support in neq\_dyn, lt\_dyn, lt\_eq\_dyn, gt\_dyn, gt\_eq\_dyn [\#1326](https://github.com/apache/arrow-rs/pull/1326) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Arrow Rust + Conbench Integration [\#1289](https://github.com/apache/arrow-rs/pull/1289) ([dianaclarke](https://github.com/dianaclarke))
+
+## [9.1.0](https://github.com/apache/arrow-rs/tree/9.1.0) (2022-02-19)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/9.0.2...9.1.0)
+
+**Implemented enhancements:**
+
+- Exposing page encoding stats [\#1321](https://github.com/apache/arrow-rs/issues/1321)
+- Improve filter performance by special casing high and low selectivity predicates [\#1288](https://github.com/apache/arrow-rs/issues/1288) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Speed up  `DeltaBitPackDecoder` [\#1281](https://github.com/apache/arrow-rs/issues/1281) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Fix all clippy lints in arrow crate [\#1255](https://github.com/apache/arrow-rs/issues/1255) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Expose page encoding `ColumnChunkMetadata` [\#1322](https://github.com/apache/arrow-rs/pull/1322) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
+- Expose column index and offset index in `ColumnChunkMetadata` [\#1318](https://github.com/apache/arrow-rs/pull/1318) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
+- Expose bloom filter offset in `ColumnChunkMetadata` [\#1309](https://github.com/apache/arrow-rs/pull/1309) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
+- Add `DictionaryArray::try_new()` to create dictionaries from pre existing arrays [\#1300](https://github.com/apache/arrow-rs/pull/1300) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add `DictionaryArray::keys_iter`, and `take_iter` for other array types [\#1296](https://github.com/apache/arrow-rs/pull/1296) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Make `rle` decoder public under `experimental` feature [\#1271](https://github.com/apache/arrow-rs/pull/1271) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
+- Add `DictionaryArray` support in `eq_dyn` kernel [\#1263](https://github.com/apache/arrow-rs/pull/1263) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+
+**Fixed bugs:**
+
+- `len` is not a parameter of `MutableArrayData::extend` [\#1316](https://github.com/apache/arrow-rs/issues/1316)
+- module `data_type` is private in Rust Parquet 8.0.0 [\#1302](https://github.com/apache/arrow-rs/issues/1302) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Test failure: bit\_chunk\_iterator [\#1294](https://github.com/apache/arrow-rs/issues/1294)
+- csv\_writer benchmark fails with "no such file or directory" [\#1292](https://github.com/apache/arrow-rs/issues/1292)
+
+**Documentation updates:**
+
+- Fix warnings in `cargo doc` [\#1268](https://github.com/apache/arrow-rs/pull/1268) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Performance improvements:**
+
+- Vectorize DeltaBitPackDecoder, up to 5x faster decoding [\#1284](https://github.com/apache/arrow-rs/pull/1284) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Skip zero-ing primitive nulls [\#1280](https://github.com/apache/arrow-rs/pull/1280) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Add specialized filter kernels in `compute` module \(up to 10x faster\) [\#1248](https://github.com/apache/arrow-rs/pull/1248) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+
+**Closed issues:**
+
+- Expose column and offset index metadata offset [\#1317](https://github.com/apache/arrow-rs/issues/1317)
+- Expose bloom filter metadata offset [\#1308](https://github.com/apache/arrow-rs/issues/1308)
+- Improve ergonomics to construct `DictionaryArrays` from `Key` and `Value` arrays [\#1299](https://github.com/apache/arrow-rs/issues/1299)
+- Make it easier to iterate over `DictionaryArray` [\#1295](https://github.com/apache/arrow-rs/issues/1295) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- (WON'T FIX) Don't Interwine Bit and Byte Aligned Operations in `BitReader` [\#1282](https://github.com/apache/arrow-rs/issues/1282)
+- how to create arrow::array from streamReader [\#1278](https://github.com/apache/arrow-rs/issues/1278)
+- Remove scientific notation when converting floats to strings. [\#983](https://github.com/apache/arrow-rs/issues/983)
+
+**Merged pull requests:**
+
+- Update the document of function `MutableArrayData::extend` [\#1336](https://github.com/apache/arrow-rs/pull/1336) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Fix clippy lint `dead_code` [\#1324](https://github.com/apache/arrow-rs/pull/1324) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
+- fix test bug and ensure that bloom filter metadata is serialized in `to_thrift` [\#1320](https://github.com/apache/arrow-rs/pull/1320) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
+- Enable more clippy lints in arrow  [\#1315](https://github.com/apache/arrow-rs/pull/1315) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
+- Fix clippy lint `clippy::type_complexity` [\#1310](https://github.com/apache/arrow-rs/pull/1310) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
+- Fix clippy lint `clippy::float_equality_without_abs` [\#1305](https://github.com/apache/arrow-rs/pull/1305) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
+- Fix clippy `clippy::vec_init_then_push` lint [\#1303](https://github.com/apache/arrow-rs/pull/1303) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
+- Fix failing csv\_writer bench [\#1293](https://github.com/apache/arrow-rs/pull/1293) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([andygrove](https://github.com/andygrove))
+- Changes for 9.0.2  [\#1291](https://github.com/apache/arrow-rs/pull/1291) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+- Fix bitmask creation also for simd comparisons with scalar [\#1290](https://github.com/apache/arrow-rs/pull/1290) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Fix simd comparison kernels [\#1286](https://github.com/apache/arrow-rs/pull/1286) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Restrict Decoder to compatible types \(\#1276\) [\#1277](https://github.com/apache/arrow-rs/pull/1277) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Fix some clippy lints in parquet crate, rename `LevelEncoder` variants to conform to Rust standards [\#1273](https://github.com/apache/arrow-rs/pull/1273) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([HaoYang670](https://github.com/HaoYang670))
+- Use new DecimalArray creation API in arrow crate [\#1249](https://github.com/apache/arrow-rs/pull/1249) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve `DecimalArray` API ergonomics: add `iter()`, `FromIterator`, `with_precision_and_scale` [\#1223](https://github.com/apache/arrow-rs/pull/1223) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+
+## [9.0.2](https://github.com/apache/arrow-rs/tree/9.0.2) (2022-02-09)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/8.0.0...9.0.2)
+
+**Breaking changes:**
+
+- Add  `Send` + `Sync` to `DataType`, `RowGroupReader`, `FileReader`, `ChunkReader`. [\#1264](https://github.com/apache/arrow-rs/issues/1264)
+- Rename the function `Bitmap::len` to `Bitmap::bit_len` to clarify its meaning [\#1242](https://github.com/apache/arrow-rs/pull/1242) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Remove unused / broken `memory-check` feature [\#1222](https://github.com/apache/arrow-rs/pull/1222) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Potentially buffer  multiple `RecordBatches` before writing a parquet row group in `ArrowWriter` [\#1214](https://github.com/apache/arrow-rs/pull/1214) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+
+**Implemented enhancements:**
+
+- Add `async` arrow parquet reader [\#1154](https://github.com/apache/arrow-rs/pull/1154) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Rename `Bitmap::len` to `Bitmap::bit_len` [\#1233](https://github.com/apache/arrow-rs/issues/1233)
+- Extend CSV schema inference to allow scientific notation for floating point types [\#1215](https://github.com/apache/arrow-rs/issues/1215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Write Multiple RecordBatch to Parquet Row Group [\#1211](https://github.com/apache/arrow-rs/issues/1211)
+- Add doc examples for `eq_dyn` etc. [\#1202](https://github.com/apache/arrow-rs/issues/1202) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add comparison kernels for `BinaryArray` [\#1108](https://github.com/apache/arrow-rs/issues/1108)
+- `impl ArrowNativeType for i128`  [\#1098](https://github.com/apache/arrow-rs/issues/1098)
+- Remove `Copy` trait bound from dyn scalar kernels [\#1243](https://github.com/apache/arrow-rs/pull/1243) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([matthewmturner](https://github.com/matthewmturner))
+- Add `into_inner` for IPC `FileWriter` [\#1236](https://github.com/apache/arrow-rs/pull/1236) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
+- \[Minor\]Re-export `array::builder::make_builder` to make it available for downstream [\#1235](https://github.com/apache/arrow-rs/pull/1235) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
+
+**Fixed bugs:**
+
+- Parquet v8.0.0 panics when reading all null column to NullArray [\#1245](https://github.com/apache/arrow-rs/issues/1245) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Get `Unknown configuration option rust-version` when running the rust format command [\#1240](https://github.com/apache/arrow-rs/issues/1240)
+- `Bitmap` Length Validation is Incorrect [\#1231](https://github.com/apache/arrow-rs/issues/1231) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Writing sliced `ListArray` or `MapArray` ignore offsets [\#1226](https://github.com/apache/arrow-rs/issues/1226) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Remove broken `memory-tracking` crate feature [\#1171](https://github.com/apache/arrow-rs/issues/1171)
+- Revert making `parquet::data_type` and `parquet::arrow::schema` experimental [\#1244](https://github.com/apache/arrow-rs/pull/1244) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+
+**Documentation updates:**
+
+- Update parquet crate documentation and examples [\#1253](https://github.com/apache/arrow-rs/pull/1253) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Refresh parquet readme / contributing guide [\#1252](https://github.com/apache/arrow-rs/pull/1252) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Add docs examples for dynamically compare functions  [\#1250](https://github.com/apache/arrow-rs/pull/1250) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Add Rust Docs examples for UnionArray [\#1241](https://github.com/apache/arrow-rs/pull/1241) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Improve documentation for Bitmap [\#1237](https://github.com/apache/arrow-rs/pull/1237) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Performance improvements:**
+
+- Improve performance for arithmetic kernels with `simd` feature enabled \(except for division/modulo\) [\#1221](https://github.com/apache/arrow-rs/pull/1221) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Do not concatenate identical dictionaries [\#1219](https://github.com/apache/arrow-rs/pull/1219) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Preserve dictionary encoding when decoding parquet into Arrow arrays, 60x perf improvement \(\#171\) [\#1180](https://github.com/apache/arrow-rs/pull/1180) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+
+**Closed issues:**
+
+- `UnalignedBitChunkIterator` to that iterates through already aligned `u64` blocks [\#1227](https://github.com/apache/arrow-rs/issues/1227)
+- Remove unused `ArrowArrayReader` in parquet  [\#1197](https://github.com/apache/arrow-rs/issues/1197) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Merged pull requests:**
+
+- Upgrade clap to 3.0.0 [\#1261](https://github.com/apache/arrow-rs/pull/1261) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jimexist](https://github.com/Jimexist))
+- Update chrono-tz requirement from 0.4 to 0.6 [\#1259](https://github.com/apache/arrow-rs/pull/1259) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Update zstd requirement from 0.9 to 0.10 [\#1257](https://github.com/apache/arrow-rs/pull/1257) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix NullArrayReader \(\#1245\) [\#1246](https://github.com/apache/arrow-rs/pull/1246) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- dyn compare for binary array [\#1238](https://github.com/apache/arrow-rs/pull/1238) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Remove arrow array reader \(\#1197\) [\#1234](https://github.com/apache/arrow-rs/pull/1234) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Fix null bitmap length validation \(\#1231\) [\#1232](https://github.com/apache/arrow-rs/pull/1232) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Faster bitmask iteration [\#1228](https://github.com/apache/arrow-rs/pull/1228) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add non utf8 values into the test cases of BinaryArray comparison [\#1220](https://github.com/apache/arrow-rs/pull/1220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Update DECIMAL\_RE to allow scientific notation in auto inferred schemas [\#1216](https://github.com/apache/arrow-rs/pull/1216) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pjmore](https://github.com/pjmore))
+- Fix simd comparison kernels [\#1286](https://github.com/apache/arrow-rs/pull/1286) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Fix bitmask creation also for simd comparisons with scalar [\#1290](https://github.com/apache/arrow-rs/pull/1290) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+
+## [8.0.0](https://github.com/apache/arrow-rs/tree/8.0.0) (2022-01-20)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/7.0.0...8.0.0)
+
+**Breaking changes:**
+
+- Return error from JSON writer rather than panic [\#1205](https://github.com/apache/arrow-rs/pull/1205) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Ted-Jiang](https://github.com/Ted-Jiang))
+- Remove `ArrowSignedNumericType ` to Simplify and reduce code duplication in arithmetic kernels [\#1161](https://github.com/apache/arrow-rs/pull/1161) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Restrict RecordReader and friends to scalar types \(\#1132\) [\#1155](https://github.com/apache/arrow-rs/pull/1155) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Move more parquet functionality behind experimental feature flag \(\#1032\)  [\#1134](https://github.com/apache/arrow-rs/pull/1134) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+
+**Implemented enhancements:**
+
+- Parquet reader should be able to read structs within list [\#1186](https://github.com/apache/arrow-rs/issues/1186) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Disable serde\_json `arbitrary_precision` feature flag [\#1174](https://github.com/apache/arrow-rs/issues/1174) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Simplify and reduce code duplication in arithmetic.rs [\#1160](https://github.com/apache/arrow-rs/issues/1160) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Return `Err` from JSON writer rather than `panic!` for unsupported types [\#1157](https://github.com/apache/arrow-rs/issues/1157) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support `scalar` mathematics kernels for `Array` and scalar value [\#1153](https://github.com/apache/arrow-rs/issues/1153) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support `DecimalArray` in sort kernel [\#1137](https://github.com/apache/arrow-rs/issues/1137)
+- Parquet Fuzz Tests [\#1053](https://github.com/apache/arrow-rs/issues/1053)
+- BooleanBufferBuilder Append Packed [\#1038](https://github.com/apache/arrow-rs/issues/1038) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet Performance Optimization: StructArrayReader Redundant Level & Bitmap Computation [\#1034](https://github.com/apache/arrow-rs/issues/1034) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Reduce Public Parquet API [\#1032](https://github.com/apache/arrow-rs/issues/1032) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add `from_iter_values` for binary array [\#1188](https://github.com/apache/arrow-rs/pull/1188) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jimexist](https://github.com/Jimexist))
+- Add support for `MapArray` in json writer [\#1149](https://github.com/apache/arrow-rs/pull/1149) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([helgikrs](https://github.com/helgikrs))
+
+**Fixed bugs:**
+
+- Empty string arrays with no nulls are not equal [\#1208](https://github.com/apache/arrow-rs/issues/1208) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Pretty print a `RecordBatch` containing `Float16` triggers a panic [\#1193](https://github.com/apache/arrow-rs/issues/1193) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Writing structs nested in lists produces an incorrect output [\#1184](https://github.com/apache/arrow-rs/issues/1184) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Undefined behavior for `GenericStringArray::from_iter_values` if reported iterator upper bound is incorrect [\#1144](https://github.com/apache/arrow-rs/issues/1144) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Interval comparisons with `simd` feature asserts [\#1136](https://github.com/apache/arrow-rs/issues/1136) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- RecordReader Permits Illegal Types [\#1132](https://github.com/apache/arrow-rs/issues/1132) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Security fixes:**
+
+- Fix undefined behavor in GenericStringArray::from\_iter\_values [\#1145](https://github.com/apache/arrow-rs/pull/1145) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+-  parquet: Optimized ByteArrayReader, Add UTF-8 Validation \(\#1040\)  [\#1082](https://github.com/apache/arrow-rs/pull/1082) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+
+**Documentation updates:**
+
+- Update parquet crate readme [\#1192](https://github.com/apache/arrow-rs/pull/1192) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Document safety justification of some uses of `from_trusted_len_iter` [\#1148](https://github.com/apache/arrow-rs/pull/1148) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Performance improvements:**
+
+- Improve parquet reading performance for columns with nulls by preserving bitmask when possible \(\#1037\) [\#1054](https://github.com/apache/arrow-rs/pull/1054) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Improve parquet performance: Skip levels computation for required struct arrays in parquet [\#1035](https://github.com/apache/arrow-rs/pull/1035) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+
+**Closed issues:**
+
+- Generify ColumnReaderImpl and RecordReader [\#1040](https://github.com/apache/arrow-rs/issues/1040) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Parquet Preserve BitMask [\#1037](https://github.com/apache/arrow-rs/issues/1037)
+
+**Merged pull requests:**
+
+- fix a bug in variable sized equality [\#1209](https://github.com/apache/arrow-rs/pull/1209) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([helgikrs](https://github.com/helgikrs))
+- Pin WASM / packed SIMD tests to nightly-2022-01-17 [\#1204](https://github.com/apache/arrow-rs/pull/1204) ([alamb](https://github.com/alamb))
+- feat: add support for casting Duration/Interval to Int64Array [\#1196](https://github.com/apache/arrow-rs/pull/1196) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([e-dard](https://github.com/e-dard))
+- Add comparison support for fully qualified BinaryArray [\#1195](https://github.com/apache/arrow-rs/pull/1195) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Fix in display of `Float16Array` [\#1194](https://github.com/apache/arrow-rs/pull/1194) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([helgikrs](https://github.com/helgikrs))
+- update nightly version for miri [\#1189](https://github.com/apache/arrow-rs/pull/1189) ([Jimexist](https://github.com/Jimexist))
+- feat\(parquet\): support for reading structs nested within lists [\#1187](https://github.com/apache/arrow-rs/pull/1187) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([helgikrs](https://github.com/helgikrs))
+- fix: Fix a bug in how definition levels are calculated for nested structs in a list [\#1185](https://github.com/apache/arrow-rs/pull/1185) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([helgikrs](https://github.com/helgikrs))
+- Truncate bitmask on BooleanBufferBuilder::resize:  [\#1183](https://github.com/apache/arrow-rs/pull/1183) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add ticket reference for false positive in clippy [\#1181](https://github.com/apache/arrow-rs/pull/1181) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fix record formatting in 1.58 [\#1178](https://github.com/apache/arrow-rs/pull/1178) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Serialize i128 as JSON string [\#1175](https://github.com/apache/arrow-rs/pull/1175) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Support DecimalType in `sort` and `take` kernels [\#1172](https://github.com/apache/arrow-rs/pull/1172) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liukun4515](https://github.com/liukun4515))
+- Fix new clippy lints introduced in Rust 1.58 [\#1170](https://github.com/apache/arrow-rs/pull/1170) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Fix compilation error with simd feature [\#1169](https://github.com/apache/arrow-rs/pull/1169) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Fix bug while writing parquet with empty lists of structs [\#1166](https://github.com/apache/arrow-rs/pull/1166) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([helgikrs](https://github.com/helgikrs))
+- Use tempfile for parquet tests [\#1165](https://github.com/apache/arrow-rs/pull/1165) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Remove left over dev/README.md file from arrow/arrow-rs split [\#1162](https://github.com/apache/arrow-rs/pull/1162) ([alamb](https://github.com/alamb))
+- Add multiply\_scalar kernel [\#1159](https://github.com/apache/arrow-rs/pull/1159) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fuzz test different parquet encodings [\#1156](https://github.com/apache/arrow-rs/pull/1156) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Add subtract\_scalar kernel [\#1152](https://github.com/apache/arrow-rs/pull/1152) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Add add\_scalar kernel [\#1151](https://github.com/apache/arrow-rs/pull/1151) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Move simd right out of for\_each loop [\#1150](https://github.com/apache/arrow-rs/pull/1150) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Internal Remove `GenericStringArray::from_vec` and `GenericStringArray::from_opt_vec` [\#1147](https://github.com/apache/arrow-rs/pull/1147) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Implement SIMD comparison operations for types with less than 4 lanes \(i128\) [\#1146](https://github.com/apache/arrow-rs/pull/1146) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Extends parquet fuzz tests to also tests nulls, dictionaries and row groups with multiple pages  \(\#1053\) [\#1110](https://github.com/apache/arrow-rs/pull/1110) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+-  Generify ColumnReaderImpl and RecordReader \(\#1040\)  [\#1041](https://github.com/apache/arrow-rs/pull/1041) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- BooleanBufferBuilder::append\_packed \(\#1038\) [\#1039](https://github.com/apache/arrow-rs/pull/1039) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+
+## [7.0.0](https://github.com/apache/arrow-rs/tree/7.0.0) (2022-1-07)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/6.5.0...7.0.0)
+
+### Arrow
+
+**Breaking changes:**
+- `pretty_format_batches` now returns `Result<impl Display>` rather than `String`: [#975](https://github.com/apache/arrow-rs/pull/975)
+- `MutableBuffer::typed_data_mut` is marked `unsafe`: [#1029](https://github.com/apache/arrow-rs/pull/1029)
+- UnionArray updated match latest Arrow spec, added `UnionMode`, `UnionArray::new()` marked `unsafe`: [#885](https://github.com/apache/arrow-rs/pull/885)
+
+**New Features:**
+- Support for `Float16Array` types [#888](https://github.com/apache/arrow-rs/pull/888)
+- IPC support for `UnionArray` [#654](https://github.com/apache/arrow-rs/issues/654)
+- Dynamic comparison kernels for scalars (e.g. `eq_dyn_scalar`), including `DictionaryArray`: [#1113](https://github.com/apache/arrow-rs/issues/1113)
+
+**Enhancements:**
+- Added `Schema::with_metadata` and `Field::with_metadata` [#1092](https://github.com/apache/arrow-rs/pull/1092)
+- Support for custom datetime format for inference and parsing csv files [#1112](https://github.com/apache/arrow-rs/pull/1112)
+- Implement `Array` for `ArrayRef` for easier use [#1129](https://github.com/apache/arrow-rs/pull/1129)
+- Pretty printing display support for `FixedSizeBinaryArray` [#1097](https://github.com/apache/arrow-rs/pull/1097)
+- Dependency Upgrades: `pyo3`, `parquet-format`, `prost`, `tonic`
+- Avoid allocating vector of indices in `lexicographical_partition_ranges`[#998](https://github.com/apache/arrow-rs/pull/998)
+
+### Parquet
+
+**Fixed bugs:**
+- (parquet) Fix reading of dictionary encoded pages with null values: [#1130](https://github.com/apache/arrow-rs/pull/1130)
+
+
+# Changelog
+
+## [6.5.0](https://github.com/apache/arrow-rs/tree/6.5.0) (2021-12-23)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/6.4.0...6.5.0)
+
+* [092fc64bbb019244887ebd0d9c9a2d3e3a9aebc0](https://github.com/apache/arrow-rs/commit/092fc64bbb019244887ebd0d9c9a2d3e3a9aebc0) support cast decimal to decimal ([#1084](https://github.com/apache/arrow-rs/pull/1084)) ([#1093](https://github.com/apache/arrow-rs/pull/1093))
+* [01459762ed18b504e00e7b2818fce91f19188b1e](https://github.com/apache/arrow-rs/commit/01459762ed18b504e00e7b2818fce91f19188b1e) Fix like regex escaping ([#1085](https://github.com/apache/arrow-rs/pull/1085)) ([#1090](https://github.com/apache/arrow-rs/pull/1090))
+* [7c748bfccbc2eac0c1138378736b70dcb7e26a5b](https://github.com/apache/arrow-rs/commit/7c748bfccbc2eac0c1138378736b70dcb7e26a5b) support cast decimal to signed numeric ([#1073](https://github.com/apache/arrow-rs/pull/1073)) ([#1089](https://github.com/apache/arrow-rs/pull/1089))
+* [bd3600b6483c253ae57a38928a636d39a6b7cb02](https://github.com/apache/arrow-rs/commit/bd3600b6483c253ae57a38928a636d39a6b7cb02) parquet: Use constant for RLE decoder buffer size ([#1070](https://github.com/apache/arrow-rs/pull/1070)) ([#1088](https://github.com/apache/arrow-rs/pull/1088))
+* [2b5c53ecd92468fd95328637a15de7f35b6fcf28](https://github.com/apache/arrow-rs/commit/2b5c53ecd92468fd95328637a15de7f35b6fcf28) Box RleDecoder index buffer ([#1061](https://github.com/apache/arrow-rs/pull/1061)) ([#1062](https://github.com/apache/arrow-rs/pull/1062)) ([#1081](https://github.com/apache/arrow-rs/pull/1081))
+* [78721bc1a467177679ad6196b994759cf4d73377](https://github.com/apache/arrow-rs/commit/78721bc1a467177679ad6196b994759cf4d73377) BooleanBufferBuilder correct buffer length ([#1051](https://github.com/apache/arrow-rs/pull/1051)) ([#1052](https://github.com/apache/arrow-rs/pull/1052)) ([#1080](https://github.com/apache/arrow-rs/pull/1080))
+* [3a5e3541d3a4db61a828011ed95c8539adf1d57c](https://github.com/apache/arrow-rs/commit/3a5e3541d3a4db61a828011ed95c8539adf1d57c) support cast signed numeric to decimal ([#1044](https://github.com/apache/arrow-rs/pull/1044)) ([#1079](https://github.com/apache/arrow-rs/pull/1079))
+* [000bdb3053098255d43288aa3e8665e8b1892a6c](https://github.com/apache/arrow-rs/commit/000bdb3053098255d43288aa3e8665e8b1892a6c) fix(compute): LIKE escape parenthesis ([#1042](https://github.com/apache/arrow-rs/pull/1042)) ([#1078](https://github.com/apache/arrow-rs/pull/1078))
+* [e0abdb9e62772a2f853974e68e744246e7f47569](https://github.com/apache/arrow-rs/commit/e0abdb9e62772a2f853974e68e744246e7f47569) Add Schema::project and RecordBatch::project functions  ([#1033](https://github.com/apache/arrow-rs/pull/1033)) ([#1077](https://github.com/apache/arrow-rs/pull/1077))
+* [31911a4d6328d889d98796b896412b3997f73e13](https://github.com/apache/arrow-rs/commit/31911a4d6328d889d98796b896412b3997f73e13) Remove outdated safety example from doc ([#1050](https://github.com/apache/arrow-rs/pull/1050)) ([#1058](https://github.com/apache/arrow-rs/pull/1058))
+* [71ac8620993a65a7f1f57278c3495556625356b3](https://github.com/apache/arrow-rs/commit/71ac8620993a65a7f1f57278c3495556625356b3) Use existing array type in `take` kernel ([#1046](https://github.com/apache/arrow-rs/pull/1046)) ([#1057](https://github.com/apache/arrow-rs/pull/1057))
+* [1c5902376b7f7d56cb5249db4f98a6a370ead919](https://github.com/apache/arrow-rs/commit/1c5902376b7f7d56cb5249db4f98a6a370ead919) Extract method to drive PageIterator -> RecordReader ([#1031](https://github.com/apache/arrow-rs/pull/1031)) ([#1056](https://github.com/apache/arrow-rs/pull/1056))
+* [7ca39361f8733b86bc0cef5ed5d74093e2c6b14d](https://github.com/apache/arrow-rs/commit/7ca39361f8733b86bc0cef5ed5d74093e2c6b14d) Clarify governance of arrow crate ([#1030](https://github.com/apache/arrow-rs/pull/1030)) ([#1055](https://github.com/apache/arrow-rs/pull/1055))
+
+
+## [6.4.0](https://github.com/apache/arrow-rs/tree/6.4.0) (2021-12-10)
+
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/6.3.0...6.4.0)
+
+
+* [049f48559f578243935b6e512d06c4c2df360bf1](https://github.com/apache/arrow-rs/commit/049f48559f578243935b6e512d06c4c2df360bf1) Force new cargo and target caching to fix CI ([#1023](https://github.com/apache/arrow-rs/pull/1023)) ([#1024](https://github.com/apache/arrow-rs/pull/1024))
+* [ef37da3b60f71a52d5ad67e9ca810dca38b29f00](https://github.com/apache/arrow-rs/commit/ef37da3b60f71a52d5ad67e9ca810dca38b29f00) Fix a broken link and some missing styling in the main arrow crate docs ([#1013](https://github.com/apache/arrow-rs/pull/1013)) ([#1019](https://github.com/apache/arrow-rs/pull/1019))
+* [f2c746a9b968714cfe05d35fcee8658371acd899](https://github.com/apache/arrow-rs/commit/f2c746a9b968714cfe05d35fcee8658371acd899) Remove out of date comment ([#1008](https://github.com/apache/arrow-rs/pull/1008)) ([#1018](https://github.com/apache/arrow-rs/pull/1018))
+* [557fc11e3b2a09a680c0cfbf38d27b13101b63fe](https://github.com/apache/arrow-rs/commit/557fc11e3b2a09a680c0cfbf38d27b13101b63fe) Remove unneeded `rc` feature of serde ([#990](https://github.com/apache/arrow-rs/pull/990)) ([#1016](https://github.com/apache/arrow-rs/pull/1016))
+* [b28385e096b1cf8f5fb2773d49b160f93d94fbac](https://github.com/apache/arrow-rs/commit/b28385e096b1cf8f5fb2773d49b160f93d94fbac) Docstrings for Timestamp*Array. ([#988](https://github.com/apache/arrow-rs/pull/988)) ([#1015](https://github.com/apache/arrow-rs/pull/1015))
+* [a92672e40217670d2566a85d70b0b59fffac594c](https://github.com/apache/arrow-rs/commit/a92672e40217670d2566a85d70b0b59fffac594c) Add full data validation for ArrayData::try_new() ([#1007](https://github.com/apache/arrow-rs/pull/1007))
+* [6c8b2936d7b07e1e2f5d1d48eea425a385382dfb](https://github.com/apache/arrow-rs/commit/6c8b2936d7b07e1e2f5d1d48eea425a385382dfb) Add boolean comparison to scalar kernels for less then, greater than ([#977](https://github.com/apache/arrow-rs/pull/977)) ([#1005](https://github.com/apache/arrow-rs/pull/1005))
+* [14d140aeca608a23a8a6b2c251c8f53ffd377e61](https://github.com/apache/arrow-rs/commit/14d140aeca608a23a8a6b2c251c8f53ffd377e61) Fix some typos in code and comments ([#985](https://github.com/apache/arrow-rs/pull/985)) ([#1006](https://github.com/apache/arrow-rs/pull/1006))
+* [b4507f562fb0eddfb79840871cd2733dc0e337cd](https://github.com/apache/arrow-rs/commit/b4507f562fb0eddfb79840871cd2733dc0e337cd) Fix warnings introduced by Rust/Clippy 1.57.0 ([#1004](https://github.com/apache/arrow-rs/pull/1004))
+
+
+## [6.3.0](https://github.com/apache/arrow-rs/tree/6.3.0) (2021-11-26)
+
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/6.2.0...6.3.0)
+
+
+**Changes:**
+* [7e51df015ce851a5de444ca08b57b38e7ee959a3](https://github.com/apache/arrow-rs/commit/7e51df015ce851a5de444ca08b57b38e7ee959a3) add more error test case and change the code style ([#952](https://github.com/apache/arrow-rs/pull/952)) ([#976](https://github.com/apache/arrow-rs/pull/976))
+* [6c570cfe98d6a7a4ec74b139b733c5c72ed10015](https://github.com/apache/arrow-rs/commit/6c570cfe98d6a7a4ec74b139b733c5c72ed10015) Support read decimal data from csv reader if user provide the schema with decimal data type ([#941](https://github.com/apache/arrow-rs/pull/941)) ([#974](https://github.com/apache/arrow-rs/pull/974))
+* [4fa0d4d7f7d9ca0a3da2a6dfe3eae6dc2d51a79a](https://github.com/apache/arrow-rs/commit/4fa0d4d7f7d9ca0a3da2a6dfe3eae6dc2d51a79a) Adding Pretty Print Support For Fixed Size List ([#958](https://github.com/apache/arrow-rs/pull/958)) ([#968](https://github.com/apache/arrow-rs/pull/968))
+* [9d453a3128013c03e8ed854ded76b15cc6f28be4](https://github.com/apache/arrow-rs/commit/9d453a3128013c03e8ed854ded76b15cc6f28be4) Fix bug in temporal utilities due to DST being ignored. ([#955](https://github.com/apache/arrow-rs/pull/955)) ([#967](https://github.com/apache/arrow-rs/pull/967))
+* [1b9fd9e3fb2653236513bb7dda5aa2fa14d1d831](https://github.com/apache/arrow-rs/commit/1b9fd9e3fb2653236513bb7dda5aa2fa14d1d831) Inferring 2. as Float64 for issue [#929](https://github.com/apache/arrow-rs/pull/929) ([#950](https://github.com/apache/arrow-rs/pull/950)) ([#966](https://github.com/apache/arrow-rs/pull/966))
+* [e6c5e1c877bd94b3d6e545567f901d9962257cf8](https://github.com/apache/arrow-rs/commit/e6c5e1c877bd94b3d6e545567f901d9962257cf8) Fix CI for latest nightly ([#970](https://github.com/apache/arrow-rs/pull/970)) ([#973](https://github.com/apache/arrow-rs/pull/973))
+* [c96e8de457442806e18944f0b26dd06ba4cb1aee](https://github.com/apache/arrow-rs/commit/c96e8de457442806e18944f0b26dd06ba4cb1aee) Fix primitive sort when input contains more nulls than the given sort limit ([#954](https://github.com/apache/arrow-rs/pull/954)) ([#965](https://github.com/apache/arrow-rs/pull/965))
+* [094037d418381584178db1d886cad3b5024b414a](https://github.com/apache/arrow-rs/commit/094037d418381584178db1d886cad3b5024b414a) Update comfy-table to 5.0 ([#957](https://github.com/apache/arrow-rs/pull/957)) ([#964](https://github.com/apache/arrow-rs/pull/964))
+* [9f635021eee6786c5377c891218c5f88ebce07c3](https://github.com/apache/arrow-rs/commit/9f635021eee6786c5377c891218c5f88ebce07c3) Fix csv writing of timestamps to show timezone. ([#849](https://github.com/apache/arrow-rs/pull/849)) ([#963](https://github.com/apache/arrow-rs/pull/963))
+* [f7deba4c3a050a52608462ee8a827bb8f6364140](https://github.com/apache/arrow-rs/commit/f7deba4c3a050a52608462ee8a827bb8f6364140) Adding ability to parse float from number with leading decimal ([#831](https://github.com/apache/arrow-rs/pull/831)) ([#962](https://github.com/apache/arrow-rs/pull/962))
+* [59f96e842d05b63882f7ba285c66a9739761cf84](https://github.com/apache/arrow-rs/commit/59f96e842d05b63882f7ba285c66a9739761cf84) add ilike comparitor ([#874](https://github.com/apache/arrow-rs/pull/874)) ([#961](https://github.com/apache/arrow-rs/pull/961))
+* [54023c8a5543c9f9fa4955afa01189029f3e96f5](https://github.com/apache/arrow-rs/commit/54023c8a5543c9f9fa4955afa01189029f3e96f5) Remove unpassable cargo publish check from verify-release-candidate.sh ([#882](https://github.com/apache/arrow-rs/pull/882)) ([#949](https://github.com/apache/arrow-rs/pull/949))
+
+
+
+## [6.2.0](https://github.com/apache/arrow-rs/tree/6.2.0) (2021-11-12)
+
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/6.1.0...6.2.0)
+
+**Features / Fixes:**
+
+
+* [4037933e43cad9e4de027039ce14caa65f78300a](https://github.com/apache/arrow-rs/commit/4037933e43cad9e4de027039ce14caa65f78300a) Fix validation for offsets of StructArrays ([#942](https://github.com/apache/arrow-rs/pull/942)) ([#946](https://github.com/apache/arrow-rs/pull/946))
+* [1af9ca5d363d870550026a7b1abcb749befbb371](https://github.com/apache/arrow-rs/commit/1af9ca5d363d870550026a7b1abcb749befbb371) implement take kernel for null arrays ([#939](https://github.com/apache/arrow-rs/pull/939)) ([#944](https://github.com/apache/arrow-rs/pull/944))
+* [320de1c20aefbf204f6888e2ad3663863afeba9f](https://github.com/apache/arrow-rs/commit/320de1c20aefbf204f6888e2ad3663863afeba9f) add checker for appending i128 to decimal builder ([#928](https://github.com/apache/arrow-rs/pull/928)) ([#943](https://github.com/apache/arrow-rs/pull/943))
+* [dff14113884ad4246a8cafb9be579ebdb4e1481f](https://github.com/apache/arrow-rs/commit/dff14113884ad4246a8cafb9be579ebdb4e1481f) Validate arguments to ArrayData::new and null bit buffer and buffers ([#810](https://github.com/apache/arrow-rs/pull/810)) ([#936](https://github.com/apache/arrow-rs/pull/936))
+* [c3eae1ec56303b97c9e15263063a6a13122ef194](https://github.com/apache/arrow-rs/commit/c3eae1ec56303b97c9e15263063a6a13122ef194) fix some warning about unused variables in panic tests ([#894](https://github.com/apache/arrow-rs/pull/894)) ([#933](https://github.com/apache/arrow-rs/pull/933))
+* [e80bb018450f13a30811ffd244c42917d8bf8a62](https://github.com/apache/arrow-rs/commit/e80bb018450f13a30811ffd244c42917d8bf8a62) fix some clippy warnings ([#896](https://github.com/apache/arrow-rs/pull/896)) ([#930](https://github.com/apache/arrow-rs/pull/930))
+* [bde89463b627be3f60b5569d038ca36c434da71d](https://github.com/apache/arrow-rs/commit/bde89463b627be3f60b5569d038ca36c434da71d) feat(ipc): add support for deserializing messages with nested dictionary fields ([#923](https://github.com/apache/arrow-rs/pull/923)) ([#931](https://github.com/apache/arrow-rs/pull/931))
+* [792544b5fb7b84224ef9745ecb9f330663c14fb4](https://github.com/apache/arrow-rs/commit/792544b5fb7b84224ef9745ecb9f330663c14fb4) refactor regexp_is_match_utf8_scalar to try to mitigate miri failures ([#895](https://github.com/apache/arrow-rs/pull/895)) ([#932](https://github.com/apache/arrow-rs/pull/932))
+* [3f0e252811cbb6e3f7c774959787dcfec985d03e](https://github.com/apache/arrow-rs/commit/3f0e252811cbb6e3f7c774959787dcfec985d03e) Automatically retry failed MIRI runs to work around intermittent failures  ([#934](https://github.com/apache/arrow-rs/pull/934))
+* [c9a9515c46d560ced00e23ff57cb10a1c97573cb](https://github.com/apache/arrow-rs/commit/c9a9515c46d560ced00e23ff57cb10a1c97573cb) Update mod.rs ([#909](https://github.com/apache/arrow-rs/pull/909)) ([#919](https://github.com/apache/arrow-rs/pull/919))
+* [64ed79ece67141b92dc45b8a1d43cb9d909aa6a9](https://github.com/apache/arrow-rs/commit/64ed79ece67141b92dc45b8a1d43cb9d909aa6a9) Mark boolean kernels public ([#913](https://github.com/apache/arrow-rs/pull/913)) ([#920](https://github.com/apache/arrow-rs/pull/920))
+* [8b95fe0bbf03588c5cc00f67365c5b0dac4d7a34](https://github.com/apache/arrow-rs/commit/8b95fe0bbf03588c5cc00f67365c5b0dac4d7a34) doc example  mistype ([#904](https://github.com/apache/arrow-rs/pull/904)) ([#918](https://github.com/apache/arrow-rs/pull/918))
+* [34c5eab4862cab16fdfd5f5ed6c68dce6298dfa4](https://github.com/apache/arrow-rs/commit/34c5eab4862cab16fdfd5f5ed6c68dce6298dfa4) allow null array to be cast to all other types ([#884](https://github.com/apache/arrow-rs/pull/884)) ([#917](https://github.com/apache/arrow-rs/pull/917))
+* [3c69752e55ed0c58f5a8faed918a22b45cd93766](https://github.com/apache/arrow-rs/commit/3c69752e55ed0c58f5a8faed918a22b45cd93766) Fix instances of UB that cause tests to not pass under miri ([#878](https://github.com/apache/arrow-rs/pull/878)) ([#916](https://github.com/apache/arrow-rs/pull/916))
+* [85402148c3af03d0855e81f855715ea98a7491c5](https://github.com/apache/arrow-rs/commit/85402148c3af03d0855e81f855715ea98a7491c5) feat(ipc): Support writing dictionaries nested in structs and unions ([#870](https://github.com/apache/arrow-rs/pull/870)) ([#915](https://github.com/apache/arrow-rs/pull/915))
+* [03d95e626cb0e654775fefa77786674ea41be4a2](https://github.com/apache/arrow-rs/commit/03d95e626cb0e654775fefa77786674ea41be4a2) Fix references to changelog ([#905](https://github.com/apache/arrow-rs/pull/905))
+
+
+## [6.1.0](https://github.com/apache/arrow-rs/tree/6.1.0) (2021-10-29)
+
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/6.0.0...6.1.0)
+
+**Features / Fixes:**
+
+* [b42649b0088fe7762c713a41a23c1abdf8d0496d](https://github.com/apache/arrow-rs/commit/b42649b0088fe7762c713a41a23c1abdf8d0496d) implement eq_dyn and neq_dyn ([#858](https://github.com/apache/arrow-rs/pull/858)) ([#867](https://github.com/apache/arrow-rs/pull/867))
+* [01743f3f10a377c1ca857cd554acbf84155766d8](https://github.com/apache/arrow-rs/commit/01743f3f10a377c1ca857cd554acbf84155766d8) fix: fix a bug in offset calculation for unions ([#863](https://github.com/apache/arrow-rs/pull/863)) ([#871](https://github.com/apache/arrow-rs/pull/871))
+* [8bfff793a23f0e71008c7a9eea7a54d6b913ecff](https://github.com/apache/arrow-rs/commit/8bfff793a23f0e71008c7a9eea7a54d6b913ecff) add lt_bool, lt_eq_bool, gt_bool, gt_eq_bool ([#860](https://github.com/apache/arrow-rs/pull/860)) ([#868](https://github.com/apache/arrow-rs/pull/868))
+* [8845e91d4ab584c822e9ee903db7069551b124af](https://github.com/apache/arrow-rs/commit/8845e91d4ab584c822e9ee903db7069551b124af) fix(ipc): Support serializing structs containing dictionaries ([#848](https://github.com/apache/arrow-rs/pull/848)) ([#865](https://github.com/apache/arrow-rs/pull/865))
+* [620282a0d9fdd2a8ed7e8313d17ba3dec64c80e5](https://github.com/apache/arrow-rs/commit/620282a0d9fdd2a8ed7e8313d17ba3dec64c80e5) Implement boolean equality kernels ([#844](https://github.com/apache/arrow-rs/pull/844)) ([#857](https://github.com/apache/arrow-rs/pull/857))
+* [94cddcacf785be982e69689291ce034ef00220b4](https://github.com/apache/arrow-rs/commit/94cddcacf785be982e69689291ce034ef00220b4) Cherry pick fix parquet_derive with default features (and fix cargo publish) ([#856](https://github.com/apache/arrow-rs/pull/856))
+* [733fd583ddb3dbe6b4d58a809c444ee16ac0eae8](https://github.com/apache/arrow-rs/commit/733fd583ddb3dbe6b4d58a809c444ee16ac0eae8) Use kernel utility for parsing timestamps in csv reader. ([#832](https://github.com/apache/arrow-rs/pull/832)) ([#853](https://github.com/apache/arrow-rs/pull/853))
+* [2cc64937a153f632796915d2d9869d5c2a501d28](https://github.com/apache/arrow-rs/commit/2cc64937a153f632796915d2d9869d5c2a501d28) [Minor] Fix clippy errors with new rust version (1.56) and float formatting with nightly ([#845](https://github.com/apache/arrow-rs/pull/845)) ([#850](https://github.com/apache/arrow-rs/pull/850))
+
+**Other:**
+* [bfac9e5a027e3bd78b7a1ec90c75a3e385bd66bb](https://github.com/apache/arrow-rs/commit/bfac9e5a027e3bd78b7a1ec90c75a3e385bd66bb) Test out new tarpaulin version ([#852](https://github.com/apache/arrow-rs/pull/852)) ([#866](https://github.com/apache/arrow-rs/pull/866))
+* [809350ced392cfc78d8a1a46228d4ffc25dea9ff](https://github.com/apache/arrow-rs/commit/809350ced392cfc78d8a1a46228d4ffc25dea9ff) Update README.md ([#834](https://github.com/apache/arrow-rs/pull/834)) ([#854](https://github.com/apache/arrow-rs/pull/854))
+* [70582f40dd21f5c710c4946266d0563a92b92337](https://github.com/apache/arrow-rs/commit/70582f40dd21f5c710c4946266d0563a92b92337) [MINOR] Delete temp file from docs ([#836](https://github.com/apache/arrow-rs/pull/836)) ([#855](https://github.com/apache/arrow-rs/pull/855))
+* [a721e00014015a7e598946b6efb9b1da8080ec85](https://github.com/apache/arrow-rs/commit/a721e00014015a7e598946b6efb9b1da8080ec85) Force fresh cargo cache key in CI ([#839](https://github.com/apache/arrow-rs/pull/839)) ([#851](https://github.com/apache/arrow-rs/pull/851))
+
+
+## [6.0.0](https://github.com/apache/arrow-rs/tree/6.0.0) (2021-10-13)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/5.5.0...6.0.0)
+
+**Breaking changes:**
+
+- Replace `ArrayData::new()` with `ArrayData::try_new()` and `unsafe ArrayData::new_unchecked` [\#822](https://github.com/apache/arrow-rs/pull/822) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Update Bitmap::len to return bits rather than bytes [\#749](https://github.com/apache/arrow-rs/pull/749) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([matthewmturner](https://github.com/matthewmturner))
+- use sort\_unstable\_by in primitive sorting [\#552](https://github.com/apache/arrow-rs/pull/552) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jimexist](https://github.com/Jimexist))
+- New MapArray support [\#491](https://github.com/apache/arrow-rs/pull/491) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nevi-me](https://github.com/nevi-me))
+
+**Implemented enhancements:**
+
+- Improve parquet binary writer speed by reducing allocations [\#819](https://github.com/apache/arrow-rs/issues/819)
+- Expose buffer operations [\#808](https://github.com/apache/arrow-rs/issues/808)
+- Add doc examples of writing parquet files using `ArrowWriter` [\#788](https://github.com/apache/arrow-rs/issues/788)
+
+**Fixed bugs:**
+
+- JSON reader can create null struct children on empty lists [\#825](https://github.com/apache/arrow-rs/issues/825)
+- Incorrect null count for cast kernel for list arrays [\#815](https://github.com/apache/arrow-rs/issues/815)
+- `minute` and `second` temporal kernels do not respect timezone [\#500](https://github.com/apache/arrow-rs/issues/500)
+- Fix data corruption in json decoder f64-to-i64 cast [\#652](https://github.com/apache/arrow-rs/pull/652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xianwill](https://github.com/xianwill))
+
+**Documentation updates:**
+
+- Doctest for PrimitiveArray using from\_iter\_values. [\#694](https://github.com/apache/arrow-rs/pull/694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([novemberkilo](https://github.com/novemberkilo))
+- Doctests for BinaryArray and LargeBinaryArray. [\#625](https://github.com/apache/arrow-rs/pull/625) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([novemberkilo](https://github.com/novemberkilo))
+- Add links in docstrings [\#605](https://github.com/apache/arrow-rs/pull/605) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+
+## [5.5.0](https://github.com/apache/arrow-rs/tree/5.5.0) (2021-09-24)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/5.4.0...5.5.0)
+
+**Implemented enhancements:**
+
+- parquet should depend on a small set of arrow features [\#800](https://github.com/apache/arrow-rs/issues/800)
+- Support equality on RecordBatch [\#735](https://github.com/apache/arrow-rs/issues/735)
+
+**Fixed bugs:**
+
+- Converting from string to timestamp uses microseconds instead of milliseconds [\#780](https://github.com/apache/arrow-rs/issues/780)
+- Document has no link to `RowColumIter` [\#762](https://github.com/apache/arrow-rs/issues/762)
+- length on slices with null doesn't work [\#744](https://github.com/apache/arrow-rs/issues/744)
+
+## [5.4.0](https://github.com/apache/arrow-rs/tree/5.4.0) (2021-09-10)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/5.3.0...5.4.0)
+
+**Implemented enhancements:**
+
+- Upgrade lexical-core to 0.8 [\#747](https://github.com/apache/arrow-rs/issues/747)
+- `append_nulls` and `append_trusted_len_iter` for PrimitiveBuilder [\#725](https://github.com/apache/arrow-rs/issues/725)
+- Optimize MutableArrayData::extend for null buffers [\#397](https://github.com/apache/arrow-rs/issues/397)
+
+**Fixed bugs:**
+
+- Arithmetic with scalars doesn't work on slices [\#742](https://github.com/apache/arrow-rs/issues/742)
+- Comparisons with scalar don't work on slices [\#740](https://github.com/apache/arrow-rs/issues/740)
+- `unary` kernel doesn't respect offset [\#738](https://github.com/apache/arrow-rs/issues/738)
+- `new_null_array` creates invalid struct arrays [\#734](https://github.com/apache/arrow-rs/issues/734)
+- --no-default-features is broken for parquet [\#733](https://github.com/apache/arrow-rs/issues/733) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `Bitmap::len` returns the number of bytes, not bits. [\#730](https://github.com/apache/arrow-rs/issues/730)
+- Decimal logical type is formatted incorrectly by print\_schema [\#713](https://github.com/apache/arrow-rs/issues/713)
+- parquet\_derive does not support chrono time values [\#711](https://github.com/apache/arrow-rs/issues/711)
+- Numeric overflow when formatting Decimal type [\#710](https://github.com/apache/arrow-rs/issues/710)
+- The integration tests are not running [\#690](https://github.com/apache/arrow-rs/issues/690)
+
+**Closed issues:**
+
+- Question: Is there no way to create a DictionaryArray with a pre-arranged mapping? [\#729](https://github.com/apache/arrow-rs/issues/729)
+
+## [5.3.0](https://github.com/apache/arrow-rs/tree/5.3.0) (2021-08-26)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/5.2.0...5.3.0)
+
+**Implemented enhancements:**
+
+- Add optimized filter kernel for regular expression matching [\#697](https://github.com/apache/arrow-rs/issues/697)
+- Can't cast from timestamp array to string array [\#587](https://github.com/apache/arrow-rs/issues/587)
+
+**Fixed bugs:**
+
+- 'Encoding DELTA\_BYTE\_ARRAY is not supported' with parquet arrow readers [\#708](https://github.com/apache/arrow-rs/issues/708)
+- Support reading json string into binary data type. [\#701](https://github.com/apache/arrow-rs/issues/701)
+
+**Closed issues:**
+
+- Resolve Issues with `prettytable-rs` dependency [\#69](https://github.com/apache/arrow-rs/issues/69) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+## [5.2.0](https://github.com/apache/arrow-rs/tree/5.2.0) (2021-08-12)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/5.1.0...5.2.0)
+
+**Implemented enhancements:**
+
+- Make rand an optional dependency [\#671](https://github.com/apache/arrow-rs/issues/671)
+- Remove undefined behavior in `value` method of boolean and primitive arrays [\#645](https://github.com/apache/arrow-rs/issues/645)
+- Avoid materialization of indices in filter\_record\_batch for single arrays [\#636](https://github.com/apache/arrow-rs/issues/636)
+- Add a note about arrow crate security / safety [\#627](https://github.com/apache/arrow-rs/issues/627)
+- Allow the creation of String arrays from an interator of &Option\<&str\> [\#598](https://github.com/apache/arrow-rs/issues/598)
+- Support arrow map datatype [\#395](https://github.com/apache/arrow-rs/issues/395)
+
+**Fixed bugs:**
+
+- Parquet fixed length byte array columns write byte array statistics [\#660](https://github.com/apache/arrow-rs/issues/660) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Parquet boolean columns write Int32 statistics [\#659](https://github.com/apache/arrow-rs/issues/659) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Writing Parquet with a boolean column fails [\#657](https://github.com/apache/arrow-rs/issues/657)
+- JSON decoder data corruption for large i64/u64 [\#653](https://github.com/apache/arrow-rs/issues/653)
+- Incorrect min/max statistics for strings in parquet files [\#641](https://github.com/apache/arrow-rs/issues/641) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Closed issues:**
+
+- Release candidate verifying script seems work on macOS [\#640](https://github.com/apache/arrow-rs/issues/640)
+- Update CONTRIBUTING  [\#342](https://github.com/apache/arrow-rs/issues/342)
+
+## [5.1.0](https://github.com/apache/arrow-rs/tree/5.1.0) (2021-07-29)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/5.0.0...5.1.0)
+
+**Implemented enhancements:**
+
+- Make FFI\_ArrowArray empty\(\) public [\#602](https://github.com/apache/arrow-rs/issues/602)
+- exponential sort can be used to speed up lexico partition kernel [\#586](https://github.com/apache/arrow-rs/issues/586)
+- Implement sort\(\) for binary array [\#568](https://github.com/apache/arrow-rs/issues/568)
+- primitive sorting can be improved and more consistent with and without `limit` if sorted unstably [\#553](https://github.com/apache/arrow-rs/issues/553)
+
+**Fixed bugs:**
+
+- Confusing memory usage with CSV reader [\#623](https://github.com/apache/arrow-rs/issues/623)
+- FFI implementation deviates from specification for array release  [\#595](https://github.com/apache/arrow-rs/issues/595)
+- Parquet file content is different if `~/.cargo` is in a git checkout [\#589](https://github.com/apache/arrow-rs/issues/589)
+- Ensure output of MIRI is checked for success [\#581](https://github.com/apache/arrow-rs/issues/581)
+- MIRI failure in `array::ffi::tests::test_struct` and other ffi tests [\#580](https://github.com/apache/arrow-rs/issues/580)
+- ListArray equality check may return wrong result [\#570](https://github.com/apache/arrow-rs/issues/570)
+- cargo audit failed [\#561](https://github.com/apache/arrow-rs/issues/561)
+- ArrayData::slice\(\) does not work for nested types such as StructArray [\#554](https://github.com/apache/arrow-rs/issues/554)
+
+**Documentation updates:**
+
+- More examples of how to construct Arrays [\#301](https://github.com/apache/arrow-rs/issues/301)
+
+**Closed issues:**
+
+- Implement StringBuilder::append\_option [\#263](https://github.com/apache/arrow-rs/issues/263) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+## [5.0.0](https://github.com/apache/arrow-rs/tree/5.0.0) (2021-07-14)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/4.4.0...5.0.0)
+
+**Breaking changes:**
+
+- Remove lifetime from DynComparator [\#543](https://github.com/apache/arrow-rs/issues/543) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Simplify interactions with arrow flight APIs [\#376](https://github.com/apache/arrow-rs/issues/376) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- refactor: remove lifetime from DynComparator [\#542](https://github.com/apache/arrow-rs/pull/542) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([e-dard](https://github.com/e-dard))
+- use iterator for partition kernel instead of generating vec [\#438](https://github.com/apache/arrow-rs/pull/438) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jimexist](https://github.com/Jimexist))
+- Remove DictionaryArray::keys\_array method [\#419](https://github.com/apache/arrow-rs/pull/419) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- simplify interactions with arrow flight APIs [\#377](https://github.com/apache/arrow-rs/pull/377) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([garyanaplan](https://github.com/garyanaplan))
+- return reference from DictionaryArray::values\(\) \(\#313\) [\#314](https://github.com/apache/arrow-rs/pull/314) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+
+**Implemented enhancements:**
+
+- Allow creation of StringArrays from Vec\<String\> [\#519](https://github.com/apache/arrow-rs/issues/519) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement RecordBatch::concat [\#461](https://github.com/apache/arrow-rs/issues/461) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement RecordBatch::slice\(\) to slice RecordBatches  [\#460](https://github.com/apache/arrow-rs/issues/460) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add a RecordBatch::split to split large batches into a set of smaller batches [\#343](https://github.com/apache/arrow-rs/issues/343)
+- generate parquet schema from rust struct [\#539](https://github.com/apache/arrow-rs/pull/539) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nevi-me](https://github.com/nevi-me))
+- Implement `RecordBatch::concat` [\#537](https://github.com/apache/arrow-rs/pull/537) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([silathdiir](https://github.com/silathdiir))
+- Implement function slice for RecordBatch [\#490](https://github.com/apache/arrow-rs/pull/490) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([b41sh](https://github.com/b41sh))
+- add lexicographically partition points and ranges [\#424](https://github.com/apache/arrow-rs/pull/424) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jimexist](https://github.com/Jimexist))
+- allow to read non-standard CSV [\#326](https://github.com/apache/arrow-rs/pull/326) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kazuk](https://github.com/kazuk))
+- parquet: Speed up `BitReader`/`DeltaBitPackDecoder` [\#325](https://github.com/apache/arrow-rs/pull/325) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([kornholi](https://github.com/kornholi))
+- ARROW-12343: \[Rust\] Support auto-vectorization for min/max [\#9](https://github.com/apache/arrow-rs/pull/9) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- ARROW-12411: \[Rust\] Create RecordBatches from Iterators [\#7](https://github.com/apache/arrow-rs/pull/7) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Fixed bugs:**
+
+- Error building on master - error: cyclic package dependency: package `ahash v0.7.4` depends on itself. Cycle [\#544](https://github.com/apache/arrow-rs/issues/544)
+- IPC reader panics with out of bounds error [\#541](https://github.com/apache/arrow-rs/issues/541)
+- Take kernel doesn't handle nulls and structs correctly [\#530](https://github.com/apache/arrow-rs/issues/530) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- master fails to compile with `default-features=false` [\#529](https://github.com/apache/arrow-rs/issues/529)
+- README developer instructions out of date [\#523](https://github.com/apache/arrow-rs/issues/523)
+- Update rustc and packed\_simd in CI before 5.0 release [\#517](https://github.com/apache/arrow-rs/issues/517)
+- Incorrect memory usage calculation for dictionary arrays [\#503](https://github.com/apache/arrow-rs/issues/503) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- sliced null buffers lead to incorrect result in take kernel \(and probably on other places\) [\#502](https://github.com/apache/arrow-rs/issues/502)
+- Cast of utf8 types and list container types don't respect offset [\#334](https://github.com/apache/arrow-rs/issues/334) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- fix take kernel null handling on structs [\#531](https://github.com/apache/arrow-rs/pull/531) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([bjchambers](https://github.com/bjchambers))
+- Correct array memory usage calculation for dictionary arrays [\#505](https://github.com/apache/arrow-rs/pull/505) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- parquet: improve BOOLEAN writing logic and report error on encoding fail [\#443](https://github.com/apache/arrow-rs/pull/443) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([garyanaplan](https://github.com/garyanaplan))
+- Fix bug with null buffer offset in boolean not kernel [\#418](https://github.com/apache/arrow-rs/pull/418) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- respect offset in utf8 and list casts [\#335](https://github.com/apache/arrow-rs/pull/335) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ritchie46](https://github.com/ritchie46))
+- Fix comparison of dictionaries with different values arrays \(\#332\) [\#333](https://github.com/apache/arrow-rs/pull/333) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- ensure null-counts are written for all-null columns [\#307](https://github.com/apache/arrow-rs/pull/307) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([crepererum](https://github.com/crepererum))
+- fix invalid null handling in filter [\#296](https://github.com/apache/arrow-rs/pull/296) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ritchie46](https://github.com/ritchie46))
+- fix NaN handling in parquet statistics [\#256](https://github.com/apache/arrow-rs/pull/256) ([crepererum](https://github.com/crepererum))
+
+**Documentation updates:**
+
+- Improve arrow's crate's readme on crates.io [\#463](https://github.com/apache/arrow-rs/issues/463)
+- Clean up README.md in advance of the 5.0 release [\#536](https://github.com/apache/arrow-rs/pull/536) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- fix readme instructions to reflect new structure [\#524](https://github.com/apache/arrow-rs/pull/524) ([marcvanheerden](https://github.com/marcvanheerden))
+- Improve docs for NullArray, new\_null\_array and new\_empty\_array [\#240](https://github.com/apache/arrow-rs/pull/240) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Merged pull requests:**
+
+- Fix default arrow build [\#533](https://github.com/apache/arrow-rs/pull/533) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add tests for building applications using arrow with different feature flags [\#532](https://github.com/apache/arrow-rs/pull/532) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Remove unused futures dependency from arrow-flight [\#528](https://github.com/apache/arrow-rs/pull/528) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+- CI: update rust nightly and packed\_simd [\#525](https://github.com/apache/arrow-rs/pull/525) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ritchie46](https://github.com/ritchie46))
+- Support `StringArray` creation from String Vec [\#522](https://github.com/apache/arrow-rs/pull/522) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([silathdiir](https://github.com/silathdiir))
+- Fix parquet benchmark schema [\#513](https://github.com/apache/arrow-rs/pull/513) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nevi-me](https://github.com/nevi-me))
+- Fix parquet definition levels [\#511](https://github.com/apache/arrow-rs/pull/511) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nevi-me](https://github.com/nevi-me))
+- Fix for primitive and boolean take kernel for nullable indices with an offset [\#509](https://github.com/apache/arrow-rs/pull/509) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Bump flatbuffers [\#499](https://github.com/apache/arrow-rs/pull/499) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([PsiACE](https://github.com/PsiACE))
+- implement second/minute helpers for temporal [\#493](https://github.com/apache/arrow-rs/pull/493) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ovr](https://github.com/ovr))
+- special case concatenating single element array shortcut [\#492](https://github.com/apache/arrow-rs/pull/492) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jimexist](https://github.com/Jimexist))
+- update docs to reflect recent changes \(joins and window functions\) [\#489](https://github.com/apache/arrow-rs/pull/489) ([Jimexist](https://github.com/Jimexist))
+- Update rand, proc-macro and zstd dependencies [\#488](https://github.com/apache/arrow-rs/pull/488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Doctest for GenericListArray. [\#474](https://github.com/apache/arrow-rs/pull/474) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([novemberkilo](https://github.com/novemberkilo))
+- remove stale comment on `ArrayData` equality and update unit tests [\#472](https://github.com/apache/arrow-rs/pull/472) ([Jimexist](https://github.com/Jimexist))
+- remove unused patch file [\#471](https://github.com/apache/arrow-rs/pull/471) ([Jimexist](https://github.com/Jimexist))
+- fix clippy warnings for rust 1.53 [\#470](https://github.com/apache/arrow-rs/pull/470) ([Jimexist](https://github.com/Jimexist))
+- Fix PR labeler [\#468](https://github.com/apache/arrow-rs/pull/468) ([Dandandan](https://github.com/Dandandan))
+- Tweak dev backporting docs [\#466](https://github.com/apache/arrow-rs/pull/466) ([alamb](https://github.com/alamb))
+- Unvendor Archery [\#459](https://github.com/apache/arrow-rs/pull/459) ([kszucs](https://github.com/kszucs))
+- Add sort boolean benchmark [\#457](https://github.com/apache/arrow-rs/pull/457) ([alamb](https://github.com/alamb))
+- Add C data interface for decimal128 and timestamp [\#453](https://github.com/apache/arrow-rs/pull/453) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alippai](https://github.com/alippai))
+- Implement the Iterator trait for the json Reader. [\#451](https://github.com/apache/arrow-rs/pull/451) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([LaurentMazare](https://github.com/LaurentMazare))
+- Update release docs + release email template [\#450](https://github.com/apache/arrow-rs/pull/450) ([alamb](https://github.com/alamb))
+- remove clippy unnecessary wraps suppresions in cast kernel [\#449](https://github.com/apache/arrow-rs/pull/449) ([Jimexist](https://github.com/Jimexist))
+- Use partition for bool sort [\#448](https://github.com/apache/arrow-rs/pull/448) ([Jimexist](https://github.com/Jimexist))
+- remove unnecessary wraps in sort [\#445](https://github.com/apache/arrow-rs/pull/445) ([Jimexist](https://github.com/Jimexist))
+- Python FFI bridge for Schema, Field and DataType  [\#439](https://github.com/apache/arrow-rs/pull/439) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kszucs](https://github.com/kszucs))
+- Update release Readme.md [\#436](https://github.com/apache/arrow-rs/pull/436) ([alamb](https://github.com/alamb))
+- Derive Eq and PartialEq for SortOptions [\#425](https://github.com/apache/arrow-rs/pull/425) ([tustvold](https://github.com/tustvold))
+- refactor lexico sort for future code reuse [\#423](https://github.com/apache/arrow-rs/pull/423) ([Jimexist](https://github.com/Jimexist))
+- Reenable MIRI check on PRs [\#421](https://github.com/apache/arrow-rs/pull/421) ([alamb](https://github.com/alamb))
+- Sort by float lists [\#420](https://github.com/apache/arrow-rs/pull/420) ([medwards](https://github.com/medwards))
+- Fix out of bounds read in bit chunk iterator [\#416](https://github.com/apache/arrow-rs/pull/416) ([jhorstmann](https://github.com/jhorstmann))
+- Doctests for DecimalArray. [\#414](https://github.com/apache/arrow-rs/pull/414) ([novemberkilo](https://github.com/novemberkilo))
+- Add Decimal to CsvWriter and improve debug display [\#406](https://github.com/apache/arrow-rs/pull/406) ([alippai](https://github.com/alippai))
+- MINOR: update install instruction [\#400](https://github.com/apache/arrow-rs/pull/400) ([alippai](https://github.com/alippai))
+- use prettier to auto format md files [\#398](https://github.com/apache/arrow-rs/pull/398) ([Jimexist](https://github.com/Jimexist))
+- window::shift to work for all array types [\#388](https://github.com/apache/arrow-rs/pull/388) ([Jimexist](https://github.com/Jimexist))
+- add more tests for window::shift and handle boundary cases [\#386](https://github.com/apache/arrow-rs/pull/386) ([Jimexist](https://github.com/Jimexist))
+- Implement faster arrow array reader [\#384](https://github.com/apache/arrow-rs/pull/384) ([yordan-pavlov](https://github.com/yordan-pavlov))
+- Add set\_bit to BooleanBufferBuilder to allow mutating bit in index [\#383](https://github.com/apache/arrow-rs/pull/383) ([boazberman](https://github.com/boazberman))
+- make sure that only concat preallocates buffers [\#382](https://github.com/apache/arrow-rs/pull/382) ([ritchie46](https://github.com/ritchie46))
+- Respect max rowgroup size in Arrow writer [\#381](https://github.com/apache/arrow-rs/pull/381) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nevi-me](https://github.com/nevi-me))
+- Fix typo in release script, update release location [\#380](https://github.com/apache/arrow-rs/pull/380) ([alamb](https://github.com/alamb))
+- Doctests for FixedSizeBinaryArray [\#378](https://github.com/apache/arrow-rs/pull/378) ([novemberkilo](https://github.com/novemberkilo))
+- Simplify shift kernel using new\_null\_array [\#370](https://github.com/apache/arrow-rs/pull/370) ([Dandandan](https://github.com/Dandandan))
+- allow `SliceableCursor` to be constructed from an `Arc` directly [\#369](https://github.com/apache/arrow-rs/pull/369) ([crepererum](https://github.com/crepererum))
+- Add doctest for ArrayBuilder [\#367](https://github.com/apache/arrow-rs/pull/367) ([alippai](https://github.com/alippai))
+- Fix version in readme [\#365](https://github.com/apache/arrow-rs/pull/365) ([domoritz](https://github.com/domoritz))
+- Remove superfluous space [\#363](https://github.com/apache/arrow-rs/pull/363) ([domoritz](https://github.com/domoritz))
+- Add crate badges [\#362](https://github.com/apache/arrow-rs/pull/362) ([domoritz](https://github.com/domoritz))
+- Disable MIRI check until it runs cleanly on CI [\#360](https://github.com/apache/arrow-rs/pull/360) ([alamb](https://github.com/alamb))
+- Only register Flight.proto with cargo if it exists [\#351](https://github.com/apache/arrow-rs/pull/351) ([tustvold](https://github.com/tustvold))
+- Reduce memory usage of concat \(large\)utf8 [\#348](https://github.com/apache/arrow-rs/pull/348) ([ritchie46](https://github.com/ritchie46))
+- Fix filter UB and add fast path [\#341](https://github.com/apache/arrow-rs/pull/341) ([ritchie46](https://github.com/ritchie46))
+- Automatic cherry-pick script [\#339](https://github.com/apache/arrow-rs/pull/339) ([alamb](https://github.com/alamb))
+- Doctests for BooleanArray. [\#338](https://github.com/apache/arrow-rs/pull/338) ([novemberkilo](https://github.com/novemberkilo))
+- feature gate ipc reader/writer [\#336](https://github.com/apache/arrow-rs/pull/336) ([ritchie46](https://github.com/ritchie46))
+- Add ported Rust release verification script [\#331](https://github.com/apache/arrow-rs/pull/331) ([wesm](https://github.com/wesm))
+- Doctests for StringArray and LargeStringArray. [\#330](https://github.com/apache/arrow-rs/pull/330) ([novemberkilo](https://github.com/novemberkilo))
+- inline PrimitiveArray::value [\#329](https://github.com/apache/arrow-rs/pull/329) ([ritchie46](https://github.com/ritchie46))
+- Enable wasm32 as a target architecture for the SIMD feature  [\#324](https://github.com/apache/arrow-rs/pull/324) ([roee88](https://github.com/roee88))
+- Fix undefined behavior in FFI and enable MIRI checks on CI [\#323](https://github.com/apache/arrow-rs/pull/323) ([roee88](https://github.com/roee88))
+- Mutablebuffer::shrink\_to\_fit [\#318](https://github.com/apache/arrow-rs/pull/318) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ritchie46](https://github.com/ritchie46))
+- Add \(simd\) modulus op [\#317](https://github.com/apache/arrow-rs/pull/317) ([gangliao](https://github.com/gangliao))
+- feature gate csv functionality [\#312](https://github.com/apache/arrow-rs/pull/312) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ritchie46](https://github.com/ritchie46))
+- \[Minor\] Version upgrades [\#304](https://github.com/apache/arrow-rs/pull/304) ([Dandandan](https://github.com/Dandandan))
+- Remove old release scripts [\#293](https://github.com/apache/arrow-rs/pull/293) ([alamb](https://github.com/alamb))
+- Add Send to the ArrayBuilder trait [\#291](https://github.com/apache/arrow-rs/pull/291) ([Max-Meldrum](https://github.com/Max-Meldrum))
+- Added changelog generator script and configuration. [\#289](https://github.com/apache/arrow-rs/pull/289) ([jorgecarleitao](https://github.com/jorgecarleitao))
+- manually bump development version [\#288](https://github.com/apache/arrow-rs/pull/288) ([nevi-me](https://github.com/nevi-me))
+- Fix FFI and add support for Struct type [\#287](https://github.com/apache/arrow-rs/pull/287) ([roee88](https://github.com/roee88))
+- Fix subtraction underflow when sorting string arrays with many nulls [\#285](https://github.com/apache/arrow-rs/pull/285) ([medwards](https://github.com/medwards))
+- Speed up bound checking in `take` [\#281](https://github.com/apache/arrow-rs/pull/281) ([Dandandan](https://github.com/Dandandan))
+- Update PR template by commenting out instructions [\#278](https://github.com/apache/arrow-rs/pull/278) ([nevi-me](https://github.com/nevi-me))
+- Added Decimal support to pretty-print display utility \(\#230\) [\#273](https://github.com/apache/arrow-rs/pull/273) ([mgill25](https://github.com/mgill25))
+- Fix null struct and list roundtrip [\#270](https://github.com/apache/arrow-rs/pull/270) ([nevi-me](https://github.com/nevi-me))
+- 1.52 clippy fixes [\#267](https://github.com/apache/arrow-rs/pull/267) ([nevi-me](https://github.com/nevi-me))
+- Fix typo in csv/reader.rs [\#265](https://github.com/apache/arrow-rs/pull/265) ([domoritz](https://github.com/domoritz))
+- Fix empty Schema::metadata deserialization error [\#260](https://github.com/apache/arrow-rs/pull/260) ([hulunbier](https://github.com/hulunbier))
+- update datafusion and ballista doc links [\#259](https://github.com/apache/arrow-rs/pull/259) ([Jimexist](https://github.com/Jimexist))
+- support full u32 and u64 roundtrip through parquet [\#258](https://github.com/apache/arrow-rs/pull/258) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([crepererum](https://github.com/crepererum))
+- \[MINOR\] Added env to run rust in integration. [\#253](https://github.com/apache/arrow-rs/pull/253) ([jorgecarleitao](https://github.com/jorgecarleitao))
+- \[Minor\] Made integration tests always run. [\#248](https://github.com/apache/arrow-rs/pull/248) ([jorgecarleitao](https://github.com/jorgecarleitao))
+- fix parquet max\_definition for non-null structs [\#246](https://github.com/apache/arrow-rs/pull/246) ([nevi-me](https://github.com/nevi-me))
+- Disabled rebase needed until demonstrate working. [\#243](https://github.com/apache/arrow-rs/pull/243) ([jorgecarleitao](https://github.com/jorgecarleitao))
+- pin flatbuffers to 0.8.4 [\#239](https://github.com/apache/arrow-rs/pull/239) ([ritchie46](https://github.com/ritchie46))
+- sort\_primitive result is capped to the min of limit or values.len [\#236](https://github.com/apache/arrow-rs/pull/236) ([medwards](https://github.com/medwards))
+- Read list field correctly [\#234](https://github.com/apache/arrow-rs/pull/234) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nevi-me](https://github.com/nevi-me))
+- Fix code examples for RecordBatch::try\_from\_iter [\#231](https://github.com/apache/arrow-rs/pull/231) ([alamb](https://github.com/alamb))
+- Support string dictionaries in csv reader \(\#228\) [\#229](https://github.com/apache/arrow-rs/pull/229) ([tustvold](https://github.com/tustvold))
+- support LargeUtf8 in sort kernel [\#26](https://github.com/apache/arrow-rs/pull/26) ([ritchie46](https://github.com/ritchie46))
+- Removed unused files [\#22](https://github.com/apache/arrow-rs/pull/22) ([jorgecarleitao](https://github.com/jorgecarleitao))
+- ARROW-12504: Buffer::from\_slice\_ref set correct capacity [\#18](https://github.com/apache/arrow-rs/pull/18) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add GitHub templates [\#17](https://github.com/apache/arrow-rs/pull/17) ([andygrove](https://github.com/andygrove))
+- ARROW-12493: Add support for writing dictionary arrays to CSV and JSON [\#16](https://github.com/apache/arrow-rs/pull/16) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- ARROW-12426: \[Rust\] Fix concatentation of arrow dictionaries [\#15](https://github.com/apache/arrow-rs/pull/15) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update repository and homepage urls [\#14](https://github.com/apache/arrow-rs/pull/14) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
+- Added rebase-needed bot [\#13](https://github.com/apache/arrow-rs/pull/13) ([jorgecarleitao](https://github.com/jorgecarleitao))
+- Added Integration tests against arrow [\#10](https://github.com/apache/arrow-rs/pull/10) ([jorgecarleitao](https://github.com/jorgecarleitao))
+
+## [4.4.0](https://github.com/apache/arrow-rs/tree/4.4.0) (2021-06-24)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/4.3.0...4.4.0)
+
+**Breaking changes:**
+
+- migrate partition kernel to use Iterator trait [\#437](https://github.com/apache/arrow-rs/issues/437) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Remove DictionaryArray::keys\_array [\#391](https://github.com/apache/arrow-rs/issues/391) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Implemented enhancements:**
+
+- sort kernel boolean sort can be O\(n\) [\#447](https://github.com/apache/arrow-rs/issues/447) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- C data interface for decimal128, timestamp, date32 and date64 [\#413](https://github.com/apache/arrow-rs/issues/413)
+- Add Decimal to CsvWriter [\#405](https://github.com/apache/arrow-rs/issues/405)
+- Use iterators to increase performance of creating Arrow arrays [\#200](https://github.com/apache/arrow-rs/issues/200) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Fixed bugs:**
+
+- Release Audit Tool \(RAT\) is not being triggered [\#481](https://github.com/apache/arrow-rs/issues/481)
+- Security Vulnerabilities: flatbuffers: `read_scalar` and `read_scalar_at` allow transmuting values without `unsafe` blocks [\#476](https://github.com/apache/arrow-rs/issues/476)
+- Clippy broken after upgrade to rust 1.53 [\#467](https://github.com/apache/arrow-rs/issues/467)
+- Pull Request Labeler is not working [\#462](https://github.com/apache/arrow-rs/issues/462)
+- Arrow 4.3 release: error\[E0658\]: use of unstable library feature 'partition\_point': new API [\#456](https://github.com/apache/arrow-rs/issues/456)
+- parquet reading hangs when row\_group contains more than 2048 rows of data [\#349](https://github.com/apache/arrow-rs/issues/349)
+- Fail to build arrow  [\#247](https://github.com/apache/arrow-rs/issues/247)
+- JSON reader does not implement iterator [\#193](https://github.com/apache/arrow-rs/issues/193) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Security fixes:**
+
+- Ensure a successful MIRI Run on CI [\#227](https://github.com/apache/arrow-rs/issues/227)
+
+**Closed issues:**
+
+- sort kernel has a lot of unnecessary wrapping [\#446](https://github.com/apache/arrow-rs/issues/446)
+- \[Parquet\] Plain encoded boolean column chunks limited to 2048 values [\#48](https://github.com/apache/arrow-rs/issues/48) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+## [4.3.0](https://github.com/apache/arrow-rs/tree/4.3.0) (2021-06-10)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/4.2.0...4.3.0)
+
+**Implemented enhancements:**
+
+- Add partitioning kernel for sorted arrays [\#428](https://github.com/apache/arrow-rs/issues/428) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement sort by float lists [\#427](https://github.com/apache/arrow-rs/issues/427) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Derive Eq and PartialEq for SortOptions [\#426](https://github.com/apache/arrow-rs/issues/426) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- use prettier and github action to normalize markdown document syntax [\#399](https://github.com/apache/arrow-rs/issues/399)
+- window::shift can work for more than just primitive array type [\#392](https://github.com/apache/arrow-rs/issues/392)
+- Doctest for ArrayBuilder [\#366](https://github.com/apache/arrow-rs/issues/366)
+
+**Fixed bugs:**
+
+- Boolean `not` kernel does not take offset of null buffer into account [\#417](https://github.com/apache/arrow-rs/issues/417)
+- my contribution not marged in 4.2 release  [\#394](https://github.com/apache/arrow-rs/issues/394)
+- window::shift shall properly handle boundary cases [\#387](https://github.com/apache/arrow-rs/issues/387)
+- Parquet `WriterProperties.max_row_group_size` not wired up [\#257](https://github.com/apache/arrow-rs/issues/257)
+- Out of bound reads in chunk iterator [\#198](https://github.com/apache/arrow-rs/issues/198) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+## [4.2.0](https://github.com/apache/arrow-rs/tree/4.2.0) (2021-05-29)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/4.1.0...4.2.0)
+
+**Breaking changes:**
+
+- DictionaryArray::values\(\) clones the underlying ArrayRef [\#313](https://github.com/apache/arrow-rs/issues/313) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Implemented enhancements:**
+
+- Simplify shift kernel using null array [\#371](https://github.com/apache/arrow-rs/issues/371)
+- Provide `Arc`-based constructor for `parquet::util::cursor::SliceableCursor` [\#368](https://github.com/apache/arrow-rs/issues/368)
+- Add badges to crates [\#361](https://github.com/apache/arrow-rs/issues/361)
+- Consider inlining PrimitiveArray::value [\#328](https://github.com/apache/arrow-rs/issues/328)
+- Implement automated release verification script [\#327](https://github.com/apache/arrow-rs/issues/327)
+- Add wasm32 to the list of target architectures of the simd feature [\#316](https://github.com/apache/arrow-rs/issues/316)
+- add with\_escape for csv::ReaderBuilder [\#315](https://github.com/apache/arrow-rs/issues/315) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- IPC feature gate [\#310](https://github.com/apache/arrow-rs/issues/310)
+- csv feature gate [\#309](https://github.com/apache/arrow-rs/issues/309) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `shrink_to` / `shrink_to_fit` to `MutableBuffer` [\#297](https://github.com/apache/arrow-rs/issues/297)
+
+**Fixed bugs:**
+
+- Incorrect crate setup instructions [\#364](https://github.com/apache/arrow-rs/issues/364)
+- Arrow-flight only register rerun-if-changed if file exists [\#350](https://github.com/apache/arrow-rs/issues/350)
+- Dictionary Comparison Uses Wrong Values Array [\#332](https://github.com/apache/arrow-rs/issues/332)
+- Undefined behavior in FFI implementation [\#322](https://github.com/apache/arrow-rs/issues/322)
+- All-null column get wrong parquet null-counts [\#306](https://github.com/apache/arrow-rs/issues/306) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Filter has inconsistent null handling [\#295](https://github.com/apache/arrow-rs/issues/295)
+
+## [4.1.0](https://github.com/apache/arrow-rs/tree/4.1.0) (2021-05-17)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/4.0.0...4.1.0)
+
+**Implemented enhancements:**
+
+- Add Send to ArrayBuilder [\#290](https://github.com/apache/arrow-rs/issues/290) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Improve performance of bound checking option [\#280](https://github.com/apache/arrow-rs/issues/280) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- extend compute kernel arity to include nullary functions [\#276](https://github.com/apache/arrow-rs/issues/276)
+- Implement FFI / CDataInterface for Struct Arrays [\#251](https://github.com/apache/arrow-rs/issues/251) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add support for pretty-printing Decimal numbers [\#230](https://github.com/apache/arrow-rs/issues/230) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- CSV Reader String Dictionary Support [\#228](https://github.com/apache/arrow-rs/issues/228) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add Builder interface for adding Arrays to record batches [\#210](https://github.com/apache/arrow-rs/issues/210) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support auto-vectorization for min/max [\#209](https://github.com/apache/arrow-rs/issues/209) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support LargeUtf8 in sort kernel [\#25](https://github.com/apache/arrow-rs/issues/25) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Fixed bugs:**
+
+-  no method named `select_nth_unstable_by` found for mutable reference `&mut [T]`  [\#283](https://github.com/apache/arrow-rs/issues/283)
+- Rust 1.52 Clippy error [\#266](https://github.com/apache/arrow-rs/issues/266)
+- NaNs can break parquet statistics [\#255](https://github.com/apache/arrow-rs/issues/255) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- u64::MAX does not roundtrip through parquet [\#254](https://github.com/apache/arrow-rs/issues/254) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Integration tests failing to compile \(flatbuffer\) [\#249](https://github.com/apache/arrow-rs/issues/249) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Fix compatibility quirks between arrow and parquet structs [\#245](https://github.com/apache/arrow-rs/issues/245) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Unable to write non-null Arrow structs to Parquet [\#244](https://github.com/apache/arrow-rs/issues/244) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- schema: missing field `metadata` when deserialize [\#241](https://github.com/apache/arrow-rs/issues/241) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Arrow does not compile due to flatbuffers upgrade [\#238](https://github.com/apache/arrow-rs/issues/238) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Sort with limit panics for the limit includes some but not all nulls, for large arrays [\#235](https://github.com/apache/arrow-rs/issues/235) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- arrow-rs contains a copy of the "format" directory [\#233](https://github.com/apache/arrow-rs/issues/233) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Fix SEGFAULT/ SIGILL in child-data ffi [\#206](https://github.com/apache/arrow-rs/issues/206) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Read list field correctly in \<struct\<list\>\> [\#167](https://github.com/apache/arrow-rs/issues/167) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- FFI listarray lead to undefined behavior.  [\#20](https://github.com/apache/arrow-rs/issues/20)
+
+**Security fixes:**
+
+- Fix MIRI build on CI [\#226](https://github.com/apache/arrow-rs/issues/226) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Get MIRI running again [\#224](https://github.com/apache/arrow-rs/issues/224) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Documentation updates:**
+
+- Comment out the instructions in the PR template [\#277](https://github.com/apache/arrow-rs/issues/277)
+- Update links to datafusion and ballista in README.md [\#19](https://github.com/apache/arrow-rs/issues/19)
+- Update "repository" in Cargo.toml [\#12](https://github.com/apache/arrow-rs/issues/12)
+
+**Closed issues:**
+
+- Arrow Aligned Vec [\#268](https://github.com/apache/arrow-rs/issues/268)
+- \[Rust\]: Tracking issue for AVX-512 [\#220](https://github.com/apache/arrow-rs/issues/220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Umbrella issue for clippy integration [\#217](https://github.com/apache/arrow-rs/issues/217) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support sort [\#215](https://github.com/apache/arrow-rs/issues/215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support stable Rust [\#214](https://github.com/apache/arrow-rs/issues/214) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Remove Rust and point integration tests to arrow-rs repo [\#211](https://github.com/apache/arrow-rs/issues/211) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- ArrayData buffers are inconsistent accross implementations [\#207](https://github.com/apache/arrow-rs/issues/207)
+- 3.0.1 patch release [\#204](https://github.com/apache/arrow-rs/issues/204)
+- Document patch release process [\#202](https://github.com/apache/arrow-rs/issues/202)
+- Simplify Offset [\#186](https://github.com/apache/arrow-rs/issues/186) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Typed Bytes [\#185](https://github.com/apache/arrow-rs/issues/185) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[CI\]docker-compose setup should enable caching [\#175](https://github.com/apache/arrow-rs/issues/175)
+- Improve take primitive performance [\#174](https://github.com/apache/arrow-rs/issues/174)
+- \[CI\] Try out buildkite [\#165](https://github.com/apache/arrow-rs/issues/165) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Update assignees in JIRA where missing [\#160](https://github.com/apache/arrow-rs/issues/160)
+- \[Rust\]: From\<ArrayDataRef\> implementations should validate data type [\#103](https://github.com/apache/arrow-rs/issues/103) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[DataFusion\] Verify that projection push down does not remove aliases columns [\#99](https://github.com/apache/arrow-rs/issues/99) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Rust\]\[DataFusion\] Implement modulus expression [\#98](https://github.com/apache/arrow-rs/issues/98) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[DataFusion\] Add constant folding to expressions during logically planning [\#96](https://github.com/apache/arrow-rs/issues/96) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[DataFusion\] DataFrame.collect should return RecordBatchReader [\#95](https://github.com/apache/arrow-rs/issues/95) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Rust\]\[DataFusion\] Add FORMAT to explain plan and an easy to visualize format [\#94](https://github.com/apache/arrow-rs/issues/94) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[DataFusion\] Implement metrics framework [\#90](https://github.com/apache/arrow-rs/issues/90) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[DataFusion\] Implement micro benchmarks for each operator [\#89](https://github.com/apache/arrow-rs/issues/89) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[DataFusion\] Implement pretty print for physical query plan [\#88](https://github.com/apache/arrow-rs/issues/88) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Archery\] Support rust clippy in the lint command [\#83](https://github.com/apache/arrow-rs/issues/83)
+- \[rust\]\[datafusion\] optimize count\(\*\) queries on parquet sources [\#75](https://github.com/apache/arrow-rs/issues/75) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[Rust\]\[DataFusion\] Improve like/nlike performance [\#71](https://github.com/apache/arrow-rs/issues/71) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[DataFusion\] Implement optimizer rule to remove redundant projections [\#56](https://github.com/apache/arrow-rs/issues/56) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[DataFusion\] Parquet data source does not support complex types [\#39](https://github.com/apache/arrow-rs/issues/39) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Merge utils from Parquet and Arrow [\#32](https://github.com/apache/arrow-rs/issues/32) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add benchmarks for Parquet [\#30](https://github.com/apache/arrow-rs/issues/30) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Mark methods that do not perform bounds checking as unsafe [\#28](https://github.com/apache/arrow-rs/issues/28) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Test issue [\#24](https://github.com/apache/arrow-rs/issues/24) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- This is a test issue [\#11](https://github.com/apache/arrow-rs/issues/11)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 890e70bd5d12..549d4da1a6b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,1297 +19,82 @@
 
 # Changelog
 
-## [15.0.0](https://github.com/apache/arrow-rs/tree/15.0.0) (2022-05-27)
+## [16.0.0](https://github.com/apache/arrow-rs/tree/16.0.0) (2022-06-10)
 
-[Full Changelog](https://github.com/apache/arrow-rs/compare/14.0.0...15.0.0)
+[Full Changelog](https://github.com/apache/arrow-rs/compare/15.0.0...16.0.0)
 
 **Breaking changes:**
 
-- Change `ArrayDataBuilder::null_bit_buffer` to accept `Option<Buffer>` rather than `Buffer` [\#1739](https://github.com/apache/arrow-rs/pull/1739) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Remove `null_count` from `ArrayData::try_new()` [\#1721](https://github.com/apache/arrow-rs/pull/1721) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Change parquet writers to use standard `std:io::Write` rather custom `ParquetWriter` trait \(\#1717\) \(\#1163\) [\#1719](https://github.com/apache/arrow-rs/pull/1719) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Add explicit column mask for selection in parquet: `ProjectionMask` \(\#1701\) [\#1716](https://github.com/apache/arrow-rs/pull/1716) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Add type\_ids in Union datatype [\#1703](https://github.com/apache/arrow-rs/pull/1703) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Fix Parquet Reader's Arrow Schema Inference [\#1682](https://github.com/apache/arrow-rs/pull/1682) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Seal `ArrowNativeType` and `OffsetSizeTrait` for safety \(\#1028\) [\#1819](https://github.com/apache/arrow-rs/pull/1819) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Improve API for `csv::infer_file_schema` by removing redundant ref  [\#1776](https://github.com/apache/arrow-rs/pull/1776) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
 
 **Implemented enhancements:**
 
-- Rename the `string` kernel to `concatenate_elements` [\#1747](https://github.com/apache/arrow-rs/issues/1747) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `ArrayDataBuilder::null_bit_buffer` should accept `Option<Buffer>` as input type [\#1737](https://github.com/apache/arrow-rs/issues/1737) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Fix schema comparison for non\_canonical\_map when running flight test [\#1730](https://github.com/apache/arrow-rs/issues/1730) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add support in aggregate kernel for `BinaryArray` [\#1724](https://github.com/apache/arrow-rs/issues/1724) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Fix incorrect null\_count in `generate_unions_case` integration test [\#1712](https://github.com/apache/arrow-rs/issues/1712) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Keep type ids in Union datatype to follow Arrow spec and integrate with other implementations [\#1690](https://github.com/apache/arrow-rs/issues/1690) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support Reading Alternative List Representations to Arrow From Parquet [\#1680](https://github.com/apache/arrow-rs/issues/1680) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Speed up the offsets checking [\#1675](https://github.com/apache/arrow-rs/issues/1675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Separate Parquet -\> Arrow Schema Conversion From ArrayBuilder [\#1655](https://github.com/apache/arrow-rs/issues/1655) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Add `leaf_columns` argument to `ArrowReader::get_record_reader_by_columns` [\#1653](https://github.com/apache/arrow-rs/issues/1653) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Implement `string_concat` kernel  [\#1540](https://github.com/apache/arrow-rs/issues/1540) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Improve Unit Test Coverage of ArrayReaderBuilder [\#1484](https://github.com/apache/arrow-rs/issues/1484) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- List equality method should work on empty offset `ListArray` [\#1817](https://github.com/apache/arrow-rs/issues/1817) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Command line tool for convert CSV to Parquet [\#1797](https://github.com/apache/arrow-rs/issues/1797) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- IPC writer should write validity buffer for `UnionArray` in V4 IPC message [\#1793](https://github.com/apache/arrow-rs/issues/1793) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add function for row alignment with page mask [\#1790](https://github.com/apache/arrow-rs/issues/1790) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Rust IPC Read should be able to read V4 UnionType Array [\#1788](https://github.com/apache/arrow-rs/issues/1788) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `combine_option_bitmap` should accept arbitrary number of input arrays. [\#1780](https://github.com/apache/arrow-rs/issues/1780) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `substring_by_char` kernels for slicing on character boundaries [\#1768](https://github.com/apache/arrow-rs/issues/1768) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support reading `PageIndex` from column metadata [\#1761](https://github.com/apache/arrow-rs/issues/1761) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support casting from `DataType::Utf8` to `DataType::Boolean` [\#1740](https://github.com/apache/arrow-rs/issues/1740) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Make current position available in `FileWriter`. [\#1691](https://github.com/apache/arrow-rs/issues/1691) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support writing parquet to `stdout` [\#1687](https://github.com/apache/arrow-rs/issues/1687) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
 
 **Fixed bugs:**
 
-- Parquet write failure \(from record batches\) when data is nested two levels deep  [\#1744](https://github.com/apache/arrow-rs/issues/1744) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- IPC reader may break on projection [\#1735](https://github.com/apache/arrow-rs/issues/1735) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Latest nightly fails to build with feature simd [\#1734](https://github.com/apache/arrow-rs/issues/1734) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Trying to write parquet file in parallel results in corrupt file [\#1717](https://github.com/apache/arrow-rs/issues/1717) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Roundtrip failure when using DELTA\_BINARY\_PACKED [\#1708](https://github.com/apache/arrow-rs/issues/1708) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- `ArrayData::try_new` cannot always return expected error. [\#1707](https://github.com/apache/arrow-rs/issues/1707) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
--  "out of order projection is not supported" after Fix Parquet Arrow Schema Inference [\#1701](https://github.com/apache/arrow-rs/issues/1701) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Rust is not interoperability with C++ for IPC schemas with dictionaries [\#1694](https://github.com/apache/arrow-rs/issues/1694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Incorrect Repeated Field Schema Inference [\#1681](https://github.com/apache/arrow-rs/issues/1681) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Parquet Treats Embedded Arrow Schema as Authoritative [\#1663](https://github.com/apache/arrow-rs/issues/1663) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- parquet\_to\_arrow\_schema\_by\_columns Incorrectly Handles Nested Types [\#1654](https://github.com/apache/arrow-rs/issues/1654) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Inconsistent Arrow Schema When Projecting Nested Parquet File [\#1652](https://github.com/apache/arrow-rs/issues/1652) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- StructArrayReader Cannot Handle Nested Lists [\#1651](https://github.com/apache/arrow-rs/issues/1651) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Bug \(`substring` kernel\): The null buffer is not aligned when `offset != 0` [\#1639](https://github.com/apache/arrow-rs/issues/1639) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Incorrect Offset Validation for Sliced List Array Children [\#1814](https://github.com/apache/arrow-rs/issues/1814) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet Snappy Codec overwrites Existing Data in Decompression Buffer [\#1806](https://github.com/apache/arrow-rs/issues/1806) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `flight_data_to_arrow_batch` does not support `RecordBatch`es with no columns [\#1783](https://github.com/apache/arrow-rs/issues/1783) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- parquet does not compile with `features=["zstd"]` [\#1630](https://github.com/apache/arrow-rs/issues/1630) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
 
 **Documentation updates:**
 
-- Parquet command line tool does not install "globally" [\#1710](https://github.com/apache/arrow-rs/issues/1710) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Improve integration test document to follow Arrow C++ repo CI [\#1742](https://github.com/apache/arrow-rs/pull/1742) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-
-**Merged pull requests:**
-
-- Test for list array equality with different offsets [\#1756](https://github.com/apache/arrow-rs/pull/1756) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Rename `string_concat` to `concat_elements_utf8` [\#1754](https://github.com/apache/arrow-rs/pull/1754) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Rename the `string` kernel to `concat_elements`. [\#1752](https://github.com/apache/arrow-rs/pull/1752) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Support writing nested lists to parquet [\#1746](https://github.com/apache/arrow-rs/pull/1746) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Pin nightly version to bypass packed\_simd build error [\#1743](https://github.com/apache/arrow-rs/pull/1743) ([viirya](https://github.com/viirya))
-- Fix projection in IPC reader [\#1736](https://github.com/apache/arrow-rs/pull/1736) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([iyupeng](https://github.com/iyupeng))
-- `cargo install` installs not globally [\#1732](https://github.com/apache/arrow-rs/pull/1732) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([kazuk](https://github.com/kazuk))
-- Fix schema comparison for non\_canonical\_map when running flight test [\#1731](https://github.com/apache/arrow-rs/pull/1731) ([viirya](https://github.com/viirya))
-- Add `min_binary` and `max_binary` aggregate kernels [\#1725](https://github.com/apache/arrow-rs/pull/1725) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Fix parquet benchmarks [\#1723](https://github.com/apache/arrow-rs/pull/1723) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Fix BitReader::get\_batch zero extension \(\#1708\) [\#1722](https://github.com/apache/arrow-rs/pull/1722) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Implementation string concat [\#1720](https://github.com/apache/arrow-rs/pull/1720) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Ismail-Maj](https://github.com/Ismail-Maj))
-- Check the length of `null_bit_buffer` in `ArrayData::try_new()` [\#1714](https://github.com/apache/arrow-rs/pull/1714) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Fix incorrect null\_count in `generate_unions_case` integration test [\#1713](https://github.com/apache/arrow-rs/pull/1713) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Fix: Null buffer accounts for `offset` in `substring` kernel. [\#1704](https://github.com/apache/arrow-rs/pull/1704) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Minor: Refine `OffsetSizeTrait` to extend `num::Integer`  [\#1702](https://github.com/apache/arrow-rs/pull/1702) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Fix StructArrayReader handling nested lists \(\#1651\)  [\#1700](https://github.com/apache/arrow-rs/pull/1700) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Speed up the offsets checking [\#1684](https://github.com/apache/arrow-rs/pull/1684) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-
-## [14.0.0](https://github.com/apache/arrow-rs/tree/14.0.0) (2022-05-13)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/13.0.0...14.0.0)
-
-**Breaking changes:**
-
-- Use `bytes` in parquet rather than custom Buffer implementation \(\#1474\) [\#1683](https://github.com/apache/arrow-rs/pull/1683) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Rename  `OffsetSize::fn is_large` to `const OffsetSize::IS_LARGE` [\#1664](https://github.com/apache/arrow-rs/pull/1664) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Remove `StringOffsetTrait` and `BinaryOffsetTrait` [\#1645](https://github.com/apache/arrow-rs/pull/1645) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Fix `generate_nested_dictionary_case` integration test failure  [\#1636](https://github.com/apache/arrow-rs/pull/1636) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([viirya](https://github.com/viirya))
-
-**Implemented enhancements:**
-
-- Add support for `DataType::Duration` in ffi interface [\#1688](https://github.com/apache/arrow-rs/issues/1688) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Fix `generate_unions_case` integration test  [\#1676](https://github.com/apache/arrow-rs/issues/1676) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
--  Add `DictionaryArray` support for `bit_length` kernel [\#1673](https://github.com/apache/arrow-rs/issues/1673) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
--  Add `DictionaryArray` support for `length` kernel [\#1672](https://github.com/apache/arrow-rs/issues/1672) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- flight\_client\_scenarios integration test should receive schema from flight data [\#1669](https://github.com/apache/arrow-rs/issues/1669) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Unpin Flatbuffer version dependency [\#1667](https://github.com/apache/arrow-rs/issues/1667) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add dictionary array support for substring function [\#1656](https://github.com/apache/arrow-rs/issues/1656) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Exclude dict\_id and dict\_is\_ordered from equality comparison of `Field` [\#1646](https://github.com/apache/arrow-rs/issues/1646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Remove `StringOffsetTrait` and `BinaryOffsetTrait` [\#1644](https://github.com/apache/arrow-rs/issues/1644) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add tests and examples for `UnionArray::from(data: ArrayData)` [\#1643](https://github.com/apache/arrow-rs/issues/1643) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add methods `pub fn offsets_buffer`, `pub fn types_ids_buffer`and `pub fn data_buffer` for `ArrayDataBuilder` [\#1640](https://github.com/apache/arrow-rs/issues/1640) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Fix `generate_nested_dictionary_case` integration test failure for Rust cases [\#1635](https://github.com/apache/arrow-rs/issues/1635) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Expose `ArrowWriter` row group flush in public API [\#1626](https://github.com/apache/arrow-rs/issues/1626) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Add `substring` support for `FixedSizeBinaryArray` [\#1618](https://github.com/apache/arrow-rs/issues/1618) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add PrettyPrint for `UnionArray`s [\#1594](https://github.com/apache/arrow-rs/issues/1594) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add SIMD support for the `length` kernel [\#1489](https://github.com/apache/arrow-rs/issues/1489) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support dictionary arrays in length and bit\_length [\#1674](https://github.com/apache/arrow-rs/pull/1674) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Add dictionary array support for substring function [\#1665](https://github.com/apache/arrow-rs/pull/1665) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sunchao](https://github.com/sunchao))
-- Add `DecimalType` support in `new_null_array ` [\#1659](https://github.com/apache/arrow-rs/pull/1659) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
-
-**Fixed bugs:**
-
-- Docs.rs build is broken  [\#1695](https://github.com/apache/arrow-rs/issues/1695)
-- Interoperability with C++ for IPC schemas with dictionaries [\#1694](https://github.com/apache/arrow-rs/issues/1694)
-- `UnionArray::is_null` incorrect [\#1625](https://github.com/apache/arrow-rs/issues/1625) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Published Parquet documentation missing `arrow::async_reader` [\#1617](https://github.com/apache/arrow-rs/issues/1617) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Files written with Julia's Arrow.jl in IPC format cannot be read by arrow-rs [\#1335](https://github.com/apache/arrow-rs/issues/1335) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Documentation updates:**
-
-- Correct arrow-flight readme version [\#1641](https://github.com/apache/arrow-rs/pull/1641) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
-
-**Closed issues:**
-
-- Make `OffsetSizeTrait::IS_LARGE` as a const value [\#1658](https://github.com/apache/arrow-rs/issues/1658)
-- Question: Why are there 3 types of `OffsetSizeTrait`s? [\#1638](https://github.com/apache/arrow-rs/issues/1638)
-- Written Parquet file way bigger than input files  [\#1627](https://github.com/apache/arrow-rs/issues/1627)
-- Ensure there is a single zero in the offsets buffer for an empty ListArray. [\#1620](https://github.com/apache/arrow-rs/issues/1620)
-- Filtering `UnionArray` Changes DataType [\#1595](https://github.com/apache/arrow-rs/issues/1595)
-
-**Merged pull requests:**
-
-- Fix docs.rs build [\#1696](https://github.com/apache/arrow-rs/pull/1696) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- support duration in ffi [\#1689](https://github.com/apache/arrow-rs/pull/1689) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ryan-jacobs1](https://github.com/ryan-jacobs1))
-- fix bench command line options [\#1685](https://github.com/apache/arrow-rs/pull/1685) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kazuk](https://github.com/kazuk))
-- Enable branch protection [\#1679](https://github.com/apache/arrow-rs/pull/1679) ([tustvold](https://github.com/tustvold))
-- Fix logical merge conflict in \#1588 [\#1678](https://github.com/apache/arrow-rs/pull/1678) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Fix generate\_unions\_case for Rust case [\#1677](https://github.com/apache/arrow-rs/pull/1677) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Receive schema from flight data [\#1670](https://github.com/apache/arrow-rs/pull/1670) ([viirya](https://github.com/viirya))
-- unpin flatbuffers dependency version [\#1668](https://github.com/apache/arrow-rs/pull/1668) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Cheappie](https://github.com/Cheappie))
-- Remove parquet dictionary converters \(\#1661\) [\#1662](https://github.com/apache/arrow-rs/pull/1662) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Minor: simplify the function `GenericListArray::get_type` [\#1650](https://github.com/apache/arrow-rs/pull/1650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Pretty Print `UnionArray`s [\#1648](https://github.com/apache/arrow-rs/pull/1648) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tfeda](https://github.com/tfeda))
-- Exclude `dict_id` and `dict_is_ordered` from equality comparison of `Field` [\#1647](https://github.com/apache/arrow-rs/pull/1647) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- expose row-group flush in public api [\#1634](https://github.com/apache/arrow-rs/pull/1634) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Cheappie](https://github.com/Cheappie))
-- Add `substring` support for `FixedSizeBinaryArray` [\#1633](https://github.com/apache/arrow-rs/pull/1633) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Fix UnionArray is\_null [\#1632](https://github.com/apache/arrow-rs/pull/1632) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Do not assume dictionaries exists in footer [\#1631](https://github.com/apache/arrow-rs/pull/1631) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pcjentsch](https://github.com/pcjentsch))
-- Add support for nested list arrays from parquet to arrow arrays \(\#993\) [\#1588](https://github.com/apache/arrow-rs/pull/1588) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Add `async` into doc features [\#1349](https://github.com/apache/arrow-rs/pull/1349) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([HaoYang670](https://github.com/HaoYang670))
-
-
-## [13.0.0](https://github.com/apache/arrow-rs/tree/13.0.0) (2022-04-29)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/12.0.0...13.0.0)
-
-**Breaking changes:**
-
-- Update `parquet::basic::LogicalType` to be more idomatic [\#1612](https://github.com/apache/arrow-rs/pull/1612) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tfeda](https://github.com/tfeda))
-- Fix Null Mask Handling in `ArrayData`,  `UnionArray`, and `MapArray` [\#1589](https://github.com/apache/arrow-rs/pull/1589) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Replace `&Option<T>`  with `Option<&T>` in several `arrow` and `parquet` APIs [\#1571](https://github.com/apache/arrow-rs/pull/1571) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tfeda](https://github.com/tfeda))
-
-**Implemented enhancements:**
-
-- Read/write nested dictionary under fixed size list in ipc stream reader/write [\#1609](https://github.com/apache/arrow-rs/issues/1609) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add support for `BinaryArray` in `substring`  kernel [\#1593](https://github.com/apache/arrow-rs/issues/1593) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Read/write nested dictionary under large list in ipc stream reader/write [\#1584](https://github.com/apache/arrow-rs/issues/1584) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Read/write nested dictionary under map in ipc stream reader/write [\#1582](https://github.com/apache/arrow-rs/issues/1582) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Implement `Clone` for JSON `DecoderOptions` [\#1580](https://github.com/apache/arrow-rs/issues/1580) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add utf-8 validation checking to `substring` kernel [\#1575](https://github.com/apache/arrow-rs/issues/1575) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support casting to/from `DataType::Null` in `cast` kernel [\#1572](https://github.com/apache/arrow-rs/pull/1572) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([WinkerDu](https://github.com/WinkerDu))
-
-**Fixed bugs:**
-
-- Parquet schema should allow scale == precision for decimal type [\#1606](https://github.com/apache/arrow-rs/issues/1606) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- ListArray::from\(ArrayData\) dereferences invalid pointer when offsets are empty [\#1601](https://github.com/apache/arrow-rs/issues/1601) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- ArrayData Equality Incorrect Null Mask Offset Handling [\#1599](https://github.com/apache/arrow-rs/issues/1599)
-- Filtering UnionArray Incorrect Handles Runs [\#1598](https://github.com/apache/arrow-rs/issues/1598)
-- \[Safety\] Filtering Dense UnionArray Produces Invalid Offsets [\#1596](https://github.com/apache/arrow-rs/issues/1596)
-- \[Safety\] UnionBuilder Doesn't Check Types [\#1591](https://github.com/apache/arrow-rs/issues/1591)
-- Union Layout Should Not Support Separate Validity Mask [\#1590](https://github.com/apache/arrow-rs/issues/1590)
-- Incorrect nullable flag when reading maps \( test\_read\_maps fails when `force_validate` is active\)  [\#1587](https://github.com/apache/arrow-rs/issues/1587) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Output of `ipc::reader::tests::projection_should_work` fails validation [\#1548](https://github.com/apache/arrow-rs/issues/1548) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Incorrect min/max statistics for decimals with byte-array notation [\#1532](https://github.com/apache/arrow-rs/issues/1532)
-
-**Documentation updates:**
-
-- Minor: Clarify docs on `UnionBuilder::append_null` [\#1628](https://github.com/apache/arrow-rs/pull/1628) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-
-**Closed issues:**
-
-- Dense UnionArray Offsets Are i32 not i8 [\#1597](https://github.com/apache/arrow-rs/issues/1597) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Replace `&Option<T>` with `Option<&T>` in some APIs [\#1556](https://github.com/apache/arrow-rs/issues/1556) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Improve ergonomics of `parquet::basic::LogicalType`  [\#1554](https://github.com/apache/arrow-rs/issues/1554) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Mark the current `substring` function as `unsafe` and rename it. [\#1541](https://github.com/apache/arrow-rs/issues/1541) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Requirements for Async Parquet API [\#1473](https://github.com/apache/arrow-rs/issues/1473) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-**Merged pull requests:**
-
-- Nit: use the standard function `div_ceil`  [\#1629](https://github.com/apache/arrow-rs/pull/1629) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Update flatbuffers requirement from =2.1.1 to =2.1.2 [\#1622](https://github.com/apache/arrow-rs/pull/1622) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Fix decimals min max statistics [\#1621](https://github.com/apache/arrow-rs/pull/1621) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([atefsawaed](https://github.com/atefsawaed))
-- Add example readme [\#1615](https://github.com/apache/arrow-rs/pull/1615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Improve docs and examples links on main readme [\#1614](https://github.com/apache/arrow-rs/pull/1614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Read/Write nested dictionaries under FixedSizeList in IPC [\#1610](https://github.com/apache/arrow-rs/pull/1610) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Add `substring` support for binary [\#1608](https://github.com/apache/arrow-rs/pull/1608) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Parquet: schema validation should allow scale == precision for decimal type [\#1607](https://github.com/apache/arrow-rs/pull/1607) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sunchao](https://github.com/sunchao))
-- Don't access and validate offset buffer in ListArray::from\(ArrayData\) [\#1602](https://github.com/apache/arrow-rs/pull/1602) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Fix map nullable flag in `ParquetTypeConverter` [\#1592](https://github.com/apache/arrow-rs/pull/1592) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
-- Read/write nested dictionary under large list in ipc stream reader/writer [\#1585](https://github.com/apache/arrow-rs/pull/1585) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Read/write nested dictionary under map in ipc stream reader/writer [\#1583](https://github.com/apache/arrow-rs/pull/1583) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Derive `Clone` and `PartialEq` for json `DecoderOptions` [\#1581](https://github.com/apache/arrow-rs/pull/1581) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Add utf-8 validation checking for `substring` [\#1577](https://github.com/apache/arrow-rs/pull/1577) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Use `Option<T>` rather than `Option<&T>` for copy types in substring kernel [\#1576](https://github.com/apache/arrow-rs/pull/1576) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Use littleendian arrow files for `projection_should_work` [\#1573](https://github.com/apache/arrow-rs/pull/1573) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-
-
-## [12.0.0](https://github.com/apache/arrow-rs/tree/12.0.0) (2022-04-15)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/11.1.0...12.0.0)
-
-**Breaking changes:**
-
-- Add `ArrowReaderOptions` to `ParquetFileArrowReader`, add option to skip decoding arrow metadata from parquet \(\#1459\) [\#1558](https://github.com/apache/arrow-rs/pull/1558) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Support `RecordBatch` with zero columns but non zero row count, add field to `RecordBatchOptions` \(\#1536\) [\#1552](https://github.com/apache/arrow-rs/pull/1552) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Consolidate JSON Reader options and `DecoderOptions` [\#1539](https://github.com/apache/arrow-rs/pull/1539) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Update `prost`, `prost-derive` and `prost-types` to 0.10, `tonic`, and `tonic-build` to `0.7` [\#1510](https://github.com/apache/arrow-rs/pull/1510) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
-- Add Json `DecoderOptions` and support custom `format_string` for each field  [\#1451](https://github.com/apache/arrow-rs/pull/1451) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sum12](https://github.com/sum12))
-
-**Implemented enhancements:**
-
-- Read/write nested dictionary in ipc stream reader/writer [\#1565](https://github.com/apache/arrow-rs/issues/1565) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support `FixedSizeBinary` in the Arrow C data interface [\#1553](https://github.com/apache/arrow-rs/issues/1553) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support Empty Column Projection in `ParquetRecordBatchReader` [\#1537](https://github.com/apache/arrow-rs/issues/1537) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Support `RecordBatch` with zero columns but non zero row count [\#1536](https://github.com/apache/arrow-rs/issues/1536) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add support for `Date32`/`Date64`\<--\> `String`/`LargeString` in `cast` kernel [\#1535](https://github.com/apache/arrow-rs/issues/1535) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support creating arrays from externally owned memory like `Vec` or `String` [\#1516](https://github.com/apache/arrow-rs/issues/1516) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Speed up the `substring` kernel [\#1511](https://github.com/apache/arrow-rs/issues/1511) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Handle Parquet Files With Inconsistent Timestamp Units [\#1459](https://github.com/apache/arrow-rs/issues/1459) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-**Fixed bugs:**
-
-- Error Infering Schema for LogicalType::UNKNOWN [\#1557](https://github.com/apache/arrow-rs/issues/1557) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Read dictionary from nested struct in ipc stream reader panics [\#1549](https://github.com/apache/arrow-rs/issues/1549) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `filter` produces invalid sparse `UnionArray`s [\#1547](https://github.com/apache/arrow-rs/issues/1547) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Documentation for `GenericListBuilder` is not exposed.  [\#1518](https://github.com/apache/arrow-rs/issues/1518) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- cannot read parquet file  [\#1515](https://github.com/apache/arrow-rs/issues/1515) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- The `substring` kernel panics when chars \> U+0x007F [\#1478](https://github.com/apache/arrow-rs/issues/1478) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Hang due to infinite loop when reading some parquet files with RLE encoding and bit packing [\#1458](https://github.com/apache/arrow-rs/issues/1458) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-**Documentation updates:**
-
-- Improve JSON reader documentation [\#1559](https://github.com/apache/arrow-rs/pull/1559) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Improve doc string for `substring` kernel [\#1529](https://github.com/apache/arrow-rs/pull/1529) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Expose documentation of `GenericListBuilder` [\#1525](https://github.com/apache/arrow-rs/pull/1525) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([comath](https://github.com/comath))
-- Add a diagram to `take` kernel documentation [\#1524](https://github.com/apache/arrow-rs/pull/1524) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-
-**Closed issues:**
-
-- Interesting benchmark results of `min_max_helper` [\#1400](https://github.com/apache/arrow-rs/issues/1400)
-
-**Merged pull requests:**
-
-- Fix incorrect `into_buffers` for UnionArray [\#1567](https://github.com/apache/arrow-rs/pull/1567) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Read/write nested dictionary in ipc stream reader/writer [\#1566](https://github.com/apache/arrow-rs/pull/1566) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Support FixedSizeBinary and FixedSizeList for the C data interface [\#1564](https://github.com/apache/arrow-rs/pull/1564) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sunchao](https://github.com/sunchao))
-- Split out ListArrayReader into separate module \(\#1483\) [\#1563](https://github.com/apache/arrow-rs/pull/1563) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Split out `MapArray` into separate module \(\#1483\) [\#1562](https://github.com/apache/arrow-rs/pull/1562) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Support empty projection in `ParquetRecordBatchReader` [\#1560](https://github.com/apache/arrow-rs/pull/1560) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- fix infinite loop in not fully packed bit-packed runs [\#1555](https://github.com/apache/arrow-rs/pull/1555) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Add test for creating FixedSizeBinaryArray::try\_from\_sparse\_iter failed when given all Nones [\#1551](https://github.com/apache/arrow-rs/pull/1551) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Fix reading dictionaries from nested structs in ipc `StreamReader` [\#1550](https://github.com/apache/arrow-rs/pull/1550) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dispanser](https://github.com/dispanser))
-- Add support for Date32/64 \<--\> String/LargeString in `cast` kernel [\#1534](https://github.com/apache/arrow-rs/pull/1534) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
-- fix clippy errors in 1.60 [\#1527](https://github.com/apache/arrow-rs/pull/1527) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Mark `remove-old-releases.sh` executable [\#1522](https://github.com/apache/arrow-rs/pull/1522) ([alamb](https://github.com/alamb))
-- Delete duplicate code in the `sort` kernel [\#1519](https://github.com/apache/arrow-rs/pull/1519) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Fix reading nested lists from parquet files  [\#1517](https://github.com/apache/arrow-rs/pull/1517) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
-- Speed up the `substring` kernel by about 2x [\#1512](https://github.com/apache/arrow-rs/pull/1512) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Add `new_from_strings` to create `MapArrays` [\#1507](https://github.com/apache/arrow-rs/pull/1507) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Decouple buffer deallocation from ffi and allow creating buffers from rust vec [\#1494](https://github.com/apache/arrow-rs/pull/1494) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-
-## [11.1.0](https://github.com/apache/arrow-rs/tree/11.1.0) (2022-03-31)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/11.0.0...11.1.0)
-
-**Implemented enhancements:**
-
-- Implement `size_hint` and `ExactSizedIterator` for DecimalArray [\#1505](https://github.com/apache/arrow-rs/issues/1505) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support calculate length by chars for `StringArray` [\#1493](https://github.com/apache/arrow-rs/issues/1493) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add `length` kernel support for `ListArray` [\#1470](https://github.com/apache/arrow-rs/issues/1470) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- The length kernel should work with `BinaryArray`s [\#1464](https://github.com/apache/arrow-rs/issues/1464) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- FFI for Arrow C Stream Interface [\#1348](https://github.com/apache/arrow-rs/issues/1348) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Improve performance of `DictionaryArray::try_new()` [\#1313](https://github.com/apache/arrow-rs/issues/1313) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Fixed bugs:**
-
-- MIRI error in math\_checked\_divide\_op/try\_from\_trusted\_len\_iter [\#1496](https://github.com/apache/arrow-rs/issues/1496) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Parquet Writer Incorrect Definition Levels for Nested NullArray [\#1480](https://github.com/apache/arrow-rs/issues/1480) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- FFI: ArrowArray::try\_from\_raw shouldn't clone [\#1425](https://github.com/apache/arrow-rs/issues/1425) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Parquet reader fails to read null list. [\#1399](https://github.com/apache/arrow-rs/issues/1399) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-**Documentation updates:**
-
-- A small mistake in the doc of `BinaryArray` and `LargeBinaryArray` [\#1455](https://github.com/apache/arrow-rs/issues/1455) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- A small mistake in the doc of `GenericBinaryArray::take_iter_unchecked` [\#1454](https://github.com/apache/arrow-rs/issues/1454) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add links in the doc of `BinaryOffsetSizeTrait` [\#1453](https://github.com/apache/arrow-rs/issues/1453) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- The doc of `FixedSizeBinaryArray` is confusing. [\#1452](https://github.com/apache/arrow-rs/issues/1452) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Clarify docs that SlicesIterator ignores null values [\#1504](https://github.com/apache/arrow-rs/pull/1504) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Update the doc of `BinaryArray` and `LargeBinaryArray` [\#1471](https://github.com/apache/arrow-rs/pull/1471) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-
-**Closed issues:**
-
-- `packed_simd` v.s. `portable_simd`, which should be used? [\#1492](https://github.com/apache/arrow-rs/issues/1492)
-- Cleanup: Use Arrow take kernel Within parquet ListArrayReader [\#1482](https://github.com/apache/arrow-rs/issues/1482) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-**Merged pull requests:**
-
-- Implement `size_hint` and `ExactSizedIterator` for `DecimalArray` [\#1506](https://github.com/apache/arrow-rs/pull/1506) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Add `StringArray::num_chars` for calculating number of characters [\#1503](https://github.com/apache/arrow-rs/pull/1503) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Workaround nightly miri error in `try_from_trusted_len_iter` [\#1497](https://github.com/apache/arrow-rs/pull/1497) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- update doc of array\_binary and array\_string [\#1491](https://github.com/apache/arrow-rs/pull/1491) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Use Arrow take kernel within ListArrayReader [\#1490](https://github.com/apache/arrow-rs/pull/1490) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
-- Add `length` kernel support for List Array [\#1488](https://github.com/apache/arrow-rs/pull/1488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Support sort for `Decimal` data type [\#1487](https://github.com/apache/arrow-rs/pull/1487) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
-- Fix reading/writing nested null arrays \(\#1480\) \(\#1036\) \(\#1399\) [\#1481](https://github.com/apache/arrow-rs/pull/1481) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
--  Implement ArrayEqual for UnionArray [\#1469](https://github.com/apache/arrow-rs/pull/1469) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Support the `length` kernel on Binary Array [\#1465](https://github.com/apache/arrow-rs/pull/1465) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Remove Clone and copy source structs internally [\#1449](https://github.com/apache/arrow-rs/pull/1449) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Fix Parquet reader for null lists [\#1448](https://github.com/apache/arrow-rs/pull/1448) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([viirya](https://github.com/viirya))
-- Improve performance of DictionaryArray::try\_new\(\)  [\#1435](https://github.com/apache/arrow-rs/pull/1435) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
-- Add FFI for Arrow C Stream Interface [\#1384](https://github.com/apache/arrow-rs/pull/1384) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-
-## [11.0.0](https://github.com/apache/arrow-rs/tree/11.0.0) (2022-03-17)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/10.0.0...11.0.0)
-
-**Breaking changes:**
-
-- Replace `filter_row_groups` with `ReadOptions` in parquet SerializedFileReader  [\#1389](https://github.com/apache/arrow-rs/pull/1389) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([yjshen](https://github.com/yjshen))
-- Implement projection for arrow `IPC Reader` file / streams [\#1339](https://github.com/apache/arrow-rs/pull/1339) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([Dandandan](https://github.com/Dandandan))
-
-**Implemented enhancements:**
-
-- Fix generate\_interval\_case integration test failure [\#1445](https://github.com/apache/arrow-rs/issues/1445)
-- Make the doc examples of `ListArray` and `LargeListArray` more readable [\#1433](https://github.com/apache/arrow-rs/issues/1433)
-- Redundant `if` and `abs` in `shift()` [\#1427](https://github.com/apache/arrow-rs/issues/1427)
-- Improve substring kernel performance [\#1422](https://github.com/apache/arrow-rs/issues/1422) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add missing value\_unchecked\(\) of `FixedSizeBinaryArray` [\#1419](https://github.com/apache/arrow-rs/issues/1419)
-- Remove duplicate bound check in function `shift` [\#1408](https://github.com/apache/arrow-rs/issues/1408)
-- Support dictionary array in C data interface [\#1397](https://github.com/apache/arrow-rs/issues/1397)
-- filter kernel should work with `UnionArray`s [\#1394](https://github.com/apache/arrow-rs/issues/1394) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- filter kernel should work with `FixedSizeListArrays`s [\#1393](https://github.com/apache/arrow-rs/issues/1393) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add doc examples for creating FixedSizeListArray [\#1392](https://github.com/apache/arrow-rs/issues/1392) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Update `rust-version` to 1.59 [\#1377](https://github.com/apache/arrow-rs/issues/1377)
-- Arrow IPC projection support [\#1338](https://github.com/apache/arrow-rs/issues/1338)
-- Implement basic FlightSQL Server [\#1386](https://github.com/apache/arrow-rs/pull/1386) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([wangfenjin](https://github.com/wangfenjin))
-
-**Fixed bugs:**
-
-- DictionaryArray::try\_new ignores validity bitmap of the keys [\#1429](https://github.com/apache/arrow-rs/issues/1429) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- The doc of `GenericListArray` is confusing [\#1424](https://github.com/apache/arrow-rs/issues/1424)
-- DeltaBitPackDecoder Incorrectly Handles Non-Zero MiniBlock Bit Width Padding [\#1417](https://github.com/apache/arrow-rs/issues/1417) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- DeltaBitPackEncoder Pads Miniblock BitWidths With Arbitrary Values [\#1416](https://github.com/apache/arrow-rs/issues/1416) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Possible unaligned write with MutableBuffer::push [\#1410](https://github.com/apache/arrow-rs/issues/1410) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Integration Test is failing on master branch [\#1398](https://github.com/apache/arrow-rs/issues/1398) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Documentation updates:**
-
-- Rewrite doc of `GenericListArray` [\#1450](https://github.com/apache/arrow-rs/pull/1450) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Fix integration doc about build.ninja location [\#1438](https://github.com/apache/arrow-rs/pull/1438) ([viirya](https://github.com/viirya))
-
-**Merged pull requests:**
-
-- Rewrite doc example of `ListArray` and `LargeListArray` [\#1447](https://github.com/apache/arrow-rs/pull/1447) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Fix generate\_interval\_case in integration test [\#1446](https://github.com/apache/arrow-rs/pull/1446) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Fix generate\_decimal128\_case in integration test [\#1440](https://github.com/apache/arrow-rs/pull/1440) ([viirya](https://github.com/viirya))
-- `filter` kernel should work with FixedSizeListArrays [\#1434](https://github.com/apache/arrow-rs/pull/1434) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Support nullable keys in DictionaryArray::try\_new [\#1430](https://github.com/apache/arrow-rs/pull/1430) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- remove redundant if/clamp\_min/abs [\#1428](https://github.com/apache/arrow-rs/pull/1428) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
-- Add doc example for creating `FixedSizeListArray` [\#1426](https://github.com/apache/arrow-rs/pull/1426) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Directly write to MutableBuffer in substring [\#1423](https://github.com/apache/arrow-rs/pull/1423) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Fix possibly unaligned writes in MutableBuffer [\#1421](https://github.com/apache/arrow-rs/pull/1421) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Add value\_unchecked\(\) and unit test [\#1420](https://github.com/apache/arrow-rs/pull/1420) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
-- Fix DeltaBitPack MiniBlock Bit Width Padding [\#1418](https://github.com/apache/arrow-rs/pull/1418) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Update zstd requirement from 0.10 to 0.11 [\#1415](https://github.com/apache/arrow-rs/pull/1415) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Set `default-features = false` for `zstd` in the parquet crate to support `wasm32-unknown-unknown` [\#1414](https://github.com/apache/arrow-rs/pull/1414) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([kylebarron](https://github.com/kylebarron))
-- Add support for `UnionArray` in`filter` kernel [\#1412](https://github.com/apache/arrow-rs/pull/1412) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Remove duplicate bound check in the function `shift` [\#1409](https://github.com/apache/arrow-rs/pull/1409) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Add dictionary support for C data interface [\#1407](https://github.com/apache/arrow-rs/pull/1407) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sunchao](https://github.com/sunchao))
-- Fix a small spelling mistake in docs. [\#1406](https://github.com/apache/arrow-rs/pull/1406) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Add unit test to check `FixedSizeBinaryArray` input all none [\#1405](https://github.com/apache/arrow-rs/pull/1405) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
-- Move csv Parser trait and its implementations to utils module [\#1385](https://github.com/apache/arrow-rs/pull/1385) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([sum12](https://github.com/sum12))
-
-## [10.0.0](https://github.com/apache/arrow-rs/tree/10.0.0) (2022-03-04)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/9.1.0...10.0.0)
-
-**Breaking changes:**
-
-- Remove existing has\_ methods for optional fields in `ColumnChunkMetaData` [\#1346](https://github.com/apache/arrow-rs/pull/1346) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
-- Remove redundant `has_` methods in `ColumnChunkMetaData` [\#1345](https://github.com/apache/arrow-rs/pull/1345) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
-
-**Implemented enhancements:**
-
-- Add extract month and day  in temporal.rs [\#1387](https://github.com/apache/arrow-rs/issues/1387)
-- Add clone to `IpcWriteOptions` [\#1381](https://github.com/apache/arrow-rs/issues/1381) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support `MapArray` in `filter` kernel  [\#1378](https://github.com/apache/arrow-rs/issues/1378) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add `week` temporal kernel [\#1375](https://github.com/apache/arrow-rs/issues/1375) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Improve performance of `compare_dict_op` [\#1371](https://github.com/apache/arrow-rs/issues/1371) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add support for LargeUtf8 in json writer [\#1357](https://github.com/apache/arrow-rs/issues/1357) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Make `arrow::array::builder::MapBuilder` public [\#1354](https://github.com/apache/arrow-rs/issues/1354) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Refactor `StructArray::from` [\#1351](https://github.com/apache/arrow-rs/issues/1351) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Refactor `RecordBatch::validate_new_batch` [\#1350](https://github.com/apache/arrow-rs/issues/1350) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Remove redundant has\_ methods for optional column metadata fields [\#1344](https://github.com/apache/arrow-rs/issues/1344) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Add `write` method to JsonWriter [\#1340](https://github.com/apache/arrow-rs/issues/1340) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Refactor the code of `Bitmap::new` [\#1337](https://github.com/apache/arrow-rs/issues/1337) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
--  Use DictionaryArray's iterator in `compare_dict_op` [\#1329](https://github.com/apache/arrow-rs/issues/1329) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add  `as_decimal_array(arr: &dyn Array) -> &DecimalArray` [\#1312](https://github.com/apache/arrow-rs/issues/1312) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- More ergonomic / idiomatic primitive array creation from iterators [\#1298](https://github.com/apache/arrow-rs/issues/1298) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Implement DictionaryArray support in `eq_dyn`, `neq_dyn`, `lt_dyn`, `lt_eq_dyn`, `gt_dyn`, `gt_eq_dyn` [\#1201](https://github.com/apache/arrow-rs/issues/1201) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Fixed bugs:**
-
-- `cargo clippy` fails on the `master` branch [\#1362](https://github.com/apache/arrow-rs/issues/1362) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `ArrowArray::try_from_raw` should not assume the pointers are from Arc [\#1333](https://github.com/apache/arrow-rs/issues/1333) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Fix CSV Writer::new to accept delimiter and make WriterBuilder::build use it   [\#1328](https://github.com/apache/arrow-rs/issues/1328) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Make bounds configurable via builder when reading CSV [\#1327](https://github.com/apache/arrow-rs/issues/1327) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add `with_datetime_format()` to CSV WriterBuilder  [\#1272](https://github.com/apache/arrow-rs/issues/1272) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Performance improvements:**
-
-- Improve performance of `min` and `max` aggregation kernels without nulls [\#1373](https://github.com/apache/arrow-rs/issues/1373) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Closed issues:**
-
-- Consider removing redundant has\_XXX metadata functions in `ColumnChunkMetadata` [\#1332](https://github.com/apache/arrow-rs/issues/1332)
-
-**Merged pull requests:**
-
-- Support extract `day` and `month` in temporal.rs [\#1388](https://github.com/apache/arrow-rs/pull/1388) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Ted-Jiang](https://github.com/Ted-Jiang))
-- Add write method to Json Writer [\#1383](https://github.com/apache/arrow-rs/pull/1383) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([matthewmturner](https://github.com/matthewmturner))
-- Derive `Clone` for  `IpcWriteOptions` [\#1382](https://github.com/apache/arrow-rs/pull/1382) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([matthewmturner](https://github.com/matthewmturner))
-- feat: support maps in MutableArrayData [\#1379](https://github.com/apache/arrow-rs/pull/1379) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([helgikrs](https://github.com/helgikrs))
-- Support extract `week` in temporal.rs [\#1376](https://github.com/apache/arrow-rs/pull/1376) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Ted-Jiang](https://github.com/Ted-Jiang))
-- Speed up the function `min_max_string` [\#1374](https://github.com/apache/arrow-rs/pull/1374) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Improve performance if dictionary kernels, add benchmark and add `take_iter_unchecked` [\#1372](https://github.com/apache/arrow-rs/pull/1372) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Update pyo3 requirement from 0.15 to 0.16 [\#1369](https://github.com/apache/arrow-rs/pull/1369) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Update contributing guide [\#1368](https://github.com/apache/arrow-rs/pull/1368) ([HaoYang670](https://github.com/HaoYang670))
-- Allow primitive array creation from iterators of PrimitiveTypes \(as well as `Option`\) [\#1367](https://github.com/apache/arrow-rs/pull/1367) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Update flatbuffers requirement from =2.1.0 to =2.1.1 [\#1364](https://github.com/apache/arrow-rs/pull/1364) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Fix clippy lints [\#1363](https://github.com/apache/arrow-rs/pull/1363) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Refactor `RecordBatch::validate_new_batch` [\#1361](https://github.com/apache/arrow-rs/pull/1361) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Refactor `StructArray::from` [\#1360](https://github.com/apache/arrow-rs/pull/1360) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Update flatbuffers requirement from =2.0.0 to =2.1.0 [\#1359](https://github.com/apache/arrow-rs/pull/1359) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- fix: add LargeUtf8 support in json writer [\#1358](https://github.com/apache/arrow-rs/pull/1358) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tiphaineruy](https://github.com/tiphaineruy))
-- Add `as_decimal_array` function [\#1356](https://github.com/apache/arrow-rs/pull/1356) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liukun4515](https://github.com/liukun4515))
-- Publicly export arrow::array::MapBuilder [\#1355](https://github.com/apache/arrow-rs/pull/1355) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tjwilson90](https://github.com/tjwilson90))
-- Add with\_datetime\_format to csv WriterBuilder [\#1347](https://github.com/apache/arrow-rs/pull/1347) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
-- Refactor `Bitmap::new` [\#1343](https://github.com/apache/arrow-rs/pull/1343) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Remove delimiter from csv Writer [\#1342](https://github.com/apache/arrow-rs/pull/1342) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
-- Make bounds configurable in csv ReaderBuilder [\#1341](https://github.com/apache/arrow-rs/pull/1341) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
-- `ArrowArray::try_from_raw` should not assume the pointers are from Arc [\#1334](https://github.com/apache/arrow-rs/pull/1334) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Use DictionaryArray's iterator in `compare_dict_op` [\#1330](https://github.com/apache/arrow-rs/pull/1330) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Implement DictionaryArray support in neq\_dyn, lt\_dyn, lt\_eq\_dyn, gt\_dyn, gt\_eq\_dyn [\#1326](https://github.com/apache/arrow-rs/pull/1326) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Arrow Rust + Conbench Integration [\#1289](https://github.com/apache/arrow-rs/pull/1289) ([dianaclarke](https://github.com/dianaclarke))
-
-## [9.1.0](https://github.com/apache/arrow-rs/tree/9.1.0) (2022-02-19)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/9.0.2...9.1.0)
-
-**Implemented enhancements:**
-
-- Exposing page encoding stats [\#1321](https://github.com/apache/arrow-rs/issues/1321)
-- Improve filter performance by special casing high and low selectivity predicates [\#1288](https://github.com/apache/arrow-rs/issues/1288) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Speed up  `DeltaBitPackDecoder` [\#1281](https://github.com/apache/arrow-rs/issues/1281) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Fix all clippy lints in arrow crate [\#1255](https://github.com/apache/arrow-rs/issues/1255) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Expose page encoding `ColumnChunkMetadata` [\#1322](https://github.com/apache/arrow-rs/pull/1322) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
-- Expose column index and offset index in `ColumnChunkMetadata` [\#1318](https://github.com/apache/arrow-rs/pull/1318) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
-- Expose bloom filter offset in `ColumnChunkMetadata` [\#1309](https://github.com/apache/arrow-rs/pull/1309) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
-- Add `DictionaryArray::try_new()` to create dictionaries from pre existing arrays [\#1300](https://github.com/apache/arrow-rs/pull/1300) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Add `DictionaryArray::keys_iter`, and `take_iter` for other array types [\#1296](https://github.com/apache/arrow-rs/pull/1296) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Make `rle` decoder public under `experimental` feature [\#1271](https://github.com/apache/arrow-rs/pull/1271) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
-- Add `DictionaryArray` support in `eq_dyn` kernel [\#1263](https://github.com/apache/arrow-rs/pull/1263) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-
-**Fixed bugs:**
-
-- `len` is not a parameter of `MutableArrayData::extend` [\#1316](https://github.com/apache/arrow-rs/issues/1316)
-- module `data_type` is private in Rust Parquet 8.0.0 [\#1302](https://github.com/apache/arrow-rs/issues/1302) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Test failure: bit\_chunk\_iterator [\#1294](https://github.com/apache/arrow-rs/issues/1294)
-- csv\_writer benchmark fails with "no such file or directory" [\#1292](https://github.com/apache/arrow-rs/issues/1292)
-
-**Documentation updates:**
-
-- Fix warnings in `cargo doc` [\#1268](https://github.com/apache/arrow-rs/pull/1268) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-
-**Performance improvements:**
-
-- Vectorize DeltaBitPackDecoder, up to 5x faster decoding [\#1284](https://github.com/apache/arrow-rs/pull/1284) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Skip zero-ing primitive nulls [\#1280](https://github.com/apache/arrow-rs/pull/1280) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Add specialized filter kernels in `compute` module \(up to 10x faster\) [\#1248](https://github.com/apache/arrow-rs/pull/1248) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-
-**Closed issues:**
-
-- Expose column and offset index metadata offset [\#1317](https://github.com/apache/arrow-rs/issues/1317)
-- Expose bloom filter metadata offset [\#1308](https://github.com/apache/arrow-rs/issues/1308)
-- Improve ergonomics to construct `DictionaryArrays` from `Key` and `Value` arrays [\#1299](https://github.com/apache/arrow-rs/issues/1299)
-- Make it easier to iterate over `DictionaryArray` [\#1295](https://github.com/apache/arrow-rs/issues/1295) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- (WON'T FIX) Don't Interwine Bit and Byte Aligned Operations in `BitReader` [\#1282](https://github.com/apache/arrow-rs/issues/1282)
-- how to create arrow::array from streamReader [\#1278](https://github.com/apache/arrow-rs/issues/1278)
-- Remove scientific notation when converting floats to strings. [\#983](https://github.com/apache/arrow-rs/issues/983)
-
-**Merged pull requests:**
-
-- Update the document of function `MutableArrayData::extend` [\#1336](https://github.com/apache/arrow-rs/pull/1336) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Fix clippy lint `dead_code` [\#1324](https://github.com/apache/arrow-rs/pull/1324) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
-- fix test bug and ensure that bloom filter metadata is serialized in `to_thrift` [\#1320](https://github.com/apache/arrow-rs/pull/1320) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([shanisolomon](https://github.com/shanisolomon))
-- Enable more clippy lints in arrow  [\#1315](https://github.com/apache/arrow-rs/pull/1315) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
-- Fix clippy lint `clippy::type_complexity` [\#1310](https://github.com/apache/arrow-rs/pull/1310) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
-- Fix clippy lint `clippy::float_equality_without_abs` [\#1305](https://github.com/apache/arrow-rs/pull/1305) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
-- Fix clippy `clippy::vec_init_then_push` lint [\#1303](https://github.com/apache/arrow-rs/pull/1303) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gsserge](https://github.com/gsserge))
-- Fix failing csv\_writer bench [\#1293](https://github.com/apache/arrow-rs/pull/1293) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([andygrove](https://github.com/andygrove))
-- Changes for 9.0.2  [\#1291](https://github.com/apache/arrow-rs/pull/1291) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
-- Fix bitmask creation also for simd comparisons with scalar [\#1290](https://github.com/apache/arrow-rs/pull/1290) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Fix simd comparison kernels [\#1286](https://github.com/apache/arrow-rs/pull/1286) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Restrict Decoder to compatible types \(\#1276\) [\#1277](https://github.com/apache/arrow-rs/pull/1277) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Fix some clippy lints in parquet crate, rename `LevelEncoder` variants to conform to Rust standards [\#1273](https://github.com/apache/arrow-rs/pull/1273) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([HaoYang670](https://github.com/HaoYang670))
-- Use new DecimalArray creation API in arrow crate [\#1249](https://github.com/apache/arrow-rs/pull/1249) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Improve `DecimalArray` API ergonomics: add `iter()`, `FromIterator`, `with_precision_and_scale` [\#1223](https://github.com/apache/arrow-rs/pull/1223) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-
-
-## [9.0.2](https://github.com/apache/arrow-rs/tree/9.0.2) (2022-02-09)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/8.0.0...9.0.2)
-
-**Breaking changes:**
-
-- Add  `Send` + `Sync` to `DataType`, `RowGroupReader`, `FileReader`, `ChunkReader`. [\#1264](https://github.com/apache/arrow-rs/issues/1264)
-- Rename the function `Bitmap::len` to `Bitmap::bit_len` to clarify its meaning [\#1242](https://github.com/apache/arrow-rs/pull/1242) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Remove unused / broken `memory-check` feature [\#1222](https://github.com/apache/arrow-rs/pull/1222) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Potentially buffer  multiple `RecordBatches` before writing a parquet row group in `ArrowWriter` [\#1214](https://github.com/apache/arrow-rs/pull/1214) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-
-**Implemented enhancements:**
-
-- Add `async` arrow parquet reader [\#1154](https://github.com/apache/arrow-rs/pull/1154) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Rename `Bitmap::len` to `Bitmap::bit_len` [\#1233](https://github.com/apache/arrow-rs/issues/1233)
-- Extend CSV schema inference to allow scientific notation for floating point types [\#1215](https://github.com/apache/arrow-rs/issues/1215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Write Multiple RecordBatch to Parquet Row Group [\#1211](https://github.com/apache/arrow-rs/issues/1211)
-- Add doc examples for `eq_dyn` etc. [\#1202](https://github.com/apache/arrow-rs/issues/1202) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add comparison kernels for `BinaryArray` [\#1108](https://github.com/apache/arrow-rs/issues/1108)
-- `impl ArrowNativeType for i128`  [\#1098](https://github.com/apache/arrow-rs/issues/1098)
-- Remove `Copy` trait bound from dyn scalar kernels [\#1243](https://github.com/apache/arrow-rs/pull/1243) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([matthewmturner](https://github.com/matthewmturner))
-- Add `into_inner` for IPC `FileWriter` [\#1236](https://github.com/apache/arrow-rs/pull/1236) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
-- \[Minor\]Re-export `array::builder::make_builder` to make it available for downstream [\#1235](https://github.com/apache/arrow-rs/pull/1235) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
-
-**Fixed bugs:**
-
-- Parquet v8.0.0 panics when reading all null column to NullArray [\#1245](https://github.com/apache/arrow-rs/issues/1245) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Get `Unknown configuration option rust-version` when running the rust format command [\#1240](https://github.com/apache/arrow-rs/issues/1240)
-- `Bitmap` Length Validation is Incorrect [\#1231](https://github.com/apache/arrow-rs/issues/1231) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Writing sliced `ListArray` or `MapArray` ignore offsets [\#1226](https://github.com/apache/arrow-rs/issues/1226) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Remove broken `memory-tracking` crate feature [\#1171](https://github.com/apache/arrow-rs/issues/1171)
-- Revert making `parquet::data_type` and `parquet::arrow::schema` experimental [\#1244](https://github.com/apache/arrow-rs/pull/1244) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-
-**Documentation updates:**
-
-- Update parquet crate documentation and examples [\#1253](https://github.com/apache/arrow-rs/pull/1253) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Refresh parquet readme / contributing guide [\#1252](https://github.com/apache/arrow-rs/pull/1252) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Add docs examples for dynamically compare functions  [\#1250](https://github.com/apache/arrow-rs/pull/1250) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Add Rust Docs examples for UnionArray [\#1241](https://github.com/apache/arrow-rs/pull/1241) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Improve documentation for Bitmap [\#1237](https://github.com/apache/arrow-rs/pull/1237) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-
-**Performance improvements:**
-
-- Improve performance for arithmetic kernels with `simd` feature enabled \(except for division/modulo\) [\#1221](https://github.com/apache/arrow-rs/pull/1221) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Do not concatenate identical dictionaries [\#1219](https://github.com/apache/arrow-rs/pull/1219) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Preserve dictionary encoding when decoding parquet into Arrow arrays, 60x perf improvement \(\#171\) [\#1180](https://github.com/apache/arrow-rs/pull/1180) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-
-**Closed issues:**
-
-- `UnalignedBitChunkIterator` to that iterates through already aligned `u64` blocks [\#1227](https://github.com/apache/arrow-rs/issues/1227)
-- Remove unused `ArrowArrayReader` in parquet  [\#1197](https://github.com/apache/arrow-rs/issues/1197) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-**Merged pull requests:**
-
-- Upgrade clap to 3.0.0 [\#1261](https://github.com/apache/arrow-rs/pull/1261) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jimexist](https://github.com/Jimexist))
-- Update chrono-tz requirement from 0.4 to 0.6 [\#1259](https://github.com/apache/arrow-rs/pull/1259) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Update zstd requirement from 0.9 to 0.10 [\#1257](https://github.com/apache/arrow-rs/pull/1257) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Fix NullArrayReader \(\#1245\) [\#1246](https://github.com/apache/arrow-rs/pull/1246) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- dyn compare for binary array [\#1238](https://github.com/apache/arrow-rs/pull/1238) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Remove arrow array reader \(\#1197\) [\#1234](https://github.com/apache/arrow-rs/pull/1234) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Fix null bitmap length validation \(\#1231\) [\#1232](https://github.com/apache/arrow-rs/pull/1232) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Faster bitmask iteration [\#1228](https://github.com/apache/arrow-rs/pull/1228) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Add non utf8 values into the test cases of BinaryArray comparison [\#1220](https://github.com/apache/arrow-rs/pull/1220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Update DECIMAL\_RE to allow scientific notation in auto inferred schemas [\#1216](https://github.com/apache/arrow-rs/pull/1216) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([pjmore](https://github.com/pjmore))
-- Fix simd comparison kernels [\#1286](https://github.com/apache/arrow-rs/pull/1286) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Fix bitmask creation also for simd comparisons with scalar [\#1290](https://github.com/apache/arrow-rs/pull/1290) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-
-## [8.0.0](https://github.com/apache/arrow-rs/tree/8.0.0) (2022-01-20)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/7.0.0...8.0.0)
-
-**Breaking changes:**
-
-- Return error from JSON writer rather than panic [\#1205](https://github.com/apache/arrow-rs/pull/1205) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Ted-Jiang](https://github.com/Ted-Jiang))
-- Remove `ArrowSignedNumericType ` to Simplify and reduce code duplication in arithmetic kernels [\#1161](https://github.com/apache/arrow-rs/pull/1161) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Restrict RecordReader and friends to scalar types \(\#1132\) [\#1155](https://github.com/apache/arrow-rs/pull/1155) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Move more parquet functionality behind experimental feature flag \(\#1032\)  [\#1134](https://github.com/apache/arrow-rs/pull/1134) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-
-**Implemented enhancements:**
-
-- Parquet reader should be able to read structs within list [\#1186](https://github.com/apache/arrow-rs/issues/1186) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Disable serde\_json `arbitrary_precision` feature flag [\#1174](https://github.com/apache/arrow-rs/issues/1174) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Simplify and reduce code duplication in arithmetic.rs [\#1160](https://github.com/apache/arrow-rs/issues/1160) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Return `Err` from JSON writer rather than `panic!` for unsupported types [\#1157](https://github.com/apache/arrow-rs/issues/1157) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support `scalar` mathematics kernels for `Array` and scalar value [\#1153](https://github.com/apache/arrow-rs/issues/1153) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support `DecimalArray` in sort kernel [\#1137](https://github.com/apache/arrow-rs/issues/1137)
-- Parquet Fuzz Tests [\#1053](https://github.com/apache/arrow-rs/issues/1053)
-- BooleanBufferBuilder Append Packed [\#1038](https://github.com/apache/arrow-rs/issues/1038) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- parquet Performance Optimization: StructArrayReader Redundant Level & Bitmap Computation [\#1034](https://github.com/apache/arrow-rs/issues/1034) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Reduce Public Parquet API [\#1032](https://github.com/apache/arrow-rs/issues/1032) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Add `from_iter_values` for binary array [\#1188](https://github.com/apache/arrow-rs/pull/1188) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jimexist](https://github.com/Jimexist))
-- Add support for `MapArray` in json writer [\#1149](https://github.com/apache/arrow-rs/pull/1149) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([helgikrs](https://github.com/helgikrs))
-
-**Fixed bugs:**
-
-- Empty string arrays with no nulls are not equal [\#1208](https://github.com/apache/arrow-rs/issues/1208) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Pretty print a `RecordBatch` containing `Float16` triggers a panic [\#1193](https://github.com/apache/arrow-rs/issues/1193) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Writing structs nested in lists produces an incorrect output [\#1184](https://github.com/apache/arrow-rs/issues/1184) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Undefined behavior for `GenericStringArray::from_iter_values` if reported iterator upper bound is incorrect [\#1144](https://github.com/apache/arrow-rs/issues/1144) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Interval comparisons with `simd` feature asserts [\#1136](https://github.com/apache/arrow-rs/issues/1136) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- RecordReader Permits Illegal Types [\#1132](https://github.com/apache/arrow-rs/issues/1132) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-**Security fixes:**
-
-- Fix undefined behavor in GenericStringArray::from\_iter\_values [\#1145](https://github.com/apache/arrow-rs/pull/1145) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
--  parquet: Optimized ByteArrayReader, Add UTF-8 Validation \(\#1040\)  [\#1082](https://github.com/apache/arrow-rs/pull/1082) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-
-**Documentation updates:**
-
-- Update parquet crate readme [\#1192](https://github.com/apache/arrow-rs/pull/1192) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Document safety justification of some uses of `from_trusted_len_iter` [\#1148](https://github.com/apache/arrow-rs/pull/1148) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-
-**Performance improvements:**
-
-- Improve parquet reading performance for columns with nulls by preserving bitmask when possible \(\#1037\) [\#1054](https://github.com/apache/arrow-rs/pull/1054) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Improve parquet performance: Skip levels computation for required struct arrays in parquet [\#1035](https://github.com/apache/arrow-rs/pull/1035) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-
-**Closed issues:**
-
-- Generify ColumnReaderImpl and RecordReader [\#1040](https://github.com/apache/arrow-rs/issues/1040) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Parquet Preserve BitMask [\#1037](https://github.com/apache/arrow-rs/issues/1037)
-
-**Merged pull requests:**
-
-- fix a bug in variable sized equality [\#1209](https://github.com/apache/arrow-rs/pull/1209) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([helgikrs](https://github.com/helgikrs))
-- Pin WASM / packed SIMD tests to nightly-2022-01-17 [\#1204](https://github.com/apache/arrow-rs/pull/1204) ([alamb](https://github.com/alamb))
-- feat: add support for casting Duration/Interval to Int64Array [\#1196](https://github.com/apache/arrow-rs/pull/1196) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([e-dard](https://github.com/e-dard))
-- Add comparison support for fully qualified BinaryArray [\#1195](https://github.com/apache/arrow-rs/pull/1195) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
-- Fix in display of `Float16Array` [\#1194](https://github.com/apache/arrow-rs/pull/1194) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([helgikrs](https://github.com/helgikrs))
-- update nightly version for miri [\#1189](https://github.com/apache/arrow-rs/pull/1189) ([Jimexist](https://github.com/Jimexist))
-- feat\(parquet\): support for reading structs nested within lists [\#1187](https://github.com/apache/arrow-rs/pull/1187) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([helgikrs](https://github.com/helgikrs))
-- fix: Fix a bug in how definition levels are calculated for nested structs in a list [\#1185](https://github.com/apache/arrow-rs/pull/1185) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([helgikrs](https://github.com/helgikrs))
-- Truncate bitmask on BooleanBufferBuilder::resize:  [\#1183](https://github.com/apache/arrow-rs/pull/1183) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Add ticket reference for false positive in clippy [\#1181](https://github.com/apache/arrow-rs/pull/1181) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Fix record formatting in 1.58 [\#1178](https://github.com/apache/arrow-rs/pull/1178) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Serialize i128 as JSON string [\#1175](https://github.com/apache/arrow-rs/pull/1175) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Support DecimalType in `sort` and `take` kernels [\#1172](https://github.com/apache/arrow-rs/pull/1172) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liukun4515](https://github.com/liukun4515))
-- Fix new clippy lints introduced in Rust 1.58 [\#1170](https://github.com/apache/arrow-rs/pull/1170) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Fix compilation error with simd feature [\#1169](https://github.com/apache/arrow-rs/pull/1169) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Fix bug while writing parquet with empty lists of structs [\#1166](https://github.com/apache/arrow-rs/pull/1166) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([helgikrs](https://github.com/helgikrs))
-- Use tempfile for parquet tests [\#1165](https://github.com/apache/arrow-rs/pull/1165) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Remove left over dev/README.md file from arrow/arrow-rs split [\#1162](https://github.com/apache/arrow-rs/pull/1162) ([alamb](https://github.com/alamb))
-- Add multiply\_scalar kernel [\#1159](https://github.com/apache/arrow-rs/pull/1159) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Fuzz test different parquet encodings [\#1156](https://github.com/apache/arrow-rs/pull/1156) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Add subtract\_scalar kernel [\#1152](https://github.com/apache/arrow-rs/pull/1152) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Add add\_scalar kernel [\#1151](https://github.com/apache/arrow-rs/pull/1151) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Move simd right out of for\_each loop [\#1150](https://github.com/apache/arrow-rs/pull/1150) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Internal Remove `GenericStringArray::from_vec` and `GenericStringArray::from_opt_vec` [\#1147](https://github.com/apache/arrow-rs/pull/1147) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Implement SIMD comparison operations for types with less than 4 lanes \(i128\) [\#1146](https://github.com/apache/arrow-rs/pull/1146) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Extends parquet fuzz tests to also tests nulls, dictionaries and row groups with multiple pages  \(\#1053\) [\#1110](https://github.com/apache/arrow-rs/pull/1110) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
--  Generify ColumnReaderImpl and RecordReader \(\#1040\)  [\#1041](https://github.com/apache/arrow-rs/pull/1041) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- BooleanBufferBuilder::append\_packed \(\#1038\) [\#1039](https://github.com/apache/arrow-rs/pull/1039) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-
-## [7.0.0](https://github.com/apache/arrow-rs/tree/7.0.0) (2022-1-07)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/6.5.0...7.0.0)
-
-### Arrow
-
-**Breaking changes:**
-- `pretty_format_batches` now returns `Result<impl Display>` rather than `String`: [#975](https://github.com/apache/arrow-rs/pull/975)
-- `MutableBuffer::typed_data_mut` is marked `unsafe`: [#1029](https://github.com/apache/arrow-rs/pull/1029)
-- UnionArray updated match latest Arrow spec, added `UnionMode`, `UnionArray::new()` marked `unsafe`: [#885](https://github.com/apache/arrow-rs/pull/885)
-
-**New Features:**
-- Support for `Float16Array` types [#888](https://github.com/apache/arrow-rs/pull/888)
-- IPC support for `UnionArray` [#654](https://github.com/apache/arrow-rs/issues/654)
-- Dynamic comparison kernels for scalars (e.g. `eq_dyn_scalar`), including `DictionaryArray`: [#1113](https://github.com/apache/arrow-rs/issues/1113)
-
-**Enhancements:**
-- Added `Schema::with_metadata` and `Field::with_metadata` [#1092](https://github.com/apache/arrow-rs/pull/1092)
-- Support for custom datetime format for inference and parsing csv files [#1112](https://github.com/apache/arrow-rs/pull/1112)
-- Implement `Array` for `ArrayRef` for easier use [#1129](https://github.com/apache/arrow-rs/pull/1129)
-- Pretty printing display support for `FixedSizeBinaryArray` [#1097](https://github.com/apache/arrow-rs/pull/1097)
-- Dependency Upgrades: `pyo3`, `parquet-format`, `prost`, `tonic`
-- Avoid allocating vector of indices in `lexicographical_partition_ranges`[#998](https://github.com/apache/arrow-rs/pull/998)
-
-### Parquet
-
-**Fixed bugs:**
-- (parquet) Fix reading of dictionary encoded pages with null values: [#1130](https://github.com/apache/arrow-rs/pull/1130)
-
-
-# Changelog
-
-## [6.5.0](https://github.com/apache/arrow-rs/tree/6.5.0) (2021-12-23)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/6.4.0...6.5.0)
-
-* [092fc64bbb019244887ebd0d9c9a2d3e3a9aebc0](https://github.com/apache/arrow-rs/commit/092fc64bbb019244887ebd0d9c9a2d3e3a9aebc0) support cast decimal to decimal ([#1084](https://github.com/apache/arrow-rs/pull/1084)) ([#1093](https://github.com/apache/arrow-rs/pull/1093))
-* [01459762ed18b504e00e7b2818fce91f19188b1e](https://github.com/apache/arrow-rs/commit/01459762ed18b504e00e7b2818fce91f19188b1e) Fix like regex escaping ([#1085](https://github.com/apache/arrow-rs/pull/1085)) ([#1090](https://github.com/apache/arrow-rs/pull/1090))
-* [7c748bfccbc2eac0c1138378736b70dcb7e26a5b](https://github.com/apache/arrow-rs/commit/7c748bfccbc2eac0c1138378736b70dcb7e26a5b) support cast decimal to signed numeric ([#1073](https://github.com/apache/arrow-rs/pull/1073)) ([#1089](https://github.com/apache/arrow-rs/pull/1089))
-* [bd3600b6483c253ae57a38928a636d39a6b7cb02](https://github.com/apache/arrow-rs/commit/bd3600b6483c253ae57a38928a636d39a6b7cb02) parquet: Use constant for RLE decoder buffer size ([#1070](https://github.com/apache/arrow-rs/pull/1070)) ([#1088](https://github.com/apache/arrow-rs/pull/1088))
-* [2b5c53ecd92468fd95328637a15de7f35b6fcf28](https://github.com/apache/arrow-rs/commit/2b5c53ecd92468fd95328637a15de7f35b6fcf28) Box RleDecoder index buffer ([#1061](https://github.com/apache/arrow-rs/pull/1061)) ([#1062](https://github.com/apache/arrow-rs/pull/1062)) ([#1081](https://github.com/apache/arrow-rs/pull/1081))
-* [78721bc1a467177679ad6196b994759cf4d73377](https://github.com/apache/arrow-rs/commit/78721bc1a467177679ad6196b994759cf4d73377) BooleanBufferBuilder correct buffer length ([#1051](https://github.com/apache/arrow-rs/pull/1051)) ([#1052](https://github.com/apache/arrow-rs/pull/1052)) ([#1080](https://github.com/apache/arrow-rs/pull/1080))
-* [3a5e3541d3a4db61a828011ed95c8539adf1d57c](https://github.com/apache/arrow-rs/commit/3a5e3541d3a4db61a828011ed95c8539adf1d57c) support cast signed numeric to decimal ([#1044](https://github.com/apache/arrow-rs/pull/1044)) ([#1079](https://github.com/apache/arrow-rs/pull/1079))
-* [000bdb3053098255d43288aa3e8665e8b1892a6c](https://github.com/apache/arrow-rs/commit/000bdb3053098255d43288aa3e8665e8b1892a6c) fix(compute): LIKE escape parenthesis ([#1042](https://github.com/apache/arrow-rs/pull/1042)) ([#1078](https://github.com/apache/arrow-rs/pull/1078))
-* [e0abdb9e62772a2f853974e68e744246e7f47569](https://github.com/apache/arrow-rs/commit/e0abdb9e62772a2f853974e68e744246e7f47569) Add Schema::project and RecordBatch::project functions  ([#1033](https://github.com/apache/arrow-rs/pull/1033)) ([#1077](https://github.com/apache/arrow-rs/pull/1077))
-* [31911a4d6328d889d98796b896412b3997f73e13](https://github.com/apache/arrow-rs/commit/31911a4d6328d889d98796b896412b3997f73e13) Remove outdated safety example from doc ([#1050](https://github.com/apache/arrow-rs/pull/1050)) ([#1058](https://github.com/apache/arrow-rs/pull/1058))
-* [71ac8620993a65a7f1f57278c3495556625356b3](https://github.com/apache/arrow-rs/commit/71ac8620993a65a7f1f57278c3495556625356b3) Use existing array type in `take` kernel ([#1046](https://github.com/apache/arrow-rs/pull/1046)) ([#1057](https://github.com/apache/arrow-rs/pull/1057))
-* [1c5902376b7f7d56cb5249db4f98a6a370ead919](https://github.com/apache/arrow-rs/commit/1c5902376b7f7d56cb5249db4f98a6a370ead919) Extract method to drive PageIterator -> RecordReader ([#1031](https://github.com/apache/arrow-rs/pull/1031)) ([#1056](https://github.com/apache/arrow-rs/pull/1056))
-* [7ca39361f8733b86bc0cef5ed5d74093e2c6b14d](https://github.com/apache/arrow-rs/commit/7ca39361f8733b86bc0cef5ed5d74093e2c6b14d) Clarify governance of arrow crate ([#1030](https://github.com/apache/arrow-rs/pull/1030)) ([#1055](https://github.com/apache/arrow-rs/pull/1055))
-
-
-## [6.4.0](https://github.com/apache/arrow-rs/tree/6.4.0) (2021-12-10)
-
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/6.3.0...6.4.0)
-
-
-* [049f48559f578243935b6e512d06c4c2df360bf1](https://github.com/apache/arrow-rs/commit/049f48559f578243935b6e512d06c4c2df360bf1) Force new cargo and target caching to fix CI ([#1023](https://github.com/apache/arrow-rs/pull/1023)) ([#1024](https://github.com/apache/arrow-rs/pull/1024))
-* [ef37da3b60f71a52d5ad67e9ca810dca38b29f00](https://github.com/apache/arrow-rs/commit/ef37da3b60f71a52d5ad67e9ca810dca38b29f00) Fix a broken link and some missing styling in the main arrow crate docs ([#1013](https://github.com/apache/arrow-rs/pull/1013)) ([#1019](https://github.com/apache/arrow-rs/pull/1019))
-* [f2c746a9b968714cfe05d35fcee8658371acd899](https://github.com/apache/arrow-rs/commit/f2c746a9b968714cfe05d35fcee8658371acd899) Remove out of date comment ([#1008](https://github.com/apache/arrow-rs/pull/1008)) ([#1018](https://github.com/apache/arrow-rs/pull/1018))
-* [557fc11e3b2a09a680c0cfbf38d27b13101b63fe](https://github.com/apache/arrow-rs/commit/557fc11e3b2a09a680c0cfbf38d27b13101b63fe) Remove unneeded `rc` feature of serde ([#990](https://github.com/apache/arrow-rs/pull/990)) ([#1016](https://github.com/apache/arrow-rs/pull/1016))
-* [b28385e096b1cf8f5fb2773d49b160f93d94fbac](https://github.com/apache/arrow-rs/commit/b28385e096b1cf8f5fb2773d49b160f93d94fbac) Docstrings for Timestamp*Array. ([#988](https://github.com/apache/arrow-rs/pull/988)) ([#1015](https://github.com/apache/arrow-rs/pull/1015))
-* [a92672e40217670d2566a85d70b0b59fffac594c](https://github.com/apache/arrow-rs/commit/a92672e40217670d2566a85d70b0b59fffac594c) Add full data validation for ArrayData::try_new() ([#1007](https://github.com/apache/arrow-rs/pull/1007))
-* [6c8b2936d7b07e1e2f5d1d48eea425a385382dfb](https://github.com/apache/arrow-rs/commit/6c8b2936d7b07e1e2f5d1d48eea425a385382dfb) Add boolean comparison to scalar kernels for less then, greater than ([#977](https://github.com/apache/arrow-rs/pull/977)) ([#1005](https://github.com/apache/arrow-rs/pull/1005))
-* [14d140aeca608a23a8a6b2c251c8f53ffd377e61](https://github.com/apache/arrow-rs/commit/14d140aeca608a23a8a6b2c251c8f53ffd377e61) Fix some typos in code and comments ([#985](https://github.com/apache/arrow-rs/pull/985)) ([#1006](https://github.com/apache/arrow-rs/pull/1006))
-* [b4507f562fb0eddfb79840871cd2733dc0e337cd](https://github.com/apache/arrow-rs/commit/b4507f562fb0eddfb79840871cd2733dc0e337cd) Fix warnings introduced by Rust/Clippy 1.57.0 ([#1004](https://github.com/apache/arrow-rs/pull/1004))
-
-
-## [6.3.0](https://github.com/apache/arrow-rs/tree/6.3.0) (2021-11-26)
-
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/6.2.0...6.3.0)
-
-
-**Changes:**
-* [7e51df015ce851a5de444ca08b57b38e7ee959a3](https://github.com/apache/arrow-rs/commit/7e51df015ce851a5de444ca08b57b38e7ee959a3) add more error test case and change the code style ([#952](https://github.com/apache/arrow-rs/pull/952)) ([#976](https://github.com/apache/arrow-rs/pull/976))
-* [6c570cfe98d6a7a4ec74b139b733c5c72ed10015](https://github.com/apache/arrow-rs/commit/6c570cfe98d6a7a4ec74b139b733c5c72ed10015) Support read decimal data from csv reader if user provide the schema with decimal data type ([#941](https://github.com/apache/arrow-rs/pull/941)) ([#974](https://github.com/apache/arrow-rs/pull/974))
-* [4fa0d4d7f7d9ca0a3da2a6dfe3eae6dc2d51a79a](https://github.com/apache/arrow-rs/commit/4fa0d4d7f7d9ca0a3da2a6dfe3eae6dc2d51a79a) Adding Pretty Print Support For Fixed Size List ([#958](https://github.com/apache/arrow-rs/pull/958)) ([#968](https://github.com/apache/arrow-rs/pull/968))
-* [9d453a3128013c03e8ed854ded76b15cc6f28be4](https://github.com/apache/arrow-rs/commit/9d453a3128013c03e8ed854ded76b15cc6f28be4) Fix bug in temporal utilities due to DST being ignored. ([#955](https://github.com/apache/arrow-rs/pull/955)) ([#967](https://github.com/apache/arrow-rs/pull/967))
-* [1b9fd9e3fb2653236513bb7dda5aa2fa14d1d831](https://github.com/apache/arrow-rs/commit/1b9fd9e3fb2653236513bb7dda5aa2fa14d1d831) Inferring 2. as Float64 for issue [#929](https://github.com/apache/arrow-rs/pull/929) ([#950](https://github.com/apache/arrow-rs/pull/950)) ([#966](https://github.com/apache/arrow-rs/pull/966))
-* [e6c5e1c877bd94b3d6e545567f901d9962257cf8](https://github.com/apache/arrow-rs/commit/e6c5e1c877bd94b3d6e545567f901d9962257cf8) Fix CI for latest nightly ([#970](https://github.com/apache/arrow-rs/pull/970)) ([#973](https://github.com/apache/arrow-rs/pull/973))
-* [c96e8de457442806e18944f0b26dd06ba4cb1aee](https://github.com/apache/arrow-rs/commit/c96e8de457442806e18944f0b26dd06ba4cb1aee) Fix primitive sort when input contains more nulls than the given sort limit ([#954](https://github.com/apache/arrow-rs/pull/954)) ([#965](https://github.com/apache/arrow-rs/pull/965))
-* [094037d418381584178db1d886cad3b5024b414a](https://github.com/apache/arrow-rs/commit/094037d418381584178db1d886cad3b5024b414a) Update comfy-table to 5.0 ([#957](https://github.com/apache/arrow-rs/pull/957)) ([#964](https://github.com/apache/arrow-rs/pull/964))
-* [9f635021eee6786c5377c891218c5f88ebce07c3](https://github.com/apache/arrow-rs/commit/9f635021eee6786c5377c891218c5f88ebce07c3) Fix csv writing of timestamps to show timezone. ([#849](https://github.com/apache/arrow-rs/pull/849)) ([#963](https://github.com/apache/arrow-rs/pull/963))
-* [f7deba4c3a050a52608462ee8a827bb8f6364140](https://github.com/apache/arrow-rs/commit/f7deba4c3a050a52608462ee8a827bb8f6364140) Adding ability to parse float from number with leading decimal ([#831](https://github.com/apache/arrow-rs/pull/831)) ([#962](https://github.com/apache/arrow-rs/pull/962))
-* [59f96e842d05b63882f7ba285c66a9739761cf84](https://github.com/apache/arrow-rs/commit/59f96e842d05b63882f7ba285c66a9739761cf84) add ilike comparitor ([#874](https://github.com/apache/arrow-rs/pull/874)) ([#961](https://github.com/apache/arrow-rs/pull/961))
-* [54023c8a5543c9f9fa4955afa01189029f3e96f5](https://github.com/apache/arrow-rs/commit/54023c8a5543c9f9fa4955afa01189029f3e96f5) Remove unpassable cargo publish check from verify-release-candidate.sh ([#882](https://github.com/apache/arrow-rs/pull/882)) ([#949](https://github.com/apache/arrow-rs/pull/949))
-
-
-
-## [6.2.0](https://github.com/apache/arrow-rs/tree/6.2.0) (2021-11-12)
-
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/6.1.0...6.2.0)
-
-**Features / Fixes:**
-
-
-* [4037933e43cad9e4de027039ce14caa65f78300a](https://github.com/apache/arrow-rs/commit/4037933e43cad9e4de027039ce14caa65f78300a) Fix validation for offsets of StructArrays ([#942](https://github.com/apache/arrow-rs/pull/942)) ([#946](https://github.com/apache/arrow-rs/pull/946))
-* [1af9ca5d363d870550026a7b1abcb749befbb371](https://github.com/apache/arrow-rs/commit/1af9ca5d363d870550026a7b1abcb749befbb371) implement take kernel for null arrays ([#939](https://github.com/apache/arrow-rs/pull/939)) ([#944](https://github.com/apache/arrow-rs/pull/944))
-* [320de1c20aefbf204f6888e2ad3663863afeba9f](https://github.com/apache/arrow-rs/commit/320de1c20aefbf204f6888e2ad3663863afeba9f) add checker for appending i128 to decimal builder ([#928](https://github.com/apache/arrow-rs/pull/928)) ([#943](https://github.com/apache/arrow-rs/pull/943))
-* [dff14113884ad4246a8cafb9be579ebdb4e1481f](https://github.com/apache/arrow-rs/commit/dff14113884ad4246a8cafb9be579ebdb4e1481f) Validate arguments to ArrayData::new and null bit buffer and buffers ([#810](https://github.com/apache/arrow-rs/pull/810)) ([#936](https://github.com/apache/arrow-rs/pull/936))
-* [c3eae1ec56303b97c9e15263063a6a13122ef194](https://github.com/apache/arrow-rs/commit/c3eae1ec56303b97c9e15263063a6a13122ef194) fix some warning about unused variables in panic tests ([#894](https://github.com/apache/arrow-rs/pull/894)) ([#933](https://github.com/apache/arrow-rs/pull/933))
-* [e80bb018450f13a30811ffd244c42917d8bf8a62](https://github.com/apache/arrow-rs/commit/e80bb018450f13a30811ffd244c42917d8bf8a62) fix some clippy warnings ([#896](https://github.com/apache/arrow-rs/pull/896)) ([#930](https://github.com/apache/arrow-rs/pull/930))
-* [bde89463b627be3f60b5569d038ca36c434da71d](https://github.com/apache/arrow-rs/commit/bde89463b627be3f60b5569d038ca36c434da71d) feat(ipc): add support for deserializing messages with nested dictionary fields ([#923](https://github.com/apache/arrow-rs/pull/923)) ([#931](https://github.com/apache/arrow-rs/pull/931))
-* [792544b5fb7b84224ef9745ecb9f330663c14fb4](https://github.com/apache/arrow-rs/commit/792544b5fb7b84224ef9745ecb9f330663c14fb4) refactor regexp_is_match_utf8_scalar to try to mitigate miri failures ([#895](https://github.com/apache/arrow-rs/pull/895)) ([#932](https://github.com/apache/arrow-rs/pull/932))
-* [3f0e252811cbb6e3f7c774959787dcfec985d03e](https://github.com/apache/arrow-rs/commit/3f0e252811cbb6e3f7c774959787dcfec985d03e) Automatically retry failed MIRI runs to work around intermittent failures  ([#934](https://github.com/apache/arrow-rs/pull/934))
-* [c9a9515c46d560ced00e23ff57cb10a1c97573cb](https://github.com/apache/arrow-rs/commit/c9a9515c46d560ced00e23ff57cb10a1c97573cb) Update mod.rs ([#909](https://github.com/apache/arrow-rs/pull/909)) ([#919](https://github.com/apache/arrow-rs/pull/919))
-* [64ed79ece67141b92dc45b8a1d43cb9d909aa6a9](https://github.com/apache/arrow-rs/commit/64ed79ece67141b92dc45b8a1d43cb9d909aa6a9) Mark boolean kernels public ([#913](https://github.com/apache/arrow-rs/pull/913)) ([#920](https://github.com/apache/arrow-rs/pull/920))
-* [8b95fe0bbf03588c5cc00f67365c5b0dac4d7a34](https://github.com/apache/arrow-rs/commit/8b95fe0bbf03588c5cc00f67365c5b0dac4d7a34) doc example  mistype ([#904](https://github.com/apache/arrow-rs/pull/904)) ([#918](https://github.com/apache/arrow-rs/pull/918))
-* [34c5eab4862cab16fdfd5f5ed6c68dce6298dfa4](https://github.com/apache/arrow-rs/commit/34c5eab4862cab16fdfd5f5ed6c68dce6298dfa4) allow null array to be cast to all other types ([#884](https://github.com/apache/arrow-rs/pull/884)) ([#917](https://github.com/apache/arrow-rs/pull/917))
-* [3c69752e55ed0c58f5a8faed918a22b45cd93766](https://github.com/apache/arrow-rs/commit/3c69752e55ed0c58f5a8faed918a22b45cd93766) Fix instances of UB that cause tests to not pass under miri ([#878](https://github.com/apache/arrow-rs/pull/878)) ([#916](https://github.com/apache/arrow-rs/pull/916))
-* [85402148c3af03d0855e81f855715ea98a7491c5](https://github.com/apache/arrow-rs/commit/85402148c3af03d0855e81f855715ea98a7491c5) feat(ipc): Support writing dictionaries nested in structs and unions ([#870](https://github.com/apache/arrow-rs/pull/870)) ([#915](https://github.com/apache/arrow-rs/pull/915))
-* [03d95e626cb0e654775fefa77786674ea41be4a2](https://github.com/apache/arrow-rs/commit/03d95e626cb0e654775fefa77786674ea41be4a2) Fix references to changelog ([#905](https://github.com/apache/arrow-rs/pull/905))
-
-
-## [6.1.0](https://github.com/apache/arrow-rs/tree/6.1.0) (2021-10-29)
-
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/6.0.0...6.1.0)
-
-**Features / Fixes:**
-
-* [b42649b0088fe7762c713a41a23c1abdf8d0496d](https://github.com/apache/arrow-rs/commit/b42649b0088fe7762c713a41a23c1abdf8d0496d) implement eq_dyn and neq_dyn ([#858](https://github.com/apache/arrow-rs/pull/858)) ([#867](https://github.com/apache/arrow-rs/pull/867))
-* [01743f3f10a377c1ca857cd554acbf84155766d8](https://github.com/apache/arrow-rs/commit/01743f3f10a377c1ca857cd554acbf84155766d8) fix: fix a bug in offset calculation for unions ([#863](https://github.com/apache/arrow-rs/pull/863)) ([#871](https://github.com/apache/arrow-rs/pull/871))
-* [8bfff793a23f0e71008c7a9eea7a54d6b913ecff](https://github.com/apache/arrow-rs/commit/8bfff793a23f0e71008c7a9eea7a54d6b913ecff) add lt_bool, lt_eq_bool, gt_bool, gt_eq_bool ([#860](https://github.com/apache/arrow-rs/pull/860)) ([#868](https://github.com/apache/arrow-rs/pull/868))
-* [8845e91d4ab584c822e9ee903db7069551b124af](https://github.com/apache/arrow-rs/commit/8845e91d4ab584c822e9ee903db7069551b124af) fix(ipc): Support serializing structs containing dictionaries ([#848](https://github.com/apache/arrow-rs/pull/848)) ([#865](https://github.com/apache/arrow-rs/pull/865))
-* [620282a0d9fdd2a8ed7e8313d17ba3dec64c80e5](https://github.com/apache/arrow-rs/commit/620282a0d9fdd2a8ed7e8313d17ba3dec64c80e5) Implement boolean equality kernels ([#844](https://github.com/apache/arrow-rs/pull/844)) ([#857](https://github.com/apache/arrow-rs/pull/857))
-* [94cddcacf785be982e69689291ce034ef00220b4](https://github.com/apache/arrow-rs/commit/94cddcacf785be982e69689291ce034ef00220b4) Cherry pick fix parquet_derive with default features (and fix cargo publish) ([#856](https://github.com/apache/arrow-rs/pull/856))
-* [733fd583ddb3dbe6b4d58a809c444ee16ac0eae8](https://github.com/apache/arrow-rs/commit/733fd583ddb3dbe6b4d58a809c444ee16ac0eae8) Use kernel utility for parsing timestamps in csv reader. ([#832](https://github.com/apache/arrow-rs/pull/832)) ([#853](https://github.com/apache/arrow-rs/pull/853))
-* [2cc64937a153f632796915d2d9869d5c2a501d28](https://github.com/apache/arrow-rs/commit/2cc64937a153f632796915d2d9869d5c2a501d28) [Minor] Fix clippy errors with new rust version (1.56) and float formatting with nightly ([#845](https://github.com/apache/arrow-rs/pull/845)) ([#850](https://github.com/apache/arrow-rs/pull/850))
-
-**Other:**
-* [bfac9e5a027e3bd78b7a1ec90c75a3e385bd66bb](https://github.com/apache/arrow-rs/commit/bfac9e5a027e3bd78b7a1ec90c75a3e385bd66bb) Test out new tarpaulin version ([#852](https://github.com/apache/arrow-rs/pull/852)) ([#866](https://github.com/apache/arrow-rs/pull/866))
-* [809350ced392cfc78d8a1a46228d4ffc25dea9ff](https://github.com/apache/arrow-rs/commit/809350ced392cfc78d8a1a46228d4ffc25dea9ff) Update README.md ([#834](https://github.com/apache/arrow-rs/pull/834)) ([#854](https://github.com/apache/arrow-rs/pull/854))
-* [70582f40dd21f5c710c4946266d0563a92b92337](https://github.com/apache/arrow-rs/commit/70582f40dd21f5c710c4946266d0563a92b92337) [MINOR] Delete temp file from docs ([#836](https://github.com/apache/arrow-rs/pull/836)) ([#855](https://github.com/apache/arrow-rs/pull/855))
-* [a721e00014015a7e598946b6efb9b1da8080ec85](https://github.com/apache/arrow-rs/commit/a721e00014015a7e598946b6efb9b1da8080ec85) Force fresh cargo cache key in CI ([#839](https://github.com/apache/arrow-rs/pull/839)) ([#851](https://github.com/apache/arrow-rs/pull/851))
-
-
-## [6.0.0](https://github.com/apache/arrow-rs/tree/6.0.0) (2021-10-13)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/5.5.0...6.0.0)
-
-**Breaking changes:**
-
-- Replace `ArrayData::new()` with `ArrayData::try_new()` and `unsafe ArrayData::new_unchecked` [\#822](https://github.com/apache/arrow-rs/pull/822) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Update Bitmap::len to return bits rather than bytes [\#749](https://github.com/apache/arrow-rs/pull/749) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([matthewmturner](https://github.com/matthewmturner))
-- use sort\_unstable\_by in primitive sorting [\#552](https://github.com/apache/arrow-rs/pull/552) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jimexist](https://github.com/Jimexist))
-- New MapArray support [\#491](https://github.com/apache/arrow-rs/pull/491) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nevi-me](https://github.com/nevi-me))
-
-**Implemented enhancements:**
-
-- Improve parquet binary writer speed by reducing allocations [\#819](https://github.com/apache/arrow-rs/issues/819)
-- Expose buffer operations [\#808](https://github.com/apache/arrow-rs/issues/808)
-- Add doc examples of writing parquet files using `ArrowWriter` [\#788](https://github.com/apache/arrow-rs/issues/788)
-
-**Fixed bugs:**
-
-- JSON reader can create null struct children on empty lists [\#825](https://github.com/apache/arrow-rs/issues/825)
-- Incorrect null count for cast kernel for list arrays [\#815](https://github.com/apache/arrow-rs/issues/815)
-- `minute` and `second` temporal kernels do not respect timezone [\#500](https://github.com/apache/arrow-rs/issues/500)
-- Fix data corruption in json decoder f64-to-i64 cast [\#652](https://github.com/apache/arrow-rs/pull/652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xianwill](https://github.com/xianwill))
-
-**Documentation updates:**
-
-- Doctest for PrimitiveArray using from\_iter\_values. [\#694](https://github.com/apache/arrow-rs/pull/694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([novemberkilo](https://github.com/novemberkilo))
-- Doctests for BinaryArray and LargeBinaryArray. [\#625](https://github.com/apache/arrow-rs/pull/625) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([novemberkilo](https://github.com/novemberkilo))
-- Add links in docstrings [\#605](https://github.com/apache/arrow-rs/pull/605) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-
-
-## [5.5.0](https://github.com/apache/arrow-rs/tree/5.5.0) (2021-09-24)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/5.4.0...5.5.0)
-
-**Implemented enhancements:**
-
-- parquet should depend on a small set of arrow features [\#800](https://github.com/apache/arrow-rs/issues/800)
-- Support equality on RecordBatch [\#735](https://github.com/apache/arrow-rs/issues/735)
-
-**Fixed bugs:**
-
-- Converting from string to timestamp uses microseconds instead of milliseconds [\#780](https://github.com/apache/arrow-rs/issues/780)
-- Document has no link to `RowColumIter` [\#762](https://github.com/apache/arrow-rs/issues/762)
-- length on slices with null doesn't work [\#744](https://github.com/apache/arrow-rs/issues/744)
-
-## [5.4.0](https://github.com/apache/arrow-rs/tree/5.4.0) (2021-09-10)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/5.3.0...5.4.0)
-
-**Implemented enhancements:**
-
-- Upgrade lexical-core to 0.8 [\#747](https://github.com/apache/arrow-rs/issues/747)
-- `append_nulls` and `append_trusted_len_iter` for PrimitiveBuilder [\#725](https://github.com/apache/arrow-rs/issues/725)
-- Optimize MutableArrayData::extend for null buffers [\#397](https://github.com/apache/arrow-rs/issues/397)
-
-**Fixed bugs:**
-
-- Arithmetic with scalars doesn't work on slices [\#742](https://github.com/apache/arrow-rs/issues/742)
-- Comparisons with scalar don't work on slices [\#740](https://github.com/apache/arrow-rs/issues/740)
-- `unary` kernel doesn't respect offset [\#738](https://github.com/apache/arrow-rs/issues/738)
-- `new_null_array` creates invalid struct arrays [\#734](https://github.com/apache/arrow-rs/issues/734)
-- --no-default-features is broken for parquet [\#733](https://github.com/apache/arrow-rs/issues/733) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- `Bitmap::len` returns the number of bytes, not bits. [\#730](https://github.com/apache/arrow-rs/issues/730)
-- Decimal logical type is formatted incorrectly by print\_schema [\#713](https://github.com/apache/arrow-rs/issues/713)
-- parquet\_derive does not support chrono time values [\#711](https://github.com/apache/arrow-rs/issues/711)
-- Numeric overflow when formatting Decimal type [\#710](https://github.com/apache/arrow-rs/issues/710)
-- The integration tests are not running [\#690](https://github.com/apache/arrow-rs/issues/690)
+- Update arrow module docs [\#1840](https://github.com/apache/arrow-rs/pull/1840) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update safety disclaimer [\#1837](https://github.com/apache/arrow-rs/pull/1837) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update ballista readme link [\#1765](https://github.com/apache/arrow-rs/pull/1765) ([tustvold](https://github.com/tustvold))
+- Move changelog archive to `CHANGELOG-old.md` [\#1759](https://github.com/apache/arrow-rs/pull/1759) ([alamb](https://github.com/alamb))
 
 **Closed issues:**
 
-- Question: Is there no way to create a DictionaryArray with a pre-arranged mapping? [\#729](https://github.com/apache/arrow-rs/issues/729)
-
-## [5.3.0](https://github.com/apache/arrow-rs/tree/5.3.0) (2021-08-26)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/5.2.0...5.3.0)
-
-**Implemented enhancements:**
-
-- Add optimized filter kernel for regular expression matching [\#697](https://github.com/apache/arrow-rs/issues/697)
-- Can't cast from timestamp array to string array [\#587](https://github.com/apache/arrow-rs/issues/587)
-
-**Fixed bugs:**
-
-- 'Encoding DELTA\_BYTE\_ARRAY is not supported' with parquet arrow readers [\#708](https://github.com/apache/arrow-rs/issues/708)
-- Support reading json string into binary data type. [\#701](https://github.com/apache/arrow-rs/issues/701)
-
-**Closed issues:**
-
-- Resolve Issues with `prettytable-rs` dependency [\#69](https://github.com/apache/arrow-rs/issues/69) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-## [5.2.0](https://github.com/apache/arrow-rs/tree/5.2.0) (2021-08-12)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/5.1.0...5.2.0)
-
-**Implemented enhancements:**
-
-- Make rand an optional dependency [\#671](https://github.com/apache/arrow-rs/issues/671)
-- Remove undefined behavior in `value` method of boolean and primitive arrays [\#645](https://github.com/apache/arrow-rs/issues/645)
-- Avoid materialization of indices in filter\_record\_batch for single arrays [\#636](https://github.com/apache/arrow-rs/issues/636)
-- Add a note about arrow crate security / safety [\#627](https://github.com/apache/arrow-rs/issues/627)
-- Allow the creation of String arrays from an interator of &Option\<&str\> [\#598](https://github.com/apache/arrow-rs/issues/598)
-- Support arrow map datatype [\#395](https://github.com/apache/arrow-rs/issues/395)
-
-**Fixed bugs:**
-
-- Parquet fixed length byte array columns write byte array statistics [\#660](https://github.com/apache/arrow-rs/issues/660) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Parquet boolean columns write Int32 statistics [\#659](https://github.com/apache/arrow-rs/issues/659) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Writing Parquet with a boolean column fails [\#657](https://github.com/apache/arrow-rs/issues/657)
-- JSON decoder data corruption for large i64/u64 [\#653](https://github.com/apache/arrow-rs/issues/653)
-- Incorrect min/max statistics for strings in parquet files [\#641](https://github.com/apache/arrow-rs/issues/641) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-**Closed issues:**
-
-- Release candidate verifying script seems work on macOS [\#640](https://github.com/apache/arrow-rs/issues/640)
-- Update CONTRIBUTING  [\#342](https://github.com/apache/arrow-rs/issues/342)
-
-## [5.1.0](https://github.com/apache/arrow-rs/tree/5.1.0) (2021-07-29)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/5.0.0...5.1.0)
-
-**Implemented enhancements:**
-
-- Make FFI\_ArrowArray empty\(\) public [\#602](https://github.com/apache/arrow-rs/issues/602)
-- exponential sort can be used to speed up lexico partition kernel [\#586](https://github.com/apache/arrow-rs/issues/586)
-- Implement sort\(\) for binary array [\#568](https://github.com/apache/arrow-rs/issues/568)
-- primitive sorting can be improved and more consistent with and without `limit` if sorted unstably [\#553](https://github.com/apache/arrow-rs/issues/553)
-
-**Fixed bugs:**
-
-- Confusing memory usage with CSV reader [\#623](https://github.com/apache/arrow-rs/issues/623)
-- FFI implementation deviates from specification for array release  [\#595](https://github.com/apache/arrow-rs/issues/595)
-- Parquet file content is different if `~/.cargo` is in a git checkout [\#589](https://github.com/apache/arrow-rs/issues/589)
-- Ensure output of MIRI is checked for success [\#581](https://github.com/apache/arrow-rs/issues/581)
-- MIRI failure in `array::ffi::tests::test_struct` and other ffi tests [\#580](https://github.com/apache/arrow-rs/issues/580)
-- ListArray equality check may return wrong result [\#570](https://github.com/apache/arrow-rs/issues/570)
-- cargo audit failed [\#561](https://github.com/apache/arrow-rs/issues/561)
-- ArrayData::slice\(\) does not work for nested types such as StructArray [\#554](https://github.com/apache/arrow-rs/issues/554)
-
-**Documentation updates:**
-
-- More examples of how to construct Arrays [\#301](https://github.com/apache/arrow-rs/issues/301)
-
-**Closed issues:**
-
-- Implement StringBuilder::append\_option [\#263](https://github.com/apache/arrow-rs/issues/263) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-## [5.0.0](https://github.com/apache/arrow-rs/tree/5.0.0) (2021-07-14)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/4.4.0...5.0.0)
-
-**Breaking changes:**
-
-- Remove lifetime from DynComparator [\#543](https://github.com/apache/arrow-rs/issues/543) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Simplify interactions with arrow flight APIs [\#376](https://github.com/apache/arrow-rs/issues/376) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
-- refactor: remove lifetime from DynComparator [\#542](https://github.com/apache/arrow-rs/pull/542) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([e-dard](https://github.com/e-dard))
-- use iterator for partition kernel instead of generating vec [\#438](https://github.com/apache/arrow-rs/pull/438) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jimexist](https://github.com/Jimexist))
-- Remove DictionaryArray::keys\_array method [\#419](https://github.com/apache/arrow-rs/pull/419) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- simplify interactions with arrow flight APIs [\#377](https://github.com/apache/arrow-rs/pull/377) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([garyanaplan](https://github.com/garyanaplan))
-- return reference from DictionaryArray::values\(\) \(\#313\) [\#314](https://github.com/apache/arrow-rs/pull/314) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-
-**Implemented enhancements:**
-
-- Allow creation of StringArrays from Vec\<String\> [\#519](https://github.com/apache/arrow-rs/issues/519) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Implement RecordBatch::concat [\#461](https://github.com/apache/arrow-rs/issues/461) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Implement RecordBatch::slice\(\) to slice RecordBatches  [\#460](https://github.com/apache/arrow-rs/issues/460) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add a RecordBatch::split to split large batches into a set of smaller batches [\#343](https://github.com/apache/arrow-rs/issues/343)
-- generate parquet schema from rust struct [\#539](https://github.com/apache/arrow-rs/pull/539) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nevi-me](https://github.com/nevi-me))
-- Implement `RecordBatch::concat` [\#537](https://github.com/apache/arrow-rs/pull/537) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([silathdiir](https://github.com/silathdiir))
-- Implement function slice for RecordBatch [\#490](https://github.com/apache/arrow-rs/pull/490) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([b41sh](https://github.com/b41sh))
-- add lexicographically partition points and ranges [\#424](https://github.com/apache/arrow-rs/pull/424) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jimexist](https://github.com/Jimexist))
-- allow to read non-standard CSV [\#326](https://github.com/apache/arrow-rs/pull/326) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kazuk](https://github.com/kazuk))
-- parquet: Speed up `BitReader`/`DeltaBitPackDecoder` [\#325](https://github.com/apache/arrow-rs/pull/325) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([kornholi](https://github.com/kornholi))
-- ARROW-12343: \[Rust\] Support auto-vectorization for min/max [\#9](https://github.com/apache/arrow-rs/pull/9) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- ARROW-12411: \[Rust\] Create RecordBatches from Iterators [\#7](https://github.com/apache/arrow-rs/pull/7) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-
-**Fixed bugs:**
-
-- Error building on master - error: cyclic package dependency: package `ahash v0.7.4` depends on itself. Cycle [\#544](https://github.com/apache/arrow-rs/issues/544)
-- IPC reader panics with out of bounds error [\#541](https://github.com/apache/arrow-rs/issues/541)
-- Take kernel doesn't handle nulls and structs correctly [\#530](https://github.com/apache/arrow-rs/issues/530) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- master fails to compile with `default-features=false` [\#529](https://github.com/apache/arrow-rs/issues/529)
-- README developer instructions out of date [\#523](https://github.com/apache/arrow-rs/issues/523)
-- Update rustc and packed\_simd in CI before 5.0 release [\#517](https://github.com/apache/arrow-rs/issues/517)
-- Incorrect memory usage calculation for dictionary arrays [\#503](https://github.com/apache/arrow-rs/issues/503) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- sliced null buffers lead to incorrect result in take kernel \(and probably on other places\) [\#502](https://github.com/apache/arrow-rs/issues/502)
-- Cast of utf8 types and list container types don't respect offset [\#334](https://github.com/apache/arrow-rs/issues/334) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- fix take kernel null handling on structs [\#531](https://github.com/apache/arrow-rs/pull/531) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([bjchambers](https://github.com/bjchambers))
-- Correct array memory usage calculation for dictionary arrays [\#505](https://github.com/apache/arrow-rs/pull/505) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- parquet: improve BOOLEAN writing logic and report error on encoding fail [\#443](https://github.com/apache/arrow-rs/pull/443) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([garyanaplan](https://github.com/garyanaplan))
-- Fix bug with null buffer offset in boolean not kernel [\#418](https://github.com/apache/arrow-rs/pull/418) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- respect offset in utf8 and list casts [\#335](https://github.com/apache/arrow-rs/pull/335) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ritchie46](https://github.com/ritchie46))
-- Fix comparison of dictionaries with different values arrays \(\#332\) [\#333](https://github.com/apache/arrow-rs/pull/333) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- ensure null-counts are written for all-null columns [\#307](https://github.com/apache/arrow-rs/pull/307) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([crepererum](https://github.com/crepererum))
-- fix invalid null handling in filter [\#296](https://github.com/apache/arrow-rs/pull/296) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ritchie46](https://github.com/ritchie46))
-- fix NaN handling in parquet statistics [\#256](https://github.com/apache/arrow-rs/pull/256) ([crepererum](https://github.com/crepererum))
-
-**Documentation updates:**
-
-- Improve arrow's crate's readme on crates.io [\#463](https://github.com/apache/arrow-rs/issues/463)
-- Clean up README.md in advance of the 5.0 release [\#536](https://github.com/apache/arrow-rs/pull/536) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- fix readme instructions to reflect new structure [\#524](https://github.com/apache/arrow-rs/pull/524) ([marcvanheerden](https://github.com/marcvanheerden))
-- Improve docs for NullArray, new\_null\_array and new\_empty\_array [\#240](https://github.com/apache/arrow-rs/pull/240) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- `DataType::Decimal` Non-Compliant? [\#1779](https://github.com/apache/arrow-rs/issues/1779) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Further simplify the offset validation [\#1770](https://github.com/apache/arrow-rs/issues/1770) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Best way to convert arrow to Rust native type [\#1760](https://github.com/apache/arrow-rs/issues/1760) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Why `Parquet` is a part of `Arrow`? [\#1715](https://github.com/apache/arrow-rs/issues/1715) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Merged pull requests:**
 
-- Fix default arrow build [\#533](https://github.com/apache/arrow-rs/pull/533) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Add tests for building applications using arrow with different feature flags [\#532](https://github.com/apache/arrow-rs/pull/532) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Remove unused futures dependency from arrow-flight [\#528](https://github.com/apache/arrow-rs/pull/528) [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
-- CI: update rust nightly and packed\_simd [\#525](https://github.com/apache/arrow-rs/pull/525) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ritchie46](https://github.com/ritchie46))
-- Support `StringArray` creation from String Vec [\#522](https://github.com/apache/arrow-rs/pull/522) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([silathdiir](https://github.com/silathdiir))
-- Fix parquet benchmark schema [\#513](https://github.com/apache/arrow-rs/pull/513) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nevi-me](https://github.com/nevi-me))
-- Fix parquet definition levels [\#511](https://github.com/apache/arrow-rs/pull/511) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nevi-me](https://github.com/nevi-me))
-- Fix for primitive and boolean take kernel for nullable indices with an offset [\#509](https://github.com/apache/arrow-rs/pull/509) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Bump flatbuffers [\#499](https://github.com/apache/arrow-rs/pull/499) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([PsiACE](https://github.com/PsiACE))
-- implement second/minute helpers for temporal [\#493](https://github.com/apache/arrow-rs/pull/493) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ovr](https://github.com/ovr))
-- special case concatenating single element array shortcut [\#492](https://github.com/apache/arrow-rs/pull/492) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jimexist](https://github.com/Jimexist))
-- update docs to reflect recent changes \(joins and window functions\) [\#489](https://github.com/apache/arrow-rs/pull/489) ([Jimexist](https://github.com/Jimexist))
-- Update rand, proc-macro and zstd dependencies [\#488](https://github.com/apache/arrow-rs/pull/488) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Doctest for GenericListArray. [\#474](https://github.com/apache/arrow-rs/pull/474) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([novemberkilo](https://github.com/novemberkilo))
-- remove stale comment on `ArrayData` equality and update unit tests [\#472](https://github.com/apache/arrow-rs/pull/472) ([Jimexist](https://github.com/Jimexist))
-- remove unused patch file [\#471](https://github.com/apache/arrow-rs/pull/471) ([Jimexist](https://github.com/Jimexist))
-- fix clippy warnings for rust 1.53 [\#470](https://github.com/apache/arrow-rs/pull/470) ([Jimexist](https://github.com/Jimexist))
-- Fix PR labeler [\#468](https://github.com/apache/arrow-rs/pull/468) ([Dandandan](https://github.com/Dandandan))
-- Tweak dev backporting docs [\#466](https://github.com/apache/arrow-rs/pull/466) ([alamb](https://github.com/alamb))
-- Unvendor Archery [\#459](https://github.com/apache/arrow-rs/pull/459) ([kszucs](https://github.com/kszucs))
-- Add sort boolean benchmark [\#457](https://github.com/apache/arrow-rs/pull/457) ([alamb](https://github.com/alamb))
-- Add C data interface for decimal128 and timestamp [\#453](https://github.com/apache/arrow-rs/pull/453) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alippai](https://github.com/alippai))
-- Implement the Iterator trait for the json Reader. [\#451](https://github.com/apache/arrow-rs/pull/451) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([LaurentMazare](https://github.com/LaurentMazare))
-- Update release docs + release email template [\#450](https://github.com/apache/arrow-rs/pull/450) ([alamb](https://github.com/alamb))
-- remove clippy unnecessary wraps suppresions in cast kernel [\#449](https://github.com/apache/arrow-rs/pull/449) ([Jimexist](https://github.com/Jimexist))
-- Use partition for bool sort [\#448](https://github.com/apache/arrow-rs/pull/448) ([Jimexist](https://github.com/Jimexist))
-- remove unnecessary wraps in sort [\#445](https://github.com/apache/arrow-rs/pull/445) ([Jimexist](https://github.com/Jimexist))
-- Python FFI bridge for Schema, Field and DataType  [\#439](https://github.com/apache/arrow-rs/pull/439) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kszucs](https://github.com/kszucs))
-- Update release Readme.md [\#436](https://github.com/apache/arrow-rs/pull/436) ([alamb](https://github.com/alamb))
-- Derive Eq and PartialEq for SortOptions [\#425](https://github.com/apache/arrow-rs/pull/425) ([tustvold](https://github.com/tustvold))
-- refactor lexico sort for future code reuse [\#423](https://github.com/apache/arrow-rs/pull/423) ([Jimexist](https://github.com/Jimexist))
-- Reenable MIRI check on PRs [\#421](https://github.com/apache/arrow-rs/pull/421) ([alamb](https://github.com/alamb))
-- Sort by float lists [\#420](https://github.com/apache/arrow-rs/pull/420) ([medwards](https://github.com/medwards))
-- Fix out of bounds read in bit chunk iterator [\#416](https://github.com/apache/arrow-rs/pull/416) ([jhorstmann](https://github.com/jhorstmann))
-- Doctests for DecimalArray. [\#414](https://github.com/apache/arrow-rs/pull/414) ([novemberkilo](https://github.com/novemberkilo))
-- Add Decimal to CsvWriter and improve debug display [\#406](https://github.com/apache/arrow-rs/pull/406) ([alippai](https://github.com/alippai))
-- MINOR: update install instruction [\#400](https://github.com/apache/arrow-rs/pull/400) ([alippai](https://github.com/alippai))
-- use prettier to auto format md files [\#398](https://github.com/apache/arrow-rs/pull/398) ([Jimexist](https://github.com/Jimexist))
-- window::shift to work for all array types [\#388](https://github.com/apache/arrow-rs/pull/388) ([Jimexist](https://github.com/Jimexist))
-- add more tests for window::shift and handle boundary cases [\#386](https://github.com/apache/arrow-rs/pull/386) ([Jimexist](https://github.com/Jimexist))
-- Implement faster arrow array reader [\#384](https://github.com/apache/arrow-rs/pull/384) ([yordan-pavlov](https://github.com/yordan-pavlov))
-- Add set\_bit to BooleanBufferBuilder to allow mutating bit in index [\#383](https://github.com/apache/arrow-rs/pull/383) ([boazberman](https://github.com/boazberman))
-- make sure that only concat preallocates buffers [\#382](https://github.com/apache/arrow-rs/pull/382) ([ritchie46](https://github.com/ritchie46))
-- Respect max rowgroup size in Arrow writer [\#381](https://github.com/apache/arrow-rs/pull/381) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nevi-me](https://github.com/nevi-me))
-- Fix typo in release script, update release location [\#380](https://github.com/apache/arrow-rs/pull/380) ([alamb](https://github.com/alamb))
-- Doctests for FixedSizeBinaryArray [\#378](https://github.com/apache/arrow-rs/pull/378) ([novemberkilo](https://github.com/novemberkilo))
-- Simplify shift kernel using new\_null\_array [\#370](https://github.com/apache/arrow-rs/pull/370) ([Dandandan](https://github.com/Dandandan))
-- allow `SliceableCursor` to be constructed from an `Arc` directly [\#369](https://github.com/apache/arrow-rs/pull/369) ([crepererum](https://github.com/crepererum))
-- Add doctest for ArrayBuilder [\#367](https://github.com/apache/arrow-rs/pull/367) ([alippai](https://github.com/alippai))
-- Fix version in readme [\#365](https://github.com/apache/arrow-rs/pull/365) ([domoritz](https://github.com/domoritz))
-- Remove superfluous space [\#363](https://github.com/apache/arrow-rs/pull/363) ([domoritz](https://github.com/domoritz))
-- Add crate badges [\#362](https://github.com/apache/arrow-rs/pull/362) ([domoritz](https://github.com/domoritz))
-- Disable MIRI check until it runs cleanly on CI [\#360](https://github.com/apache/arrow-rs/pull/360) ([alamb](https://github.com/alamb))
-- Only register Flight.proto with cargo if it exists [\#351](https://github.com/apache/arrow-rs/pull/351) ([tustvold](https://github.com/tustvold))
-- Reduce memory usage of concat \(large\)utf8 [\#348](https://github.com/apache/arrow-rs/pull/348) ([ritchie46](https://github.com/ritchie46))
-- Fix filter UB and add fast path [\#341](https://github.com/apache/arrow-rs/pull/341) ([ritchie46](https://github.com/ritchie46))
-- Automatic cherry-pick script [\#339](https://github.com/apache/arrow-rs/pull/339) ([alamb](https://github.com/alamb))
-- Doctests for BooleanArray. [\#338](https://github.com/apache/arrow-rs/pull/338) ([novemberkilo](https://github.com/novemberkilo))
-- feature gate ipc reader/writer [\#336](https://github.com/apache/arrow-rs/pull/336) ([ritchie46](https://github.com/ritchie46))
-- Add ported Rust release verification script [\#331](https://github.com/apache/arrow-rs/pull/331) ([wesm](https://github.com/wesm))
-- Doctests for StringArray and LargeStringArray. [\#330](https://github.com/apache/arrow-rs/pull/330) ([novemberkilo](https://github.com/novemberkilo))
-- inline PrimitiveArray::value [\#329](https://github.com/apache/arrow-rs/pull/329) ([ritchie46](https://github.com/ritchie46))
-- Enable wasm32 as a target architecture for the SIMD feature  [\#324](https://github.com/apache/arrow-rs/pull/324) ([roee88](https://github.com/roee88))
-- Fix undefined behavior in FFI and enable MIRI checks on CI [\#323](https://github.com/apache/arrow-rs/pull/323) ([roee88](https://github.com/roee88))
-- Mutablebuffer::shrink\_to\_fit [\#318](https://github.com/apache/arrow-rs/pull/318) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ritchie46](https://github.com/ritchie46))
-- Add \(simd\) modulus op [\#317](https://github.com/apache/arrow-rs/pull/317) ([gangliao](https://github.com/gangliao))
-- feature gate csv functionality [\#312](https://github.com/apache/arrow-rs/pull/312) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ritchie46](https://github.com/ritchie46))
-- \[Minor\] Version upgrades [\#304](https://github.com/apache/arrow-rs/pull/304) ([Dandandan](https://github.com/Dandandan))
-- Remove old release scripts [\#293](https://github.com/apache/arrow-rs/pull/293) ([alamb](https://github.com/alamb))
-- Add Send to the ArrayBuilder trait [\#291](https://github.com/apache/arrow-rs/pull/291) ([Max-Meldrum](https://github.com/Max-Meldrum))
-- Added changelog generator script and configuration. [\#289](https://github.com/apache/arrow-rs/pull/289) ([jorgecarleitao](https://github.com/jorgecarleitao))
-- manually bump development version [\#288](https://github.com/apache/arrow-rs/pull/288) ([nevi-me](https://github.com/nevi-me))
-- Fix FFI and add support for Struct type [\#287](https://github.com/apache/arrow-rs/pull/287) ([roee88](https://github.com/roee88))
-- Fix subtraction underflow when sorting string arrays with many nulls [\#285](https://github.com/apache/arrow-rs/pull/285) ([medwards](https://github.com/medwards))
-- Speed up bound checking in `take` [\#281](https://github.com/apache/arrow-rs/pull/281) ([Dandandan](https://github.com/Dandandan))
-- Update PR template by commenting out instructions [\#278](https://github.com/apache/arrow-rs/pull/278) ([nevi-me](https://github.com/nevi-me))
-- Added Decimal support to pretty-print display utility \(\#230\) [\#273](https://github.com/apache/arrow-rs/pull/273) ([mgill25](https://github.com/mgill25))
-- Fix null struct and list roundtrip [\#270](https://github.com/apache/arrow-rs/pull/270) ([nevi-me](https://github.com/nevi-me))
-- 1.52 clippy fixes [\#267](https://github.com/apache/arrow-rs/pull/267) ([nevi-me](https://github.com/nevi-me))
-- Fix typo in csv/reader.rs [\#265](https://github.com/apache/arrow-rs/pull/265) ([domoritz](https://github.com/domoritz))
-- Fix empty Schema::metadata deserialization error [\#260](https://github.com/apache/arrow-rs/pull/260) ([hulunbier](https://github.com/hulunbier))
-- update datafusion and ballista doc links [\#259](https://github.com/apache/arrow-rs/pull/259) ([Jimexist](https://github.com/Jimexist))
-- support full u32 and u64 roundtrip through parquet [\#258](https://github.com/apache/arrow-rs/pull/258) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([crepererum](https://github.com/crepererum))
-- \[MINOR\] Added env to run rust in integration. [\#253](https://github.com/apache/arrow-rs/pull/253) ([jorgecarleitao](https://github.com/jorgecarleitao))
-- \[Minor\] Made integration tests always run. [\#248](https://github.com/apache/arrow-rs/pull/248) ([jorgecarleitao](https://github.com/jorgecarleitao))
-- fix parquet max\_definition for non-null structs [\#246](https://github.com/apache/arrow-rs/pull/246) ([nevi-me](https://github.com/nevi-me))
-- Disabled rebase needed until demonstrate working. [\#243](https://github.com/apache/arrow-rs/pull/243) ([jorgecarleitao](https://github.com/jorgecarleitao))
-- pin flatbuffers to 0.8.4 [\#239](https://github.com/apache/arrow-rs/pull/239) ([ritchie46](https://github.com/ritchie46))
-- sort\_primitive result is capped to the min of limit or values.len [\#236](https://github.com/apache/arrow-rs/pull/236) ([medwards](https://github.com/medwards))
-- Read list field correctly [\#234](https://github.com/apache/arrow-rs/pull/234) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([nevi-me](https://github.com/nevi-me))
-- Fix code examples for RecordBatch::try\_from\_iter [\#231](https://github.com/apache/arrow-rs/pull/231) ([alamb](https://github.com/alamb))
-- Support string dictionaries in csv reader \(\#228\) [\#229](https://github.com/apache/arrow-rs/pull/229) ([tustvold](https://github.com/tustvold))
-- support LargeUtf8 in sort kernel [\#26](https://github.com/apache/arrow-rs/pull/26) ([ritchie46](https://github.com/ritchie46))
-- Removed unused files [\#22](https://github.com/apache/arrow-rs/pull/22) ([jorgecarleitao](https://github.com/jorgecarleitao))
-- ARROW-12504: Buffer::from\_slice\_ref set correct capacity [\#18](https://github.com/apache/arrow-rs/pull/18) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Add GitHub templates [\#17](https://github.com/apache/arrow-rs/pull/17) ([andygrove](https://github.com/andygrove))
-- ARROW-12493: Add support for writing dictionary arrays to CSV and JSON [\#16](https://github.com/apache/arrow-rs/pull/16) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- ARROW-12426: \[Rust\] Fix concatentation of arrow dictionaries [\#15](https://github.com/apache/arrow-rs/pull/15) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Update repository and homepage urls [\#14](https://github.com/apache/arrow-rs/pull/14) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
-- Added rebase-needed bot [\#13](https://github.com/apache/arrow-rs/pull/13) ([jorgecarleitao](https://github.com/jorgecarleitao))
-- Added Integration tests against arrow [\#10](https://github.com/apache/arrow-rs/pull/10) ([jorgecarleitao](https://github.com/jorgecarleitao))
-
-## [4.4.0](https://github.com/apache/arrow-rs/tree/4.4.0) (2021-06-24)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/4.3.0...4.4.0)
-
-**Breaking changes:**
-
-- migrate partition kernel to use Iterator trait [\#437](https://github.com/apache/arrow-rs/issues/437) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Remove DictionaryArray::keys\_array [\#391](https://github.com/apache/arrow-rs/issues/391) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Make equals\_datatype method public, enabling other modules [\#1838](https://github.com/apache/arrow-rs/pull/1838) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([nl5887](https://github.com/nl5887))
+- \[Minor\] Clarify `PageIterator` Documentation [\#1831](https://github.com/apache/arrow-rs/pull/1831) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Ted-Jiang](https://github.com/Ted-Jiang))
+- Update MIRI pin [\#1828](https://github.com/apache/arrow-rs/pull/1828) ([tustvold](https://github.com/tustvold))
+- Change to use `resolver v2`, test more feature flag combinations in CI, fix errors \(\#1630\) [\#1822](https://github.com/apache/arrow-rs/pull/1822) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add ScalarBuffer abstraction \(\#1811\) [\#1820](https://github.com/apache/arrow-rs/pull/1820) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Fix list equal for empty offset list array [\#1818](https://github.com/apache/arrow-rs/pull/1818) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fix Decimal and List ArrayData Validation \(\#1813\) \(\#1814\) [\#1816](https://github.com/apache/arrow-rs/pull/1816) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Don't overwrite existing data on snappy decompress \(\#1806\) [\#1807](https://github.com/apache/arrow-rs/pull/1807) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Rename `arrow/benches/string_kernels.rs` to `arrow/benches/substring_kernels.rs` [\#1805](https://github.com/apache/arrow-rs/pull/1805) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Add public API for decoding parquet footer [\#1804](https://github.com/apache/arrow-rs/pull/1804) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Add AsyncFileReader trait [\#1803](https://github.com/apache/arrow-rs/pull/1803) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- add parquet-fromcsv \(\#1\) [\#1798](https://github.com/apache/arrow-rs/pull/1798) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([kazuk](https://github.com/kazuk))
+- Use IPC row count info in IPC reader [\#1796](https://github.com/apache/arrow-rs/pull/1796) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Fix typos in the Memory and Buffers section of the docs home [\#1795](https://github.com/apache/arrow-rs/pull/1795) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([datapythonista](https://github.com/datapythonista))
+- Write validity buffer for UnionArray in V4 IPC message [\#1794](https://github.com/apache/arrow-rs/pull/1794) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- feat:Add function for row alignment with page mask [\#1791](https://github.com/apache/arrow-rs/pull/1791) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Ted-Jiang](https://github.com/Ted-Jiang))
+- Read and skip validity buffer of UnionType Array for V4 ipc message [\#1789](https://github.com/apache/arrow-rs/pull/1789) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([viirya](https://github.com/viirya))
+- Add `Substring_by_char` [\#1784](https://github.com/apache/arrow-rs/pull/1784) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Add `ParquetFileArrowReader::try_new` [\#1782](https://github.com/apache/arrow-rs/pull/1782) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Arbitrary size combine option bitmap [\#1781](https://github.com/apache/arrow-rs/pull/1781) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Ismail-Maj](https://github.com/Ismail-Maj))
+- Implement `ChunkReader` for `Bytes`, deprecate `SliceableCursor` [\#1775](https://github.com/apache/arrow-rs/pull/1775) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Access metadata of flushed row groups on write \(\#1691\) [\#1774](https://github.com/apache/arrow-rs/pull/1774) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Simplify ParquetFileArrowReader Metadata API [\#1773](https://github.com/apache/arrow-rs/pull/1773) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- MINOR: Unpin nightly version as packed\_simd releases new version [\#1771](https://github.com/apache/arrow-rs/pull/1771) ([viirya](https://github.com/viirya))
+- Update comfy-table requirement from 5.0 to 6.0 [\#1769](https://github.com/apache/arrow-rs/pull/1769) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Optionally disable `validate_decimal_precision` check in `DecimalBuilder.append_value` for interop test [\#1767](https://github.com/apache/arrow-rs/pull/1767) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Minor: Clean up the code of MutableArrayData [\#1763](https://github.com/apache/arrow-rs/pull/1763) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HaoYang670](https://github.com/HaoYang670))
+- Support reading PageIndex from parquet metadata, prepare for skipping pages at reading [\#1762](https://github.com/apache/arrow-rs/pull/1762) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Ted-Jiang](https://github.com/Ted-Jiang))
+- Support casting `Utf8` to `Boolean` [\#1738](https://github.com/apache/arrow-rs/pull/1738) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([MazterQyou](https://github.com/MazterQyou))
 
-**Implemented enhancements:**
-
-- sort kernel boolean sort can be O\(n\) [\#447](https://github.com/apache/arrow-rs/issues/447) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- C data interface for decimal128, timestamp, date32 and date64 [\#413](https://github.com/apache/arrow-rs/issues/413)
-- Add Decimal to CsvWriter [\#405](https://github.com/apache/arrow-rs/issues/405)
-- Use iterators to increase performance of creating Arrow arrays [\#200](https://github.com/apache/arrow-rs/issues/200) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-**Fixed bugs:**
-
-- Release Audit Tool \(RAT\) is not being triggered [\#481](https://github.com/apache/arrow-rs/issues/481)
-- Security Vulnerabilities: flatbuffers: `read_scalar` and `read_scalar_at` allow transmuting values without `unsafe` blocks [\#476](https://github.com/apache/arrow-rs/issues/476)
-- Clippy broken after upgrade to rust 1.53 [\#467](https://github.com/apache/arrow-rs/issues/467)
-- Pull Request Labeler is not working [\#462](https://github.com/apache/arrow-rs/issues/462)
-- Arrow 4.3 release: error\[E0658\]: use of unstable library feature 'partition\_point': new API [\#456](https://github.com/apache/arrow-rs/issues/456)
-- parquet reading hangs when row\_group contains more than 2048 rows of data [\#349](https://github.com/apache/arrow-rs/issues/349)
-- Fail to build arrow  [\#247](https://github.com/apache/arrow-rs/issues/247)
-- JSON reader does not implement iterator [\#193](https://github.com/apache/arrow-rs/issues/193) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Security fixes:**
-
-- Ensure a successful MIRI Run on CI [\#227](https://github.com/apache/arrow-rs/issues/227)
-
-**Closed issues:**
-
-- sort kernel has a lot of unnecessary wrapping [\#446](https://github.com/apache/arrow-rs/issues/446)
-- \[Parquet\] Plain encoded boolean column chunks limited to 2048 values [\#48](https://github.com/apache/arrow-rs/issues/48) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-
-## [4.3.0](https://github.com/apache/arrow-rs/tree/4.3.0) (2021-06-10)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/4.2.0...4.3.0)
-
-**Implemented enhancements:**
-
-- Add partitioning kernel for sorted arrays [\#428](https://github.com/apache/arrow-rs/issues/428) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Implement sort by float lists [\#427](https://github.com/apache/arrow-rs/issues/427) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Derive Eq and PartialEq for SortOptions [\#426](https://github.com/apache/arrow-rs/issues/426) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- use prettier and github action to normalize markdown document syntax [\#399](https://github.com/apache/arrow-rs/issues/399)
-- window::shift can work for more than just primitive array type [\#392](https://github.com/apache/arrow-rs/issues/392)
-- Doctest for ArrayBuilder [\#366](https://github.com/apache/arrow-rs/issues/366)
-
-**Fixed bugs:**
-
-- Boolean `not` kernel does not take offset of null buffer into account [\#417](https://github.com/apache/arrow-rs/issues/417)
-- my contribution not marged in 4.2 release  [\#394](https://github.com/apache/arrow-rs/issues/394)
-- window::shift shall properly handle boundary cases [\#387](https://github.com/apache/arrow-rs/issues/387)
-- Parquet `WriterProperties.max_row_group_size` not wired up [\#257](https://github.com/apache/arrow-rs/issues/257)
-- Out of bound reads in chunk iterator [\#198](https://github.com/apache/arrow-rs/issues/198) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-## [4.2.0](https://github.com/apache/arrow-rs/tree/4.2.0) (2021-05-29)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/4.1.0...4.2.0)
-
-**Breaking changes:**
-
-- DictionaryArray::values\(\) clones the underlying ArrayRef [\#313](https://github.com/apache/arrow-rs/issues/313) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Implemented enhancements:**
-
-- Simplify shift kernel using null array [\#371](https://github.com/apache/arrow-rs/issues/371)
-- Provide `Arc`-based constructor for `parquet::util::cursor::SliceableCursor` [\#368](https://github.com/apache/arrow-rs/issues/368)
-- Add badges to crates [\#361](https://github.com/apache/arrow-rs/issues/361)
-- Consider inlining PrimitiveArray::value [\#328](https://github.com/apache/arrow-rs/issues/328)
-- Implement automated release verification script [\#327](https://github.com/apache/arrow-rs/issues/327)
-- Add wasm32 to the list of target architectures of the simd feature [\#316](https://github.com/apache/arrow-rs/issues/316)
-- add with\_escape for csv::ReaderBuilder [\#315](https://github.com/apache/arrow-rs/issues/315) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- IPC feature gate [\#310](https://github.com/apache/arrow-rs/issues/310)
-- csv feature gate [\#309](https://github.com/apache/arrow-rs/issues/309) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add `shrink_to` / `shrink_to_fit` to `MutableBuffer` [\#297](https://github.com/apache/arrow-rs/issues/297)
-
-**Fixed bugs:**
-
-- Incorrect crate setup instructions [\#364](https://github.com/apache/arrow-rs/issues/364)
-- Arrow-flight only register rerun-if-changed if file exists [\#350](https://github.com/apache/arrow-rs/issues/350)
-- Dictionary Comparison Uses Wrong Values Array [\#332](https://github.com/apache/arrow-rs/issues/332)
-- Undefined behavior in FFI implementation [\#322](https://github.com/apache/arrow-rs/issues/322)
-- All-null column get wrong parquet null-counts [\#306](https://github.com/apache/arrow-rs/issues/306) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Filter has inconsistent null handling [\#295](https://github.com/apache/arrow-rs/issues/295)
-
-## [4.1.0](https://github.com/apache/arrow-rs/tree/4.1.0) (2021-05-17)
-
-[Full Changelog](https://github.com/apache/arrow-rs/compare/4.0.0...4.1.0)
-
-**Implemented enhancements:**
-
-- Add Send to ArrayBuilder [\#290](https://github.com/apache/arrow-rs/issues/290) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Improve performance of bound checking option [\#280](https://github.com/apache/arrow-rs/issues/280) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- extend compute kernel arity to include nullary functions [\#276](https://github.com/apache/arrow-rs/issues/276)
-- Implement FFI / CDataInterface for Struct Arrays [\#251](https://github.com/apache/arrow-rs/issues/251) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add support for pretty-printing Decimal numbers [\#230](https://github.com/apache/arrow-rs/issues/230) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- CSV Reader String Dictionary Support [\#228](https://github.com/apache/arrow-rs/issues/228) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add Builder interface for adding Arrays to record batches [\#210](https://github.com/apache/arrow-rs/issues/210) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support auto-vectorization for min/max [\#209](https://github.com/apache/arrow-rs/issues/209) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support LargeUtf8 in sort kernel [\#25](https://github.com/apache/arrow-rs/issues/25) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Fixed bugs:**
-
--  no method named `select_nth_unstable_by` found for mutable reference `&mut [T]`  [\#283](https://github.com/apache/arrow-rs/issues/283)
-- Rust 1.52 Clippy error [\#266](https://github.com/apache/arrow-rs/issues/266)
-- NaNs can break parquet statistics [\#255](https://github.com/apache/arrow-rs/issues/255) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- u64::MAX does not roundtrip through parquet [\#254](https://github.com/apache/arrow-rs/issues/254) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Integration tests failing to compile \(flatbuffer\) [\#249](https://github.com/apache/arrow-rs/issues/249) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Fix compatibility quirks between arrow and parquet structs [\#245](https://github.com/apache/arrow-rs/issues/245) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Unable to write non-null Arrow structs to Parquet [\#244](https://github.com/apache/arrow-rs/issues/244) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- schema: missing field `metadata` when deserialize [\#241](https://github.com/apache/arrow-rs/issues/241) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Arrow does not compile due to flatbuffers upgrade [\#238](https://github.com/apache/arrow-rs/issues/238) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Sort with limit panics for the limit includes some but not all nulls, for large arrays [\#235](https://github.com/apache/arrow-rs/issues/235) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- arrow-rs contains a copy of the "format" directory [\#233](https://github.com/apache/arrow-rs/issues/233) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Fix SEGFAULT/ SIGILL in child-data ffi [\#206](https://github.com/apache/arrow-rs/issues/206) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Read list field correctly in \<struct\<list\>\> [\#167](https://github.com/apache/arrow-rs/issues/167) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- FFI listarray lead to undefined behavior.  [\#20](https://github.com/apache/arrow-rs/issues/20)
-
-**Security fixes:**
-
-- Fix MIRI build on CI [\#226](https://github.com/apache/arrow-rs/issues/226) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Get MIRI running again [\#224](https://github.com/apache/arrow-rs/issues/224) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Documentation updates:**
-
-- Comment out the instructions in the PR template [\#277](https://github.com/apache/arrow-rs/issues/277)
-- Update links to datafusion and ballista in README.md [\#19](https://github.com/apache/arrow-rs/issues/19)
-- Update "repository" in Cargo.toml [\#12](https://github.com/apache/arrow-rs/issues/12)
-
-**Closed issues:**
 
-- Arrow Aligned Vec [\#268](https://github.com/apache/arrow-rs/issues/268)
-- \[Rust\]: Tracking issue for AVX-512 [\#220](https://github.com/apache/arrow-rs/issues/220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Umbrella issue for clippy integration [\#217](https://github.com/apache/arrow-rs/issues/217) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support sort [\#215](https://github.com/apache/arrow-rs/issues/215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support stable Rust [\#214](https://github.com/apache/arrow-rs/issues/214) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Remove Rust and point integration tests to arrow-rs repo [\#211](https://github.com/apache/arrow-rs/issues/211) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- ArrayData buffers are inconsistent accross implementations [\#207](https://github.com/apache/arrow-rs/issues/207)
-- 3.0.1 patch release [\#204](https://github.com/apache/arrow-rs/issues/204)
-- Document patch release process [\#202](https://github.com/apache/arrow-rs/issues/202)
-- Simplify Offset [\#186](https://github.com/apache/arrow-rs/issues/186) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Typed Bytes [\#185](https://github.com/apache/arrow-rs/issues/185) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[CI\]docker-compose setup should enable caching [\#175](https://github.com/apache/arrow-rs/issues/175)
-- Improve take primitive performance [\#174](https://github.com/apache/arrow-rs/issues/174)
-- \[CI\] Try out buildkite [\#165](https://github.com/apache/arrow-rs/issues/165) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Update assignees in JIRA where missing [\#160](https://github.com/apache/arrow-rs/issues/160)
-- \[Rust\]: From\<ArrayDataRef\> implementations should validate data type [\#103](https://github.com/apache/arrow-rs/issues/103) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[DataFusion\] Verify that projection push down does not remove aliases columns [\#99](https://github.com/apache/arrow-rs/issues/99) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[Rust\]\[DataFusion\] Implement modulus expression [\#98](https://github.com/apache/arrow-rs/issues/98) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[DataFusion\] Add constant folding to expressions during logically planning [\#96](https://github.com/apache/arrow-rs/issues/96) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[DataFusion\] DataFrame.collect should return RecordBatchReader [\#95](https://github.com/apache/arrow-rs/issues/95) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[Rust\]\[DataFusion\] Add FORMAT to explain plan and an easy to visualize format [\#94](https://github.com/apache/arrow-rs/issues/94) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[DataFusion\] Implement metrics framework [\#90](https://github.com/apache/arrow-rs/issues/90) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[DataFusion\] Implement micro benchmarks for each operator [\#89](https://github.com/apache/arrow-rs/issues/89) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[DataFusion\] Implement pretty print for physical query plan [\#88](https://github.com/apache/arrow-rs/issues/88) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[Archery\] Support rust clippy in the lint command [\#83](https://github.com/apache/arrow-rs/issues/83)
-- \[rust\]\[datafusion\] optimize count\(\*\) queries on parquet sources [\#75](https://github.com/apache/arrow-rs/issues/75) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[Rust\]\[DataFusion\] Improve like/nlike performance [\#71](https://github.com/apache/arrow-rs/issues/71) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[DataFusion\] Implement optimizer rule to remove redundant projections [\#56](https://github.com/apache/arrow-rs/issues/56) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- \[DataFusion\] Parquet data source does not support complex types [\#39](https://github.com/apache/arrow-rs/issues/39) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Merge utils from Parquet and Arrow [\#32](https://github.com/apache/arrow-rs/issues/32) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Add benchmarks for Parquet [\#30](https://github.com/apache/arrow-rs/issues/30) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Mark methods that do not perform bounds checking as unsafe [\#28](https://github.com/apache/arrow-rs/issues/28) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Test issue [\#24](https://github.com/apache/arrow-rs/issues/24) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- This is a test issue [\#11](https://github.com/apache/arrow-rs/issues/11)
 
-For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md)
 \* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)*
diff --git a/Cargo.toml b/Cargo.toml
index de7d36f34814..2837f028e8c4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,15 @@ members = [
         "arrow-flight",
         "integration-testing",
 ]
+# Enable the version 2 feature resolver, which avoids unifying features for targets that are not being built
+#
+# Critically this prevents dev-dependencies from enabling features even when not building a target that
+# uses dev-dependencies, e.g. the library crate. This in turn ensures that we can catch invalid feature
+# flag combinations that would otherwise only surface in dependent crates
+#
+# Reference - https://doc.rust-lang.org/nightly/cargo/reference/features.html#feature-resolver-version-2
+#
+resolver = "2"
 
 # this package is excluded because it requires different compilation flags, thereby significantly changing
 # how it is compiled within the workspace, causing the whole workspace to be compiled from scratch
diff --git a/README.md b/README.md
index 08c79bac35ff..08385fb6c15d 100644
--- a/README.md
+++ b/README.md
@@ -66,5 +66,5 @@ There is more information in the [contributing] guide.
 [parquet-readme]: parquet/README.md
 [flight-readme]: arrow-flight/README.md
 [datafusion-readme]: https://github.com/apache/arrow-datafusion/blob/master/README.md
-[ballista-readme]: https://github.com/apache/arrow-datafusion/blob/master/ballista/README.md
+[ballista-readme]: https://github.com/apache/arrow-ballista/blob/master/README.md
 [issues]: https://github.com/apache/arrow-rs/issues
diff --git a/arrow-flight/Cargo.toml b/arrow-flight/Cargo.toml
index 19215cd3df2a..c5522766e4bd 100644
--- a/arrow-flight/Cargo.toml
+++ b/arrow-flight/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "arrow-flight"
 description = "Apache Arrow Flight"
-version = "15.0.0"
+version = "16.0.0"
 edition = "2021"
 rust-version = "1.57"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -27,14 +27,14 @@ repository = "https://github.com/apache/arrow-rs"
 license = "Apache-2.0"
 
 [dependencies]
-arrow = { path = "../arrow", version = "15.0.0" }
-base64 = "0.13"
-tonic = "0.7"
-bytes = "1"
-prost = "0.10"
-prost-types = { version = "0.10.0", optional = true }
-prost-derive = "0.10"
-tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread"] }
+arrow = { path = "../arrow", version = "16.0.0", default-features = false, features = ["ipc"] }
+base64 = { version = "0.13", default-features = false }
+tonic = { version = "0.7", default-features = false, features = ["transport", "codegen", "prost"] }
+bytes = { version = "1", default-features = false }
+prost = { version = "0.10", default-features = false }
+prost-types = { version = "0.10.0", default-features = false, optional = true }
+prost-derive = { version = "0.10", default-features = false }
+tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"] }
 futures = { version = "0.3", default-features = false, features = ["alloc"]}
 
 [features]
@@ -44,10 +44,10 @@ flight-sql-experimental = ["prost-types"]
 [dev-dependencies]
 
 [build-dependencies]
-tonic-build = "0.7"
+tonic-build = { version = "0.7", default-features = false, features = ["transport", "prost"] }
 # Pin specific version of the tonic-build dependencies to avoid auto-generated
 # (and checked in) arrow.flight.protocol.rs from changing
-proc-macro2 = ">1.0.30"
+proc-macro2 = { version = ">1.0.30", default-features = false }
 
 [[example]]
 name = "flight_sql_server"
diff --git a/arrow-flight/README.md b/arrow-flight/README.md
index 45b081799e6f..a951699f40aa 100644
--- a/arrow-flight/README.md
+++ b/arrow-flight/README.md
@@ -27,7 +27,7 @@ Add this to your Cargo.toml:
 
 ```toml
 [dependencies]
-arrow-flight = "15.0.0"
+arrow-flight = "16.0.0"
 ```
 
 Apache Arrow Flight is a gRPC based protocol for exchanging Arrow data between processes. See the blog post [Introducing Apache Arrow Flight: A Framework for Fast Data Transport](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for more information.
diff --git a/arrow-flight/src/arrow.flight.protocol.rs b/arrow-flight/src/arrow.flight.protocol.rs
index bbca033fda3b..c76469b39ce7 100644
--- a/arrow-flight/src/arrow.flight.protocol.rs
+++ b/arrow-flight/src/arrow.flight.protocol.rs
@@ -229,7 +229,7 @@ pub mod flight_service_client {
     where
         T: tonic::client::GrpcService<tonic::body::BoxBody>,
         T::Error: Into<StdError>,
-        T::ResponseBody: Default + Body<Data = Bytes> + Send + 'static,
+        T::ResponseBody: Body<Data = Bytes> + Send + 'static,
         <T::ResponseBody as Body>::Error: Into<StdError> + Send,
     {
         pub fn new(inner: T) -> Self {
@@ -242,6 +242,7 @@ pub mod flight_service_client {
         ) -> FlightServiceClient<InterceptedService<T, F>>
         where
             F: tonic::service::Interceptor,
+            T::ResponseBody: Default,
             T: tonic::codegen::Service<
                 http::Request<tonic::body::BoxBody>,
                 Response = http::Response<
@@ -278,9 +279,9 @@ pub mod flight_service_client {
             &mut self,
             request: impl tonic::IntoStreamingRequest<Message = super::HandshakeRequest>,
         ) -> Result<
-                tonic::Response<tonic::codec::Streaming<super::HandshakeResponse>>,
-                tonic::Status,
-            > {
+            tonic::Response<tonic::codec::Streaming<super::HandshakeResponse>>,
+            tonic::Status,
+        > {
             self.inner
                 .ready()
                 .await
@@ -307,9 +308,9 @@ pub mod flight_service_client {
             &mut self,
             request: impl tonic::IntoRequest<super::Criteria>,
         ) -> Result<
-                tonic::Response<tonic::codec::Streaming<super::FlightInfo>>,
-                tonic::Status,
-            > {
+            tonic::Response<tonic::codec::Streaming<super::FlightInfo>>,
+            tonic::Status,
+        > {
             self.inner
                 .ready()
                 .await
@@ -388,9 +389,9 @@ pub mod flight_service_client {
             &mut self,
             request: impl tonic::IntoRequest<super::Ticket>,
         ) -> Result<
-                tonic::Response<tonic::codec::Streaming<super::FlightData>>,
-                tonic::Status,
-            > {
+            tonic::Response<tonic::codec::Streaming<super::FlightData>>,
+            tonic::Status,
+        > {
             self.inner
                 .ready()
                 .await
@@ -417,9 +418,9 @@ pub mod flight_service_client {
             &mut self,
             request: impl tonic::IntoStreamingRequest<Message = super::FlightData>,
         ) -> Result<
-                tonic::Response<tonic::codec::Streaming<super::PutResult>>,
-                tonic::Status,
-            > {
+            tonic::Response<tonic::codec::Streaming<super::PutResult>>,
+            tonic::Status,
+        > {
             self.inner
                 .ready()
                 .await
@@ -445,9 +446,9 @@ pub mod flight_service_client {
             &mut self,
             request: impl tonic::IntoStreamingRequest<Message = super::FlightData>,
         ) -> Result<
-                tonic::Response<tonic::codec::Streaming<super::FlightData>>,
-                tonic::Status,
-            > {
+            tonic::Response<tonic::codec::Streaming<super::FlightData>>,
+            tonic::Status,
+        > {
             self.inner
                 .ready()
                 .await
@@ -474,9 +475,9 @@ pub mod flight_service_client {
             &mut self,
             request: impl tonic::IntoRequest<super::Action>,
         ) -> Result<
-                tonic::Response<tonic::codec::Streaming<super::Result>>,
-                tonic::Status,
-            > {
+            tonic::Response<tonic::codec::Streaming<super::Result>>,
+            tonic::Status,
+        > {
             self.inner
                 .ready()
                 .await
@@ -500,9 +501,9 @@ pub mod flight_service_client {
             &mut self,
             request: impl tonic::IntoRequest<super::Empty>,
         ) -> Result<
-                tonic::Response<tonic::codec::Streaming<super::ActionType>>,
-                tonic::Status,
-            > {
+            tonic::Response<tonic::codec::Streaming<super::ActionType>>,
+            tonic::Status,
+        > {
             self.inner
                 .ready()
                 .await
diff --git a/arrow-flight/src/utils.rs b/arrow-flight/src/utils.rs
index 77526917f22a..dda3fc7fe3db 100644
--- a/arrow-flight/src/utils.rs
+++ b/arrow-flight/src/utils.rs
@@ -71,6 +71,7 @@ pub fn flight_data_to_arrow_batch(
                 schema,
                 dictionaries_by_id,
                 None,
+                &message.version(),
             )
         })?
 }
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml
index cba15fc61f32..58ba726091c8 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "arrow-pyarrow-integration-testing"
 description = ""
-version = "15.0.0"
+version = "16.0.0"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -32,7 +32,7 @@ name = "arrow_pyarrow_integration_testing"
 crate-type = ["cdylib"]
 
 [dependencies]
-arrow = { path = "../arrow", version = "15.0.0", features = ["pyarrow"] }
+arrow = { path = "../arrow", version = "16.0.0", features = ["pyarrow"] }
 pyo3 = { version = "0.16", features = ["extension-module"] }
 
 [package.metadata.maturin]
diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs
index 26c09d64d5d1..086b21834657 100644
--- a/arrow-pyarrow-integration-testing/src/lib.rs
+++ b/arrow-pyarrow-integration-testing/src/lib.rs
@@ -27,6 +27,7 @@ use arrow::array::{ArrayData, ArrayRef, Int64Array};
 use arrow::compute::kernels;
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::error::ArrowError;
+use arrow::ffi_stream::ArrowArrayStreamReader;
 use arrow::pyarrow::PyArrowConvert;
 use arrow::record_batch::RecordBatch;
 
@@ -111,6 +112,13 @@ fn round_trip_record_batch(obj: RecordBatch) -> PyResult<RecordBatch> {
     Ok(obj)
 }
 
+#[pyfunction]
+fn round_trip_record_batch_reader(
+    obj: ArrowArrayStreamReader,
+) -> PyResult<ArrowArrayStreamReader> {
+    Ok(obj)
+}
+
 #[pymodule]
 fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(double))?;
@@ -122,5 +130,6 @@ fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()>
     m.add_wrapped(wrap_pyfunction!(round_trip_schema))?;
     m.add_wrapped(wrap_pyfunction!(round_trip_array))?;
     m.add_wrapped(wrap_pyfunction!(round_trip_record_batch))?;
+    m.add_wrapped(wrap_pyfunction!(round_trip_record_batch_reader))?;
     Ok(())
 }
diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py b/arrow-pyarrow-integration-testing/tests/test_sql.py
index 324956c9c6a6..a17ba6d06135 100644
--- a/arrow-pyarrow-integration-testing/tests/test_sql.py
+++ b/arrow-pyarrow-integration-testing/tests/test_sql.py
@@ -303,3 +303,19 @@ def test_dictionary_python():
     assert a == b
     del a
     del b
+
+def test_record_batch_reader():
+    """
+    Python -> Rust -> Python
+    """
+    schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
+    batches = [
+        pa.record_batch([[[1], [2, 42]]], schema),
+        pa.record_batch([[None, [], [5, 6]]], schema),
+    ]
+    a = pa.RecordBatchReader.from_batches(schema, batches)
+    b = rust.round_trip_record_batch_reader(a)
+
+    assert b.schema == schema
+    got_batches = list(b)
+    assert got_batches == batches
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 8878d4a607fd..6579c002380d 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "arrow"
-version = "15.0.0"
+version = "16.0.0"
 description = "Rust implementation of Apache Arrow"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
@@ -38,33 +38,32 @@ path = "src/lib.rs"
 bench = false
 
 [dependencies]
-byteorder = "1"
-lz4 = "1.23"
-zstd = "0.11.1"
-serde = { version = "1.0" }
-serde_derive = "1.0"
-serde_json = { version = "1.0", features = ["preserve_order"] }
-indexmap = { version = "1.6", features = ["std"] }
-rand = { version = "0.8", optional = true }
-num = "0.4"
-half = "1.8"
-csv_crate = { version = "1.1", optional = true, package="csv" }
-regex = "1.3"
-lazy_static = "1.4"
-packed_simd = { version = "0.3", optional = true, package = "packed_simd_2" }
+byteorder = { version = "1", default-features = false }
+lz4 = { version = "1.23", default-features = false, optional = true }
+zstd = { version = "0.11.1", optional = true, default-features = false }
+serde = { version = "1.0", default-features = false }
+serde_derive = { version = "1.0", default-features = false }
+serde_json = { version = "1.0", default-features = false, features = ["preserve_order"] }
+indexmap = { version = "1.6", default-features = false, features = ["std"] }
+rand = { version = "0.8", default-features = false, features =  ["std", "std_rng"], optional = true }
+num = { version = "0.4", default-features = false, features = ["std"] }
+half = { version = "1.8", default-features = false }
+csv_crate = { version = "1.1", default-features = false, optional = true, package="csv" }
+regex = { version = "1.5.6", default-features = false, features = ["std", "unicode"] }
+lazy_static = { version = "1.4", default-features = false }
+packed_simd = { version = "0.3", default-features = false, optional = true, package = "packed_simd_2" }
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
-chrono-tz = {version = "0.6", optional = true}
-flatbuffers = { version = "2.1.2", optional = true }
-hex = "0.4"
-comfy-table = { version = "5.0", optional = true, default-features = false }
-pyo3 = { version = "0.16", optional = true }
-lexical-core = "^0.8"
-multiversion = "0.6.1"
-bitflags = "1.2.1"
+chrono-tz = {version = "0.6", default-features = false, optional = true}
+flatbuffers = { version = "2.1.2", default-features = false, features = ["thiserror"], optional = true }
+hex = { version = "0.4", default-features = false, features = ["std"] }
+comfy-table = { version = "6.0", optional = true, default-features = false }
+pyo3 = { version = "0.16", default-features = false, optional = true }
+lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] }
+multiversion = { version = "0.6.1", default-features = false }
+bitflags = { version = "1.2.1", default-features = false }
 
 [features]
 default = ["csv", "ipc", "test_utils"]
-avx512 = []
 csv = ["csv_crate"]
 ipc = ["flatbuffers"]
 simd = ["packed_simd"]
@@ -81,16 +80,17 @@ pyarrow = ["pyo3"]
 force_validate = []
 
 [dev-dependencies]
-rand = "0.8"
-criterion = "0.3"
-flate2 = "1"
-tempfile = "3"
+rand = { version = "0.8", default-features = false, features =  ["std", "std_rng"] }
+criterion = { version = "0.3", default-features = false }
+flate2 = { version = "1", default-features = false, features = ["rust_backend"] }
+tempfile = { version = "3", default-features = false }
 
 [build-dependencies]
 
 [[bench]]
 name = "aggregate_kernels"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "array_from_vec"
@@ -99,6 +99,7 @@ harness = false
 [[bench]]
 name = "builder"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "buffer_bit_ops"
@@ -107,6 +108,7 @@ harness = false
 [[bench]]
 name = "boolean_kernels"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "boolean_append_packed"
@@ -115,22 +117,27 @@ harness = false
 [[bench]]
 name = "arithmetic_kernels"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "cast_kernels"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "comparison_kernels"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "filter_kernels"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "take_kernels"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "length_kernel"
@@ -143,10 +150,12 @@ harness = false
 [[bench]]
 name = "sort_kernel"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "partition_kernels"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "csv_writer"
@@ -159,6 +168,7 @@ harness = false
 [[bench]]
 name = "equal"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "array_slice"
@@ -167,18 +177,22 @@ harness = false
 [[bench]]
 name = "concatenate_kernel"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "mutable_array"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "buffer_create"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
-name = "string_kernels"
+name = "substring_kernels"
 harness = false
+required-features = ["test_utils"]
 
 [[bench]]
 name = "array_data_validate"
diff --git a/arrow/README.md b/arrow/README.md
index 33940d5beb85..28240e77dff3 100644
--- a/arrow/README.md
+++ b/arrow/README.md
@@ -32,7 +32,7 @@ This crate is tested with the latest stable version of Rust. We do not currently
 
 The arrow crate follows the [SemVer standard](https://doc.rust-lang.org/cargo/reference/semver.html) defined by Cargo and works well within the Rust crate ecosystem.
 
-However, for historical reasons, this crate uses versions with major numbers greater than `0.x` (e.g. `15.0.0`), unlike many other crates in the Rust ecosystem which spend extended time releasing versions `0.x` to signal planned ongoing API changes. Minor arrow releases contain only compatible changes, while major releases may contain breaking API changes.
+However, for historical reasons, this crate uses versions with major numbers greater than `0.x` (e.g. `16.0.0`), unlike many other crates in the Rust ecosystem which spend extended time releasing versions `0.x` to signal planned ongoing API changes. Minor arrow releases contain only compatible changes, while major releases may contain breaking API changes.
 
 ## Features
 
@@ -49,13 +49,31 @@ The arrow crate provides the following features which may be enabled:
 
 ## Safety
 
-TLDR: You should avoid using the `alloc` and `buffer` and `bitmap` modules if at all possible. These modules contain `unsafe` code, are easy to misuse, and are not needed for most users.
+Arrow seeks to uphold the Rust Soundness Pledge as articulated eloquently [here](https://raphlinus.github.io/rust/2020/01/18/soundness-pledge.html). Specifically:
 
-As with all open source code, you should carefully evaluate the suitability of `arrow` for your project, taking into consideration your needs and risk tolerance prior to doing so.
+> The intent of this crate is to be free of soundness bugs. The developers will do their best to avoid them, and welcome help in analyzing and fixing them
 
-_Background_: There are various parts of the `arrow` crate which use `unsafe` and `transmute` code internally. We are actively working as a community to minimize undefined behavior and remove `unsafe` usage to align more with Rust's core principles of safety.
+Where soundness in turn is defined as:
 
-As `arrow` exists today, it is fairly easy to misuse the code in modules named above, leading to undefined behavior.
+> Code is unable to trigger undefined behaviour using safe APIs
+
+One way to ensure this would be to not use `unsafe`, however, as described in the opening chapter of the [Rustonomicon](https://doc.rust-lang.org/nomicon/meet-safe-and-unsafe.html) this is not a requirement, and flexibility in this regard is actually one of Rust's great strengths.
+
+In particular there are a number of scenarios where `unsafe` is largely unavoidable:
+
+* Invariants that cannot be statically verified by the compiler and unlock non-trivial performance wins, e.g. values in a StringArray are UTF-8, [TrustedLen](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html) iterators, etc...
+* FFI 
+* SIMD
+
+Additionally, this crate exposes a number of `unsafe` APIs, allowing downstream crates to explicitly opt-out of potentially expensive invariant checking where appropriate. 
+
+We have a number of strategies to help reduce this risk:
+
+* Provide strongly-typed `Array` and `ArrayBuilder` APIs to safely and efficiently interact with arrays
+* Extensive validation logic to safely construct `ArrayData` from untrusted sources
+* All commits are verified using [MIRI](https://github.com/rust-lang/miri) to detect undefined behaviour
+* We provide a `force_validate` feature that enables additional validation checks for use in test/debug builds
+* There is ongoing work to reduce and better document the use of unsafe, and we welcome contributions in this space
 
 ## Building for WASM
 
@@ -82,3 +100,17 @@ cargo run --example read_csv
 ```
 
 [arrow]: https://arrow.apache.org/
+
+
+## Performance
+
+Most of the compute kernels benefit a lot from being optimized for a specific CPU target.
+This is especially so on x86-64 since without specifying a target the compiler can only assume support for SSE2 vector instructions.
+One of the following values as `-Ctarget-cpu=value` in `RUSTFLAGS` can therefore improve performance significantly:
+
+ - `native`: Target the exact features of the cpu that the build is running on.
+   This should give the best performance when building and running locally, but should be used carefully for example when building in a CI pipeline or when shipping pre-compiled software. 
+ - `x86-64-v3`: Includes AVX2 support and is close to the intel `haswell` architecture released in 2013 and should be supported by any recent Intel or Amd cpu.
+ - `x86-64-v4`: Includes AVX512 support available on intel `skylake` server and `icelake`/`tigerlake`/`rocketlake` laptop and desktop processors.
+
+These flags should be used in addition to the `simd` feature, since they will also affect the code generated by the simd library. 
\ No newline at end of file
diff --git a/arrow/benches/buffer_bit_ops.rs b/arrow/benches/buffer_bit_ops.rs
index 063f39c92729..6c6bb0463b28 100644
--- a/arrow/benches/buffer_bit_ops.rs
+++ b/arrow/benches/buffer_bit_ops.rs
@@ -17,11 +17,14 @@
 
 #[macro_use]
 extern crate criterion;
-use criterion::Criterion;
+
+use criterion::{Criterion, Throughput};
 
 extern crate arrow;
 
-use arrow::buffer::{Buffer, MutableBuffer};
+use arrow::buffer::{
+    buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer, MutableBuffer,
+};
 
 ///  Helper function to create arrays
 fn create_buffer(size: usize) -> Buffer {
@@ -42,17 +45,59 @@ fn bench_buffer_or(left: &Buffer, right: &Buffer) {
     criterion::black_box((left | right).unwrap());
 }
 
+fn bench_buffer_not(buffer: &Buffer) {
+    criterion::black_box(!buffer);
+}
+
+fn bench_buffer_and_with_offsets(
+    left: &Buffer,
+    left_offset: usize,
+    right: &Buffer,
+    right_offset: usize,
+    len: usize,
+) {
+    criterion::black_box(buffer_bin_and(left, left_offset, right, right_offset, len));
+}
+
+fn bench_buffer_or_with_offsets(
+    left: &Buffer,
+    left_offset: usize,
+    right: &Buffer,
+    right_offset: usize,
+    len: usize,
+) {
+    criterion::black_box(buffer_bin_or(left, left_offset, right, right_offset, len));
+}
+
+fn bench_buffer_not_with_offsets(buffer: &Buffer, offset: usize, len: usize) {
+    criterion::black_box(buffer_unary_not(buffer, offset, len));
+}
+
 fn bit_ops_benchmark(c: &mut Criterion) {
     let left = create_buffer(512 * 10);
     let right = create_buffer(512 * 10);
 
-    c.bench_function("buffer_bit_ops and", |b| {
-        b.iter(|| bench_buffer_and(&left, &right))
-    });
+    c.benchmark_group("buffer_binary_ops")
+        .throughput(Throughput::Bytes(3 * left.len() as u64))
+        .bench_function("and", |b| b.iter(|| bench_buffer_and(&left, &right)))
+        .bench_function("or", |b| b.iter(|| bench_buffer_or(&left, &right)))
+        .bench_function("and_with_offset", |b| {
+            b.iter(|| {
+                bench_buffer_and_with_offsets(&left, 1, &right, 2, left.len() * 8 - 5)
+            })
+        })
+        .bench_function("or_with_offset", |b| {
+            b.iter(|| {
+                bench_buffer_or_with_offsets(&left, 1, &right, 2, left.len() * 8 - 5)
+            })
+        });
 
-    c.bench_function("buffer_bit_ops or", |b| {
-        b.iter(|| bench_buffer_or(&left, &right))
-    });
+    c.benchmark_group("buffer_unary_ops")
+        .throughput(Throughput::Bytes(2 * left.len() as u64))
+        .bench_function("not", |b| b.iter(|| bench_buffer_not(&left)))
+        .bench_function("not_with_offset", |b| {
+            b.iter(|| bench_buffer_not_with_offsets(&left, 1, left.len() * 8 - 5))
+        });
 }
 
 criterion_group!(benches, bit_ops_benchmark);
diff --git a/arrow/benches/comparison_kernels.rs b/arrow/benches/comparison_kernels.rs
index 4dced67ad87f..21d83e07eec3 100644
--- a/arrow/benches/comparison_kernels.rs
+++ b/arrow/benches/comparison_kernels.rs
@@ -124,6 +124,11 @@ fn bench_ilike_utf8_scalar(arr_a: &StringArray, value_b: &str) {
         .unwrap();
 }
 
+fn bench_nilike_utf8_scalar(arr_a: &StringArray, value_b: &str) {
+    nilike_utf8_scalar(criterion::black_box(arr_a), criterion::black_box(value_b))
+        .unwrap();
+}
+
 fn bench_regexp_is_match_utf8_scalar(arr_a: &StringArray, value_b: &str) {
     regexp_is_match_utf8_scalar(
         criterion::black_box(arr_a),
@@ -254,6 +259,26 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| bench_ilike_utf8_scalar(&arr_string, "%xx_xX%xXX"))
     });
 
+    c.bench_function("nilike_utf8 scalar equals", |b| {
+        b.iter(|| bench_nilike_utf8_scalar(&arr_string, "xxXX"))
+    });
+
+    c.bench_function("nilike_utf8 scalar contains", |b| {
+        b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xxXX%"))
+    });
+
+    c.bench_function("nilike_utf8 scalar ends with", |b| {
+        b.iter(|| bench_nilike_utf8_scalar(&arr_string, "xXXx%"))
+    });
+
+    c.bench_function("nilike_utf8 scalar starts with", |b| {
+        b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%XXXx"))
+    });
+
+    c.bench_function("nilike_utf8 scalar complex", |b| {
+        b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xx_xX%xXX"))
+    });
+
     c.bench_function("egexp_matches_utf8 scalar starts with", |b| {
         b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, "^xx"))
     });
diff --git a/arrow/benches/string_kernels.rs b/arrow/benches/substring_kernels.rs
similarity index 82%
rename from arrow/benches/string_kernels.rs
rename to arrow/benches/substring_kernels.rs
index 7df52a6bbd4c..6bbfc9c09839 100644
--- a/arrow/benches/string_kernels.rs
+++ b/arrow/benches/substring_kernels.rs
@@ -22,13 +22,21 @@ use criterion::Criterion;
 extern crate arrow;
 
 use arrow::array::*;
-use arrow::compute::kernels::substring::substring;
+use arrow::compute::kernels::substring::*;
 use arrow::util::bench_util::*;
 
 fn bench_substring(arr: &dyn Array, start: i64, length: Option<u64>) {
     substring(criterion::black_box(arr), start, length).unwrap();
 }
 
+fn bench_substring_by_char<O: OffsetSizeTrait>(
+    arr: &GenericStringArray<O>,
+    start: i64,
+    length: Option<u64>,
+) {
+    substring_by_char(criterion::black_box(arr), start, length).unwrap();
+}
+
 fn add_benchmark(c: &mut Criterion) {
     let size = 65536;
     let val_len = 1000;
@@ -44,6 +52,10 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| bench_substring(&arr_string, 1, Some((val_len - 1) as u64)))
     });
 
+    c.bench_function("substring utf8 by char", |b| {
+        b.iter(|| bench_substring_by_char(&arr_string, 1, Some((val_len - 1) as u64)))
+    });
+
     c.bench_function("substring fixed size binary array", |b| {
         b.iter(|| bench_substring(&arr_fsb, 1, Some((val_len - 1) as u64)))
     });
diff --git a/arrow/examples/dynamic_types.rs b/arrow/examples/dynamic_types.rs
index 58e41560e238..f98596f2e777 100644
--- a/arrow/examples/dynamic_types.rs
+++ b/arrow/examples/dynamic_types.rs
@@ -25,6 +25,9 @@ use arrow::datatypes::*;
 use arrow::error::Result;
 use arrow::record_batch::*;
 
+#[cfg(feature = "prettyprint")]
+use arrow::util::pretty::print_batches;
+
 fn main() -> Result<()> {
     // define schema
     let schema = Schema::new(vec![
@@ -62,6 +65,11 @@ fn main() -> Result<()> {
     let batch =
         RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id), Arc::new(nested)])?;
 
+    #[cfg(feature = "prettyprint")]
+    {
+        print_batches(&[batch.clone()]).unwrap();
+    }
+
     process(&batch);
     Ok(())
 }
@@ -91,11 +99,17 @@ fn process(batch: &RecordBatch) {
         Field::new("sum", DataType::Float64, false),
     ]);
 
-    let _ = RecordBatch::try_new(
+    let projection = RecordBatch::try_new(
         Arc::new(projected_schema),
         vec![
             id.clone(), // NOTE: this is cloning the Arc not the array data
             Arc::new(Float64Array::from(nested_c.data().clone())),
         ],
-    );
+    )
+    .unwrap();
+
+    #[cfg(feature = "prettyprint")]
+    {
+        print_batches(&[projection]).unwrap();
+    }
 }
diff --git a/arrow/examples/read_csv.rs b/arrow/examples/read_csv.rs
index 243d8d0f7ee3..5ccf0c58a797 100644
--- a/arrow/examples/read_csv.rs
+++ b/arrow/examples/read_csv.rs
@@ -35,7 +35,8 @@ fn main() {
             Field::new("lng", DataType::Float64, false),
         ]);
 
-        let file = File::open("test/data/uk_cities.csv").unwrap();
+        let path = format!("{}/test/data/uk_cities.csv", env!("CARGO_MANIFEST_DIR"));
+        let file = File::open(path).unwrap();
 
         let mut csv =
             csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None, None);
diff --git a/arrow/examples/read_csv_infer_schema.rs b/arrow/examples/read_csv_infer_schema.rs
index 11f8cfb7f7d2..e9f5ff650706 100644
--- a/arrow/examples/read_csv_infer_schema.rs
+++ b/arrow/examples/read_csv_infer_schema.rs
@@ -26,7 +26,11 @@ use std::fs::File;
 fn main() {
     #[cfg(feature = "csv")]
     {
-        let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
+        let path = format!(
+            "{}/test/data/uk_cities_with_headers.csv",
+            env!("CARGO_MANIFEST_DIR")
+        );
+        let file = File::open(path).unwrap();
         let builder = csv::ReaderBuilder::new()
             .has_header(true)
             .infer_schema(Some(100));
diff --git a/arrow/src/arch/avx512.rs b/arrow/src/arch/avx512.rs
deleted file mode 100644
index 264532f3594c..000000000000
--- a/arrow/src/arch/avx512.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-pub(crate) const AVX512_U8X64_LANES: usize = 64;
-
-#[target_feature(enable = "avx512f")]
-pub(crate) unsafe fn avx512_bin_and(left: &[u8], right: &[u8], res: &mut [u8]) {
-    use core::arch::x86_64::{__m512i, _mm512_and_si512, _mm512_loadu_epi64};
-
-    let l: __m512i = _mm512_loadu_epi64(left.as_ptr() as *const _);
-    let r: __m512i = _mm512_loadu_epi64(right.as_ptr() as *const _);
-    let f = _mm512_and_si512(l, r);
-    let s = &f as *const __m512i as *const u8;
-    let d = res.get_unchecked_mut(0) as *mut _ as *mut u8;
-    std::ptr::copy_nonoverlapping(s, d, std::mem::size_of::<__m512i>());
-}
-
-#[target_feature(enable = "avx512f")]
-pub(crate) unsafe fn avx512_bin_or(left: &[u8], right: &[u8], res: &mut [u8]) {
-    use core::arch::x86_64::{__m512i, _mm512_loadu_epi64, _mm512_or_si512};
-
-    let l: __m512i = _mm512_loadu_epi64(left.as_ptr() as *const _);
-    let r: __m512i = _mm512_loadu_epi64(right.as_ptr() as *const _);
-    let f = _mm512_or_si512(l, r);
-    let s = &f as *const __m512i as *const u8;
-    let d = res.get_unchecked_mut(0) as *mut _ as *mut u8;
-    std::ptr::copy_nonoverlapping(s, d, std::mem::size_of::<__m512i>());
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_bitwise_and_avx512() {
-        let buf1 = [0b00110011u8; 64];
-        let buf2 = [0b11110000u8; 64];
-        let mut buf3 = [0b00000000; 64];
-        unsafe {
-            avx512_bin_and(&buf1, &buf2, &mut buf3);
-        };
-        for i in buf3.iter() {
-            assert_eq!(&0b00110000u8, i);
-        }
-    }
-
-    #[test]
-    fn test_bitwise_or_avx512() {
-        let buf1 = [0b00010011u8; 64];
-        let buf2 = [0b11100000u8; 64];
-        let mut buf3 = [0b00000000; 64];
-        unsafe {
-            avx512_bin_or(&buf1, &buf2, &mut buf3);
-        };
-        for i in buf3.iter() {
-            assert_eq!(&0b11110011u8, i);
-        }
-    }
-}
diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs
index f28aba59d73e..c566ff99f12e 100644
--- a/arrow/src/array/array.rs
+++ b/arrow/src/array/array.rs
@@ -873,7 +873,9 @@ mod tests {
 
     #[test]
     fn test_memory_size_primitive_nullable() {
-        let arr: PrimitiveArray<Int64Type> = (0..128).map(Some).collect();
+        let arr: PrimitiveArray<Int64Type> = (0..128)
+            .map(|i| if i % 20 == 0 { Some(i) } else { None })
+            .collect();
         let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
             ArrayData::builder(arr.data_type().clone())
                 .add_buffer(MutableBuffer::new(0).into())
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index a3ab4aeaa115..481ea92d66c3 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -33,6 +33,7 @@ use crate::datatypes::{
 };
 use crate::error::{ArrowError, Result};
 use crate::util::bit_util;
+use crate::util::decimal::Decimal128;
 use crate::{buffer::MutableBuffer, datatypes::DataType};
 
 /// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
@@ -700,6 +701,18 @@ impl From<FixedSizeListArray> for FixedSizeBinaryArray {
     }
 }
 
+impl From<Vec<Option<&[u8]>>> for FixedSizeBinaryArray {
+    fn from(v: Vec<Option<&[u8]>>) -> Self {
+        Self::try_from_sparse_iter(v.into_iter()).unwrap()
+    }
+}
+
+impl From<Vec<&[u8]>> for FixedSizeBinaryArray {
+    fn from(v: Vec<&[u8]>) -> Self {
+        Self::try_from_iter(v.into_iter()).unwrap()
+    }
+}
+
 impl fmt::Debug for FixedSizeBinaryArray {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "FixedSizeBinaryArray<{}>\n[\n", self.value_length())?;
@@ -744,7 +757,7 @@ impl Array for FixedSizeBinaryArray {
 ///     .unwrap();
 ///
 ///    assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type());
-///    assert_eq!(8_887_000_000, decimal_array.value(0));
+///    assert_eq!(8_887_000_000_i128, decimal_array.value(0).as_i128());
 ///    assert_eq!("8887.000000", decimal_array.value_as_string(0));
 ///    assert_eq!(3, decimal_array.len());
 ///    assert_eq!(1, decimal_array.null_count());
@@ -763,8 +776,8 @@ pub struct DecimalArray {
 }
 
 impl DecimalArray {
-    /// Returns the element at index `i` as i128.
-    pub fn value(&self, i: usize) -> i128 {
+    /// Returns the element at index `i`.
+    pub fn value(&self, i: usize) -> Decimal128 {
         assert!(i < self.data.len(), "DecimalArray out of bounds access");
         let offset = i.checked_add(self.data.offset()).unwrap();
         let raw_val = unsafe {
@@ -775,10 +788,11 @@ impl DecimalArray {
             )
         };
         let as_array = raw_val.try_into();
-        match as_array {
+        let integer = match as_array {
             Ok(v) if raw_val.len() == 16 => i128::from_le_bytes(v),
             _ => panic!("DecimalArray elements are not 128bit integers."),
-        }
+        };
+        Decimal128::new_from_i128(self.precision, self.scale, integer)
     }
 
     /// Returns the offset for the element at index `i`.
@@ -809,23 +823,7 @@ impl DecimalArray {
 
     #[inline]
     pub fn value_as_string(&self, row: usize) -> String {
-        let value = self.value(row);
-        let value_str = value.to_string();
-
-        if self.scale == 0 {
-            value_str
-        } else {
-            let (sign, rest) = value_str.split_at(if value >= 0 { 0 } else { 1 });
-
-            if rest.len() > self.scale {
-                // Decimal separator is in the middle of the string
-                let (whole, decimal) = value_str.split_at(value_str.len() - self.scale);
-                format!("{}.{}", whole, decimal)
-            } else {
-                // String has to be padded
-                format!("{}0.{:0>width$}", sign, rest, width = self.scale)
-            }
-        }
+        self.value(row).as_string()
     }
 
     pub fn from_fixed_size_list_array(
@@ -1480,18 +1478,19 @@ mod tests {
             192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
             255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
         ];
-        let array_data = ArrayData::builder(DataType::Decimal(23, 6))
+        let array_data = ArrayData::builder(DataType::Decimal(38, 6))
             .len(2)
             .add_buffer(Buffer::from(&values[..]))
             .build()
             .unwrap();
         let decimal_array = DecimalArray::from(array_data);
-        assert_eq!(8_887_000_000, decimal_array.value(0));
-        assert_eq!(-8_887_000_000, decimal_array.value(1));
+        assert_eq!(8_887_000_000_i128, decimal_array.value(0).into());
+        assert_eq!(-8_887_000_000_i128, decimal_array.value(1).into());
         assert_eq!(16, decimal_array.value_length());
     }
 
     #[test]
+    #[cfg(not(feature = "force_validate"))]
     fn test_decimal_append_error_value() {
         let mut decimal_builder = DecimalBuilder::new(10, 5, 3);
         let mut result = decimal_builder.append_value(123456);
@@ -1500,9 +1499,15 @@ mod tests {
             "Invalid argument error: 123456 is too large to store in a Decimal of precision 5. Max is 99999",
             error.to_string()
         );
+
+        unsafe {
+            decimal_builder.disable_value_validation();
+        }
+        result = decimal_builder.append_value(123456);
+        assert!(result.is_ok());
         decimal_builder.append_value(12345).unwrap();
         let arr = decimal_builder.finish();
-        assert_eq!("12.345", arr.value_as_string(0));
+        assert_eq!("12.345", arr.value_as_string(1));
 
         decimal_builder = DecimalBuilder::new(10, 2, 1);
         result = decimal_builder.append_value(100);
@@ -1511,28 +1516,31 @@ mod tests {
             "Invalid argument error: 100 is too large to store in a Decimal of precision 2. Max is 99",
             error.to_string()
         );
+
+        unsafe {
+            decimal_builder.disable_value_validation();
+        }
+        result = decimal_builder.append_value(100);
+        assert!(result.is_ok());
         decimal_builder.append_value(99).unwrap();
         result = decimal_builder.append_value(-100);
-        error = result.unwrap_err();
-        assert_eq!(
-            "Invalid argument error: -100 is too small to store in a Decimal of precision 2. Min is -99",
-            error.to_string()
-        );
+        assert!(result.is_ok());
         decimal_builder.append_value(-99).unwrap();
         let arr = decimal_builder.finish();
-        assert_eq!("9.9", arr.value_as_string(0));
-        assert_eq!("-9.9", arr.value_as_string(1));
+        assert_eq!("9.9", arr.value_as_string(1));
+        assert_eq!("-9.9", arr.value_as_string(3));
     }
+
     #[test]
     fn test_decimal_from_iter_values() {
         let array = DecimalArray::from_iter_values(vec![-100, 0, 101].into_iter());
         assert_eq!(array.len(), 3);
         assert_eq!(array.data_type(), &DataType::Decimal(38, 10));
-        assert_eq!(-100, array.value(0));
+        assert_eq!(-100_i128, array.value(0).into());
         assert!(!array.is_null(0));
-        assert_eq!(0, array.value(1));
+        assert_eq!(0_i128, array.value(1).into());
         assert!(!array.is_null(1));
-        assert_eq!(101, array.value(2));
+        assert_eq!(101_i128, array.value(2).into());
         assert!(!array.is_null(2));
     }
 
@@ -1541,10 +1549,10 @@ mod tests {
         let array: DecimalArray = vec![Some(-100), None, Some(101)].into_iter().collect();
         assert_eq!(array.len(), 3);
         assert_eq!(array.data_type(), &DataType::Decimal(38, 10));
-        assert_eq!(-100, array.value(0));
+        assert_eq!(-100_i128, array.value(0).into());
         assert!(!array.is_null(0));
         assert!(array.is_null(1));
-        assert_eq!(101, array.value(2));
+        assert_eq!(101_i128, array.value(2).into());
         assert!(!array.is_null(2));
     }
 
@@ -1703,6 +1711,64 @@ mod tests {
         assert_eq!(5, arr.len())
     }
 
+    #[test]
+    fn test_fixed_size_binary_array_from_vec() {
+        let values = vec!["one".as_bytes(), b"two", b"six", b"ten"];
+        let array = FixedSizeBinaryArray::from(values);
+        assert_eq!(array.len(), 4);
+        assert_eq!(array.null_count(), 0);
+        assert_eq!(array.value(0), b"one");
+        assert_eq!(array.value(1), b"two");
+        assert_eq!(array.value(2), b"six");
+        assert_eq!(array.value(3), b"ten");
+        assert!(!array.is_null(0));
+        assert!(!array.is_null(1));
+        assert!(!array.is_null(2));
+        assert!(!array.is_null(3));
+    }
+
+    #[test]
+    #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")]
+    fn test_fixed_size_binary_array_from_vec_incorrect_length() {
+        let values = vec!["one".as_bytes(), b"two", b"three", b"four"];
+        let _ = FixedSizeBinaryArray::from(values);
+    }
+
+    #[test]
+    fn test_fixed_size_binary_array_from_opt_vec() {
+        let values = vec![
+            Some("one".as_bytes()),
+            Some(b"two"),
+            None,
+            Some(b"six"),
+            Some(b"ten"),
+        ];
+        let array = FixedSizeBinaryArray::from(values);
+        assert_eq!(array.len(), 5);
+        assert_eq!(array.value(0), b"one");
+        assert_eq!(array.value(1), b"two");
+        assert_eq!(array.value(3), b"six");
+        assert_eq!(array.value(4), b"ten");
+        assert!(!array.is_null(0));
+        assert!(!array.is_null(1));
+        assert!(array.is_null(2));
+        assert!(!array.is_null(3));
+        assert!(!array.is_null(4));
+    }
+
+    #[test]
+    #[should_panic(expected = "Nested array size mismatch: one is 3, and the other is 5")]
+    fn test_fixed_size_binary_array_from_opt_vec_incorrect_length() {
+        let values = vec![
+            Some("one".as_bytes()),
+            Some(b"two"),
+            None,
+            Some(b"three"),
+            Some(b"four"),
+        ];
+        let _ = FixedSizeBinaryArray::from(values);
+    }
+
     #[test]
     fn test_binary_array_all_null() {
         let data = vec![None];
diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs
index b967b3abb49f..0fbd5a34eb60 100644
--- a/arrow/src/array/array_dictionary.rs
+++ b/arrow/src/array/array_dictionary.rs
@@ -114,11 +114,11 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
         }
 
         // Safety: `validate` ensures key type is correct, and
-        //  `validate_dictionary_offset` ensures all offsets are within range
+        //  `validate_values` ensures all offsets are within range
         let array = unsafe { data.build_unchecked() };
 
         array.validate()?;
-        array.validate_dictionary_offset()?;
+        array.validate_values()?;
 
         Ok(array.into())
     }
diff --git a/arrow/src/array/array_primitive.rs b/arrow/src/array/array_primitive.rs
index 8893703aa853..6f496562f896 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -397,29 +397,35 @@ impl<'a, T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr>
         let iter = iter.into_iter();
         let (lower, _) = iter.size_hint();
 
-        let mut null_buf = BooleanBufferBuilder::new(lower);
+        let mut null_builder = BooleanBufferBuilder::new(lower);
 
         let buffer: Buffer = iter
             .map(|item| {
                 if let Some(a) = item.into().native {
-                    null_buf.append(true);
+                    null_builder.append(true);
                     a
                 } else {
-                    null_buf.append(false);
+                    null_builder.append(false);
                     // this ensures that null items on the buffer are not arbitrary.
-                    // This is important because falible operations can use null values (e.g. a vectorized "add")
+                    // This is important because fallible operations can use null values (e.g. a vectorized "add")
                     // which may panic (e.g. overflow if the number on the slots happen to be very large).
                     T::Native::default()
                 }
             })
             .collect();
 
+        let len = null_builder.len();
+        let null_buf: Buffer = null_builder.into();
+        let valid_count = null_buf.count_set_bits();
+        let null_count = len - valid_count;
+        let opt_null_buf = (null_count != 0).then(|| null_buf);
+
         let data = unsafe {
             ArrayData::new_unchecked(
                 T::DATA_TYPE,
-                null_buf.len(),
-                None,
-                Some(null_buf.into()),
+                len,
+                Some(null_count),
+                opt_null_buf,
                 0,
                 vec![buffer],
                 vec![],
@@ -1025,6 +1031,16 @@ mod tests {
         assert_eq!(primitive_array.len(), 10);
     }
 
+    #[test]
+    fn test_primitive_array_from_non_null_iter() {
+        let iter = (0..10_i32).map(Some);
+        let primitive_array = PrimitiveArray::<Int32Type>::from_iter(iter);
+        assert_eq!(primitive_array.len(), 10);
+        assert_eq!(primitive_array.null_count(), 0);
+        assert_eq!(primitive_array.data().null_buffer(), None);
+        assert_eq!(primitive_array.values(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    }
+
     #[test]
     #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
                                (values buffer)")]
diff --git a/arrow/src/array/array_union.rs b/arrow/src/array/array_union.rs
index 5cfab0bbf858..4ff0a31c6529 100644
--- a/arrow/src/array/array_union.rs
+++ b/arrow/src/array/array_union.rs
@@ -185,7 +185,7 @@ impl UnionArray {
         }
 
         // Check the type_ids
-        let type_id_slice: &[i8] = unsafe { type_ids.typed_data() };
+        let type_id_slice: &[i8] = type_ids.typed_data();
         let invalid_type_ids = type_id_slice
             .iter()
             .filter(|i| *i < &0)
@@ -201,7 +201,7 @@ impl UnionArray {
         // Check the value offsets if provided
         if let Some(offset_buffer) = &value_offsets {
             let max_len = type_ids.len() as i32;
-            let offsets_slice: &[i32] = unsafe { offset_buffer.typed_data() };
+            let offsets_slice: &[i32] = offset_buffer.typed_data();
             let invalid_offsets = offsets_slice
                 .iter()
                 .filter(|i| *i < &0 || *i > &max_len)
@@ -255,9 +255,7 @@ impl UnionArray {
     pub fn value_offset(&self, index: usize) -> i32 {
         assert!(index - self.offset() < self.len());
         if self.is_dense() {
-            // safety: reinterpreting is safe since the offset buffer contains `i32` values and is
-            // properly aligned.
-            unsafe { self.data().buffers()[1].typed_data::<i32>()[index] }
+            self.data().buffers()[1].typed_data::<i32>()[index]
         } else {
             index as i32
         }
@@ -436,6 +434,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg_attr(miri, ignore)]
     fn test_dense_i32_large() {
         let mut builder = UnionBuilder::new_dense(1024);
 
diff --git a/arrow/src/array/builder.rs b/arrow/src/array/builder.rs
deleted file mode 100644
index e22a6f81ed8f..000000000000
--- a/arrow/src/array/builder.rs
+++ /dev/null
@@ -1,3844 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Defines a [`BufferBuilder`](crate::array::BufferBuilder) capable
-//! of creating a [`Buffer`](crate::buffer::Buffer) which can be used
-//! as an internal buffer in an [`ArrayData`](crate::array::ArrayData)
-//! object.
-
-use std::any::Any;
-use std::collections::HashMap;
-use std::fmt;
-use std::marker::PhantomData;
-use std::mem;
-use std::ops::Range;
-use std::sync::Arc;
-
-use crate::array::*;
-use crate::buffer::{Buffer, MutableBuffer};
-use crate::datatypes::*;
-use crate::error::{ArrowError, Result};
-use crate::util::bit_util;
-
-///  Converts a `MutableBuffer` to a `BufferBuilder<T>`.
-///
-/// `slots` is the number of array slots currently represented in the `MutableBuffer`.
-pub(crate) fn mutable_buffer_to_builder<T: ArrowNativeType>(
-    mutable_buffer: MutableBuffer,
-    slots: usize,
-) -> BufferBuilder<T> {
-    BufferBuilder::<T> {
-        buffer: mutable_buffer,
-        len: slots,
-        _marker: PhantomData,
-    }
-}
-
-///  Converts a `BufferBuilder<T>` into its underlying `MutableBuffer`.
-///
-/// `From` is not implemented because associated type bounds are unstable.
-pub(crate) fn builder_to_mutable_buffer<T: ArrowNativeType>(
-    builder: BufferBuilder<T>,
-) -> MutableBuffer {
-    builder.buffer
-}
-
-/// Builder for creating a [`Buffer`](crate::buffer::Buffer) object.
-///
-/// A [`Buffer`](crate::buffer::Buffer) is the underlying data
-/// structure of Arrow's [`Arrays`](crate::array::Array).
-///
-/// For all supported types, there are type definitions for the
-/// generic version of `BufferBuilder<T>`, e.g. `UInt8BufferBuilder`.
-///
-/// # Example:
-///
-/// ```
-/// use arrow::array::UInt8BufferBuilder;
-///
-/// # fn main() -> arrow::error::Result<()> {
-/// let mut builder = UInt8BufferBuilder::new(100);
-/// builder.append_slice(&[42, 43, 44]);
-/// builder.append(45);
-/// let buffer = builder.finish();
-///
-/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
-/// # Ok(())
-/// # }
-/// ```
-#[derive(Debug)]
-pub struct BufferBuilder<T: ArrowNativeType> {
-    buffer: MutableBuffer,
-    len: usize,
-    _marker: PhantomData<T>,
-}
-
-impl<T: ArrowNativeType> BufferBuilder<T> {
-    /// Creates a new builder with initial capacity for _at least_ `capacity`
-    /// elements of type `T`.
-    ///
-    /// The capacity can later be manually adjusted with the
-    /// [`reserve()`](BufferBuilder::reserve) method.
-    /// Also the
-    /// [`append()`](BufferBuilder::append),
-    /// [`append_slice()`](BufferBuilder::append_slice) and
-    /// [`advance()`](BufferBuilder::advance)
-    /// methods automatically increase the capacity if needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    ///
-    /// assert!(builder.capacity() >= 10);
-    /// ```
-    #[inline]
-    pub fn new(capacity: usize) -> Self {
-        let buffer = MutableBuffer::new(capacity * mem::size_of::<T>());
-
-        Self {
-            buffer,
-            len: 0,
-            _marker: PhantomData,
-        }
-    }
-
-    /// Returns the current number of array elements in the internal buffer.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append(42);
-    ///
-    /// assert_eq!(builder.len(), 1);
-    /// ```
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether the internal buffer is empty.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append(42);
-    ///
-    /// assert_eq!(builder.is_empty(), false);
-    /// ```
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Returns the actual capacity (number of elements) of the internal buffer.
-    ///
-    /// Note: the internal capacity returned by this method might be larger than
-    /// what you'd expect after setting the capacity in the `new()` or `reserve()`
-    /// functions.
-    pub fn capacity(&self) -> usize {
-        let byte_capacity = self.buffer.capacity();
-        byte_capacity / std::mem::size_of::<T>()
-    }
-
-    /// Increases the number of elements in the internal buffer by `n`
-    /// and resizes the buffer as needed.
-    ///
-    /// The values of the newly added elements are 0.
-    /// This method is usually used when appending `NULL` values to the buffer
-    /// as they still require physical memory space.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.advance(2);
-    ///
-    /// assert_eq!(builder.len(), 2);
-    /// ```
-    #[inline]
-    pub fn advance(&mut self, i: usize) {
-        let new_buffer_len = (self.len + i) * mem::size_of::<T>();
-        self.buffer.resize(new_buffer_len, 0);
-        self.len += i;
-    }
-
-    /// Reserves memory for _at least_ `n` more elements of type `T`.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.reserve(10);
-    ///
-    /// assert!(builder.capacity() >= 20);
-    /// ```
-    #[inline]
-    pub fn reserve(&mut self, n: usize) {
-        self.buffer.reserve(n * mem::size_of::<T>());
-    }
-
-    /// Appends a value of type `T` into the builder,
-    /// growing the internal buffer as needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append(42);
-    ///
-    /// assert_eq!(builder.len(), 1);
-    /// ```
-    #[inline]
-    pub fn append(&mut self, v: T) {
-        self.reserve(1);
-        self.buffer.push(v);
-        self.len += 1;
-    }
-
-    /// Appends a value of type `T` into the builder N times,
-    /// growing the internal buffer as needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append_n(10, 42);
-    ///
-    /// assert_eq!(builder.len(), 10);
-    /// ```
-    #[inline]
-    pub fn append_n(&mut self, n: usize, v: T) {
-        self.reserve(n);
-        for _ in 0..n {
-            self.buffer.push(v);
-        }
-        self.len += n;
-    }
-
-    /// Appends a slice of type `T`, growing the internal buffer as needed.
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append_slice(&[42, 44, 46]);
-    ///
-    /// assert_eq!(builder.len(), 3);
-    /// ```
-    #[inline]
-    pub fn append_slice(&mut self, slice: &[T]) {
-        self.buffer.extend_from_slice(slice);
-        self.len += slice.len();
-    }
-
-    /// # Safety
-    /// This requires the iterator be a trusted length. This could instead require
-    /// the iterator implement `TrustedLen` once that is stabilized.
-    #[inline]
-    pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
-        let iter = iter.into_iter();
-        let len = iter
-            .size_hint()
-            .1
-            .expect("append_trusted_len_iter expects upper bound");
-        self.reserve(len);
-        for v in iter {
-            self.buffer.push(v)
-        }
-        self.len += len;
-    }
-
-    /// Resets this builder and returns an immutable [`Buffer`](crate::buffer::Buffer).
-    ///
-    /// # Example:
-    ///
-    /// ```
-    /// use arrow::array::UInt8BufferBuilder;
-    ///
-    /// let mut builder = UInt8BufferBuilder::new(10);
-    /// builder.append_slice(&[42, 44, 46]);
-    ///
-    /// let buffer = builder.finish();
-    ///
-    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
-    /// ```
-    #[inline]
-    pub fn finish(&mut self) -> Buffer {
-        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
-        self.len = 0;
-        buf.into()
-    }
-}
-
-#[derive(Debug)]
-pub struct BooleanBufferBuilder {
-    buffer: MutableBuffer,
-    len: usize,
-}
-
-impl BooleanBufferBuilder {
-    #[inline]
-    pub fn new(capacity: usize) -> Self {
-        let byte_capacity = bit_util::ceil(capacity, 8);
-        let buffer = MutableBuffer::new(byte_capacity);
-        Self { buffer, len: 0 }
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    #[inline]
-    pub fn set_bit(&mut self, index: usize, v: bool) {
-        if v {
-            bit_util::set_bit(self.buffer.as_mut(), index);
-        } else {
-            bit_util::unset_bit(self.buffer.as_mut(), index);
-        }
-    }
-
-    #[inline]
-    pub fn get_bit(&self, index: usize) -> bool {
-        bit_util::get_bit(self.buffer.as_slice(), index)
-    }
-
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    #[inline]
-    pub fn capacity(&self) -> usize {
-        self.buffer.capacity() * 8
-    }
-
-    #[inline]
-    pub fn advance(&mut self, additional: usize) {
-        let new_len = self.len + additional;
-        let new_len_bytes = bit_util::ceil(new_len, 8);
-        if new_len_bytes > self.buffer.len() {
-            self.buffer.resize(new_len_bytes, 0);
-        }
-        self.len = new_len;
-    }
-
-    /// Reserve space to at least `additional` new bits.
-    /// Capacity will be `>= self.len() + additional`.
-    /// New bytes are uninitialized and reading them is undefined behavior.
-    #[inline]
-    pub fn reserve(&mut self, additional: usize) {
-        let capacity = self.len + additional;
-        if capacity > self.capacity() {
-            // convert differential to bytes
-            let additional = bit_util::ceil(capacity, 8) - self.buffer.len();
-            self.buffer.reserve(additional);
-        }
-    }
-
-    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
-    /// growing it (potentially reallocating it) and writing `false` in the newly available bits.
-    #[inline]
-    pub fn resize(&mut self, len: usize) {
-        let len_bytes = bit_util::ceil(len, 8);
-        self.buffer.resize(len_bytes, 0);
-        self.len = len;
-    }
-
-    #[inline]
-    pub fn append(&mut self, v: bool) {
-        self.advance(1);
-        if v {
-            unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) };
-        }
-    }
-
-    #[inline]
-    pub fn append_n(&mut self, additional: usize, v: bool) {
-        self.advance(additional);
-        if additional > 0 && v {
-            let offset = self.len() - additional;
-            (0..additional).for_each(|i| unsafe {
-                bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i)
-            })
-        }
-    }
-
-    #[inline]
-    pub fn append_slice(&mut self, slice: &[bool]) {
-        let additional = slice.len();
-        self.advance(additional);
-
-        let offset = self.len() - additional;
-        for (i, v) in slice.iter().enumerate() {
-            if *v {
-                unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) }
-            }
-        }
-    }
-
-    /// Append `range` bits from `to_set`
-    ///
-    /// `to_set` is a slice of bits packed LSB-first into `[u8]`
-    ///
-    /// # Panics
-    ///
-    /// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes
-    pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
-        let offset_write = self.len;
-        let len = range.end - range.start;
-        self.advance(len);
-        crate::util::bit_mask::set_bits(
-            self.buffer.as_slice_mut(),
-            to_set,
-            offset_write,
-            range.start,
-            len,
-        );
-    }
-
-    /// Returns the packed bits
-    pub fn as_slice(&self) -> &[u8] {
-        self.buffer.as_slice()
-    }
-
-    #[inline]
-    pub fn finish(&mut self) -> Buffer {
-        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
-        self.len = 0;
-        buf.into()
-    }
-}
-
-impl From<BooleanBufferBuilder> for Buffer {
-    #[inline]
-    fn from(builder: BooleanBufferBuilder) -> Self {
-        builder.buffer.into()
-    }
-}
-
-/// Trait for dealing with different array builders at runtime
-///
-/// # Example
-///
-/// ```
-/// # use arrow::{
-/// #     array::{ArrayBuilder, ArrayRef, Float64Builder, Int64Builder, StringArray, StringBuilder},
-/// #     error::ArrowError,
-/// # };
-/// # fn main() -> std::result::Result<(), ArrowError> {
-/// // Create
-/// let mut data_builders: Vec<Box<dyn ArrayBuilder>> = vec![
-///     Box::new(Float64Builder::new(1024)),
-///     Box::new(Int64Builder::new(1024)),
-///     Box::new(StringBuilder::new(1024)),
-/// ];
-///
-/// // Fill
-/// data_builders[0]
-///     .as_any_mut()
-///     .downcast_mut::<Float64Builder>()
-///     .unwrap()
-///     .append_value(3.14)?;
-/// data_builders[1]
-///     .as_any_mut()
-///     .downcast_mut::<Int64Builder>()
-///     .unwrap()
-///     .append_value(-1)?;
-/// data_builders[2]
-///     .as_any_mut()
-///     .downcast_mut::<StringBuilder>()
-///     .unwrap()
-///     .append_value("🍎")?;
-///
-/// // Finish
-/// let array_refs: Vec<ArrayRef> = data_builders
-///     .iter_mut()
-///     .map(|builder| builder.finish())
-///     .collect();
-/// assert_eq!(array_refs[0].len(), 1);
-/// assert_eq!(array_refs[1].is_null(0), false);
-/// assert_eq!(
-///     array_refs[2]
-///         .as_any()
-///         .downcast_ref::<StringArray>()
-///         .unwrap()
-///         .value(0),
-///     "🍎"
-/// );
-/// # Ok(())
-/// # }
-/// ```
-pub trait ArrayBuilder: Any + Send {
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize;
-
-    /// Returns whether number of array slots is zero
-    fn is_empty(&self) -> bool;
-
-    /// Builds the array
-    fn finish(&mut self) -> ArrayRef;
-
-    /// Returns the builder as a non-mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call non-mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_ref` to get a reference on the specific builder.
-    fn as_any(&self) -> &dyn Any;
-
-    /// Returns the builder as a mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_mut` to get a reference on the specific builder.
-    fn as_any_mut(&mut self) -> &mut dyn Any;
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
-}
-
-///  Array builder for fixed-width primitive types
-///
-/// # Example
-///
-/// Create a `BooleanArray` from a `BooleanBuilder`
-///
-/// ```
-///     use arrow::array::{Array, BooleanArray, BooleanBuilder};
-///
-///     let mut b = BooleanBuilder::new(4);
-///     b.append_value(true);
-///     b.append_null();
-///     b.append_value(false);
-///     b.append_value(true);
-///     let arr = b.finish();
-///
-///     assert_eq!(4, arr.len());
-///     assert_eq!(1, arr.null_count());
-///     assert_eq!(true, arr.value(0));
-///     assert!(arr.is_valid(0));
-///     assert!(!arr.is_null(0));
-///     assert!(!arr.is_valid(1));
-///     assert!(arr.is_null(1));
-///     assert_eq!(false, arr.value(2));
-///     assert!(arr.is_valid(2));
-///     assert!(!arr.is_null(2));
-///     assert_eq!(true, arr.value(3));
-///     assert!(arr.is_valid(3));
-///     assert!(!arr.is_null(3));
-/// ```
-#[derive(Debug)]
-pub struct BooleanBuilder {
-    values_builder: BooleanBufferBuilder,
-    bitmap_builder: BooleanBufferBuilder,
-}
-
-impl BooleanBuilder {
-    /// Creates a new primitive array builder
-    pub fn new(capacity: usize) -> Self {
-        Self {
-            values_builder: BooleanBufferBuilder::new(capacity),
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-        }
-    }
-
-    /// Returns the capacity of this builder measured in slots of type `T`
-    pub fn capacity(&self) -> usize {
-        self.values_builder.capacity()
-    }
-
-    /// Appends a value of type `T` into the builder
-    #[inline]
-    pub fn append_value(&mut self, v: bool) -> Result<()> {
-        self.bitmap_builder.append(true);
-        self.values_builder.append(v);
-        Ok(())
-    }
-
-    /// Appends a null slot into the builder
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.bitmap_builder.append(false);
-        self.values_builder.advance(1);
-        Ok(())
-    }
-
-    /// Appends an `Option<T>` into the builder
-    #[inline]
-    pub fn append_option(&mut self, v: Option<bool>) -> Result<()> {
-        match v {
-            None => self.append_null()?,
-            Some(v) => self.append_value(v)?,
-        };
-        Ok(())
-    }
-
-    /// Appends a slice of type `T` into the builder
-    #[inline]
-    pub fn append_slice(&mut self, v: &[bool]) -> Result<()> {
-        self.bitmap_builder.append_n(v.len(), true);
-        self.values_builder.append_slice(v);
-        Ok(())
-    }
-
-    /// Appends values from a slice of type `T` and a validity boolean slice
-    #[inline]
-    pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<()> {
-        if values.len() != is_valid.len() {
-            return Err(ArrowError::InvalidArgumentError(
-                "Value and validity lengths must be equal".to_string(),
-            ));
-        }
-        self.bitmap_builder.append_slice(is_valid);
-        self.values_builder.append_slice(values);
-        Ok(())
-    }
-
-    /// Builds the [BooleanArray] and reset this builder.
-    pub fn finish(&mut self) -> BooleanArray {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.finish();
-        let null_count = len - null_bit_buffer.count_set_bits();
-        let builder = ArrayData::builder(DataType::Boolean)
-            .len(len)
-            .add_buffer(self.values_builder.finish())
-            .null_bit_buffer((null_count > 0).then(|| null_bit_buffer));
-
-        let array_data = unsafe { builder.build_unchecked() };
-        BooleanArray::from(array_data)
-    }
-}
-
-impl ArrayBuilder for BooleanBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.values_builder.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.values_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-///  Array builder for fixed-width primitive types
-#[derive(Debug)]
-pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
-    values_builder: BufferBuilder<T::Native>,
-    /// We only materialize the builder when we add `false`.
-    /// This optimization is **very** important for performance of `StringBuilder`.
-    bitmap_builder: Option<BooleanBufferBuilder>,
-}
-
-impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.values_builder.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.values_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
-    /// Creates a new primitive array builder
-    pub fn new(capacity: usize) -> Self {
-        Self {
-            values_builder: BufferBuilder::<T::Native>::new(capacity),
-            bitmap_builder: None,
-        }
-    }
-
-    /// Returns the capacity of this builder measured in slots of type `T`
-    pub fn capacity(&self) -> usize {
-        self.values_builder.capacity()
-    }
-
-    /// Appends a value of type `T` into the builder
-    #[inline]
-    pub fn append_value(&mut self, v: T::Native) -> Result<()> {
-        if let Some(b) = self.bitmap_builder.as_mut() {
-            b.append(true);
-        }
-        self.values_builder.append(v);
-        Ok(())
-    }
-
-    /// Appends a null slot into the builder
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.materialize_bitmap_builder();
-        self.bitmap_builder.as_mut().unwrap().append(false);
-        self.values_builder.advance(1);
-        Ok(())
-    }
-
-    #[inline]
-    pub fn append_nulls(&mut self, n: usize) -> Result<()> {
-        self.materialize_bitmap_builder();
-        self.bitmap_builder.as_mut().unwrap().append_n(n, false);
-        self.values_builder.advance(n);
-        Ok(())
-    }
-
-    /// Appends an `Option<T>` into the builder
-    #[inline]
-    pub fn append_option(&mut self, v: Option<T::Native>) -> Result<()> {
-        match v {
-            None => self.append_null()?,
-            Some(v) => self.append_value(v)?,
-        };
-        Ok(())
-    }
-
-    /// Appends a slice of type `T` into the builder
-    #[inline]
-    pub fn append_slice(&mut self, v: &[T::Native]) -> Result<()> {
-        if let Some(b) = self.bitmap_builder.as_mut() {
-            b.append_n(v.len(), true);
-        }
-        self.values_builder.append_slice(v);
-        Ok(())
-    }
-
-    /// Appends values from a slice of type `T` and a validity boolean slice
-    #[inline]
-    pub fn append_values(
-        &mut self,
-        values: &[T::Native],
-        is_valid: &[bool],
-    ) -> Result<()> {
-        if values.len() != is_valid.len() {
-            return Err(ArrowError::InvalidArgumentError(
-                "Value and validity lengths must be equal".to_string(),
-            ));
-        }
-        if is_valid.iter().any(|v| !*v) {
-            self.materialize_bitmap_builder();
-        }
-        if let Some(b) = self.bitmap_builder.as_mut() {
-            b.append_slice(is_valid);
-        }
-        self.values_builder.append_slice(values);
-        Ok(())
-    }
-
-    /// Appends values from a trusted length iterator.
-    ///
-    /// # Safety
-    /// This requires the iterator be a trusted length. This could instead require
-    /// the iterator implement `TrustedLen` once that is stabilized.
-    #[inline]
-    pub unsafe fn append_trusted_len_iter(
-        &mut self,
-        iter: impl IntoIterator<Item = T::Native>,
-    ) -> Result<()> {
-        let iter = iter.into_iter();
-        let len = iter
-            .size_hint()
-            .1
-            .expect("append_trusted_len_iter requires an upper bound");
-
-        if let Some(b) = self.bitmap_builder.as_mut() {
-            b.append_n(len, true);
-        }
-        self.values_builder.append_trusted_len_iter(iter);
-        Ok(())
-    }
-
-    /// Builds the `PrimitiveArray` and reset this builder.
-    pub fn finish(&mut self) -> PrimitiveArray<T> {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
-        let null_count = len
-            - null_bit_buffer
-                .as_ref()
-                .map(|b| b.count_set_bits())
-                .unwrap_or(len);
-        let builder = ArrayData::builder(T::DATA_TYPE)
-            .len(len)
-            .add_buffer(self.values_builder.finish())
-            .null_bit_buffer(if null_count > 0 {
-                null_bit_buffer
-            } else {
-                None
-            });
-
-        let array_data = unsafe { builder.build_unchecked() };
-        PrimitiveArray::<T>::from(array_data)
-    }
-
-    /// Builds the `DictionaryArray` and reset this builder.
-    pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> {
-        let len = self.len();
-        let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
-        let null_count = len
-            - null_bit_buffer
-                .as_ref()
-                .map(|b| b.count_set_bits())
-                .unwrap_or(len);
-        let data_type = DataType::Dictionary(
-            Box::new(T::DATA_TYPE),
-            Box::new(values.data_type().clone()),
-        );
-        let mut builder = ArrayData::builder(data_type)
-            .len(len)
-            .add_buffer(self.values_builder.finish());
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(null_bit_buffer);
-        }
-        builder = builder.add_child_data(values.data().clone());
-        let array_data = unsafe { builder.build_unchecked() };
-        DictionaryArray::<T>::from(array_data)
-    }
-
-    fn materialize_bitmap_builder(&mut self) {
-        if self.bitmap_builder.is_some() {
-            return;
-        }
-        let mut b = BooleanBufferBuilder::new(0);
-        b.reserve(self.values_builder.capacity());
-        b.append_n(self.values_builder.len, true);
-        self.bitmap_builder = Some(b);
-    }
-}
-
-///  Array builder for `ListArray`
-#[derive(Debug)]
-pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
-    offsets_builder: BufferBuilder<OffsetSize>,
-    bitmap_builder: BooleanBufferBuilder,
-    values_builder: T,
-    len: OffsetSize,
-}
-
-impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> {
-    /// Creates a new `ListArrayBuilder` from a given values array builder
-    pub fn new(values_builder: T) -> Self {
-        let capacity = values_builder.len();
-        Self::with_capacity(values_builder, capacity)
-    }
-
-    /// Creates a new `ListArrayBuilder` from a given values array builder
-    /// `capacity` is the number of items to pre-allocate space for in this builder
-    pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
-        let mut offsets_builder = BufferBuilder::<OffsetSize>::new(capacity + 1);
-        let len = OffsetSize::zero();
-        offsets_builder.append(len);
-        Self {
-            offsets_builder,
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-            values_builder,
-            len,
-        }
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
-    for GenericListBuilder<OffsetSize, T>
-where
-    T: 'static,
-{
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.len.to_usize().unwrap()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.len == OffsetSize::zero()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
-where
-    T: 'static,
-{
-    /// Returns the child array builder as a mutable reference.
-    ///
-    /// This mutable reference can be used to append values into the child array builder,
-    /// but you must call `append` to delimit each distinct list value.
-    pub fn values(&mut self) -> &mut T {
-        &mut self.values_builder
-    }
-
-    /// Finish the current variable-length list array slot
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.offsets_builder
-            .append(OffsetSize::from_usize(self.values_builder.len()).unwrap());
-        self.bitmap_builder.append(is_valid);
-        self.len += OffsetSize::one();
-        Ok(())
-    }
-
-    /// Builds the `ListArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericListArray<OffsetSize> {
-        let len = self.len();
-        self.len = OffsetSize::zero();
-        let values_arr = self
-            .values_builder
-            .as_any_mut()
-            .downcast_mut::<T>()
-            .unwrap()
-            .finish();
-        let values_data = values_arr.data();
-
-        let offset_buffer = self.offsets_builder.finish();
-        let null_bit_buffer = self.bitmap_builder.finish();
-        self.offsets_builder.append(self.len);
-        let field = Box::new(Field::new(
-            "item",
-            values_data.data_type().clone(),
-            true, // TODO: find a consistent way of getting this
-        ));
-        let data_type = if OffsetSize::IS_LARGE {
-            DataType::LargeList(field)
-        } else {
-            DataType::List(field)
-        };
-        let array_data = ArrayData::builder(data_type)
-            .len(len)
-            .add_buffer(offset_buffer)
-            .add_child_data(values_data.clone())
-            .null_bit_buffer(Some(null_bit_buffer));
-
-        let array_data = unsafe { array_data.build_unchecked() };
-
-        GenericListArray::<OffsetSize>::from(array_data)
-    }
-}
-
-pub type ListBuilder<T> = GenericListBuilder<i32, T>;
-pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
-
-///  Array builder for `ListArray`
-#[derive(Debug)]
-pub struct FixedSizeListBuilder<T: ArrayBuilder> {
-    bitmap_builder: BooleanBufferBuilder,
-    values_builder: T,
-    len: usize,
-    list_len: i32,
-}
-
-impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
-    /// Creates a new `FixedSizeListBuilder` from a given values array builder
-    /// `length` is the number of values within each array
-    pub fn new(values_builder: T, length: i32) -> Self {
-        let capacity = values_builder.len();
-        Self::with_capacity(values_builder, length, capacity)
-    }
-
-    /// Creates a new `FixedSizeListBuilder` from a given values array builder
-    /// `length` is the number of values within each array
-    /// `capacity` is the number of items to pre-allocate space for in this builder
-    pub fn with_capacity(values_builder: T, length: i32, capacity: usize) -> Self {
-        let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
-        offsets_builder.append(0);
-        Self {
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-            values_builder,
-            len: 0,
-            list_len: length,
-        }
-    }
-}
-
-impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T>
-where
-    T: 'static,
-{
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<T: ArrayBuilder> FixedSizeListBuilder<T>
-where
-    T: 'static,
-{
-    /// Returns the child array builder as a mutable reference.
-    ///
-    /// This mutable reference can be used to append values into the child array builder,
-    /// but you must call `append` to delimit each distinct list value.
-    pub fn values(&mut self) -> &mut T {
-        &mut self.values_builder
-    }
-
-    pub fn value_length(&self) -> i32 {
-        self.list_len
-    }
-
-    /// Finish the current variable-length list array slot
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.bitmap_builder.append(is_valid);
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Builds the `FixedSizeListBuilder` and reset this builder.
-    pub fn finish(&mut self) -> FixedSizeListArray {
-        let len = self.len();
-        self.len = 0;
-        let values_arr = self
-            .values_builder
-            .as_any_mut()
-            .downcast_mut::<T>()
-            .unwrap()
-            .finish();
-        let values_data = values_arr.data();
-
-        // check that values_data length is multiple of len if we have data
-        if len != 0 {
-            assert!(
-                values_data.len() / len == self.list_len as usize,
-                "Values of FixedSizeList must have equal lengths, values have length {} and list has {}",
-                values_data.len() / len,
-                self.list_len
-            );
-        }
-
-        let null_bit_buffer = self.bitmap_builder.finish();
-        let array_data = ArrayData::builder(DataType::FixedSizeList(
-            Box::new(Field::new("item", values_data.data_type().clone(), true)),
-            self.list_len,
-        ))
-        .len(len)
-        .add_child_data(values_data.clone())
-        .null_bit_buffer(Some(null_bit_buffer));
-
-        let array_data = unsafe { array_data.build_unchecked() };
-
-        FixedSizeListArray::from(array_data)
-    }
-}
-
-///  Array builder for `BinaryArray`
-#[derive(Debug)]
-pub struct GenericBinaryBuilder<OffsetSize: OffsetSizeTrait> {
-    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
-}
-
-pub type BinaryBuilder = GenericBinaryBuilder<i32>;
-pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
-
-#[derive(Debug)]
-pub struct GenericStringBuilder<OffsetSize: OffsetSizeTrait> {
-    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
-}
-
-pub type StringBuilder = GenericStringBuilder<i32>;
-pub type LargeStringBuilder = GenericStringBuilder<i64>;
-
-#[derive(Debug)]
-pub struct FixedSizeBinaryBuilder {
-    builder: FixedSizeListBuilder<UInt8Builder>,
-}
-
-///
-/// Array Builder for [`DecimalArray`]
-///
-/// See [`DecimalArray`] for example.
-///
-#[derive(Debug)]
-pub struct DecimalBuilder {
-    builder: FixedSizeListBuilder<UInt8Builder>,
-    precision: usize,
-    scale: usize,
-}
-
-impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericBinaryBuilder<OffsetSize> {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericStringBuilder<OffsetSize> {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        let a = GenericStringBuilder::<OffsetSize>::finish(self);
-        Arc::new(a)
-    }
-}
-
-impl ArrayBuilder for FixedSizeBinaryBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl ArrayBuilder for DecimalBuilder {
-    /// Returns the builder as a non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
-    /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: GenericListBuilder::new(values_builder),
-        }
-    }
-
-    /// Appends a single byte value into the builder's values array.
-    ///
-    /// Note, when appending individual byte values you must call `append` to delimit each
-    /// distinct list value.
-    #[inline]
-    pub fn append_byte(&mut self, value: u8) -> Result<()> {
-        self.builder.values().append_value(value)?;
-        Ok(())
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
-        self.builder.values().append_slice(value.as_ref())?;
-        self.builder.append(true)?;
-        Ok(())
-    }
-
-    /// Finish the current variable-length list array slot.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.builder.append(is_valid)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Builds the `BinaryArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericBinaryArray<OffsetSize> {
-        GenericBinaryArray::<OffsetSize>::from(self.builder.finish())
-    }
-}
-
-impl<OffsetSize: OffsetSizeTrait> GenericStringBuilder<OffsetSize> {
-    /// Creates a new `StringBuilder`,
-    /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder
-    pub fn new(capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: GenericListBuilder::new(values_builder),
-        }
-    }
-
-    /// Creates a new `StringBuilder`,
-    /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder
-    /// `item_capacity` is the number of items to pre-allocate space for in this builder
-    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
-        let values_builder = UInt8Builder::new(data_capacity);
-        Self {
-            builder: GenericListBuilder::with_capacity(values_builder, item_capacity),
-        }
-    }
-
-    /// Appends a string into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the string appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<str>) -> Result<()> {
-        self.builder
-            .values()
-            .append_slice(value.as_ref().as_bytes())?;
-        self.builder.append(true)?;
-        Ok(())
-    }
-
-    /// Finish the current variable-length list array slot.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.builder.append(is_valid)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Append an `Option` value to the array.
-    #[inline]
-    pub fn append_option(&mut self, value: Option<impl AsRef<str>>) -> Result<()> {
-        match value {
-            None => self.append_null()?,
-            Some(v) => self.append_value(v)?,
-        };
-        Ok(())
-    }
-
-    /// Builds the `StringArray` and reset this builder.
-    pub fn finish(&mut self) -> GenericStringArray<OffsetSize> {
-        GenericStringArray::<OffsetSize>::from(self.builder.finish())
-    }
-}
-
-impl FixedSizeBinaryBuilder {
-    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize, byte_width: i32) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        Self {
-            builder: FixedSizeListBuilder::new(values_builder, byte_width),
-        }
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
-        if self.builder.value_length() != value.as_ref().len() as i32 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string()
-            ));
-        }
-        self.builder.values().append_slice(value.as_ref())?;
-        self.builder.append(true)
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        let length: usize = self.builder.value_length() as usize;
-        self.builder.values().append_slice(&vec![0u8; length][..])?;
-        self.builder.append(false)
-    }
-
-    /// Builds the `FixedSizeBinaryArray` and reset this builder.
-    pub fn finish(&mut self) -> FixedSizeBinaryArray {
-        FixedSizeBinaryArray::from(self.builder.finish())
-    }
-}
-
-impl DecimalBuilder {
-    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
-    /// array
-    pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
-        let values_builder = UInt8Builder::new(capacity);
-        let byte_width = 16;
-        Self {
-            builder: FixedSizeListBuilder::new(values_builder, byte_width),
-            precision,
-            scale,
-        }
-    }
-
-    /// Appends a byte slice into the builder.
-    ///
-    /// Automatically calls the `append` method to delimit the slice appended in as a
-    /// distinct array element.
-    #[inline]
-    pub fn append_value(&mut self, value: i128) -> Result<()> {
-        let value = validate_decimal_precision(value, self.precision)?;
-        let value_as_bytes = Self::from_i128_to_fixed_size_bytes(
-            value,
-            self.builder.value_length() as usize,
-        )?;
-        if self.builder.value_length() != value_as_bytes.len() as i32 {
-            return Err(ArrowError::InvalidArgumentError(
-                "Byte slice does not have the same length as DecimalBuilder value lengths".to_string()
-            ));
-        }
-        self.builder
-            .values()
-            .append_slice(value_as_bytes.as_slice())?;
-        self.builder.append(true)
-    }
-
-    fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> {
-        if size > 16 {
-            return Err(ArrowError::InvalidArgumentError(
-                "DecimalBuilder only supports values up to 16 bytes.".to_string(),
-            ));
-        }
-        let res = v.to_le_bytes();
-        let start_byte = 16 - size;
-        Ok(res[start_byte..16].to_vec())
-    }
-
-    /// Append a null value to the array.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        let length: usize = self.builder.value_length() as usize;
-        self.builder.values().append_slice(&vec![0u8; length][..])?;
-        self.builder.append(false)
-    }
-
-    /// Builds the `DecimalArray` and reset this builder.
-    pub fn finish(&mut self) -> DecimalArray {
-        DecimalArray::from_fixed_size_list_array(
-            self.builder.finish(),
-            self.precision,
-            self.scale,
-        )
-    }
-}
-
-/// Array builder for Struct types.
-///
-/// Note that callers should make sure that methods of all the child field builders are
-/// properly called to maintain the consistency of the data structure.
-pub struct StructBuilder {
-    fields: Vec<Field>,
-    field_builders: Vec<Box<dyn ArrayBuilder>>,
-    bitmap_builder: BooleanBufferBuilder,
-    len: usize,
-}
-
-impl fmt::Debug for StructBuilder {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("StructBuilder")
-            .field("fields", &self.fields)
-            .field("bitmap_builder", &self.bitmap_builder)
-            .field("len", &self.len)
-            .finish()
-    }
-}
-
-impl ArrayBuilder for StructBuilder {
-    /// Returns the number of array slots in the builder.
-    ///
-    /// Note that this always return the first child field builder's length, and it is
-    /// the caller's responsibility to maintain the consistency that all the child field
-    /// builder should have the equal number of elements.
-    fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    /// Builds the array.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-
-    /// Returns the builder as a non-mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call non-mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_ref` to get a reference on the specific builder.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as a mutable `Any` reference.
-    ///
-    /// This is most useful when one wants to call mutable APIs on a specific builder
-    /// type. In this case, one can first cast this into a `Any`, and then use
-    /// `downcast_mut` to get a reference on the specific builder.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-}
-
-/// Returns a builder with capacity `capacity` that corresponds to the datatype `DataType`
-/// This function is useful to construct arrays from an arbitrary vectors with known/expected
-/// schema.
-pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilder> {
-    match datatype {
-        DataType::Null => unimplemented!(),
-        DataType::Boolean => Box::new(BooleanBuilder::new(capacity)),
-        DataType::Int8 => Box::new(Int8Builder::new(capacity)),
-        DataType::Int16 => Box::new(Int16Builder::new(capacity)),
-        DataType::Int32 => Box::new(Int32Builder::new(capacity)),
-        DataType::Int64 => Box::new(Int64Builder::new(capacity)),
-        DataType::UInt8 => Box::new(UInt8Builder::new(capacity)),
-        DataType::UInt16 => Box::new(UInt16Builder::new(capacity)),
-        DataType::UInt32 => Box::new(UInt32Builder::new(capacity)),
-        DataType::UInt64 => Box::new(UInt64Builder::new(capacity)),
-        DataType::Float32 => Box::new(Float32Builder::new(capacity)),
-        DataType::Float64 => Box::new(Float64Builder::new(capacity)),
-        DataType::Binary => Box::new(BinaryBuilder::new(capacity)),
-        DataType::FixedSizeBinary(len) => {
-            Box::new(FixedSizeBinaryBuilder::new(capacity, *len))
-        }
-        DataType::Decimal(precision, scale) => {
-            Box::new(DecimalBuilder::new(capacity, *precision, *scale))
-        }
-        DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
-        DataType::Date32 => Box::new(Date32Builder::new(capacity)),
-        DataType::Date64 => Box::new(Date64Builder::new(capacity)),
-        DataType::Time32(TimeUnit::Second) => {
-            Box::new(Time32SecondBuilder::new(capacity))
-        }
-        DataType::Time32(TimeUnit::Millisecond) => {
-            Box::new(Time32MillisecondBuilder::new(capacity))
-        }
-        DataType::Time64(TimeUnit::Microsecond) => {
-            Box::new(Time64MicrosecondBuilder::new(capacity))
-        }
-        DataType::Time64(TimeUnit::Nanosecond) => {
-            Box::new(Time64NanosecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Second, _) => {
-            Box::new(TimestampSecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Millisecond, _) => {
-            Box::new(TimestampMillisecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, _) => {
-            Box::new(TimestampMicrosecondBuilder::new(capacity))
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-            Box::new(TimestampNanosecondBuilder::new(capacity))
-        }
-        DataType::Interval(IntervalUnit::YearMonth) => {
-            Box::new(IntervalYearMonthBuilder::new(capacity))
-        }
-        DataType::Interval(IntervalUnit::DayTime) => {
-            Box::new(IntervalDayTimeBuilder::new(capacity))
-        }
-        DataType::Interval(IntervalUnit::MonthDayNano) => {
-            Box::new(IntervalMonthDayNanoBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Second) => {
-            Box::new(DurationSecondBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Millisecond) => {
-            Box::new(DurationMillisecondBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Microsecond) => {
-            Box::new(DurationMicrosecondBuilder::new(capacity))
-        }
-        DataType::Duration(TimeUnit::Nanosecond) => {
-            Box::new(DurationNanosecondBuilder::new(capacity))
-        }
-        DataType::Struct(fields) => {
-            Box::new(StructBuilder::from_fields(fields.clone(), capacity))
-        }
-        t => panic!("Data type {:?} is not currently supported", t),
-    }
-}
-
-impl StructBuilder {
-    pub fn new(fields: Vec<Field>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
-        Self {
-            fields,
-            field_builders,
-            bitmap_builder: BooleanBufferBuilder::new(0),
-            len: 0,
-        }
-    }
-
-    pub fn from_fields(fields: Vec<Field>, capacity: usize) -> Self {
-        let mut builders = Vec::with_capacity(fields.len());
-        for field in &fields {
-            builders.push(make_builder(field.data_type(), capacity));
-        }
-        Self::new(fields, builders)
-    }
-
-    /// Returns a mutable reference to the child field builder at index `i`.
-    /// Result will be `None` if the input type `T` provided doesn't match the actual
-    /// field builder's type.
-    pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
-        self.field_builders[i].as_any_mut().downcast_mut::<T>()
-    }
-
-    /// Returns the number of fields for the struct this builder is building.
-    pub fn num_fields(&self) -> usize {
-        self.field_builders.len()
-    }
-
-    /// Appends an element (either null or non-null) to the struct. The actual elements
-    /// should be appended for each child sub-array in a consistent way.
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        self.bitmap_builder.append(is_valid);
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Appends a null element to the struct.
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.append(false)
-    }
-
-    /// Builds the `StructArray` and reset this builder.
-    pub fn finish(&mut self) -> StructArray {
-        let mut child_data = Vec::with_capacity(self.field_builders.len());
-        for f in &mut self.field_builders {
-            let arr = f.finish();
-            child_data.push(arr.data().clone());
-        }
-
-        let null_bit_buffer = self.bitmap_builder.finish();
-        let null_count = self.len - null_bit_buffer.count_set_bits();
-        let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone()))
-            .len(self.len)
-            .child_data(child_data);
-        if null_count > 0 {
-            builder = builder.null_bit_buffer(Some(null_bit_buffer));
-        }
-
-        self.len = 0;
-
-        let array_data = unsafe { builder.build_unchecked() };
-        StructArray::from(array_data)
-    }
-}
-
-#[derive(Debug)]
-pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
-    offsets_builder: BufferBuilder<i32>,
-    bitmap_builder: BooleanBufferBuilder,
-    field_names: MapFieldNames,
-    key_builder: K,
-    value_builder: V,
-    len: i32,
-}
-
-#[derive(Debug, Clone)]
-pub struct MapFieldNames {
-    pub entry: String,
-    pub key: String,
-    pub value: String,
-}
-
-impl Default for MapFieldNames {
-    fn default() -> Self {
-        Self {
-            entry: "entries".to_string(),
-            key: "keys".to_string(),
-            value: "values".to_string(),
-        }
-    }
-}
-
-#[allow(dead_code)]
-impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
-    pub fn new(
-        field_names: Option<MapFieldNames>,
-        key_builder: K,
-        value_builder: V,
-    ) -> Self {
-        let capacity = key_builder.len();
-        Self::with_capacity(field_names, key_builder, value_builder, capacity)
-    }
-
-    pub fn with_capacity(
-        field_names: Option<MapFieldNames>,
-        key_builder: K,
-        value_builder: V,
-        capacity: usize,
-    ) -> Self {
-        let mut offsets_builder = BufferBuilder::<i32>::new(capacity + 1);
-        let len = 0;
-        offsets_builder.append(len);
-        Self {
-            offsets_builder,
-            bitmap_builder: BooleanBufferBuilder::new(capacity),
-            field_names: field_names.unwrap_or_default(),
-            key_builder,
-            value_builder,
-            len,
-        }
-    }
-
-    pub fn keys(&mut self) -> &mut K {
-        &mut self.key_builder
-    }
-
-    pub fn values(&mut self) -> &mut V {
-        &mut self.value_builder
-    }
-
-    /// Finish the current map array slot
-    #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<()> {
-        if self.key_builder.len() != self.value_builder.len() {
-            return Err(ArrowError::InvalidArgumentError(format!(
-                "Cannot append to a map builder when its keys and values have unequal lengths of {} and {}",
-                self.key_builder.len(),
-                self.value_builder.len()
-            )));
-        }
-        self.offsets_builder.append(self.key_builder.len() as i32);
-        self.bitmap_builder.append(is_valid);
-        self.len += 1;
-        Ok(())
-    }
-
-    pub fn finish(&mut self) -> MapArray {
-        let len = self.len();
-        self.len = 0;
-
-        // Build the keys
-        let keys_arr = self
-            .key_builder
-            .as_any_mut()
-            .downcast_mut::<K>()
-            .unwrap()
-            .finish();
-        let values_arr = self
-            .value_builder
-            .as_any_mut()
-            .downcast_mut::<V>()
-            .unwrap()
-            .finish();
-
-        let keys_field = Field::new(
-            self.field_names.key.as_str(),
-            keys_arr.data_type().clone(),
-            false, // always nullable
-        );
-        let values_field = Field::new(
-            self.field_names.value.as_str(),
-            values_arr.data_type().clone(),
-            true,
-        );
-
-        let struct_array =
-            StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
-
-        let offset_buffer = self.offsets_builder.finish();
-        let null_bit_buffer = self.bitmap_builder.finish();
-        self.offsets_builder.append(self.len);
-        let map_field = Box::new(Field::new(
-            self.field_names.entry.as_str(),
-            struct_array.data_type().clone(),
-            false, // always non-nullable
-        ));
-        let array_data = ArrayData::builder(DataType::Map(map_field, false)) // TODO: support sorted keys
-            .len(len)
-            .add_buffer(offset_buffer)
-            .add_child_data(struct_array.data().clone())
-            .null_bit_buffer(Some(null_bit_buffer));
-
-        let array_data = unsafe { array_data.build_unchecked() };
-
-        MapArray::from(array_data)
-    }
-}
-
-impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
-    fn len(&self) -> usize {
-        self.len as usize
-    }
-
-    fn is_empty(&self) -> bool {
-        self.len == 0
-    }
-
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-}
-
-/// `FieldData` is a helper struct to track the state of the fields in the `UnionBuilder`.
-#[derive(Debug)]
-struct FieldData {
-    /// The type id for this field
-    type_id: i8,
-    /// The Arrow data type represented in the `values_buffer`, which is untyped
-    data_type: DataType,
-    /// A buffer containing the values for this field in raw bytes
-    values_buffer: Option<MutableBuffer>,
-    ///  The number of array slots represented by the buffer
-    slots: usize,
-    /// A builder for the null bitmap
-    bitmap_builder: BooleanBufferBuilder,
-}
-
-impl FieldData {
-    /// Creates a new `FieldData`.
-    fn new(type_id: i8, data_type: DataType) -> Self {
-        Self {
-            type_id,
-            data_type,
-            values_buffer: Some(MutableBuffer::new(1)),
-            slots: 0,
-            bitmap_builder: BooleanBufferBuilder::new(1),
-        }
-    }
-
-    /// Appends a single value to this `FieldData`'s `values_buffer`.
-    #[allow(clippy::unnecessary_wraps)]
-    fn append_to_values_buffer<T: ArrowPrimitiveType>(
-        &mut self,
-        v: T::Native,
-    ) -> Result<()> {
-        let values_buffer = self
-            .values_buffer
-            .take()
-            .expect("Values buffer was never created");
-        let mut builder: BufferBuilder<T::Native> =
-            mutable_buffer_to_builder(values_buffer, self.slots);
-        builder.append(v);
-        let mutable_buffer = builder_to_mutable_buffer(builder);
-        self.values_buffer = Some(mutable_buffer);
-
-        self.slots += 1;
-        self.bitmap_builder.append(true);
-        Ok(())
-    }
-
-    /// Appends a null to this `FieldData`.
-    #[allow(clippy::unnecessary_wraps)]
-    fn append_null<T: ArrowPrimitiveType>(&mut self) -> Result<()> {
-        let values_buffer = self
-            .values_buffer
-            .take()
-            .expect("Values buffer was never created");
-
-        let mut builder: BufferBuilder<T::Native> =
-            mutable_buffer_to_builder(values_buffer, self.slots);
-
-        builder.advance(1);
-        let mutable_buffer = builder_to_mutable_buffer(builder);
-        self.values_buffer = Some(mutable_buffer);
-        self.slots += 1;
-        self.bitmap_builder.append(false);
-        Ok(())
-    }
-
-    /// Appends a null to this `FieldData` when the type is not known at compile time.
-    ///
-    /// As the main `append` method of `UnionBuilder` is generic, we need a way to append null
-    /// slots to the fields that are not being appended to in the case of sparse unions.  This
-    /// method solves this problem by appending dynamically based on `DataType`.
-    ///
-    /// Note, this method does **not** update the length of the `UnionArray` (this is done by the
-    /// main append operation) and assumes that it is called from a method that is generic over `T`
-    /// where `T` satisfies the bound `ArrowPrimitiveType`.
-    fn append_null_dynamic(&mut self) -> Result<()> {
-        match self.data_type {
-            DataType::Null => unimplemented!(),
-            DataType::Int8 => self.append_null::<Int8Type>()?,
-            DataType::Int16 => self.append_null::<Int16Type>()?,
-            DataType::Int32
-            | DataType::Date32
-            | DataType::Time32(_)
-            | DataType::Interval(IntervalUnit::YearMonth) => {
-                self.append_null::<Int32Type>()?
-            }
-            DataType::Int64
-            | DataType::Timestamp(_, _)
-            | DataType::Date64
-            | DataType::Time64(_)
-            | DataType::Interval(IntervalUnit::DayTime)
-            | DataType::Duration(_) => self.append_null::<Int64Type>()?,
-            DataType::Interval(IntervalUnit::MonthDayNano) => self.append_null::<IntervalMonthDayNanoType>()?,
-            DataType::UInt8 => self.append_null::<UInt8Type>()?,
-            DataType::UInt16 => self.append_null::<UInt16Type>()?,
-            DataType::UInt32 => self.append_null::<UInt32Type>()?,
-            DataType::UInt64 => self.append_null::<UInt64Type>()?,
-            DataType::Float32 => self.append_null::<Float32Type>()?,
-            DataType::Float64 => self.append_null::<Float64Type>()?,
-            _ => unreachable!("All cases of types that satisfy the trait bounds over T are covered above."),
-        };
-        Ok(())
-    }
-}
-
-/// Builder type for creating a new `UnionArray`.
-///
-/// Example: **Dense Memory Layout**
-///
-/// ```
-/// use arrow::array::UnionBuilder;
-/// use arrow::datatypes::{Float64Type, Int32Type};
-///
-/// let mut builder = UnionBuilder::new_dense(3);
-/// builder.append::<Int32Type>("a", 1).unwrap();
-/// builder.append::<Float64Type>("b", 3.0).unwrap();
-/// builder.append::<Int32Type>("a", 4).unwrap();
-/// let union = builder.build().unwrap();
-///
-/// assert_eq!(union.type_id(0), 0_i8);
-/// assert_eq!(union.type_id(1), 1_i8);
-/// assert_eq!(union.type_id(2), 0_i8);
-///
-/// assert_eq!(union.value_offset(0), 0_i32);
-/// assert_eq!(union.value_offset(1), 0_i32);
-/// assert_eq!(union.value_offset(2), 1_i32);
-/// ```
-///
-/// Example: **Sparse Memory Layout**
-/// ```
-/// use arrow::array::UnionBuilder;
-/// use arrow::datatypes::{Float64Type, Int32Type};
-///
-/// let mut builder = UnionBuilder::new_sparse(3);
-/// builder.append::<Int32Type>("a", 1).unwrap();
-/// builder.append::<Float64Type>("b", 3.0).unwrap();
-/// builder.append::<Int32Type>("a", 4).unwrap();
-/// let union = builder.build().unwrap();
-///
-/// assert_eq!(union.type_id(0), 0_i8);
-/// assert_eq!(union.type_id(1), 1_i8);
-/// assert_eq!(union.type_id(2), 0_i8);
-///
-/// assert_eq!(union.value_offset(0), 0_i32);
-/// assert_eq!(union.value_offset(1), 1_i32);
-/// assert_eq!(union.value_offset(2), 2_i32);
-/// ```
-#[derive(Debug)]
-pub struct UnionBuilder {
-    /// The current number of slots in the array
-    len: usize,
-    /// Maps field names to `FieldData` instances which track the builders for that field
-    fields: HashMap<String, FieldData>,
-    /// Builder to keep track of type ids
-    type_id_builder: Int8BufferBuilder,
-    /// Builder to keep track of offsets (`None` for sparse unions)
-    value_offset_builder: Option<Int32BufferBuilder>,
-}
-
-impl UnionBuilder {
-    /// Creates a new dense array builder.
-    pub fn new_dense(capacity: usize) -> Self {
-        Self {
-            len: 0,
-            fields: HashMap::default(),
-            type_id_builder: Int8BufferBuilder::new(capacity),
-            value_offset_builder: Some(Int32BufferBuilder::new(capacity)),
-        }
-    }
-
-    /// Creates a new sparse array builder.
-    pub fn new_sparse(capacity: usize) -> Self {
-        Self {
-            len: 0,
-            fields: HashMap::default(),
-            type_id_builder: Int8BufferBuilder::new(capacity),
-            value_offset_builder: None,
-        }
-    }
-
-    /// Appends a null to this builder, encoding the null in the array
-    /// of the `type_name` child / field.
-    ///
-    /// Since `UnionArray` encodes nulls as an entry in its children
-    /// (it doesn't have a validity bitmap itself), and where the null
-    /// is part of the final array, appending a NULL requires
-    /// specifying which field (child) to use.
-    #[inline]
-    pub fn append_null<T: ArrowPrimitiveType>(&mut self, type_name: &str) -> Result<()> {
-        self.append_option::<T>(type_name, None)
-    }
-
-    /// Appends a value to this builder.
-    #[inline]
-    pub fn append<T: ArrowPrimitiveType>(
-        &mut self,
-        type_name: &str,
-        v: T::Native,
-    ) -> Result<()> {
-        self.append_option::<T>(type_name, Some(v))
-    }
-
-    fn append_option<T: ArrowPrimitiveType>(
-        &mut self,
-        type_name: &str,
-        v: Option<T::Native>,
-    ) -> Result<()> {
-        let type_name = type_name.to_string();
-
-        let mut field_data = match self.fields.remove(&type_name) {
-            Some(data) => {
-                if data.data_type != T::DATA_TYPE {
-                    return Err(ArrowError::InvalidArgumentError(format!("Attempt to write col \"{}\" with type {} doesn't match existing type {}", type_name, T::DATA_TYPE, data.data_type)));
-                }
-                data
-            }
-            None => match self.value_offset_builder {
-                Some(_) => FieldData::new(self.fields.len() as i8, T::DATA_TYPE),
-                None => {
-                    let mut fd = FieldData::new(self.fields.len() as i8, T::DATA_TYPE);
-                    for _ in 0..self.len {
-                        fd.append_null::<T>()?;
-                    }
-                    fd
-                }
-            },
-        };
-        self.type_id_builder.append(field_data.type_id);
-
-        match &mut self.value_offset_builder {
-            // Dense Union
-            Some(offset_builder) => {
-                offset_builder.append(field_data.slots as i32);
-            }
-            // Sparse Union
-            None => {
-                for (_, fd) in self.fields.iter_mut() {
-                    // Append to all bar the FieldData currently being appended to
-                    fd.append_null_dynamic()?;
-                }
-            }
-        }
-
-        match v {
-            Some(v) => field_data.append_to_values_buffer::<T>(v)?,
-            None => field_data.append_null::<T>()?,
-        }
-
-        self.fields.insert(type_name, field_data);
-        self.len += 1;
-        Ok(())
-    }
-
-    /// Builds this builder creating a new `UnionArray`.
-    pub fn build(mut self) -> Result<UnionArray> {
-        let type_id_buffer = self.type_id_builder.finish();
-        let value_offsets_buffer = self.value_offset_builder.map(|mut b| b.finish());
-        let mut children = Vec::new();
-        for (
-            name,
-            FieldData {
-                type_id,
-                data_type,
-                values_buffer,
-                slots,
-                mut bitmap_builder,
-            },
-        ) in self.fields.into_iter()
-        {
-            let buffer = values_buffer
-                .expect("The `values_buffer` should only ever be None inside the `append` method.")
-                .into();
-            let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
-                .add_buffer(buffer)
-                .len(slots)
-                .null_bit_buffer(Some(bitmap_builder.finish()));
-
-            let arr_data_ref = unsafe { arr_data_builder.build_unchecked() };
-            let array_ref = make_array(arr_data_ref);
-            children.push((type_id, (Field::new(&name, data_type, false), array_ref)))
-        }
-
-        children.sort_by(|a, b| {
-            a.0.partial_cmp(&b.0)
-                .expect("This will never be None as type ids are always i8 values.")
-        });
-        let children: Vec<_> = children.into_iter().map(|(_, b)| b).collect();
-
-        let type_ids: Vec<i8> = (0_i8..children.len() as i8).collect();
-
-        UnionArray::try_new(&type_ids, type_id_buffer, value_offsets_buffer, children)
-    }
-}
-
-/// Array builder for `DictionaryArray`. For example to map a set of byte indices
-/// to f32 values. Note that the use of a `HashMap` here will not scale to very large
-/// arrays or result in an ordered dictionary.
-///
-/// # Example:
-///
-/// ```
-///  use arrow::array::{
-///      Array, PrimitiveBuilder, PrimitiveDictionaryBuilder,
-///      UInt8Array, UInt32Array,
-///    };
-///  use arrow::datatypes::{UInt8Type, UInt32Type};
-///
-///  let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
-///  let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
-///  let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-///  builder.append(12345678).unwrap();
-///  builder.append_null().unwrap();
-///  builder.append(22345678).unwrap();
-///  let array = builder.finish();
-///
-///  assert_eq!(
-///      array.keys(),
-///      &UInt8Array::from(vec![Some(0), None, Some(1)])
-///  );
-///
-///  // Values are polymorphic and so require a downcast.
-///  let av = array.values();
-///  let ava: &UInt32Array = av.as_any().downcast_ref::<UInt32Array>().unwrap();
-///  let avs: &[u32] = ava.values();
-///
-///  assert!(!array.is_null(0));
-///  assert!(array.is_null(1));
-///  assert!(!array.is_null(2));
-///
-///  assert_eq!(avs, &[12345678, 22345678]);
-/// ```
-#[derive(Debug)]
-pub struct PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    keys_builder: PrimitiveBuilder<K>,
-    values_builder: PrimitiveBuilder<V>,
-    map: HashMap<Box<[u8]>, K::Native>,
-}
-
-impl<K, V> PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    /// Creates a new `PrimitiveDictionaryBuilder` from a keys builder and a value builder.
-    pub fn new(
-        keys_builder: PrimitiveBuilder<K>,
-        values_builder: PrimitiveBuilder<V>,
-    ) -> Self {
-        Self {
-            keys_builder,
-            values_builder,
-            map: HashMap::new(),
-        }
-    }
-}
-
-impl<K, V> ArrayBuilder for PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    /// Returns the builder as an non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as an mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.keys_builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.keys_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<K, V> PrimitiveDictionaryBuilder<K, V>
-where
-    K: ArrowPrimitiveType,
-    V: ArrowPrimitiveType,
-{
-    /// Append a primitive value to the array. Return an existing index
-    /// if already present in the values array or a new index if the
-    /// value is appended to the values array.
-    #[inline]
-    pub fn append(&mut self, value: V::Native) -> Result<K::Native> {
-        if let Some(&key) = self.map.get(value.to_byte_slice()) {
-            // Append existing value.
-            self.keys_builder.append_value(key)?;
-            Ok(key)
-        } else {
-            // Append new value.
-            let key = K::Native::from_usize(self.values_builder.len())
-                .ok_or(ArrowError::DictionaryKeyOverflowError)?;
-            self.values_builder.append_value(value)?;
-            self.keys_builder.append_value(key as K::Native)?;
-            self.map.insert(value.to_byte_slice().into(), key);
-            Ok(key)
-        }
-    }
-
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.keys_builder.append_null()
-    }
-
-    /// Builds the `DictionaryArray` and reset this builder.
-    pub fn finish(&mut self) -> DictionaryArray<K> {
-        self.map.clear();
-        let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
-        self.keys_builder.finish_dict(value_ref)
-    }
-}
-
-/// Array builder for `DictionaryArray` that stores Strings. For example to map a set of byte indices
-/// to String values. Note that the use of a `HashMap` here will not scale to very large
-/// arrays or result in an ordered dictionary.
-///
-/// ```
-/// use arrow::{
-///   array::{
-///     Int8Array, StringArray,
-///     PrimitiveBuilder, StringBuilder, StringDictionaryBuilder,
-///   },
-///   datatypes::Int8Type,
-/// };
-///
-/// // Create a dictionary array indexed by bytes whose values are Strings.
-/// // It can thus hold up to 256 distinct string values.
-///
-/// let key_builder = PrimitiveBuilder::<Int8Type>::new(100);
-/// let value_builder = StringBuilder::new(100);
-/// let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
-///
-/// // The builder builds the dictionary value by value
-/// builder.append("abc").unwrap();
-/// builder.append_null().unwrap();
-/// builder.append("def").unwrap();
-/// builder.append("def").unwrap();
-/// builder.append("abc").unwrap();
-/// let array = builder.finish();
-///
-/// assert_eq!(
-///   array.keys(),
-///   &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
-/// );
-///
-/// // Values are polymorphic and so require a downcast.
-/// let av = array.values();
-/// let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
-///
-/// assert_eq!(ava.value(0), "abc");
-/// assert_eq!(ava.value(1), "def");
-///
-/// ```
-#[derive(Debug)]
-pub struct StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    keys_builder: PrimitiveBuilder<K>,
-    values_builder: StringBuilder,
-    map: HashMap<Box<[u8]>, K::Native>,
-}
-
-impl<K> StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    /// Creates a new `StringDictionaryBuilder` from a keys builder and a value builder.
-    pub fn new(keys_builder: PrimitiveBuilder<K>, values_builder: StringBuilder) -> Self {
-        Self {
-            keys_builder,
-            values_builder,
-            map: HashMap::new(),
-        }
-    }
-
-    /// Creates a new `StringDictionaryBuilder` from a keys builder and a dictionary
-    /// which is initialized with the given values.
-    /// The indices of those dictionary values are used as keys.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use arrow::datatypes::Int16Type;
-    /// use arrow::array::{StringArray, StringDictionaryBuilder, PrimitiveBuilder, Int16Array};
-    /// use std::convert::TryFrom;
-    ///
-    /// let dictionary_values = StringArray::from(vec![None, Some("abc"), Some("def")]);
-    ///
-    /// let mut builder = StringDictionaryBuilder::new_with_dictionary(PrimitiveBuilder::<Int16Type>::new(3), &dictionary_values).unwrap();
-    /// builder.append("def").unwrap();
-    /// builder.append_null().unwrap();
-    /// builder.append("abc").unwrap();
-    ///
-    /// let dictionary_array = builder.finish();
-    ///
-    /// let keys = dictionary_array.keys();
-    ///
-    /// assert_eq!(keys, &Int16Array::from(vec![Some(2), None, Some(1)]));
-    /// ```
-    pub fn new_with_dictionary(
-        keys_builder: PrimitiveBuilder<K>,
-        dictionary_values: &StringArray,
-    ) -> Result<Self> {
-        let dict_len = dictionary_values.len();
-        let mut values_builder =
-            StringBuilder::with_capacity(dict_len, dictionary_values.value_data().len());
-        let mut map: HashMap<Box<[u8]>, K::Native> = HashMap::with_capacity(dict_len);
-        for i in 0..dict_len {
-            if dictionary_values.is_valid(i) {
-                let value = dictionary_values.value(i);
-                map.insert(
-                    value.as_bytes().into(),
-                    K::Native::from_usize(i)
-                        .ok_or(ArrowError::DictionaryKeyOverflowError)?,
-                );
-                values_builder.append_value(value)?;
-            } else {
-                values_builder.append_null()?;
-            }
-        }
-        Ok(Self {
-            keys_builder,
-            values_builder,
-            map,
-        })
-    }
-}
-
-impl<K> ArrayBuilder for StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    /// Returns the builder as an non-mutable `Any` reference.
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    /// Returns the builder as an mutable `Any` reference.
-    fn as_any_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-
-    /// Returns the boxed builder as a box of `Any`.
-    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
-        self
-    }
-
-    /// Returns the number of array slots in the builder
-    fn len(&self) -> usize {
-        self.keys_builder.len()
-    }
-
-    /// Returns whether the number of array slots is zero
-    fn is_empty(&self) -> bool {
-        self.keys_builder.is_empty()
-    }
-
-    /// Builds the array and reset this builder.
-    fn finish(&mut self) -> ArrayRef {
-        Arc::new(self.finish())
-    }
-}
-
-impl<K> StringDictionaryBuilder<K>
-where
-    K: ArrowDictionaryKeyType,
-{
-    /// Append a primitive value to the array. Return an existing index
-    /// if already present in the values array or a new index if the
-    /// value is appended to the values array.
-    pub fn append(&mut self, value: impl AsRef<str>) -> Result<K::Native> {
-        if let Some(&key) = self.map.get(value.as_ref().as_bytes()) {
-            // Append existing value.
-            self.keys_builder.append_value(key)?;
-            Ok(key)
-        } else {
-            // Append new value.
-            let key = K::Native::from_usize(self.values_builder.len())
-                .ok_or(ArrowError::DictionaryKeyOverflowError)?;
-            self.values_builder.append_value(value.as_ref())?;
-            self.keys_builder.append_value(key as K::Native)?;
-            self.map.insert(value.as_ref().as_bytes().into(), key);
-            Ok(key)
-        }
-    }
-
-    #[inline]
-    pub fn append_null(&mut self) -> Result<()> {
-        self.keys_builder.append_null()
-    }
-
-    /// Builds the `DictionaryArray` and reset this builder.
-    pub fn finish(&mut self) -> DictionaryArray<K> {
-        self.map.clear();
-        let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
-        self.keys_builder.finish_dict(value_ref)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    use crate::array::Array;
-    use crate::bitmap::Bitmap;
-
-    #[test]
-    fn test_builder_i32_empty() {
-        let mut b = Int32BufferBuilder::new(5);
-        assert_eq!(0, b.len());
-        assert_eq!(16, b.capacity());
-        let a = b.finish();
-        assert_eq!(0, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32_alloc_zero_bytes() {
-        let mut b = Int32BufferBuilder::new(0);
-        b.append(123);
-        let a = b.finish();
-        assert_eq!(4, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32() {
-        let mut b = Int32BufferBuilder::new(5);
-        for i in 0..5 {
-            b.append(i);
-        }
-        assert_eq!(16, b.capacity());
-        let a = b.finish();
-        assert_eq!(20, a.len());
-    }
-
-    #[test]
-    fn test_builder_i32_grow_buffer() {
-        let mut b = Int32BufferBuilder::new(2);
-        assert_eq!(16, b.capacity());
-        for i in 0..20 {
-            b.append(i);
-        }
-        assert_eq!(32, b.capacity());
-        let a = b.finish();
-        assert_eq!(80, a.len());
-    }
-
-    #[test]
-    fn test_builder_finish() {
-        let mut b = Int32BufferBuilder::new(5);
-        assert_eq!(16, b.capacity());
-        for i in 0..10 {
-            b.append(i);
-        }
-        let mut a = b.finish();
-        assert_eq!(40, a.len());
-        assert_eq!(0, b.len());
-        assert_eq!(0, b.capacity());
-
-        // Try build another buffer after cleaning up.
-        for i in 0..20 {
-            b.append(i)
-        }
-        assert_eq!(32, b.capacity());
-        a = b.finish();
-        assert_eq!(80, a.len());
-    }
-
-    #[test]
-    fn test_reserve() {
-        let mut b = UInt8BufferBuilder::new(2);
-        assert_eq!(64, b.capacity());
-        b.reserve(64);
-        assert_eq!(64, b.capacity());
-        b.reserve(65);
-        assert_eq!(128, b.capacity());
-
-        let mut b = Int32BufferBuilder::new(2);
-        assert_eq!(16, b.capacity());
-        b.reserve(16);
-        assert_eq!(16, b.capacity());
-        b.reserve(17);
-        assert_eq!(32, b.capacity());
-    }
-
-    #[test]
-    fn test_append_slice() {
-        let mut b = UInt8BufferBuilder::new(0);
-        b.append_slice(b"Hello, ");
-        b.append_slice(b"World!");
-        let buffer = b.finish();
-        assert_eq!(13, buffer.len());
-
-        let mut b = Int32BufferBuilder::new(0);
-        b.append_slice(&[32, 54]);
-        let buffer = b.finish();
-        assert_eq!(8, buffer.len());
-    }
-
-    #[test]
-    fn test_append_values() -> Result<()> {
-        let mut a = Int8Builder::new(0);
-        a.append_value(1)?;
-        a.append_null()?;
-        a.append_value(-2)?;
-        assert_eq!(a.len(), 3);
-
-        // append values
-        let values = &[1, 2, 3, 4];
-        let is_valid = &[true, true, false, true];
-        a.append_values(values, is_valid)?;
-
-        assert_eq!(a.len(), 7);
-        let array = a.finish();
-        assert_eq!(array.value(0), 1);
-        assert!(array.is_null(1));
-        assert_eq!(array.value(2), -2);
-        assert_eq!(array.value(3), 1);
-        assert_eq!(array.value(4), 2);
-        assert!(array.is_null(5));
-        assert_eq!(array.value(6), 4);
-
-        Ok(())
-    }
-
-    #[test]
-    fn test_boolean_buffer_builder_write_bytes() {
-        let mut b = BooleanBufferBuilder::new(4);
-        b.append(false);
-        b.append(true);
-        b.append(false);
-        b.append(true);
-        assert_eq!(4, b.len());
-        assert_eq!(512, b.capacity());
-        let buffer = b.finish();
-        assert_eq!(1, buffer.len());
-
-        // Overallocate capacity
-        let mut b = BooleanBufferBuilder::new(8);
-        b.append_slice(&[false, true, false, true]);
-        assert_eq!(4, b.len());
-        assert_eq!(512, b.capacity());
-        let buffer = b.finish();
-        assert_eq!(1, buffer.len());
-    }
-
-    #[test]
-    fn test_boolean_buffer_builder_unset_first_bit() {
-        let mut buffer = BooleanBufferBuilder::new(4);
-        buffer.append(true);
-        buffer.append(true);
-        buffer.append(false);
-        buffer.append(true);
-        buffer.set_bit(0, false);
-        assert_eq!(buffer.len(), 4);
-        assert_eq!(buffer.finish().as_slice(), &[0b1010_u8]);
-    }
-
-    #[test]
-    fn test_boolean_buffer_builder_unset_last_bit() {
-        let mut buffer = BooleanBufferBuilder::new(4);
-        buffer.append(true);
-        buffer.append(true);
-        buffer.append(false);
-        buffer.append(true);
-        buffer.set_bit(3, false);
-        assert_eq!(buffer.len(), 4);
-        assert_eq!(buffer.finish().as_slice(), &[0b0011_u8]);
-    }
-
-    #[test]
-    fn test_boolean_buffer_builder_unset_an_inner_bit() {
-        let mut buffer = BooleanBufferBuilder::new(5);
-        buffer.append(true);
-        buffer.append(true);
-        buffer.append(false);
-        buffer.append(true);
-        buffer.set_bit(1, false);
-        assert_eq!(buffer.len(), 4);
-        assert_eq!(buffer.finish().as_slice(), &[0b1001_u8]);
-    }
-
-    #[test]
-    fn test_boolean_buffer_builder_unset_several_bits() {
-        let mut buffer = BooleanBufferBuilder::new(5);
-        buffer.append(true);
-        buffer.append(true);
-        buffer.append(true);
-        buffer.append(false);
-        buffer.append(true);
-        buffer.set_bit(1, false);
-        buffer.set_bit(2, false);
-        assert_eq!(buffer.len(), 5);
-        assert_eq!(buffer.finish().as_slice(), &[0b10001_u8]);
-    }
-
-    #[test]
-    fn test_boolean_buffer_builder_unset_several_bits_bigger_than_one_byte() {
-        let mut buffer = BooleanBufferBuilder::new(16);
-        buffer.append_n(10, true);
-        buffer.set_bit(0, false);
-        buffer.set_bit(3, false);
-        buffer.set_bit(9, false);
-        assert_eq!(buffer.len(), 10);
-        assert_eq!(buffer.finish().as_slice(), &[0b11110110_u8, 0b01_u8]);
-    }
-
-    #[test]
-    fn test_boolean_buffer_builder_flip_several_bits_bigger_than_one_byte() {
-        let mut buffer = BooleanBufferBuilder::new(16);
-        buffer.append_n(5, true);
-        buffer.append_n(5, false);
-        buffer.append_n(5, true);
-        buffer.set_bit(0, false);
-        buffer.set_bit(3, false);
-        buffer.set_bit(9, false);
-        buffer.set_bit(6, true);
-        buffer.set_bit(14, true);
-        buffer.set_bit(13, false);
-        assert_eq!(buffer.len(), 15);
-        assert_eq!(buffer.finish().as_slice(), &[0b01010110_u8, 0b1011100_u8]);
-    }
-
-    #[test]
-    fn test_bool_buffer_builder_get_first_bit() {
-        let mut buffer = BooleanBufferBuilder::new(16);
-        buffer.append_n(8, true);
-        buffer.append_n(8, false);
-        assert!(buffer.get_bit(0));
-    }
-
-    #[test]
-    fn test_bool_buffer_builder_get_first_bit_not_requires_mutability() {
-        let buffer = {
-            let mut buffer = BooleanBufferBuilder::new(16);
-            buffer.append_n(8, true);
-            buffer
-        };
-
-        assert!(buffer.get_bit(0));
-    }
-
-    #[test]
-    fn test_bool_buffer_builder_get_last_bit() {
-        let mut buffer = BooleanBufferBuilder::new(16);
-        buffer.append_n(8, true);
-        buffer.append_n(8, false);
-        assert!(!buffer.get_bit(15));
-    }
-
-    #[test]
-    fn test_bool_buffer_builder_get_an_inner_bit() {
-        let mut buffer = BooleanBufferBuilder::new(16);
-        buffer.append_n(4, false);
-        buffer.append_n(8, true);
-        buffer.append_n(4, false);
-        assert!(buffer.get_bit(11));
-    }
-
-    #[test]
-    fn test_bool_buffer_fuzz() {
-        use rand::prelude::*;
-
-        let mut buffer = BooleanBufferBuilder::new(12);
-        let mut all_bools = vec![];
-        let mut rng = rand::thread_rng();
-
-        let src_len = 32;
-        let (src, compacted_src) = {
-            let src: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
-                .take(src_len)
-                .collect();
-
-            let mut compacted_src = BooleanBufferBuilder::new(src_len);
-            compacted_src.append_slice(&src);
-            (src, compacted_src.finish())
-        };
-
-        for _ in 0..100 {
-            let a = rng.next_u32() as usize % src_len;
-            let b = rng.next_u32() as usize % src_len;
-
-            let start = a.min(b);
-            let end = a.max(b);
-
-            buffer.append_packed_range(start..end, compacted_src.as_slice());
-            all_bools.extend_from_slice(&src[start..end]);
-        }
-
-        let mut compacted = BooleanBufferBuilder::new(all_bools.len());
-        compacted.append_slice(&all_bools);
-
-        assert_eq!(buffer.finish(), compacted.finish())
-    }
-
-    #[test]
-    fn test_boolean_array_builder_append_slice() {
-        let arr1 =
-            BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
-
-        let mut builder = BooleanArray::builder(0);
-        builder.append_slice(&[true, false]).unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(false).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1, arr2);
-    }
-
-    #[test]
-    fn test_boolean_array_builder_append_slice_large() {
-        let arr1 = BooleanArray::from(vec![true; 513]);
-
-        let mut builder = BooleanArray::builder(512);
-        builder.append_slice(&[true; 513]).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1, arr2);
-    }
-
-    #[test]
-    fn test_boolean_array_builder_resize() {
-        let mut builder = BooleanBufferBuilder::new(20);
-        builder.append_n(4, true);
-        builder.append_n(7, false);
-        builder.append_n(2, true);
-        builder.resize(20);
-
-        assert_eq!(builder.len, 20);
-        assert_eq!(
-            builder.buffer.as_slice(),
-            &[0b00001111, 0b00011000, 0b00000000]
-        );
-
-        builder.resize(5);
-        assert_eq!(builder.len, 5);
-        assert_eq!(builder.buffer.as_slice(), &[0b00001111]);
-
-        builder.append_n(4, true);
-        assert_eq!(builder.len, 9);
-        assert_eq!(builder.buffer.as_slice(), &[0b11101111, 0b00000001]);
-    }
-
-    #[test]
-    fn test_boolean_builder_increases_buffer_len() {
-        // 00000010 01001000
-        let buf = Buffer::from([72_u8, 2_u8]);
-        let mut builder = BooleanBufferBuilder::new(8);
-
-        for i in 0..16 {
-            if i == 3 || i == 6 || i == 9 {
-                builder.append(true);
-            } else {
-                builder.append(false);
-            }
-        }
-        let buf2 = builder.finish();
-
-        assert_eq!(buf.len(), buf2.len());
-        assert_eq!(buf.as_slice(), buf2.as_slice());
-    }
-
-    #[test]
-    fn test_primitive_array_builder_i32() {
-        let mut builder = Int32Array::builder(5);
-        for i in 0..5 {
-            builder.append_value(i).unwrap();
-        }
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i32, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_i32_append_iter() {
-        let mut builder = Int32Array::builder(5);
-        unsafe { builder.append_trusted_len_iter(0..5) }.unwrap();
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i32, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_i32_append_nulls() {
-        let mut builder = Int32Array::builder(5);
-        builder.append_nulls(5).unwrap();
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(5, arr.null_count());
-        for i in 0..5 {
-            assert!(arr.is_null(i));
-            assert!(!arr.is_valid(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_date32() {
-        let mut builder = Date32Array::builder(5);
-        for i in 0..5 {
-            builder.append_value(i).unwrap();
-        }
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i32, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_timestamp_second() {
-        let mut builder = TimestampSecondArray::builder(5);
-        for i in 0..5 {
-            builder.append_value(i).unwrap();
-        }
-        let arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..5 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i as i64, arr.value(i));
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_bool() {
-        // 00000010 01001000
-        let buf = Buffer::from([72_u8, 2_u8]);
-        let mut builder = BooleanArray::builder(10);
-        for i in 0..10 {
-            if i == 3 || i == 6 || i == 9 {
-                builder.append_value(true).unwrap();
-            } else {
-                builder.append_value(false).unwrap();
-            }
-        }
-
-        let arr = builder.finish();
-        assert_eq!(&buf, arr.values());
-        assert_eq!(10, arr.len());
-        assert_eq!(0, arr.offset());
-        assert_eq!(0, arr.null_count());
-        for i in 0..10 {
-            assert!(!arr.is_null(i));
-            assert!(arr.is_valid(i));
-            assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i)
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_append_option() {
-        let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.append_option(Some(0)).unwrap();
-        builder.append_option(None).unwrap();
-        builder.append_option(Some(2)).unwrap();
-        builder.append_option(None).unwrap();
-        builder.append_option(Some(4)).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_append_null() {
-        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.append_value(0).unwrap();
-        builder.append_value(2).unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(4).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_append_slice() {
-        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
-
-        let mut builder = Int32Array::builder(5);
-        builder.append_slice(&[0, 2]).unwrap();
-        builder.append_null().unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(4).unwrap();
-        let arr2 = builder.finish();
-
-        assert_eq!(arr1.len(), arr2.len());
-        assert_eq!(arr1.offset(), arr2.offset());
-        assert_eq!(arr1.null_count(), arr2.null_count());
-        for i in 0..5 {
-            assert_eq!(arr1.is_null(i), arr2.is_null(i));
-            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
-            if arr1.is_valid(i) {
-                assert_eq!(arr1.value(i), arr2.value(i));
-            }
-        }
-    }
-
-    #[test]
-    fn test_primitive_array_builder_finish() {
-        let mut builder = Int32Builder::new(5);
-        builder.append_slice(&[2, 4, 6, 8]).unwrap();
-        let mut arr = builder.finish();
-        assert_eq!(4, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.append_slice(&[1, 3, 5, 7, 9]).unwrap();
-        arr = builder.finish();
-        assert_eq!(5, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_list_array_builder() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = ListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_value(4).unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        let values = list_array.values().data().buffers()[0].clone();
-        assert_eq!(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]), values);
-        assert_eq!(
-            Buffer::from_slice_ref(&[0, 3, 6, 8]),
-            list_array.data().buffers()[0].clone()
-        );
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[2]);
-        assert_eq!(2, list_array.value_length(2));
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_large_list_array_builder() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = LargeListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_value(4).unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        let values = list_array.values().data().buffers()[0].clone();
-        assert_eq!(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]), values);
-        assert_eq!(
-            Buffer::from_slice_ref(&[0i64, 3, 6, 8]),
-            list_array.data().buffers()[0].clone()
-        );
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
-        assert_eq!(0, list_array.null_count());
-        assert_eq!(6, list_array.value_offsets()[2]);
-        assert_eq!(2, list_array.value_length(2));
-        for i in 0..3 {
-            assert!(list_array.is_valid(i));
-            assert!(!list_array.is_null(i));
-        }
-    }
-
-    #[test]
-    fn test_list_array_builder_nulls() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = ListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(3, list_array.value_offsets()[2]);
-        assert_eq!(3, list_array.value_length(2));
-    }
-
-    #[test]
-    fn test_large_list_array_builder_nulls() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = LargeListBuilder::new(values_builder);
-
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(3, list_array.value_offsets()[2]);
-        assert_eq!(3, list_array.value_length(2));
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_builder() {
-        let values_builder = Int32Builder::new(10);
-        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
-
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
-        builder.values().append_value(0).unwrap();
-        builder.values().append_value(1).unwrap();
-        builder.values().append_value(2).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_null().unwrap();
-        builder.append(false).unwrap();
-        builder.values().append_value(3).unwrap();
-        builder.values().append_null().unwrap();
-        builder.values().append_value(5).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_value(6).unwrap();
-        builder.values().append_value(7).unwrap();
-        builder.values().append_null().unwrap();
-        builder.append(true).unwrap();
-        let list_array = builder.finish();
-
-        assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(6, list_array.value_offset(2));
-        assert_eq!(3, list_array.value_length());
-    }
-
-    #[test]
-    fn test_list_array_builder_finish() {
-        let values_builder = Int32Array::builder(5);
-        let mut builder = ListBuilder::new(values_builder);
-
-        builder.values().append_slice(&[1, 2, 3]).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_slice(&[4, 5, 6]).unwrap();
-        builder.append(true).unwrap();
-
-        let mut arr = builder.finish();
-        assert_eq!(2, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.values().append_slice(&[7, 8, 9]).unwrap();
-        builder.append(true).unwrap();
-        arr = builder.finish();
-        assert_eq!(1, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_builder_empty() {
-        let values_builder = Int32Array::builder(5);
-        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
-
-        let arr = builder.finish();
-        assert_eq!(0, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_fixed_size_list_array_builder_finish() {
-        let values_builder = Int32Array::builder(5);
-        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
-
-        builder.values().append_slice(&[1, 2, 3]).unwrap();
-        builder.append(true).unwrap();
-        builder.values().append_slice(&[4, 5, 6]).unwrap();
-        builder.append(true).unwrap();
-
-        let mut arr = builder.finish();
-        assert_eq!(2, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.values().append_slice(&[7, 8, 9]).unwrap();
-        builder.append(true).unwrap();
-        arr = builder.finish();
-        assert_eq!(1, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_list_list_array_builder() {
-        let primitive_builder = Int32Builder::new(10);
-        let values_builder = ListBuilder::new(primitive_builder);
-        let mut builder = ListBuilder::new(values_builder);
-
-        //  [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
-        builder.values().values().append_value(1).unwrap();
-        builder.values().values().append_value(2).unwrap();
-        builder.values().append(true).unwrap();
-        builder.values().values().append_value(3).unwrap();
-        builder.values().values().append_value(4).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        builder.values().values().append_value(5).unwrap();
-        builder.values().values().append_value(6).unwrap();
-        builder.values().values().append_value(7).unwrap();
-        builder.values().append(true).unwrap();
-        builder.values().append(false).unwrap();
-        builder.values().values().append_value(8).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        builder.append(false).unwrap();
-
-        builder.values().values().append_value(9).unwrap();
-        builder.values().values().append_value(10).unwrap();
-        builder.values().append(true).unwrap();
-        builder.append(true).unwrap();
-
-        let list_array = builder.finish();
-
-        assert_eq!(4, list_array.len());
-        assert_eq!(1, list_array.null_count());
-        assert_eq!(
-            Buffer::from_slice_ref(&[0, 2, 5, 5, 6]),
-            list_array.data().buffers()[0].clone()
-        );
-
-        assert_eq!(6, list_array.values().data().len());
-        assert_eq!(1, list_array.values().data().null_count());
-        assert_eq!(
-            Buffer::from_slice_ref(&[0, 2, 4, 7, 7, 8, 10]),
-            list_array.values().data().buffers()[0].clone()
-        );
-
-        assert_eq!(10, list_array.values().data().child_data()[0].len());
-        assert_eq!(0, list_array.values().data().child_data()[0].null_count());
-        assert_eq!(
-            Buffer::from_slice_ref(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
-            list_array.values().data().child_data()[0].buffers()[0].clone()
-        );
-    }
-
-    #[test]
-    fn test_binary_array_builder() {
-        let mut builder = BinaryBuilder::new(20);
-
-        builder.append_byte(b'h').unwrap();
-        builder.append_byte(b'e').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append(true).unwrap();
-        builder.append(true).unwrap();
-        builder.append_byte(b'w').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append_byte(b'r').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'd').unwrap();
-        builder.append(true).unwrap();
-
-        let binary_array = builder.finish();
-
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
-        assert_eq!([] as [u8; 0], binary_array.value(1));
-        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
-        assert_eq!(5, binary_array.value_offsets()[2]);
-        assert_eq!(5, binary_array.value_length(2));
-    }
-
-    #[test]
-    fn test_large_binary_array_builder() {
-        let mut builder = LargeBinaryBuilder::new(20);
-
-        builder.append_byte(b'h').unwrap();
-        builder.append_byte(b'e').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append(true).unwrap();
-        builder.append(true).unwrap();
-        builder.append_byte(b'w').unwrap();
-        builder.append_byte(b'o').unwrap();
-        builder.append_byte(b'r').unwrap();
-        builder.append_byte(b'l').unwrap();
-        builder.append_byte(b'd').unwrap();
-        builder.append(true).unwrap();
-
-        let binary_array = builder.finish();
-
-        assert_eq!(3, binary_array.len());
-        assert_eq!(0, binary_array.null_count());
-        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
-        assert_eq!([] as [u8; 0], binary_array.value(1));
-        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
-        assert_eq!(5, binary_array.value_offsets()[2]);
-        assert_eq!(5, binary_array.value_length(2));
-    }
-
-    #[test]
-    fn test_string_array_builder() {
-        let mut builder = StringBuilder::new(20);
-
-        builder.append_value("hello").unwrap();
-        builder.append(true).unwrap();
-        builder.append_value("world").unwrap();
-
-        let string_array = builder.finish();
-
-        assert_eq!(3, string_array.len());
-        assert_eq!(0, string_array.null_count());
-        assert_eq!("hello", string_array.value(0));
-        assert_eq!("", string_array.value(1));
-        assert_eq!("world", string_array.value(2));
-        assert_eq!(5, string_array.value_offsets()[2]);
-        assert_eq!(5, string_array.value_length(2));
-    }
-
-    #[test]
-    fn test_fixed_size_binary_builder() {
-        let mut builder = FixedSizeBinaryBuilder::new(15, 5);
-
-        //  [b"hello", null, "arrow"]
-        builder.append_value(b"hello").unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(b"arrow").unwrap();
-        let fixed_size_binary_array: FixedSizeBinaryArray = builder.finish();
-
-        assert_eq!(
-            &DataType::FixedSizeBinary(5),
-            fixed_size_binary_array.data_type()
-        );
-        assert_eq!(3, fixed_size_binary_array.len());
-        assert_eq!(1, fixed_size_binary_array.null_count());
-        assert_eq!(10, fixed_size_binary_array.value_offset(2));
-        assert_eq!(5, fixed_size_binary_array.value_length());
-    }
-
-    #[test]
-    fn test_decimal_builder() {
-        let mut builder = DecimalBuilder::new(30, 23, 6);
-
-        builder.append_value(8_887_000_000).unwrap();
-        builder.append_null().unwrap();
-        builder.append_value(-8_887_000_000).unwrap();
-        let decimal_array: DecimalArray = builder.finish();
-
-        assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type());
-        assert_eq!(3, decimal_array.len());
-        assert_eq!(1, decimal_array.null_count());
-        assert_eq!(32, decimal_array.value_offset(2));
-        assert_eq!(16, decimal_array.value_length());
-    }
-
-    #[test]
-    fn test_string_array_builder_finish() {
-        let mut builder = StringBuilder::new(10);
-
-        builder.append_value("hello").unwrap();
-        builder.append_value("world").unwrap();
-
-        let mut arr = builder.finish();
-        assert_eq!(2, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder.append_value("arrow").unwrap();
-        arr = builder.finish();
-        assert_eq!(1, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_string_array_builder_append_string() {
-        let mut builder = StringBuilder::new(20);
-
-        let var = "hello".to_owned();
-        builder.append_value(&var).unwrap();
-        builder.append(true).unwrap();
-        builder.append_value("world").unwrap();
-
-        let string_array = builder.finish();
-
-        assert_eq!(3, string_array.len());
-        assert_eq!(0, string_array.null_count());
-        assert_eq!("hello", string_array.value(0));
-        assert_eq!("", string_array.value(1));
-        assert_eq!("world", string_array.value(2));
-        assert_eq!(5, string_array.value_offsets()[2]);
-        assert_eq!(5, string_array.value_length(2));
-    }
-
-    #[test]
-    fn test_string_array_builder_append_option() {
-        let mut builder = StringBuilder::new(20);
-        builder.append_option(Some("hello")).unwrap();
-        builder.append_option(None::<&str>).unwrap();
-        builder.append_option(None::<String>).unwrap();
-        builder.append_option(Some("world")).unwrap();
-
-        let string_array = builder.finish();
-
-        assert_eq!(4, string_array.len());
-        assert_eq!("hello", string_array.value(0));
-        assert!(string_array.is_null(1));
-        assert!(string_array.is_null(2));
-        assert_eq!("world", string_array.value(3));
-    }
-
-    #[test]
-    fn test_struct_array_builder() {
-        let string_builder = StringBuilder::new(4);
-        let int_builder = Int32Builder::new(4);
-
-        let mut fields = Vec::new();
-        let mut field_builders = Vec::new();
-        fields.push(Field::new("f1", DataType::Utf8, false));
-        field_builders.push(Box::new(string_builder) as Box<dyn ArrayBuilder>);
-        fields.push(Field::new("f2", DataType::Int32, false));
-        field_builders.push(Box::new(int_builder) as Box<dyn ArrayBuilder>);
-
-        let mut builder = StructBuilder::new(fields, field_builders);
-        assert_eq!(2, builder.num_fields());
-
-        let string_builder = builder
-            .field_builder::<StringBuilder>(0)
-            .expect("builder at field 0 should be string builder");
-        string_builder.append_value("joe").unwrap();
-        string_builder.append_null().unwrap();
-        string_builder.append_null().unwrap();
-        string_builder.append_value("mark").unwrap();
-
-        let int_builder = builder
-            .field_builder::<Int32Builder>(1)
-            .expect("builder at field 1 should be int builder");
-        int_builder.append_value(1).unwrap();
-        int_builder.append_value(2).unwrap();
-        int_builder.append_null().unwrap();
-        int_builder.append_value(4).unwrap();
-
-        builder.append(true).unwrap();
-        builder.append(true).unwrap();
-        builder.append_null().unwrap();
-        builder.append(true).unwrap();
-
-        let arr = builder.finish();
-
-        let struct_data = arr.data();
-        assert_eq!(4, struct_data.len());
-        assert_eq!(1, struct_data.null_count());
-        assert_eq!(
-            Some(&Bitmap::from(Buffer::from(&[11_u8]))),
-            struct_data.null_bitmap()
-        );
-
-        let expected_string_data = ArrayData::builder(DataType::Utf8)
-            .len(4)
-            .null_bit_buffer(Some(Buffer::from(&[9_u8])))
-            .add_buffer(Buffer::from_slice_ref(&[0, 3, 3, 3, 7]))
-            .add_buffer(Buffer::from_slice_ref(b"joemark"))
-            .build()
-            .unwrap();
-
-        let expected_int_data = ArrayData::builder(DataType::Int32)
-            .len(4)
-            .null_bit_buffer(Some(Buffer::from_slice_ref(&[11_u8])))
-            .add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4]))
-            .build()
-            .unwrap();
-
-        assert_eq!(expected_string_data, *arr.column(0).data());
-        assert_eq!(expected_int_data, *arr.column(1).data());
-    }
-
-    #[test]
-    fn test_struct_array_builder_finish() {
-        let int_builder = Int32Builder::new(10);
-        let bool_builder = BooleanBuilder::new(10);
-
-        let mut fields = Vec::new();
-        let mut field_builders = Vec::new();
-        fields.push(Field::new("f1", DataType::Int32, false));
-        field_builders.push(Box::new(int_builder) as Box<dyn ArrayBuilder>);
-        fields.push(Field::new("f2", DataType::Boolean, false));
-        field_builders.push(Box::new(bool_builder) as Box<dyn ArrayBuilder>);
-
-        let mut builder = StructBuilder::new(fields, field_builders);
-        builder
-            .field_builder::<Int32Builder>(0)
-            .unwrap()
-            .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
-            .unwrap();
-        builder
-            .field_builder::<BooleanBuilder>(1)
-            .unwrap()
-            .append_slice(&[
-                false, true, false, true, false, true, false, true, false, true,
-            ])
-            .unwrap();
-
-        // Append slot values - all are valid.
-        for _ in 0..10 {
-            assert!(builder.append(true).is_ok())
-        }
-
-        assert_eq!(10, builder.len());
-
-        let arr = builder.finish();
-
-        assert_eq!(10, arr.len());
-        assert_eq!(0, builder.len());
-
-        builder
-            .field_builder::<Int32Builder>(0)
-            .unwrap()
-            .append_slice(&[1, 3, 5, 7, 9])
-            .unwrap();
-        builder
-            .field_builder::<BooleanBuilder>(1)
-            .unwrap()
-            .append_slice(&[false, true, false, true, false])
-            .unwrap();
-
-        // Append slot values - all are valid.
-        for _ in 0..5 {
-            assert!(builder.append(true).is_ok())
-        }
-
-        assert_eq!(5, builder.len());
-
-        let arr = builder.finish();
-
-        assert_eq!(5, arr.len());
-        assert_eq!(0, builder.len());
-    }
-
-    #[test]
-    fn test_map_array_builder() {
-        let string_builder = StringBuilder::new(4);
-        let int_builder = Int32Builder::new(4);
-
-        let mut builder = MapBuilder::new(None, string_builder, int_builder);
-
-        let string_builder = builder.keys();
-        string_builder.append_value("joe").unwrap();
-        string_builder.append_null().unwrap();
-        string_builder.append_null().unwrap();
-        string_builder.append_value("mark").unwrap();
-
-        let int_builder = builder.values();
-        int_builder.append_value(1).unwrap();
-        int_builder.append_value(2).unwrap();
-        int_builder.append_null().unwrap();
-        int_builder.append_value(4).unwrap();
-
-        builder.append(true).unwrap();
-        builder.append(false).unwrap();
-        builder.append(true).unwrap();
-
-        let arr = builder.finish();
-
-        let map_data = arr.data();
-        assert_eq!(3, map_data.len());
-        assert_eq!(1, map_data.null_count());
-        assert_eq!(
-            Some(&Bitmap::from(Buffer::from(&[5_u8]))),
-            map_data.null_bitmap()
-        );
-
-        let expected_string_data = ArrayData::builder(DataType::Utf8)
-            .len(4)
-            .null_bit_buffer(Some(Buffer::from(&[9_u8])))
-            .add_buffer(Buffer::from_slice_ref(&[0, 3, 3, 3, 7]))
-            .add_buffer(Buffer::from_slice_ref(b"joemark"))
-            .build()
-            .unwrap();
-
-        let expected_int_data = ArrayData::builder(DataType::Int32)
-            .len(4)
-            .null_bit_buffer(Some(Buffer::from_slice_ref(&[11_u8])))
-            .add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4]))
-            .build()
-            .unwrap();
-
-        assert_eq!(&expected_string_data, arr.keys().data());
-        assert_eq!(&expected_int_data, arr.values().data());
-    }
-
-    // TODO: add a test that finishes building, after designing a spec-compliant
-    // way of inserting values to the map.
-    // A map's values shouldn't be repeated within a slot
-
-    #[test]
-    fn test_struct_array_builder_from_schema() {
-        let mut fields = vec![
-            Field::new("f1", DataType::Float32, false),
-            Field::new("f2", DataType::Utf8, false),
-        ];
-        let sub_fields = vec![
-            Field::new("g1", DataType::Int32, false),
-            Field::new("g2", DataType::Boolean, false),
-        ];
-        let struct_type = DataType::Struct(sub_fields);
-        fields.push(Field::new("f3", struct_type, false));
-
-        let mut builder = StructBuilder::from_fields(fields, 5);
-        assert_eq!(3, builder.num_fields());
-        assert!(builder.field_builder::<Float32Builder>(0).is_some());
-        assert!(builder.field_builder::<StringBuilder>(1).is_some());
-        assert!(builder.field_builder::<StructBuilder>(2).is_some());
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }) is not currently supported"
-    )]
-    fn test_struct_array_builder_from_schema_unsupported_type() {
-        let mut fields = vec![Field::new("f1", DataType::Int16, false)];
-        let list_type =
-            DataType::List(Box::new(Field::new("item", DataType::Int64, true)));
-        fields.push(Field::new("f2", list_type, false));
-
-        let _ = StructBuilder::from_fields(fields, 5);
-    }
-
-    #[test]
-    fn test_struct_array_builder_field_builder_type_mismatch() {
-        let int_builder = Int32Builder::new(10);
-
-        let mut fields = Vec::new();
-        let mut field_builders = Vec::new();
-        fields.push(Field::new("f1", DataType::Int32, false));
-        field_builders.push(Box::new(int_builder) as Box<dyn ArrayBuilder>);
-
-        let mut builder = StructBuilder::new(fields, field_builders);
-        assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
-    }
-
-    #[test]
-    fn test_primitive_dictionary_builder() {
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        builder.append(12345678).unwrap();
-        builder.append_null().unwrap();
-        builder.append(22345678).unwrap();
-        let array = builder.finish();
-
-        assert_eq!(
-            array.keys(),
-            &UInt8Array::from(vec![Some(0), None, Some(1)])
-        );
-
-        // Values are polymorphic and so require a downcast.
-        let av = array.values();
-        let ava: &UInt32Array = av.as_any().downcast_ref::<UInt32Array>().unwrap();
-        let avs: &[u32] = ava.values();
-
-        assert!(!array.is_null(0));
-        assert!(array.is_null(1));
-        assert!(!array.is_null(2));
-
-        assert_eq!(avs, &[12345678, 22345678]);
-    }
-
-    #[test]
-    fn test_string_dictionary_builder() {
-        let key_builder = PrimitiveBuilder::<Int8Type>::new(5);
-        let value_builder = StringBuilder::new(2);
-        let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
-        builder.append("abc").unwrap();
-        builder.append_null().unwrap();
-        builder.append("def").unwrap();
-        builder.append("def").unwrap();
-        builder.append("abc").unwrap();
-        let array = builder.finish();
-
-        assert_eq!(
-            array.keys(),
-            &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
-        );
-
-        // Values are polymorphic and so require a downcast.
-        let av = array.values();
-        let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
-
-        assert_eq!(ava.value(0), "abc");
-        assert_eq!(ava.value(1), "def");
-    }
-
-    #[test]
-    fn test_string_dictionary_builder_with_existing_dictionary() {
-        let dictionary = StringArray::from(vec![None, Some("def"), Some("abc")]);
-
-        let key_builder = PrimitiveBuilder::<Int8Type>::new(6);
-        let mut builder =
-            StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
-                .unwrap();
-        builder.append("abc").unwrap();
-        builder.append_null().unwrap();
-        builder.append("def").unwrap();
-        builder.append("def").unwrap();
-        builder.append("abc").unwrap();
-        builder.append("ghi").unwrap();
-        let array = builder.finish();
-
-        assert_eq!(
-            array.keys(),
-            &Int8Array::from(vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)])
-        );
-
-        // Values are polymorphic and so require a downcast.
-        let av = array.values();
-        let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
-
-        assert!(!ava.is_valid(0));
-        assert_eq!(ava.value(1), "def");
-        assert_eq!(ava.value(2), "abc");
-        assert_eq!(ava.value(3), "ghi");
-    }
-
-    #[test]
-    fn test_string_dictionary_builder_with_reserved_null_value() {
-        let dictionary: Vec<Option<&str>> = vec![None];
-        let dictionary = StringArray::from(dictionary);
-
-        let key_builder = PrimitiveBuilder::<Int16Type>::new(4);
-        let mut builder =
-            StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
-                .unwrap();
-        builder.append("abc").unwrap();
-        builder.append_null().unwrap();
-        builder.append("def").unwrap();
-        builder.append("abc").unwrap();
-        let array = builder.finish();
-
-        assert!(array.is_null(1));
-        assert!(!array.is_valid(1));
-
-        let keys = array.keys();
-
-        assert_eq!(keys.value(0), 1);
-        assert!(keys.is_null(1));
-        // zero initialization is currently guaranteed by Buffer allocation and resizing
-        assert_eq!(keys.value(1), 0);
-        assert_eq!(keys.value(2), 2);
-        assert_eq!(keys.value(3), 1);
-    }
-
-    #[test]
-    #[should_panic(expected = "DictionaryKeyOverflowError")]
-    fn test_primitive_dictionary_overflow() {
-        let key_builder = PrimitiveBuilder::<UInt8Type>::new(257);
-        let value_builder = PrimitiveBuilder::<UInt32Type>::new(257);
-        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
-        // 256 unique keys.
-        for i in 0..256 {
-            builder.append(i + 1000).unwrap();
-        }
-        // Special error if the key overflows (256th entry)
-        builder.append(1257).unwrap();
-    }
-}
diff --git a/arrow/src/array/builder/boolean_buffer_builder.rs b/arrow/src/array/builder/boolean_buffer_builder.rs
new file mode 100644
index 000000000000..5b6d1ce48478
--- /dev/null
+++ b/arrow/src/array/builder/boolean_buffer_builder.rs
@@ -0,0 +1,383 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::buffer::{Buffer, MutableBuffer};
+
+use super::Range;
+
+use crate::util::bit_util;
+
+#[derive(Debug)]
+pub struct BooleanBufferBuilder {
+    buffer: MutableBuffer,
+    len: usize,
+}
+
+impl BooleanBufferBuilder {
+    #[inline]
+    pub fn new(capacity: usize) -> Self {
+        let byte_capacity = bit_util::ceil(capacity, 8);
+        let buffer = MutableBuffer::new(byte_capacity);
+        Self { buffer, len: 0 }
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    #[inline]
+    pub fn set_bit(&mut self, index: usize, v: bool) {
+        if v {
+            bit_util::set_bit(self.buffer.as_mut(), index);
+        } else {
+            bit_util::unset_bit(self.buffer.as_mut(), index);
+        }
+    }
+
+    #[inline]
+    pub fn get_bit(&self, index: usize) -> bool {
+        bit_util::get_bit(self.buffer.as_slice(), index)
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        self.buffer.capacity() * 8
+    }
+
+    #[inline]
+    pub fn advance(&mut self, additional: usize) {
+        let new_len = self.len + additional;
+        let new_len_bytes = bit_util::ceil(new_len, 8);
+        if new_len_bytes > self.buffer.len() {
+            self.buffer.resize(new_len_bytes, 0);
+        }
+        self.len = new_len;
+    }
+
+    /// Reserve space to at least `additional` new bits.
+    /// Capacity will be `>= self.len() + additional`.
+    /// New bytes are uninitialized and reading them is undefined behavior.
+    #[inline]
+    pub fn reserve(&mut self, additional: usize) {
+        let capacity = self.len + additional;
+        if capacity > self.capacity() {
+            // convert differential to bytes
+            let additional = bit_util::ceil(capacity, 8) - self.buffer.len();
+            self.buffer.reserve(additional);
+        }
+    }
+
+    /// Resizes the buffer, either truncating its contents (with no change in capacity), or
+    /// growing it (potentially reallocating it) and writing `false` in the newly available bits.
+    #[inline]
+    pub fn resize(&mut self, len: usize) {
+        let len_bytes = bit_util::ceil(len, 8);
+        self.buffer.resize(len_bytes, 0);
+        self.len = len;
+    }
+
+    #[inline]
+    pub fn append(&mut self, v: bool) {
+        self.advance(1);
+        if v {
+            unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) };
+        }
+    }
+
+    #[inline]
+    pub fn append_n(&mut self, additional: usize, v: bool) {
+        self.advance(additional);
+        if additional > 0 && v {
+            let offset = self.len() - additional;
+            (0..additional).for_each(|i| unsafe {
+                bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i)
+            })
+        }
+    }
+
+    #[inline]
+    pub fn append_slice(&mut self, slice: &[bool]) {
+        let additional = slice.len();
+        self.advance(additional);
+
+        let offset = self.len() - additional;
+        for (i, v) in slice.iter().enumerate() {
+            if *v {
+                unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) }
+            }
+        }
+    }
+
+    /// Append `range` bits from `to_set`
+    ///
+    /// `to_set` is a slice of bits packed LSB-first into `[u8]`
+    ///
+    /// # Panics
+    ///
+    /// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes
+    pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) {
+        let offset_write = self.len;
+        let len = range.end - range.start;
+        self.advance(len);
+        crate::util::bit_mask::set_bits(
+            self.buffer.as_slice_mut(),
+            to_set,
+            offset_write,
+            range.start,
+            len,
+        );
+    }
+
+    /// Returns the packed bits
+    pub fn as_slice(&self) -> &[u8] {
+        self.buffer.as_slice()
+    }
+
+    #[inline]
+    pub fn finish(&mut self) -> Buffer {
+        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
+        self.len = 0;
+        buf.into()
+    }
+}
+
+impl From<BooleanBufferBuilder> for Buffer {
+    #[inline]
+    fn from(builder: BooleanBufferBuilder) -> Self {
+        builder.buffer.into()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_boolean_buffer_builder_write_bytes() {
+        let mut b = BooleanBufferBuilder::new(4);
+        b.append(false);
+        b.append(true);
+        b.append(false);
+        b.append(true);
+        assert_eq!(4, b.len());
+        assert_eq!(512, b.capacity());
+        let buffer = b.finish();
+        assert_eq!(1, buffer.len());
+
+        // Overallocate capacity
+        let mut b = BooleanBufferBuilder::new(8);
+        b.append_slice(&[false, true, false, true]);
+        assert_eq!(4, b.len());
+        assert_eq!(512, b.capacity());
+        let buffer = b.finish();
+        assert_eq!(1, buffer.len());
+    }
+
+    #[test]
+    fn test_boolean_buffer_builder_unset_first_bit() {
+        let mut buffer = BooleanBufferBuilder::new(4);
+        buffer.append(true);
+        buffer.append(true);
+        buffer.append(false);
+        buffer.append(true);
+        buffer.set_bit(0, false);
+        assert_eq!(buffer.len(), 4);
+        assert_eq!(buffer.finish().as_slice(), &[0b1010_u8]);
+    }
+
+    #[test]
+    fn test_boolean_buffer_builder_unset_last_bit() {
+        let mut buffer = BooleanBufferBuilder::new(4);
+        buffer.append(true);
+        buffer.append(true);
+        buffer.append(false);
+        buffer.append(true);
+        buffer.set_bit(3, false);
+        assert_eq!(buffer.len(), 4);
+        assert_eq!(buffer.finish().as_slice(), &[0b0011_u8]);
+    }
+
+    #[test]
+    fn test_boolean_buffer_builder_unset_an_inner_bit() {
+        let mut buffer = BooleanBufferBuilder::new(5);
+        buffer.append(true);
+        buffer.append(true);
+        buffer.append(false);
+        buffer.append(true);
+        buffer.set_bit(1, false);
+        assert_eq!(buffer.len(), 4);
+        assert_eq!(buffer.finish().as_slice(), &[0b1001_u8]);
+    }
+
+    #[test]
+    fn test_boolean_buffer_builder_unset_several_bits() {
+        let mut buffer = BooleanBufferBuilder::new(5);
+        buffer.append(true);
+        buffer.append(true);
+        buffer.append(true);
+        buffer.append(false);
+        buffer.append(true);
+        buffer.set_bit(1, false);
+        buffer.set_bit(2, false);
+        assert_eq!(buffer.len(), 5);
+        assert_eq!(buffer.finish().as_slice(), &[0b10001_u8]);
+    }
+
+    #[test]
+    fn test_boolean_buffer_builder_unset_several_bits_bigger_than_one_byte() {
+        let mut buffer = BooleanBufferBuilder::new(16);
+        buffer.append_n(10, true);
+        buffer.set_bit(0, false);
+        buffer.set_bit(3, false);
+        buffer.set_bit(9, false);
+        assert_eq!(buffer.len(), 10);
+        assert_eq!(buffer.finish().as_slice(), &[0b11110110_u8, 0b01_u8]);
+    }
+
+    #[test]
+    fn test_boolean_buffer_builder_flip_several_bits_bigger_than_one_byte() {
+        let mut buffer = BooleanBufferBuilder::new(16);
+        buffer.append_n(5, true);
+        buffer.append_n(5, false);
+        buffer.append_n(5, true);
+        buffer.set_bit(0, false);
+        buffer.set_bit(3, false);
+        buffer.set_bit(9, false);
+        buffer.set_bit(6, true);
+        buffer.set_bit(14, true);
+        buffer.set_bit(13, false);
+        assert_eq!(buffer.len(), 15);
+        assert_eq!(buffer.finish().as_slice(), &[0b01010110_u8, 0b1011100_u8]);
+    }
+
+    #[test]
+    fn test_bool_buffer_builder_get_first_bit() {
+        let mut buffer = BooleanBufferBuilder::new(16);
+        buffer.append_n(8, true);
+        buffer.append_n(8, false);
+        assert!(buffer.get_bit(0));
+    }
+
+    #[test]
+    fn test_bool_buffer_builder_get_first_bit_not_requires_mutability() {
+        let buffer = {
+            let mut buffer = BooleanBufferBuilder::new(16);
+            buffer.append_n(8, true);
+            buffer
+        };
+
+        assert!(buffer.get_bit(0));
+    }
+
+    #[test]
+    fn test_bool_buffer_builder_get_last_bit() {
+        let mut buffer = BooleanBufferBuilder::new(16);
+        buffer.append_n(8, true);
+        buffer.append_n(8, false);
+        assert!(!buffer.get_bit(15));
+    }
+
+    #[test]
+    fn test_bool_buffer_builder_get_an_inner_bit() {
+        let mut buffer = BooleanBufferBuilder::new(16);
+        buffer.append_n(4, false);
+        buffer.append_n(8, true);
+        buffer.append_n(4, false);
+        assert!(buffer.get_bit(11));
+    }
+
+    #[test]
+    fn test_bool_buffer_fuzz() {
+        use rand::prelude::*;
+
+        let mut buffer = BooleanBufferBuilder::new(12);
+        let mut all_bools = vec![];
+        let mut rng = rand::thread_rng();
+
+        let src_len = 32;
+        let (src, compacted_src) = {
+            let src: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0))
+                .take(src_len)
+                .collect();
+
+            let mut compacted_src = BooleanBufferBuilder::new(src_len);
+            compacted_src.append_slice(&src);
+            (src, compacted_src.finish())
+        };
+
+        for _ in 0..100 {
+            let a = rng.next_u32() as usize % src_len;
+            let b = rng.next_u32() as usize % src_len;
+
+            let start = a.min(b);
+            let end = a.max(b);
+
+            buffer.append_packed_range(start..end, compacted_src.as_slice());
+            all_bools.extend_from_slice(&src[start..end]);
+        }
+
+        let mut compacted = BooleanBufferBuilder::new(all_bools.len());
+        compacted.append_slice(&all_bools);
+
+        assert_eq!(buffer.finish(), compacted.finish())
+    }
+
+    #[test]
+    fn test_boolean_array_builder_resize() {
+        let mut builder = BooleanBufferBuilder::new(20);
+        builder.append_n(4, true);
+        builder.append_n(7, false);
+        builder.append_n(2, true);
+        builder.resize(20);
+
+        assert_eq!(builder.len(), 20);
+        assert_eq!(builder.as_slice(), &[0b00001111, 0b00011000, 0b00000000]);
+
+        builder.resize(5);
+        assert_eq!(builder.len(), 5);
+        assert_eq!(builder.as_slice(), &[0b00001111]);
+
+        builder.append_n(4, true);
+        assert_eq!(builder.len(), 9);
+        assert_eq!(builder.as_slice(), &[0b11101111, 0b00000001]);
+    }
+
+    #[test]
+    fn test_boolean_builder_increases_buffer_len() {
+        // 00000010 01001000
+        let buf = Buffer::from([72_u8, 2_u8]);
+        let mut builder = BooleanBufferBuilder::new(8);
+
+        for i in 0..16 {
+            if i == 3 || i == 6 || i == 9 {
+                builder.append(true);
+            } else {
+                builder.append(false);
+            }
+        }
+        let buf2 = builder.finish();
+
+        assert_eq!(buf.len(), buf2.len());
+        assert_eq!(buf.as_slice(), buf2.as_slice());
+    }
+}
diff --git a/arrow/src/array/builder/boolean_builder.rs b/arrow/src/array/builder/boolean_builder.rs
new file mode 100644
index 000000000000..98acb641b1a8
--- /dev/null
+++ b/arrow/src/array/builder/boolean_builder.rs
@@ -0,0 +1,203 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use crate::array::ArrayBuilder;
+use crate::array::ArrayData;
+use crate::array::ArrayRef;
+use crate::array::BooleanArray;
+use crate::datatypes::DataType;
+use crate::error::{ArrowError, Result};
+
+use super::BooleanBufferBuilder;
+
+///  Array builder for fixed-width primitive types
+///
+/// # Example
+///
+/// Create a `BooleanArray` from a `BooleanBuilder`
+///
+/// ```
+///     use arrow::array::{Array, BooleanArray, BooleanBuilder};
+///
+///     let mut b = BooleanBuilder::new(4);
+///     b.append_value(true);
+///     b.append_null();
+///     b.append_value(false);
+///     b.append_value(true);
+///     let arr = b.finish();
+///
+///     assert_eq!(4, arr.len());
+///     assert_eq!(1, arr.null_count());
+///     assert_eq!(true, arr.value(0));
+///     assert!(arr.is_valid(0));
+///     assert!(!arr.is_null(0));
+///     assert!(!arr.is_valid(1));
+///     assert!(arr.is_null(1));
+///     assert_eq!(false, arr.value(2));
+///     assert!(arr.is_valid(2));
+///     assert!(!arr.is_null(2));
+///     assert_eq!(true, arr.value(3));
+///     assert!(arr.is_valid(3));
+///     assert!(!arr.is_null(3));
+/// ```
+#[derive(Debug)]
+pub struct BooleanBuilder {
+    values_builder: BooleanBufferBuilder,
+    bitmap_builder: BooleanBufferBuilder,
+}
+
+impl BooleanBuilder {
+    /// Creates a new primitive array builder
+    pub fn new(capacity: usize) -> Self {
+        Self {
+            values_builder: BooleanBufferBuilder::new(capacity),
+            bitmap_builder: BooleanBufferBuilder::new(capacity),
+        }
+    }
+
+    /// Returns the capacity of this builder measured in slots of type `T`
+    pub fn capacity(&self) -> usize {
+        self.values_builder.capacity()
+    }
+
+    /// Appends a value of type `T` into the builder
+    #[inline]
+    pub fn append_value(&mut self, v: bool) -> Result<()> {
+        self.bitmap_builder.append(true);
+        self.values_builder.append(v);
+        Ok(())
+    }
+
+    /// Appends a null slot into the builder
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        self.bitmap_builder.append(false);
+        self.values_builder.advance(1);
+        Ok(())
+    }
+
+    /// Appends an `Option<T>` into the builder
+    #[inline]
+    pub fn append_option(&mut self, v: Option<bool>) -> Result<()> {
+        match v {
+            None => self.append_null()?,
+            Some(v) => self.append_value(v)?,
+        };
+        Ok(())
+    }
+
+    /// Appends a slice of type `T` into the builder
+    #[inline]
+    pub fn append_slice(&mut self, v: &[bool]) -> Result<()> {
+        self.bitmap_builder.append_n(v.len(), true);
+        self.values_builder.append_slice(v);
+        Ok(())
+    }
+
+    /// Appends values from a slice of type `T` and a validity boolean slice
+    #[inline]
+    pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<()> {
+        if values.len() != is_valid.len() {
+            return Err(ArrowError::InvalidArgumentError(
+                "Value and validity lengths must be equal".to_string(),
+            ));
+        }
+        self.bitmap_builder.append_slice(is_valid);
+        self.values_builder.append_slice(values);
+        Ok(())
+    }
+
+    /// Builds the [BooleanArray] and reset this builder.
+    pub fn finish(&mut self) -> BooleanArray {
+        let len = self.len();
+        let null_bit_buffer = self.bitmap_builder.finish();
+        let null_count = len - null_bit_buffer.count_set_bits();
+        let builder = ArrayData::builder(DataType::Boolean)
+            .len(len)
+            .add_buffer(self.values_builder.finish())
+            .null_bit_buffer((null_count > 0).then(|| null_bit_buffer));
+
+        let array_data = unsafe { builder.build_unchecked() };
+        BooleanArray::from(array_data)
+    }
+}
+
+impl ArrayBuilder for BooleanBuilder {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.values_builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.values_builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_boolean_array_builder_append_slice() {
+        let arr1 =
+            BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
+
+        let mut builder = BooleanArray::builder(0);
+        builder.append_slice(&[true, false]).unwrap();
+        builder.append_null().unwrap();
+        builder.append_null().unwrap();
+        builder.append_value(false).unwrap();
+        let arr2 = builder.finish();
+
+        assert_eq!(arr1, arr2);
+    }
+
+    #[test]
+    fn test_boolean_array_builder_append_slice_large() {
+        let arr1 = BooleanArray::from(vec![true; 513]);
+
+        let mut builder = BooleanArray::builder(512);
+        builder.append_slice(&[true; 513]).unwrap();
+        let arr2 = builder.finish();
+
+        assert_eq!(arr1, arr2);
+    }
+}
diff --git a/arrow/src/array/builder/buffer_builder.rs b/arrow/src/array/builder/buffer_builder.rs
new file mode 100644
index 000000000000..83b2afb44e7a
--- /dev/null
+++ b/arrow/src/array/builder/buffer_builder.rs
@@ -0,0 +1,418 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::mem;
+
+use crate::buffer::{Buffer, MutableBuffer};
+use crate::datatypes::ArrowNativeType;
+
+use super::PhantomData;
+
+///  Converts a `MutableBuffer` to a `BufferBuilder<T>`.
+///
+/// `slots` is the number of array slots currently represented in the `MutableBuffer`.
+pub(crate) fn mutable_buffer_to_builder<T: ArrowNativeType>(
+    mutable_buffer: MutableBuffer,
+    slots: usize,
+) -> BufferBuilder<T> {
+    BufferBuilder::<T> {
+        buffer: mutable_buffer,
+        len: slots,
+        _marker: PhantomData,
+    }
+}
+
+///  Converts a `BufferBuilder<T>` into its underlying `MutableBuffer`.
+///
+/// `From` is not implemented because associated type bounds are unstable.
+pub(crate) fn builder_to_mutable_buffer<T: ArrowNativeType>(
+    builder: BufferBuilder<T>,
+) -> MutableBuffer {
+    builder.buffer
+}
+
+/// Builder for creating a [`Buffer`](crate::buffer::Buffer) object.
+///
+/// A [`Buffer`](crate::buffer::Buffer) is the underlying data
+/// structure of Arrow's [`Arrays`](crate::array::Array).
+///
+/// For all supported types, there are type definitions for the
+/// generic version of `BufferBuilder<T>`, e.g. `UInt8BufferBuilder`.
+///
+/// # Example:
+///
+/// ```
+/// use arrow::array::UInt8BufferBuilder;
+///
+/// # fn main() -> arrow::error::Result<()> {
+/// let mut builder = UInt8BufferBuilder::new(100);
+/// builder.append_slice(&[42, 43, 44]);
+/// builder.append(45);
+/// let buffer = builder.finish();
+///
+/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
+/// # Ok(())
+/// # }
+/// ```
+#[derive(Debug)]
+pub struct BufferBuilder<T: ArrowNativeType> {
+    buffer: MutableBuffer,
+    len: usize,
+    _marker: PhantomData<T>,
+}
+
+impl<T: ArrowNativeType> BufferBuilder<T> {
+    /// Creates a new builder with initial capacity for _at least_ `capacity`
+    /// elements of type `T`.
+    ///
+    /// The capacity can later be manually adjusted with the
+    /// [`reserve()`](BufferBuilder::reserve) method.
+    /// Also the
+    /// [`append()`](BufferBuilder::append),
+    /// [`append_slice()`](BufferBuilder::append_slice) and
+    /// [`advance()`](BufferBuilder::advance)
+    /// methods automatically increase the capacity if needed.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt8BufferBuilder;
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    ///
+    /// assert!(builder.capacity() >= 10);
+    /// ```
+    #[inline]
+    pub fn new(capacity: usize) -> Self {
+        let buffer = MutableBuffer::new(capacity * mem::size_of::<T>());
+
+        Self {
+            buffer,
+            len: 0,
+            _marker: PhantomData,
+        }
+    }
+
+    /// Returns the current number of array elements in the internal buffer.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt8BufferBuilder;
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.append(42);
+    ///
+    /// assert_eq!(builder.len(), 1);
+    /// ```
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns whether the internal buffer is empty.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt8BufferBuilder;
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.append(42);
+    ///
+    /// assert_eq!(builder.is_empty(), false);
+    /// ```
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Returns the actual capacity (number of elements) of the internal buffer.
+    ///
+    /// Note: the internal capacity returned by this method might be larger than
+    /// what you'd expect after setting the capacity in the `new()` or `reserve()`
+    /// functions.
+    pub fn capacity(&self) -> usize {
+        let byte_capacity = self.buffer.capacity();
+        byte_capacity / std::mem::size_of::<T>()
+    }
+
+    /// Increases the number of elements in the internal buffer by `n`
+    /// and resizes the buffer as needed.
+    ///
+    /// The values of the newly added elements are 0.
+    /// This method is usually used when appending `NULL` values to the buffer
+    /// as they still require physical memory space.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt8BufferBuilder;
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.advance(2);
+    ///
+    /// assert_eq!(builder.len(), 2);
+    /// ```
+    #[inline]
+    pub fn advance(&mut self, i: usize) {
+        let new_buffer_len = (self.len + i) * mem::size_of::<T>();
+        self.buffer.resize(new_buffer_len, 0);
+        self.len += i;
+    }
+
+    /// Reserves memory for _at least_ `n` more elements of type `T`.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt8BufferBuilder;
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.reserve(10);
+    ///
+    /// assert!(builder.capacity() >= 20);
+    /// ```
+    #[inline]
+    pub fn reserve(&mut self, n: usize) {
+        self.buffer.reserve(n * mem::size_of::<T>());
+    }
+
+    /// Appends a value of type `T` into the builder,
+    /// growing the internal buffer as needed.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt8BufferBuilder;
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.append(42);
+    ///
+    /// assert_eq!(builder.len(), 1);
+    /// ```
+    #[inline]
+    pub fn append(&mut self, v: T) {
+        self.reserve(1);
+        self.buffer.push(v);
+        self.len += 1;
+    }
+
+    /// Appends a value of type `T` into the builder N times,
+    /// growing the internal buffer as needed.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt8BufferBuilder;
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.append_n(10, 42);
+    ///
+    /// assert_eq!(builder.len(), 10);
+    /// ```
+    #[inline]
+    pub fn append_n(&mut self, n: usize, v: T) {
+        self.reserve(n);
+        for _ in 0..n {
+            self.buffer.push(v);
+        }
+        self.len += n;
+    }
+
+    /// Appends a slice of type `T`, growing the internal buffer as needed.
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt8BufferBuilder;
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.append_slice(&[42, 44, 46]);
+    ///
+    /// assert_eq!(builder.len(), 3);
+    /// ```
+    #[inline]
+    pub fn append_slice(&mut self, slice: &[T]) {
+        self.buffer.extend_from_slice(slice);
+        self.len += slice.len();
+    }
+
+    /// # Safety
+    /// This requires the iterator be a trusted length. This could instead require
+    /// the iterator implement `TrustedLen` once that is stabilized.
+    #[inline]
+    pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
+        let iter = iter.into_iter();
+        let len = iter
+            .size_hint()
+            .1
+            .expect("append_trusted_len_iter expects upper bound");
+        self.reserve(len);
+        for v in iter {
+            self.buffer.push(v)
+        }
+        self.len += len;
+    }
+
+    /// Resets this builder and returns an immutable [`Buffer`](crate::buffer::Buffer).
+    ///
+    /// # Example:
+    ///
+    /// ```
+    /// use arrow::array::UInt8BufferBuilder;
+    ///
+    /// let mut builder = UInt8BufferBuilder::new(10);
+    /// builder.append_slice(&[42, 44, 46]);
+    ///
+    /// let buffer = builder.finish();
+    ///
+    /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
+    /// ```
+    #[inline]
+    pub fn finish(&mut self) -> Buffer {
+        let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
+        self.len = 0;
+        buf.into()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::array::array::Array;
+    use crate::array::builder::ArrayBuilder;
+    use crate::array::Int32BufferBuilder;
+    use crate::array::Int8Builder;
+    use crate::array::UInt8BufferBuilder;
+    use crate::error::Result;
+
+    #[test]
+    fn test_builder_i32_empty() {
+        let mut b = Int32BufferBuilder::new(5);
+        assert_eq!(0, b.len());
+        assert_eq!(16, b.capacity());
+        let a = b.finish();
+        assert_eq!(0, a.len());
+    }
+
+    #[test]
+    fn test_builder_i32_alloc_zero_bytes() {
+        let mut b = Int32BufferBuilder::new(0);
+        b.append(123);
+        let a = b.finish();
+        assert_eq!(4, a.len());
+    }
+
+    #[test]
+    fn test_builder_i32() {
+        let mut b = Int32BufferBuilder::new(5);
+        for i in 0..5 {
+            b.append(i);
+        }
+        assert_eq!(16, b.capacity());
+        let a = b.finish();
+        assert_eq!(20, a.len());
+    }
+
+    #[test]
+    fn test_builder_i32_grow_buffer() {
+        let mut b = Int32BufferBuilder::new(2);
+        assert_eq!(16, b.capacity());
+        for i in 0..20 {
+            b.append(i);
+        }
+        assert_eq!(32, b.capacity());
+        let a = b.finish();
+        assert_eq!(80, a.len());
+    }
+
+    #[test]
+    fn test_builder_finish() {
+        let mut b = Int32BufferBuilder::new(5);
+        assert_eq!(16, b.capacity());
+        for i in 0..10 {
+            b.append(i);
+        }
+        let mut a = b.finish();
+        assert_eq!(40, a.len());
+        assert_eq!(0, b.len());
+        assert_eq!(0, b.capacity());
+
+        // Try build another buffer after cleaning up.
+        for i in 0..20 {
+            b.append(i)
+        }
+        assert_eq!(32, b.capacity());
+        a = b.finish();
+        assert_eq!(80, a.len());
+    }
+
+    #[test]
+    fn test_reserve() {
+        let mut b = UInt8BufferBuilder::new(2);
+        assert_eq!(64, b.capacity());
+        b.reserve(64);
+        assert_eq!(64, b.capacity());
+        b.reserve(65);
+        assert_eq!(128, b.capacity());
+
+        let mut b = Int32BufferBuilder::new(2);
+        assert_eq!(16, b.capacity());
+        b.reserve(16);
+        assert_eq!(16, b.capacity());
+        b.reserve(17);
+        assert_eq!(32, b.capacity());
+    }
+
+    #[test]
+    fn test_append_slice() {
+        let mut b = UInt8BufferBuilder::new(0);
+        b.append_slice(b"Hello, ");
+        b.append_slice(b"World!");
+        let buffer = b.finish();
+        assert_eq!(13, buffer.len());
+
+        let mut b = Int32BufferBuilder::new(0);
+        b.append_slice(&[32, 54]);
+        let buffer = b.finish();
+        assert_eq!(8, buffer.len());
+    }
+
+    #[test]
+    fn test_append_values() -> Result<()> {
+        let mut a = Int8Builder::new(0);
+        a.append_value(1)?;
+        a.append_null()?;
+        a.append_value(-2)?;
+        assert_eq!(a.len(), 3);
+
+        // append values
+        let values = &[1, 2, 3, 4];
+        let is_valid = &[true, true, false, true];
+        a.append_values(values, is_valid)?;
+
+        assert_eq!(a.len(), 7);
+        let array = a.finish();
+        assert_eq!(array.value(0), 1);
+        assert!(array.is_null(1));
+        assert_eq!(array.value(2), -2);
+        assert_eq!(array.value(3), 1);
+        assert_eq!(array.value(4), 2);
+        assert!(array.is_null(5));
+        assert_eq!(array.value(6), 4);
+
+        Ok(())
+    }
+}
diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs
new file mode 100644
index 000000000000..a7925358b8f8
--- /dev/null
+++ b/arrow/src/array/builder/decimal_builder.rs
@@ -0,0 +1,452 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use crate::array::ArrayBuilder;
+use crate::array::ArrayRef;
+use crate::array::DecimalArray;
+use crate::array::FixedSizeBinaryArray;
+use crate::array::OffsetSizeTrait;
+use crate::array::UInt8Builder;
+use crate::array::{GenericBinaryArray, GenericStringArray};
+
+use crate::error::{ArrowError, Result};
+
+use super::{FixedSizeBinaryBuilder, FixedSizeListBuilder};
+use super::{GenericBinaryBuilder, GenericListBuilder, GenericStringBuilder};
+
+use crate::datatypes::validate_decimal_precision;
+
+/// Array Builder for [`DecimalArray`]
+///
+/// See [`DecimalArray`] for example.
+///
+#[derive(Debug)]
+pub struct DecimalBuilder {
+    builder: FixedSizeListBuilder<UInt8Builder>,
+    precision: usize,
+    scale: usize,
+
+    /// Should i128 values be validated for compatibility with scale and precision?
+    /// defaults to true
+    value_validation: bool,
+}
+
+impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericBinaryBuilder<OffsetSize> {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericStringBuilder<OffsetSize> {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        let a = GenericStringBuilder::<OffsetSize>::finish(self);
+        Arc::new(a)
+    }
+}
+
+impl ArrayBuilder for FixedSizeBinaryBuilder {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl ArrayBuilder for DecimalBuilder {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
+    /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values
+    /// array
+    pub fn new(capacity: usize) -> Self {
+        let values_builder = UInt8Builder::new(capacity);
+        Self {
+            builder: GenericListBuilder::new(values_builder),
+        }
+    }
+
+    /// Appends a single byte value into the builder's values array.
+    ///
+    /// Note, when appending individual byte values you must call `append` to delimit each
+    /// distinct list value.
+    #[inline]
+    pub fn append_byte(&mut self, value: u8) -> Result<()> {
+        self.builder.values().append_value(value)?;
+        Ok(())
+    }
+
+    /// Appends a byte slice into the builder.
+    ///
+    /// Automatically calls the `append` method to delimit the slice appended in as a
+    /// distinct array element.
+    #[inline]
+    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
+        self.builder.values().append_slice(value.as_ref())?;
+        self.builder.append(true)?;
+        Ok(())
+    }
+
+    /// Finish the current variable-length list array slot.
+    #[inline]
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        self.builder.append(is_valid)
+    }
+
+    /// Append a null value to the array.
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        self.append(false)
+    }
+
+    /// Builds the `BinaryArray` and reset this builder.
+    pub fn finish(&mut self) -> GenericBinaryArray<OffsetSize> {
+        GenericBinaryArray::<OffsetSize>::from(self.builder.finish())
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait> GenericStringBuilder<OffsetSize> {
+    /// Creates a new `StringBuilder`,
+    /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder
+    pub fn new(capacity: usize) -> Self {
+        let values_builder = UInt8Builder::new(capacity);
+        Self {
+            builder: GenericListBuilder::new(values_builder),
+        }
+    }
+
+    /// Creates a new `StringBuilder`,
+    /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder
+    /// `item_capacity` is the number of items to pre-allocate space for in this builder
+    pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self {
+        let values_builder = UInt8Builder::new(data_capacity);
+        Self {
+            builder: GenericListBuilder::with_capacity(values_builder, item_capacity),
+        }
+    }
+
+    /// Appends a string into the builder.
+    ///
+    /// Automatically calls the `append` method to delimit the string appended in as a
+    /// distinct array element.
+    #[inline]
+    pub fn append_value(&mut self, value: impl AsRef<str>) -> Result<()> {
+        self.builder
+            .values()
+            .append_slice(value.as_ref().as_bytes())?;
+        self.builder.append(true)?;
+        Ok(())
+    }
+
+    /// Finish the current variable-length list array slot.
+    #[inline]
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        self.builder.append(is_valid)
+    }
+
+    /// Append a null value to the array.
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        self.append(false)
+    }
+
+    /// Append an `Option` value to the array.
+    #[inline]
+    pub fn append_option(&mut self, value: Option<impl AsRef<str>>) -> Result<()> {
+        match value {
+            None => self.append_null()?,
+            Some(v) => self.append_value(v)?,
+        };
+        Ok(())
+    }
+
+    /// Builds the `StringArray` and reset this builder.
+    pub fn finish(&mut self) -> GenericStringArray<OffsetSize> {
+        GenericStringArray::<OffsetSize>::from(self.builder.finish())
+    }
+}
+
+impl FixedSizeBinaryBuilder {
+    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
+    /// array
+    pub fn new(capacity: usize, byte_width: i32) -> Self {
+        let values_builder = UInt8Builder::new(capacity);
+        Self {
+            builder: FixedSizeListBuilder::new(values_builder, byte_width),
+        }
+    }
+
+    /// Appends a byte slice into the builder.
+    ///
+    /// Automatically calls the `append` method to delimit the slice appended in as a
+    /// distinct array element.
+    #[inline]
+    pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> {
+        if self.builder.value_length() != value.as_ref().len() as i32 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string()
+            ));
+        }
+        self.builder.values().append_slice(value.as_ref())?;
+        self.builder.append(true)
+    }
+
+    /// Append a null value to the array.
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        let length: usize = self.builder.value_length() as usize;
+        self.builder.values().append_slice(&vec![0u8; length][..])?;
+        self.builder.append(false)
+    }
+
+    /// Builds the `FixedSizeBinaryArray` and reset this builder.
+    pub fn finish(&mut self) -> FixedSizeBinaryArray {
+        FixedSizeBinaryArray::from(self.builder.finish())
+    }
+}
+
+impl DecimalBuilder {
+    /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
+    /// array
+    pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
+        let values_builder = UInt8Builder::new(capacity);
+        let byte_width = 16;
+        Self {
+            builder: FixedSizeListBuilder::new(values_builder, byte_width),
+            precision,
+            scale,
+            value_validation: true,
+        }
+    }
+
+    /// Disable validation
+    ///
+    /// # Safety
+    ///
+    /// After disabling validation, caller must ensure that appended values are compatible
+    /// for the specified precision and scale.
+    pub unsafe fn disable_value_validation(&mut self) {
+        self.value_validation = false;
+    }
+
+    /// Appends a byte slice into the builder.
+    ///
+    /// Automatically calls the `append` method to delimit the slice appended in as a
+    /// distinct array element.
+    #[inline]
+    pub fn append_value(&mut self, value: impl Into<i128>) -> Result<()> {
+        let value = if self.value_validation {
+            validate_decimal_precision(value.into(), self.precision)?
+        } else {
+            value.into()
+        };
+
+        let value_as_bytes = Self::from_i128_to_fixed_size_bytes(
+            value,
+            self.builder.value_length() as usize,
+        )?;
+        if self.builder.value_length() != value_as_bytes.len() as i32 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Byte slice does not have the same length as DecimalBuilder value lengths".to_string()
+            ));
+        }
+        self.builder
+            .values()
+            .append_slice(value_as_bytes.as_slice())?;
+        self.builder.append(true)
+    }
+
+    pub(crate) fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> {
+        if size > 16 {
+            return Err(ArrowError::InvalidArgumentError(
+                "DecimalBuilder only supports values up to 16 bytes.".to_string(),
+            ));
+        }
+        let res = v.to_le_bytes();
+        let start_byte = 16 - size;
+        Ok(res[start_byte..16].to_vec())
+    }
+
+    /// Append a null value to the array.
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        let length: usize = self.builder.value_length() as usize;
+        self.builder.values().append_slice(&vec![0u8; length][..])?;
+        self.builder.append(false)
+    }
+
+    /// Builds the `DecimalArray` and reset this builder.
+    pub fn finish(&mut self) -> DecimalArray {
+        DecimalArray::from_fixed_size_list_array(
+            self.builder.finish(),
+            self.precision,
+            self.scale,
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::array::Array;
+    use crate::datatypes::DataType;
+    use crate::util::decimal::Decimal128;
+
+    #[test]
+    fn test_decimal_builder() {
+        let mut builder = DecimalBuilder::new(30, 38, 6);
+
+        builder.append_value(8_887_000_000_i128).unwrap();
+        builder.append_null().unwrap();
+        builder.append_value(-8_887_000_000_i128).unwrap();
+        let decimal_array: DecimalArray = builder.finish();
+
+        assert_eq!(&DataType::Decimal(38, 6), decimal_array.data_type());
+        assert_eq!(3, decimal_array.len());
+        assert_eq!(1, decimal_array.null_count());
+        assert_eq!(32, decimal_array.value_offset(2));
+        assert_eq!(16, decimal_array.value_length());
+    }
+
+    #[test]
+    fn test_decimal_builder_with_decimal128() {
+        let mut builder = DecimalBuilder::new(30, 38, 6);
+
+        builder
+            .append_value(Decimal128::new_from_i128(30, 38, 8_887_000_000_i128))
+            .unwrap();
+        builder.append_null().unwrap();
+        builder
+            .append_value(Decimal128::new_from_i128(30, 38, -8_887_000_000_i128))
+            .unwrap();
+        let decimal_array: DecimalArray = builder.finish();
+
+        assert_eq!(&DataType::Decimal(38, 6), decimal_array.data_type());
+        assert_eq!(3, decimal_array.len());
+        assert_eq!(1, decimal_array.null_count());
+        assert_eq!(32, decimal_array.value_offset(2));
+        assert_eq!(16, decimal_array.value_length());
+    }
+}
diff --git a/arrow/src/array/builder/fixed_size_list_builder.rs b/arrow/src/array/builder/fixed_size_list_builder.rs
new file mode 100644
index 000000000000..f0233e2638bd
--- /dev/null
+++ b/arrow/src/array/builder/fixed_size_list_builder.rs
@@ -0,0 +1,251 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use crate::array::ArrayData;
+use crate::array::ArrayRef;
+use crate::array::FixedSizeListArray;
+use crate::array::Int32BufferBuilder;
+use crate::datatypes::DataType;
+use crate::datatypes::Field;
+use crate::error::Result;
+
+use super::ArrayBuilder;
+use super::BooleanBufferBuilder;
+
+///  Array builder for `ListArray`
+#[derive(Debug)]
+pub struct FixedSizeListBuilder<T: ArrayBuilder> {
+    bitmap_builder: BooleanBufferBuilder,
+    values_builder: T,
+    len: usize,
+    list_len: i32,
+}
+
+impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
+    /// Creates a new `FixedSizeListBuilder` from a given values array builder
+    /// `length` is the number of values within each array
+    pub fn new(values_builder: T, length: i32) -> Self {
+        let capacity = values_builder.len();
+        Self::with_capacity(values_builder, length, capacity)
+    }
+
+    /// Creates a new `FixedSizeListBuilder` from a given values array builder
+    /// `length` is the number of values within each array
+    /// `capacity` is the number of items to pre-allocate space for in this builder
+    pub fn with_capacity(values_builder: T, length: i32, capacity: usize) -> Self {
+        let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
+        offsets_builder.append(0);
+        Self {
+            bitmap_builder: BooleanBufferBuilder::new(capacity),
+            values_builder,
+            len: 0,
+            list_len: length,
+        }
+    }
+}
+
+impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T>
+where
+    T: 'static,
+{
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl<T: ArrayBuilder> FixedSizeListBuilder<T>
+where
+    T: 'static,
+{
+    /// Returns the child array builder as a mutable reference.
+    ///
+    /// This mutable reference can be used to append values into the child array builder,
+    /// but you must call `append` to delimit each distinct list value.
+    pub fn values(&mut self) -> &mut T {
+        &mut self.values_builder
+    }
+
+    pub fn value_length(&self) -> i32 {
+        self.list_len
+    }
+
+    /// Finish the current variable-length list array slot
+    #[inline]
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        self.bitmap_builder.append(is_valid);
+        self.len += 1;
+        Ok(())
+    }
+
+    /// Builds the `FixedSizeListBuilder` and reset this builder.
+    pub fn finish(&mut self) -> FixedSizeListArray {
+        let len = self.len();
+        self.len = 0;
+        let values_arr = self
+            .values_builder
+            .as_any_mut()
+            .downcast_mut::<T>()
+            .unwrap()
+            .finish();
+        let values_data = values_arr.data();
+
+        // check that values_data length is multiple of len if we have data
+        if len != 0 {
+            assert!(
+                values_data.len() / len == self.list_len as usize,
+                "Values of FixedSizeList must have equal lengths, values have length {} and list has {}",
+                values_data.len() / len,
+                self.list_len
+            );
+        }
+
+        let null_bit_buffer = self.bitmap_builder.finish();
+        let array_data = ArrayData::builder(DataType::FixedSizeList(
+            Box::new(Field::new("item", values_data.data_type().clone(), true)),
+            self.list_len,
+        ))
+        .len(len)
+        .add_child_data(values_data.clone())
+        .null_bit_buffer(Some(null_bit_buffer));
+
+        let array_data = unsafe { array_data.build_unchecked() };
+
+        FixedSizeListArray::from(array_data)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::array::Array;
+    use crate::array::FixedSizeBinaryArray;
+    use crate::array::FixedSizeBinaryBuilder;
+    use crate::array::Int32Array;
+    use crate::array::Int32Builder;
+
+    #[test]
+    fn test_fixed_size_list_array_builder() {
+        let values_builder = Int32Builder::new(10);
+        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
+
+        //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
+        builder.values().append_value(0).unwrap();
+        builder.values().append_value(1).unwrap();
+        builder.values().append_value(2).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_null().unwrap();
+        builder.values().append_null().unwrap();
+        builder.values().append_null().unwrap();
+        builder.append(false).unwrap();
+        builder.values().append_value(3).unwrap();
+        builder.values().append_null().unwrap();
+        builder.values().append_value(5).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_value(6).unwrap();
+        builder.values().append_value(7).unwrap();
+        builder.values().append_null().unwrap();
+        builder.append(true).unwrap();
+        let list_array = builder.finish();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(4, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(6, list_array.value_offset(2));
+        assert_eq!(3, list_array.value_length());
+    }
+
+    #[test]
+    fn test_fixed_size_list_array_builder_empty() {
+        let values_builder = Int32Array::builder(5);
+        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
+
+        let arr = builder.finish();
+        assert_eq!(0, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
+    #[test]
+    fn test_fixed_size_list_array_builder_finish() {
+        let values_builder = Int32Array::builder(5);
+        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
+
+        builder.values().append_slice(&[1, 2, 3]).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_slice(&[4, 5, 6]).unwrap();
+        builder.append(true).unwrap();
+
+        let mut arr = builder.finish();
+        assert_eq!(2, arr.len());
+        assert_eq!(0, builder.len());
+
+        builder.values().append_slice(&[7, 8, 9]).unwrap();
+        builder.append(true).unwrap();
+        arr = builder.finish();
+        assert_eq!(1, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
+    #[test]
+    fn test_fixed_size_binary_builder() {
+        let mut builder = FixedSizeBinaryBuilder::new(15, 5);
+
+        //  [b"hello", null, "arrow"]
+        builder.append_value(b"hello").unwrap();
+        builder.append_null().unwrap();
+        builder.append_value(b"arrow").unwrap();
+        let fixed_size_binary_array: FixedSizeBinaryArray = builder.finish();
+
+        assert_eq!(
+            &DataType::FixedSizeBinary(5),
+            fixed_size_binary_array.data_type()
+        );
+        assert_eq!(3, fixed_size_binary_array.len());
+        assert_eq!(1, fixed_size_binary_array.null_count());
+        assert_eq!(10, fixed_size_binary_array.value_offset(2));
+        assert_eq!(5, fixed_size_binary_array.value_length());
+    }
+}
diff --git a/arrow/src/array/builder/generic_list_builder.rs b/arrow/src/array/builder/generic_list_builder.rs
new file mode 100644
index 000000000000..1449b5c09cc0
--- /dev/null
+++ b/arrow/src/array/builder/generic_list_builder.rs
@@ -0,0 +1,502 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use crate::array::ArrayData;
+use crate::array::ArrayRef;
+use crate::array::GenericListArray;
+use crate::array::OffsetSizeTrait;
+use crate::datatypes::DataType;
+use crate::datatypes::Field;
+use crate::error::Result;
+
+use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
+
+///  Array builder for `ListArray`
+#[derive(Debug)]
+pub struct GenericListBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
+    offsets_builder: BufferBuilder<OffsetSize>,
+    bitmap_builder: BooleanBufferBuilder,
+    values_builder: T,
+    len: OffsetSize,
+}
+
+impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T> {
+    /// Creates a new `ListArrayBuilder` from a given values array builder
+    pub fn new(values_builder: T) -> Self {
+        let capacity = values_builder.len();
+        Self::with_capacity(values_builder, capacity)
+    }
+
+    /// Creates a new `ListArrayBuilder` from a given values array builder
+    /// `capacity` is the number of items to pre-allocate space for in this builder
+    pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
+        let mut offsets_builder = BufferBuilder::<OffsetSize>::new(capacity + 1);
+        let len = OffsetSize::zero();
+        offsets_builder.append(len);
+        Self {
+            offsets_builder,
+            bitmap_builder: BooleanBufferBuilder::new(capacity),
+            values_builder,
+            len,
+        }
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
+    for GenericListBuilder<OffsetSize, T>
+where
+    T: 'static,
+{
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.len.to_usize().unwrap()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.len == OffsetSize::zero()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListBuilder<OffsetSize, T>
+where
+    T: 'static,
+{
+    /// Returns the child array builder as a mutable reference.
+    ///
+    /// This mutable reference can be used to append values into the child array builder,
+    /// but you must call `append` to delimit each distinct list value.
+    pub fn values(&mut self) -> &mut T {
+        &mut self.values_builder
+    }
+
+    /// Finish the current variable-length list array slot
+    #[inline]
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        self.offsets_builder
+            .append(OffsetSize::from_usize(self.values_builder.len()).unwrap());
+        self.bitmap_builder.append(is_valid);
+        self.len += OffsetSize::one();
+        Ok(())
+    }
+
+    /// Builds the `ListArray` and reset this builder.
+    pub fn finish(&mut self) -> GenericListArray<OffsetSize> {
+        let len = self.len();
+        self.len = OffsetSize::zero();
+        let values_arr = self
+            .values_builder
+            .as_any_mut()
+            .downcast_mut::<T>()
+            .unwrap()
+            .finish();
+        let values_data = values_arr.data();
+
+        let offset_buffer = self.offsets_builder.finish();
+        let null_bit_buffer = self.bitmap_builder.finish();
+        self.offsets_builder.append(self.len);
+        let field = Box::new(Field::new(
+            "item",
+            values_data.data_type().clone(),
+            true, // TODO: find a consistent way of getting this
+        ));
+        let data_type = if OffsetSize::IS_LARGE {
+            DataType::LargeList(field)
+        } else {
+            DataType::List(field)
+        };
+        let array_data = ArrayData::builder(data_type)
+            .len(len)
+            .add_buffer(offset_buffer)
+            .add_child_data(values_data.clone())
+            .null_bit_buffer(Some(null_bit_buffer));
+
+        let array_data = unsafe { array_data.build_unchecked() };
+
+        GenericListArray::<OffsetSize>::from(array_data)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::array::Array;
+    use crate::array::Int32Array;
+    use crate::array::Int32Builder;
+    use crate::buffer::Buffer;
+
+    use crate::array::builder::{
+        BinaryBuilder, LargeBinaryBuilder, LargeListBuilder, ListBuilder, StringBuilder,
+    };
+
+    #[test]
+    fn test_list_array_builder() {
+        let values_builder = Int32Builder::new(10);
+        let mut builder = ListBuilder::new(values_builder);
+
+        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
+        builder.values().append_value(0).unwrap();
+        builder.values().append_value(1).unwrap();
+        builder.values().append_value(2).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_value(3).unwrap();
+        builder.values().append_value(4).unwrap();
+        builder.values().append_value(5).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_value(6).unwrap();
+        builder.values().append_value(7).unwrap();
+        builder.append(true).unwrap();
+        let list_array = builder.finish();
+
+        let values = list_array.values().data().buffers()[0].clone();
+        assert_eq!(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]), values);
+        assert_eq!(
+            Buffer::from_slice_ref(&[0, 3, 6, 8]),
+            list_array.data().buffers()[0].clone()
+        );
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(3, list_array.len());
+        assert_eq!(0, list_array.null_count());
+        assert_eq!(6, list_array.value_offsets()[2]);
+        assert_eq!(2, list_array.value_length(2));
+        for i in 0..3 {
+            assert!(list_array.is_valid(i));
+            assert!(!list_array.is_null(i));
+        }
+    }
+
+    #[test]
+    fn test_large_list_array_builder() {
+        let values_builder = Int32Builder::new(10);
+        let mut builder = LargeListBuilder::new(values_builder);
+
+        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
+        builder.values().append_value(0).unwrap();
+        builder.values().append_value(1).unwrap();
+        builder.values().append_value(2).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_value(3).unwrap();
+        builder.values().append_value(4).unwrap();
+        builder.values().append_value(5).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_value(6).unwrap();
+        builder.values().append_value(7).unwrap();
+        builder.append(true).unwrap();
+        let list_array = builder.finish();
+
+        let values = list_array.values().data().buffers()[0].clone();
+        assert_eq!(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]), values);
+        assert_eq!(
+            Buffer::from_slice_ref(&[0i64, 3, 6, 8]),
+            list_array.data().buffers()[0].clone()
+        );
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(3, list_array.len());
+        assert_eq!(0, list_array.null_count());
+        assert_eq!(6, list_array.value_offsets()[2]);
+        assert_eq!(2, list_array.value_length(2));
+        for i in 0..3 {
+            assert!(list_array.is_valid(i));
+            assert!(!list_array.is_null(i));
+        }
+    }
+
+    #[test]
+    fn test_list_array_builder_nulls() {
+        let values_builder = Int32Builder::new(10);
+        let mut builder = ListBuilder::new(values_builder);
+
+        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
+        builder.values().append_value(0).unwrap();
+        builder.values().append_value(1).unwrap();
+        builder.values().append_value(2).unwrap();
+        builder.append(true).unwrap();
+        builder.append(false).unwrap();
+        builder.values().append_value(3).unwrap();
+        builder.values().append_null().unwrap();
+        builder.values().append_value(5).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_value(6).unwrap();
+        builder.values().append_value(7).unwrap();
+        builder.append(true).unwrap();
+        let list_array = builder.finish();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(4, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(3, list_array.value_offsets()[2]);
+        assert_eq!(3, list_array.value_length(2));
+    }
+
+    #[test]
+    fn test_large_list_array_builder_nulls() {
+        let values_builder = Int32Builder::new(10);
+        let mut builder = LargeListBuilder::new(values_builder);
+
+        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
+        builder.values().append_value(0).unwrap();
+        builder.values().append_value(1).unwrap();
+        builder.values().append_value(2).unwrap();
+        builder.append(true).unwrap();
+        builder.append(false).unwrap();
+        builder.values().append_value(3).unwrap();
+        builder.values().append_null().unwrap();
+        builder.values().append_value(5).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_value(6).unwrap();
+        builder.values().append_value(7).unwrap();
+        builder.append(true).unwrap();
+        let list_array = builder.finish();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(4, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(3, list_array.value_offsets()[2]);
+        assert_eq!(3, list_array.value_length(2));
+    }
+
+    #[test]
+    fn test_list_array_builder_finish() {
+        let values_builder = Int32Array::builder(5);
+        let mut builder = ListBuilder::new(values_builder);
+
+        builder.values().append_slice(&[1, 2, 3]).unwrap();
+        builder.append(true).unwrap();
+        builder.values().append_slice(&[4, 5, 6]).unwrap();
+        builder.append(true).unwrap();
+
+        let mut arr = builder.finish();
+        assert_eq!(2, arr.len());
+        assert_eq!(0, builder.len());
+
+        builder.values().append_slice(&[7, 8, 9]).unwrap();
+        builder.append(true).unwrap();
+        arr = builder.finish();
+        assert_eq!(1, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
+    #[test]
+    fn test_list_list_array_builder() {
+        let primitive_builder = Int32Builder::new(10);
+        let values_builder = ListBuilder::new(primitive_builder);
+        let mut builder = ListBuilder::new(values_builder);
+
+        //  [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
+        builder.values().values().append_value(1).unwrap();
+        builder.values().values().append_value(2).unwrap();
+        builder.values().append(true).unwrap();
+        builder.values().values().append_value(3).unwrap();
+        builder.values().values().append_value(4).unwrap();
+        builder.values().append(true).unwrap();
+        builder.append(true).unwrap();
+
+        builder.values().values().append_value(5).unwrap();
+        builder.values().values().append_value(6).unwrap();
+        builder.values().values().append_value(7).unwrap();
+        builder.values().append(true).unwrap();
+        builder.values().append(false).unwrap();
+        builder.values().values().append_value(8).unwrap();
+        builder.values().append(true).unwrap();
+        builder.append(true).unwrap();
+
+        builder.append(false).unwrap();
+
+        builder.values().values().append_value(9).unwrap();
+        builder.values().values().append_value(10).unwrap();
+        builder.values().append(true).unwrap();
+        builder.append(true).unwrap();
+
+        let list_array = builder.finish();
+
+        assert_eq!(4, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(
+            Buffer::from_slice_ref(&[0, 2, 5, 5, 6]),
+            list_array.data().buffers()[0].clone()
+        );
+
+        assert_eq!(6, list_array.values().data().len());
+        assert_eq!(1, list_array.values().data().null_count());
+        assert_eq!(
+            Buffer::from_slice_ref(&[0, 2, 4, 7, 7, 8, 10]),
+            list_array.values().data().buffers()[0].clone()
+        );
+
+        assert_eq!(10, list_array.values().data().child_data()[0].len());
+        assert_eq!(0, list_array.values().data().child_data()[0].null_count());
+        assert_eq!(
+            Buffer::from_slice_ref(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
+            list_array.values().data().child_data()[0].buffers()[0].clone()
+        );
+    }
+
+    #[test]
+    fn test_binary_array_builder() {
+        let mut builder = BinaryBuilder::new(20);
+
+        builder.append_byte(b'h').unwrap();
+        builder.append_byte(b'e').unwrap();
+        builder.append_byte(b'l').unwrap();
+        builder.append_byte(b'l').unwrap();
+        builder.append_byte(b'o').unwrap();
+        builder.append(true).unwrap();
+        builder.append(true).unwrap();
+        builder.append_byte(b'w').unwrap();
+        builder.append_byte(b'o').unwrap();
+        builder.append_byte(b'r').unwrap();
+        builder.append_byte(b'l').unwrap();
+        builder.append_byte(b'd').unwrap();
+        builder.append(true).unwrap();
+
+        let binary_array = builder.finish();
+
+        assert_eq!(3, binary_array.len());
+        assert_eq!(0, binary_array.null_count());
+        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
+        assert_eq!([] as [u8; 0], binary_array.value(1));
+        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
+        assert_eq!(5, binary_array.value_offsets()[2]);
+        assert_eq!(5, binary_array.value_length(2));
+    }
+
+    #[test]
+    fn test_large_binary_array_builder() {
+        let mut builder = LargeBinaryBuilder::new(20);
+
+        builder.append_byte(b'h').unwrap();
+        builder.append_byte(b'e').unwrap();
+        builder.append_byte(b'l').unwrap();
+        builder.append_byte(b'l').unwrap();
+        builder.append_byte(b'o').unwrap();
+        builder.append(true).unwrap();
+        builder.append(true).unwrap();
+        builder.append_byte(b'w').unwrap();
+        builder.append_byte(b'o').unwrap();
+        builder.append_byte(b'r').unwrap();
+        builder.append_byte(b'l').unwrap();
+        builder.append_byte(b'd').unwrap();
+        builder.append(true).unwrap();
+
+        let binary_array = builder.finish();
+
+        assert_eq!(3, binary_array.len());
+        assert_eq!(0, binary_array.null_count());
+        assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0));
+        assert_eq!([] as [u8; 0], binary_array.value(1));
+        assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2));
+        assert_eq!(5, binary_array.value_offsets()[2]);
+        assert_eq!(5, binary_array.value_length(2));
+    }
+
+    #[test]
+    fn test_string_array_builder() {
+        let mut builder = StringBuilder::new(20);
+
+        builder.append_value("hello").unwrap();
+        builder.append(true).unwrap();
+        builder.append_value("world").unwrap();
+
+        let string_array = builder.finish();
+
+        assert_eq!(3, string_array.len());
+        assert_eq!(0, string_array.null_count());
+        assert_eq!("hello", string_array.value(0));
+        assert_eq!("", string_array.value(1));
+        assert_eq!("world", string_array.value(2));
+        assert_eq!(5, string_array.value_offsets()[2]);
+        assert_eq!(5, string_array.value_length(2));
+    }
+
+    #[test]
+    fn test_string_array_builder_finish() {
+        let mut builder = StringBuilder::new(10);
+
+        builder.append_value("hello").unwrap();
+        builder.append_value("world").unwrap();
+
+        let mut arr = builder.finish();
+        assert_eq!(2, arr.len());
+        assert_eq!(0, builder.len());
+
+        builder.append_value("arrow").unwrap();
+        arr = builder.finish();
+        assert_eq!(1, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
+    #[test]
+    fn test_string_array_builder_append_string() {
+        let mut builder = StringBuilder::new(20);
+
+        let var = "hello".to_owned();
+        builder.append_value(&var).unwrap();
+        builder.append(true).unwrap();
+        builder.append_value("world").unwrap();
+
+        let string_array = builder.finish();
+
+        assert_eq!(3, string_array.len());
+        assert_eq!(0, string_array.null_count());
+        assert_eq!("hello", string_array.value(0));
+        assert_eq!("", string_array.value(1));
+        assert_eq!("world", string_array.value(2));
+        assert_eq!(5, string_array.value_offsets()[2]);
+        assert_eq!(5, string_array.value_length(2));
+    }
+
+    #[test]
+    fn test_string_array_builder_append_option() {
+        let mut builder = StringBuilder::new(20);
+        builder.append_option(Some("hello")).unwrap();
+        builder.append_option(None::<&str>).unwrap();
+        builder.append_option(None::<String>).unwrap();
+        builder.append_option(Some("world")).unwrap();
+
+        let string_array = builder.finish();
+
+        assert_eq!(4, string_array.len());
+        assert_eq!("hello", string_array.value(0));
+        assert!(string_array.is_null(1));
+        assert!(string_array.is_null(2));
+        assert_eq!("world", string_array.value(3));
+    }
+}
diff --git a/arrow/src/array/builder/map_builder.rs b/arrow/src/array/builder/map_builder.rs
new file mode 100644
index 000000000000..30ea9ad1b2ae
--- /dev/null
+++ b/arrow/src/array/builder/map_builder.rs
@@ -0,0 +1,255 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use crate::array::array::Array;
+use crate::array::ArrayData;
+use crate::array::ArrayRef;
+use crate::array::MapArray;
+use crate::array::StructArray;
+use crate::datatypes::DataType;
+use crate::datatypes::Field;
+use crate::error::{ArrowError, Result};
+
+use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
+
+#[derive(Debug)]
+pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
+    offsets_builder: BufferBuilder<i32>,
+    bitmap_builder: BooleanBufferBuilder,
+    field_names: MapFieldNames,
+    key_builder: K,
+    value_builder: V,
+    len: i32,
+}
+
+#[derive(Debug, Clone)]
+pub struct MapFieldNames {
+    pub entry: String,
+    pub key: String,
+    pub value: String,
+}
+
+impl Default for MapFieldNames {
+    fn default() -> Self {
+        Self {
+            entry: "entries".to_string(),
+            key: "keys".to_string(),
+            value: "values".to_string(),
+        }
+    }
+}
+
+#[allow(dead_code)]
+impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
+    pub fn new(
+        field_names: Option<MapFieldNames>,
+        key_builder: K,
+        value_builder: V,
+    ) -> Self {
+        let capacity = key_builder.len();
+        Self::with_capacity(field_names, key_builder, value_builder, capacity)
+    }
+
+    pub fn with_capacity(
+        field_names: Option<MapFieldNames>,
+        key_builder: K,
+        value_builder: V,
+        capacity: usize,
+    ) -> Self {
+        let mut offsets_builder = BufferBuilder::<i32>::new(capacity + 1);
+        let len = 0;
+        offsets_builder.append(len);
+        Self {
+            offsets_builder,
+            bitmap_builder: BooleanBufferBuilder::new(capacity),
+            field_names: field_names.unwrap_or_default(),
+            key_builder,
+            value_builder,
+            len,
+        }
+    }
+
+    pub fn keys(&mut self) -> &mut K {
+        &mut self.key_builder
+    }
+
+    pub fn values(&mut self) -> &mut V {
+        &mut self.value_builder
+    }
+
+    /// Finish the current map array slot
+    #[inline]
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        if self.key_builder.len() != self.value_builder.len() {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Cannot append to a map builder when its keys and values have unequal lengths of {} and {}",
+                self.key_builder.len(),
+                self.value_builder.len()
+            )));
+        }
+        self.offsets_builder.append(self.key_builder.len() as i32);
+        self.bitmap_builder.append(is_valid);
+        self.len += 1;
+        Ok(())
+    }
+
+    pub fn finish(&mut self) -> MapArray {
+        let len = self.len();
+        self.len = 0;
+
+        // Build the keys
+        let keys_arr = self
+            .key_builder
+            .as_any_mut()
+            .downcast_mut::<K>()
+            .unwrap()
+            .finish();
+        let values_arr = self
+            .value_builder
+            .as_any_mut()
+            .downcast_mut::<V>()
+            .unwrap()
+            .finish();
+
+        let keys_field = Field::new(
+            self.field_names.key.as_str(),
+            keys_arr.data_type().clone(),
+            false, // always nullable
+        );
+        let values_field = Field::new(
+            self.field_names.value.as_str(),
+            values_arr.data_type().clone(),
+            true,
+        );
+
+        let struct_array =
+            StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
+
+        let offset_buffer = self.offsets_builder.finish();
+        let null_bit_buffer = self.bitmap_builder.finish();
+        self.offsets_builder.append(self.len);
+        let map_field = Box::new(Field::new(
+            self.field_names.entry.as_str(),
+            struct_array.data_type().clone(),
+            false, // always non-nullable
+        ));
+        let array_data = ArrayData::builder(DataType::Map(map_field, false)) // TODO: support sorted keys
+            .len(len)
+            .add_buffer(offset_buffer)
+            .add_child_data(struct_array.data().clone())
+            .null_bit_buffer(Some(null_bit_buffer));
+
+        let array_data = unsafe { array_data.build_unchecked() };
+
+        MapArray::from(array_data)
+    }
+}
+
+impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for MapBuilder<K, V> {
+    fn len(&self) -> usize {
+        self.len as usize
+    }
+
+    fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::array::builder::StringBuilder;
+    use crate::array::Int32Builder;
+    use crate::bitmap::Bitmap;
+    use crate::buffer::Buffer;
+
+    // TODO: add a test that finishes building, after designing a spec-compliant
+    // way of inserting values to the map.
+    // A map's values shouldn't be repeated within a slot
+
+    #[test]
+    fn test_map_array_builder() {
+        let string_builder = StringBuilder::new(4);
+        let int_builder = Int32Builder::new(4);
+
+        let mut builder = MapBuilder::new(None, string_builder, int_builder);
+
+        let string_builder = builder.keys();
+        string_builder.append_value("joe").unwrap();
+        string_builder.append_null().unwrap();
+        string_builder.append_null().unwrap();
+        string_builder.append_value("mark").unwrap();
+
+        let int_builder = builder.values();
+        int_builder.append_value(1).unwrap();
+        int_builder.append_value(2).unwrap();
+        int_builder.append_null().unwrap();
+        int_builder.append_value(4).unwrap();
+
+        builder.append(true).unwrap();
+        builder.append(false).unwrap();
+        builder.append(true).unwrap();
+
+        let arr = builder.finish();
+
+        let map_data = arr.data();
+        assert_eq!(3, map_data.len());
+        assert_eq!(1, map_data.null_count());
+        assert_eq!(
+            Some(&Bitmap::from(Buffer::from(&[5_u8]))),
+            map_data.null_bitmap()
+        );
+
+        let expected_string_data = ArrayData::builder(DataType::Utf8)
+            .len(4)
+            .null_bit_buffer(Some(Buffer::from(&[9_u8])))
+            .add_buffer(Buffer::from_slice_ref(&[0, 3, 3, 3, 7]))
+            .add_buffer(Buffer::from_slice_ref(b"joemark"))
+            .build()
+            .unwrap();
+
+        let expected_int_data = ArrayData::builder(DataType::Int32)
+            .len(4)
+            .null_bit_buffer(Some(Buffer::from_slice_ref(&[11_u8])))
+            .add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4]))
+            .build()
+            .unwrap();
+
+        assert_eq!(&expected_string_data, arr.keys().data());
+        assert_eq!(&expected_int_data, arr.values().data());
+    }
+}
diff --git a/arrow/src/array/builder/mod.rs b/arrow/src/array/builder/mod.rs
new file mode 100644
index 000000000000..4cd82d9bfe3e
--- /dev/null
+++ b/arrow/src/array/builder/mod.rs
@@ -0,0 +1,165 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines a [`BufferBuilder`](crate::array::BufferBuilder) capable
+//! of creating a [`Buffer`](crate::buffer::Buffer) which can be used
+//! as an internal buffer in an [`ArrayData`](crate::array::ArrayData)
+//! object.
+
+mod boolean_buffer_builder;
+mod boolean_builder;
+mod buffer_builder;
+mod decimal_builder;
+mod fixed_size_list_builder;
+mod generic_list_builder;
+mod map_builder;
+mod primitive_builder;
+mod primitive_dictionary_builder;
+mod string_dictionary_builder;
+mod struct_builder;
+mod union_builder;
+
+use std::any::Any;
+use std::marker::PhantomData;
+use std::ops::Range;
+
+use super::ArrayRef;
+use super::OffsetSizeTrait;
+use super::UInt8Builder;
+
+pub use boolean_buffer_builder::BooleanBufferBuilder;
+pub use boolean_builder::BooleanBuilder;
+pub use buffer_builder::BufferBuilder;
+pub use decimal_builder::DecimalBuilder;
+pub use fixed_size_list_builder::FixedSizeListBuilder;
+pub use generic_list_builder::GenericListBuilder;
+pub use map_builder::MapBuilder;
+pub use primitive_builder::PrimitiveBuilder;
+pub use primitive_dictionary_builder::PrimitiveDictionaryBuilder;
+pub use string_dictionary_builder::StringDictionaryBuilder;
+pub use struct_builder::StructBuilder;
+pub use union_builder::UnionBuilder;
+
+pub use struct_builder::make_builder;
+
+/// Trait for dealing with different array builders at runtime
+///
+/// # Example
+///
+/// ```
+/// # use arrow::{
+/// #     array::{ArrayBuilder, ArrayRef, Float64Builder, Int64Builder, StringArray, StringBuilder},
+/// #     error::ArrowError,
+/// # };
+/// # fn main() -> std::result::Result<(), ArrowError> {
+/// // Create
+/// let mut data_builders: Vec<Box<dyn ArrayBuilder>> = vec![
+///     Box::new(Float64Builder::new(1024)),
+///     Box::new(Int64Builder::new(1024)),
+///     Box::new(StringBuilder::new(1024)),
+/// ];
+///
+/// // Fill
+/// data_builders[0]
+///     .as_any_mut()
+///     .downcast_mut::<Float64Builder>()
+///     .unwrap()
+///     .append_value(3.14)?;
+/// data_builders[1]
+///     .as_any_mut()
+///     .downcast_mut::<Int64Builder>()
+///     .unwrap()
+///     .append_value(-1)?;
+/// data_builders[2]
+///     .as_any_mut()
+///     .downcast_mut::<StringBuilder>()
+///     .unwrap()
+///     .append_value("🍎")?;
+///
+/// // Finish
+/// let array_refs: Vec<ArrayRef> = data_builders
+///     .iter_mut()
+///     .map(|builder| builder.finish())
+///     .collect();
+/// assert_eq!(array_refs[0].len(), 1);
+/// assert_eq!(array_refs[1].is_null(0), false);
+/// assert_eq!(
+///     array_refs[2]
+///         .as_any()
+///         .downcast_ref::<StringArray>()
+///         .unwrap()
+///         .value(0),
+///     "🍎"
+/// );
+/// # Ok(())
+/// # }
+/// ```
+pub trait ArrayBuilder: Any + Send {
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize;
+
+    /// Returns whether number of array slots is zero
+    fn is_empty(&self) -> bool;
+
+    /// Builds the array
+    fn finish(&mut self) -> ArrayRef;
+
+    /// Returns the builder as a non-mutable `Any` reference.
+    ///
+    /// This is most useful when one wants to call non-mutable APIs on a specific builder
+    /// type. In this case, one can first cast this into a `Any`, and then use
+    /// `downcast_ref` to get a reference on the specific builder.
+    fn as_any(&self) -> &dyn Any;
+
+    /// Returns the builder as a mutable `Any` reference.
+    ///
+    /// This is most useful when one wants to call mutable APIs on a specific builder
+    /// type. In this case, one can first cast this into a `Any`, and then use
+    /// `downcast_mut` to get a reference on the specific builder.
+    fn as_any_mut(&mut self) -> &mut dyn Any;
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
+}
+
+pub type ListBuilder<T> = GenericListBuilder<i32, T>;
+pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
+
+///  Array builder for `BinaryArray`
+#[derive(Debug)]
+pub struct GenericBinaryBuilder<OffsetSize: OffsetSizeTrait> {
+    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
+}
+
+pub type BinaryBuilder = GenericBinaryBuilder<i32>;
+pub type LargeBinaryBuilder = GenericBinaryBuilder<i64>;
+
+#[derive(Debug)]
+pub struct GenericStringBuilder<OffsetSize: OffsetSizeTrait> {
+    builder: GenericListBuilder<OffsetSize, UInt8Builder>,
+}
+
+pub type StringBuilder = GenericStringBuilder<i32>;
+pub type LargeStringBuilder = GenericStringBuilder<i64>;
+
+#[derive(Debug)]
+pub struct FixedSizeBinaryBuilder {
+    builder: FixedSizeListBuilder<UInt8Builder>,
+}
+
+#[cfg(test)]
+mod tests {}
diff --git a/arrow/src/array/builder/primitive_builder.rs b/arrow/src/array/builder/primitive_builder.rs
new file mode 100644
index 000000000000..83c62509cfb0
--- /dev/null
+++ b/arrow/src/array/builder/primitive_builder.rs
@@ -0,0 +1,436 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use crate::array::ArrayData;
+use crate::array::ArrayRef;
+use crate::array::DictionaryArray;
+use crate::array::PrimitiveArray;
+use crate::datatypes::ArrowPrimitiveType;
+use crate::datatypes::DataType;
+use crate::error::{ArrowError, Result};
+
+use super::{ArrayBuilder, BooleanBufferBuilder, BufferBuilder};
+
+///  Array builder for fixed-width primitive types
+#[derive(Debug)]
+pub struct PrimitiveBuilder<T: ArrowPrimitiveType> {
+    values_builder: BufferBuilder<T::Native>,
+    /// We only materialize the builder when we add `false`.
+    /// This optimization is **very** important for performance of `StringBuilder`.
+    bitmap_builder: Option<BooleanBufferBuilder>,
+}
+
+impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> {
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.values_builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.values_builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
+    /// Creates a new primitive array builder
+    pub fn new(capacity: usize) -> Self {
+        Self {
+            values_builder: BufferBuilder::<T::Native>::new(capacity),
+            bitmap_builder: None,
+        }
+    }
+
+    /// Returns the capacity of this builder measured in slots of type `T`
+    pub fn capacity(&self) -> usize {
+        self.values_builder.capacity()
+    }
+
+    /// Appends a value of type `T` into the builder
+    #[inline]
+    pub fn append_value(&mut self, v: T::Native) -> Result<()> {
+        if let Some(b) = self.bitmap_builder.as_mut() {
+            b.append(true);
+        }
+        self.values_builder.append(v);
+        Ok(())
+    }
+
+    /// Appends a null slot into the builder
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        self.materialize_bitmap_builder();
+        self.bitmap_builder.as_mut().unwrap().append(false);
+        self.values_builder.advance(1);
+        Ok(())
+    }
+
+    #[inline]
+    pub fn append_nulls(&mut self, n: usize) -> Result<()> {
+        self.materialize_bitmap_builder();
+        self.bitmap_builder.as_mut().unwrap().append_n(n, false);
+        self.values_builder.advance(n);
+        Ok(())
+    }
+
+    /// Appends an `Option<T>` into the builder
+    #[inline]
+    pub fn append_option(&mut self, v: Option<T::Native>) -> Result<()> {
+        match v {
+            None => self.append_null()?,
+            Some(v) => self.append_value(v)?,
+        };
+        Ok(())
+    }
+
+    /// Appends a slice of type `T` into the builder
+    #[inline]
+    pub fn append_slice(&mut self, v: &[T::Native]) -> Result<()> {
+        if let Some(b) = self.bitmap_builder.as_mut() {
+            b.append_n(v.len(), true);
+        }
+        self.values_builder.append_slice(v);
+        Ok(())
+    }
+
+    /// Appends values from a slice of type `T` and a validity boolean slice
+    #[inline]
+    pub fn append_values(
+        &mut self,
+        values: &[T::Native],
+        is_valid: &[bool],
+    ) -> Result<()> {
+        if values.len() != is_valid.len() {
+            return Err(ArrowError::InvalidArgumentError(
+                "Value and validity lengths must be equal".to_string(),
+            ));
+        }
+        if is_valid.iter().any(|v| !*v) {
+            self.materialize_bitmap_builder();
+        }
+        if let Some(b) = self.bitmap_builder.as_mut() {
+            b.append_slice(is_valid);
+        }
+        self.values_builder.append_slice(values);
+        Ok(())
+    }
+
+    /// Appends values from a trusted length iterator.
+    ///
+    /// # Safety
+    /// This requires the iterator be a trusted length. This could instead require
+    /// the iterator implement `TrustedLen` once that is stabilized.
+    #[inline]
+    pub unsafe fn append_trusted_len_iter(
+        &mut self,
+        iter: impl IntoIterator<Item = T::Native>,
+    ) -> Result<()> {
+        let iter = iter.into_iter();
+        let len = iter
+            .size_hint()
+            .1
+            .expect("append_trusted_len_iter requires an upper bound");
+
+        if let Some(b) = self.bitmap_builder.as_mut() {
+            b.append_n(len, true);
+        }
+        self.values_builder.append_trusted_len_iter(iter);
+        Ok(())
+    }
+
+    /// Builds the `PrimitiveArray` and reset this builder.
+    pub fn finish(&mut self) -> PrimitiveArray<T> {
+        let len = self.len();
+        let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
+        let null_count = len
+            - null_bit_buffer
+                .as_ref()
+                .map(|b| b.count_set_bits())
+                .unwrap_or(len);
+        let builder = ArrayData::builder(T::DATA_TYPE)
+            .len(len)
+            .add_buffer(self.values_builder.finish())
+            .null_bit_buffer(if null_count > 0 {
+                null_bit_buffer
+            } else {
+                None
+            });
+
+        let array_data = unsafe { builder.build_unchecked() };
+        PrimitiveArray::<T>::from(array_data)
+    }
+
+    /// Builds the `DictionaryArray` and reset this builder.
+    pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> {
+        let len = self.len();
+        let null_bit_buffer = self.bitmap_builder.as_mut().map(|b| b.finish());
+        let null_count = len
+            - null_bit_buffer
+                .as_ref()
+                .map(|b| b.count_set_bits())
+                .unwrap_or(len);
+        let data_type = DataType::Dictionary(
+            Box::new(T::DATA_TYPE),
+            Box::new(values.data_type().clone()),
+        );
+        let mut builder = ArrayData::builder(data_type)
+            .len(len)
+            .add_buffer(self.values_builder.finish());
+        if null_count > 0 {
+            builder = builder.null_bit_buffer(null_bit_buffer);
+        }
+        builder = builder.add_child_data(values.data().clone());
+        let array_data = unsafe { builder.build_unchecked() };
+        DictionaryArray::<T>::from(array_data)
+    }
+
+    fn materialize_bitmap_builder(&mut self) {
+        if self.bitmap_builder.is_some() {
+            return;
+        }
+        let mut b = BooleanBufferBuilder::new(0);
+        b.reserve(self.values_builder.capacity());
+        b.append_n(self.values_builder.len(), true);
+        self.bitmap_builder = Some(b);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::array::Array;
+    use crate::array::BooleanArray;
+    use crate::array::Date32Array;
+    use crate::array::Int32Array;
+    use crate::array::Int32Builder;
+    use crate::array::TimestampSecondArray;
+    use crate::buffer::Buffer;
+
+    #[test]
+    fn test_primitive_array_builder_i32() {
+        let mut builder = Int32Array::builder(5);
+        for i in 0..5 {
+            builder.append_value(i).unwrap();
+        }
+        let arr = builder.finish();
+        assert_eq!(5, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(0, arr.null_count());
+        for i in 0..5 {
+            assert!(!arr.is_null(i));
+            assert!(arr.is_valid(i));
+            assert_eq!(i as i32, arr.value(i));
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_i32_append_iter() {
+        let mut builder = Int32Array::builder(5);
+        unsafe { builder.append_trusted_len_iter(0..5) }.unwrap();
+        let arr = builder.finish();
+        assert_eq!(5, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(0, arr.null_count());
+        for i in 0..5 {
+            assert!(!arr.is_null(i));
+            assert!(arr.is_valid(i));
+            assert_eq!(i as i32, arr.value(i));
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_i32_append_nulls() {
+        let mut builder = Int32Array::builder(5);
+        builder.append_nulls(5).unwrap();
+        let arr = builder.finish();
+        assert_eq!(5, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(5, arr.null_count());
+        for i in 0..5 {
+            assert!(arr.is_null(i));
+            assert!(!arr.is_valid(i));
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_date32() {
+        let mut builder = Date32Array::builder(5);
+        for i in 0..5 {
+            builder.append_value(i).unwrap();
+        }
+        let arr = builder.finish();
+        assert_eq!(5, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(0, arr.null_count());
+        for i in 0..5 {
+            assert!(!arr.is_null(i));
+            assert!(arr.is_valid(i));
+            assert_eq!(i as i32, arr.value(i));
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_timestamp_second() {
+        let mut builder = TimestampSecondArray::builder(5);
+        for i in 0..5 {
+            builder.append_value(i).unwrap();
+        }
+        let arr = builder.finish();
+        assert_eq!(5, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(0, arr.null_count());
+        for i in 0..5 {
+            assert!(!arr.is_null(i));
+            assert!(arr.is_valid(i));
+            assert_eq!(i as i64, arr.value(i));
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_bool() {
+        // 00000010 01001000
+        let buf = Buffer::from([72_u8, 2_u8]);
+        let mut builder = BooleanArray::builder(10);
+        for i in 0..10 {
+            if i == 3 || i == 6 || i == 9 {
+                builder.append_value(true).unwrap();
+            } else {
+                builder.append_value(false).unwrap();
+            }
+        }
+
+        let arr = builder.finish();
+        assert_eq!(&buf, arr.values());
+        assert_eq!(10, arr.len());
+        assert_eq!(0, arr.offset());
+        assert_eq!(0, arr.null_count());
+        for i in 0..10 {
+            assert!(!arr.is_null(i));
+            assert!(arr.is_valid(i));
+            assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i)
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_append_option() {
+        let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
+
+        let mut builder = Int32Array::builder(5);
+        builder.append_option(Some(0)).unwrap();
+        builder.append_option(None).unwrap();
+        builder.append_option(Some(2)).unwrap();
+        builder.append_option(None).unwrap();
+        builder.append_option(Some(4)).unwrap();
+        let arr2 = builder.finish();
+
+        assert_eq!(arr1.len(), arr2.len());
+        assert_eq!(arr1.offset(), arr2.offset());
+        assert_eq!(arr1.null_count(), arr2.null_count());
+        for i in 0..5 {
+            assert_eq!(arr1.is_null(i), arr2.is_null(i));
+            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
+            if arr1.is_valid(i) {
+                assert_eq!(arr1.value(i), arr2.value(i));
+            }
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_append_null() {
+        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
+
+        let mut builder = Int32Array::builder(5);
+        builder.append_value(0).unwrap();
+        builder.append_value(2).unwrap();
+        builder.append_null().unwrap();
+        builder.append_null().unwrap();
+        builder.append_value(4).unwrap();
+        let arr2 = builder.finish();
+
+        assert_eq!(arr1.len(), arr2.len());
+        assert_eq!(arr1.offset(), arr2.offset());
+        assert_eq!(arr1.null_count(), arr2.null_count());
+        for i in 0..5 {
+            assert_eq!(arr1.is_null(i), arr2.is_null(i));
+            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
+            if arr1.is_valid(i) {
+                assert_eq!(arr1.value(i), arr2.value(i));
+            }
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_append_slice() {
+        let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]);
+
+        let mut builder = Int32Array::builder(5);
+        builder.append_slice(&[0, 2]).unwrap();
+        builder.append_null().unwrap();
+        builder.append_null().unwrap();
+        builder.append_value(4).unwrap();
+        let arr2 = builder.finish();
+
+        assert_eq!(arr1.len(), arr2.len());
+        assert_eq!(arr1.offset(), arr2.offset());
+        assert_eq!(arr1.null_count(), arr2.null_count());
+        for i in 0..5 {
+            assert_eq!(arr1.is_null(i), arr2.is_null(i));
+            assert_eq!(arr1.is_valid(i), arr2.is_valid(i));
+            if arr1.is_valid(i) {
+                assert_eq!(arr1.value(i), arr2.value(i));
+            }
+        }
+    }
+
+    #[test]
+    fn test_primitive_array_builder_finish() {
+        let mut builder = Int32Builder::new(5);
+        builder.append_slice(&[2, 4, 6, 8]).unwrap();
+        let mut arr = builder.finish();
+        assert_eq!(4, arr.len());
+        assert_eq!(0, builder.len());
+
+        builder.append_slice(&[1, 3, 5, 7, 9]).unwrap();
+        arr = builder.finish();
+        assert_eq!(5, arr.len());
+        assert_eq!(0, builder.len());
+    }
+}
diff --git a/arrow/src/array/builder/primitive_dictionary_builder.rs b/arrow/src/array/builder/primitive_dictionary_builder.rs
new file mode 100644
index 000000000000..93695e0b730a
--- /dev/null
+++ b/arrow/src/array/builder/primitive_dictionary_builder.rs
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use crate::array::ArrayRef;
+use crate::array::ArrowPrimitiveType;
+use crate::array::DictionaryArray;
+use crate::datatypes::ArrowNativeType;
+use crate::datatypes::ToByteSlice;
+use crate::error::{ArrowError, Result};
+
+use super::ArrayBuilder;
+use super::PrimitiveBuilder;
+
+/// Array builder for `DictionaryArray`. For example to map a set of byte indices
+/// to f32 values. Note that the use of a `HashMap` here will not scale to very large
+/// arrays or result in an ordered dictionary.
+///
+/// # Example:
+///
+/// ```
+///  use arrow::array::{
+///      Array, PrimitiveBuilder, PrimitiveDictionaryBuilder,
+///      UInt8Array, UInt32Array,
+///    };
+///  use arrow::datatypes::{UInt8Type, UInt32Type};
+///
+///  let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
+///  let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
+///  let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
+///  builder.append(12345678).unwrap();
+///  builder.append_null().unwrap();
+///  builder.append(22345678).unwrap();
+///  let array = builder.finish();
+///
+///  assert_eq!(
+///      array.keys(),
+///      &UInt8Array::from(vec![Some(0), None, Some(1)])
+///  );
+///
+///  // Values are polymorphic and so require a downcast.
+///  let av = array.values();
+///  let ava: &UInt32Array = av.as_any().downcast_ref::<UInt32Array>().unwrap();
+///  let avs: &[u32] = ava.values();
+///
+///  assert!(!array.is_null(0));
+///  assert!(array.is_null(1));
+///  assert!(!array.is_null(2));
+///
+///  assert_eq!(avs, &[12345678, 22345678]);
+/// ```
+#[derive(Debug)]
+pub struct PrimitiveDictionaryBuilder<K, V>
+where
+    K: ArrowPrimitiveType,
+    V: ArrowPrimitiveType,
+{
+    keys_builder: PrimitiveBuilder<K>,
+    values_builder: PrimitiveBuilder<V>,
+    map: HashMap<Box<[u8]>, K::Native>,
+}
+
+impl<K, V> PrimitiveDictionaryBuilder<K, V>
+where
+    K: ArrowPrimitiveType,
+    V: ArrowPrimitiveType,
+{
+    /// Creates a new `PrimitiveDictionaryBuilder` from a keys builder and a value builder.
+    pub fn new(
+        keys_builder: PrimitiveBuilder<K>,
+        values_builder: PrimitiveBuilder<V>,
+    ) -> Self {
+        Self {
+            keys_builder,
+            values_builder,
+            map: HashMap::new(),
+        }
+    }
+}
+
+impl<K, V> ArrayBuilder for PrimitiveDictionaryBuilder<K, V>
+where
+    K: ArrowPrimitiveType,
+    V: ArrowPrimitiveType,
+{
+    /// Returns the builder as an non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as an mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.keys_builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.keys_builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl<K, V> PrimitiveDictionaryBuilder<K, V>
+where
+    K: ArrowPrimitiveType,
+    V: ArrowPrimitiveType,
+{
+    /// Append a primitive value to the array. Return an existing index
+    /// if already present in the values array or a new index if the
+    /// value is appended to the values array.
+    #[inline]
+    pub fn append(&mut self, value: V::Native) -> Result<K::Native> {
+        if let Some(&key) = self.map.get(value.to_byte_slice()) {
+            // Append existing value.
+            self.keys_builder.append_value(key)?;
+            Ok(key)
+        } else {
+            // Append new value.
+            let key = K::Native::from_usize(self.values_builder.len())
+                .ok_or(ArrowError::DictionaryKeyOverflowError)?;
+            self.values_builder.append_value(value)?;
+            self.keys_builder.append_value(key as K::Native)?;
+            self.map.insert(value.to_byte_slice().into(), key);
+            Ok(key)
+        }
+    }
+
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        self.keys_builder.append_null()
+    }
+
+    /// Builds the `DictionaryArray` and reset this builder.
+    pub fn finish(&mut self) -> DictionaryArray<K> {
+        self.map.clear();
+        let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
+        self.keys_builder.finish_dict(value_ref)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::array::Array;
+    use crate::array::UInt32Array;
+    use crate::array::UInt8Array;
+    use crate::datatypes::UInt32Type;
+    use crate::datatypes::UInt8Type;
+
+    #[test]
+    fn test_primitive_dictionary_builder() {
+        let key_builder = PrimitiveBuilder::<UInt8Type>::new(3);
+        let value_builder = PrimitiveBuilder::<UInt32Type>::new(2);
+        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
+        builder.append(12345678).unwrap();
+        builder.append_null().unwrap();
+        builder.append(22345678).unwrap();
+        let array = builder.finish();
+
+        assert_eq!(
+            array.keys(),
+            &UInt8Array::from(vec![Some(0), None, Some(1)])
+        );
+
+        // Values are polymorphic and so require a downcast.
+        let av = array.values();
+        let ava: &UInt32Array = av.as_any().downcast_ref::<UInt32Array>().unwrap();
+        let avs: &[u32] = ava.values();
+
+        assert!(!array.is_null(0));
+        assert!(array.is_null(1));
+        assert!(!array.is_null(2));
+
+        assert_eq!(avs, &[12345678, 22345678]);
+    }
+
+    #[test]
+    #[should_panic(expected = "DictionaryKeyOverflowError")]
+    fn test_primitive_dictionary_overflow() {
+        let key_builder = PrimitiveBuilder::<UInt8Type>::new(257);
+        let value_builder = PrimitiveBuilder::<UInt32Type>::new(257);
+        let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder);
+        // 256 unique keys.
+        for i in 0..256 {
+            builder.append(i + 1000).unwrap();
+        }
+        // Special error if the key overflows (256th entry)
+        builder.append(1257).unwrap();
+    }
+}
diff --git a/arrow/src/array/builder/string_dictionary_builder.rs b/arrow/src/array/builder/string_dictionary_builder.rs
new file mode 100644
index 000000000000..d1b872fd3134
--- /dev/null
+++ b/arrow/src/array/builder/string_dictionary_builder.rs
@@ -0,0 +1,313 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use crate::array::array::Array;
+use crate::array::ArrayBuilder;
+use crate::array::ArrayRef;
+use crate::array::ArrowDictionaryKeyType;
+use crate::array::DictionaryArray;
+use crate::array::PrimitiveBuilder;
+use crate::array::StringArray;
+use crate::array::StringBuilder;
+use crate::datatypes::ArrowNativeType;
+use crate::error::{ArrowError, Result};
+
+/// Array builder for `DictionaryArray` that stores Strings. For example to map a set of byte indices
+/// to String values. Note that the use of a `HashMap` here will not scale to very large
+/// arrays or result in an ordered dictionary.
+///
+/// ```
+/// use arrow::{
+///   array::{
+///     Int8Array, StringArray,
+///     PrimitiveBuilder, StringBuilder, StringDictionaryBuilder,
+///   },
+///   datatypes::Int8Type,
+/// };
+///
+/// // Create a dictionary array indexed by bytes whose values are Strings.
+/// // It can thus hold up to 256 distinct string values.
+///
+/// let key_builder = PrimitiveBuilder::<Int8Type>::new(100);
+/// let value_builder = StringBuilder::new(100);
+/// let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
+///
+/// // The builder builds the dictionary value by value
+/// builder.append("abc").unwrap();
+/// builder.append_null().unwrap();
+/// builder.append("def").unwrap();
+/// builder.append("def").unwrap();
+/// builder.append("abc").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///   array.keys(),
+///   &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
+/// );
+///
+/// // Values are polymorphic and so require a downcast.
+/// let av = array.values();
+/// let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), "abc");
+/// assert_eq!(ava.value(1), "def");
+///
+/// ```
+#[derive(Debug)]
+pub struct StringDictionaryBuilder<K>
+where
+    K: ArrowDictionaryKeyType,
+{
+    keys_builder: PrimitiveBuilder<K>,
+    values_builder: StringBuilder,
+    map: HashMap<Box<[u8]>, K::Native>,
+}
+
+impl<K> StringDictionaryBuilder<K>
+where
+    K: ArrowDictionaryKeyType,
+{
+    /// Creates a new `StringDictionaryBuilder` from a keys builder and a value builder.
+    pub fn new(keys_builder: PrimitiveBuilder<K>, values_builder: StringBuilder) -> Self {
+        Self {
+            keys_builder,
+            values_builder,
+            map: HashMap::new(),
+        }
+    }
+
+    /// Creates a new `StringDictionaryBuilder` from a keys builder and a dictionary
+    /// which is initialized with the given values.
+    /// The indices of those dictionary values are used as keys.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use arrow::datatypes::Int16Type;
+    /// use arrow::array::{StringArray, StringDictionaryBuilder, PrimitiveBuilder, Int16Array};
+    /// use std::convert::TryFrom;
+    ///
+    /// let dictionary_values = StringArray::from(vec![None, Some("abc"), Some("def")]);
+    ///
+    /// let mut builder = StringDictionaryBuilder::new_with_dictionary(PrimitiveBuilder::<Int16Type>::new(3), &dictionary_values).unwrap();
+    /// builder.append("def").unwrap();
+    /// builder.append_null().unwrap();
+    /// builder.append("abc").unwrap();
+    ///
+    /// let dictionary_array = builder.finish();
+    ///
+    /// let keys = dictionary_array.keys();
+    ///
+    /// assert_eq!(keys, &Int16Array::from(vec![Some(2), None, Some(1)]));
+    /// ```
+    pub fn new_with_dictionary(
+        keys_builder: PrimitiveBuilder<K>,
+        dictionary_values: &StringArray,
+    ) -> Result<Self> {
+        let dict_len = dictionary_values.len();
+        let mut values_builder =
+            StringBuilder::with_capacity(dict_len, dictionary_values.value_data().len());
+        let mut map: HashMap<Box<[u8]>, K::Native> = HashMap::with_capacity(dict_len);
+        for i in 0..dict_len {
+            if dictionary_values.is_valid(i) {
+                let value = dictionary_values.value(i);
+                map.insert(
+                    value.as_bytes().into(),
+                    K::Native::from_usize(i)
+                        .ok_or(ArrowError::DictionaryKeyOverflowError)?,
+                );
+                values_builder.append_value(value)?;
+            } else {
+                values_builder.append_null()?;
+            }
+        }
+        Ok(Self {
+            keys_builder,
+            values_builder,
+            map,
+        })
+    }
+}
+
+impl<K> ArrayBuilder for StringDictionaryBuilder<K>
+where
+    K: ArrowDictionaryKeyType,
+{
+    /// Returns the builder as an non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as an mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.keys_builder.len()
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.keys_builder.is_empty()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+}
+
+impl<K> StringDictionaryBuilder<K>
+where
+    K: ArrowDictionaryKeyType,
+{
+    /// Append a primitive value to the array. Return an existing index
+    /// if already present in the values array or a new index if the
+    /// value is appended to the values array.
+    pub fn append(&mut self, value: impl AsRef<str>) -> Result<K::Native> {
+        if let Some(&key) = self.map.get(value.as_ref().as_bytes()) {
+            // Append existing value.
+            self.keys_builder.append_value(key)?;
+            Ok(key)
+        } else {
+            // Append new value.
+            let key = K::Native::from_usize(self.values_builder.len())
+                .ok_or(ArrowError::DictionaryKeyOverflowError)?;
+            self.values_builder.append_value(value.as_ref())?;
+            self.keys_builder.append_value(key as K::Native)?;
+            self.map.insert(value.as_ref().as_bytes().into(), key);
+            Ok(key)
+        }
+    }
+
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        self.keys_builder.append_null()
+    }
+
+    /// Builds the `DictionaryArray` and reset this builder.
+    pub fn finish(&mut self) -> DictionaryArray<K> {
+        self.map.clear();
+        let value_ref: ArrayRef = Arc::new(self.values_builder.finish());
+        self.keys_builder.finish_dict(value_ref)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::array::Array;
+    use crate::array::Int8Array;
+    use crate::datatypes::Int16Type;
+    use crate::datatypes::Int8Type;
+
+    #[test]
+    fn test_string_dictionary_builder() {
+        let key_builder = PrimitiveBuilder::<Int8Type>::new(5);
+        let value_builder = StringBuilder::new(2);
+        let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
+        builder.append("abc").unwrap();
+        builder.append_null().unwrap();
+        builder.append("def").unwrap();
+        builder.append("def").unwrap();
+        builder.append("abc").unwrap();
+        let array = builder.finish();
+
+        assert_eq!(
+            array.keys(),
+            &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)])
+        );
+
+        // Values are polymorphic and so require a downcast.
+        let av = array.values();
+        let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
+
+        assert_eq!(ava.value(0), "abc");
+        assert_eq!(ava.value(1), "def");
+    }
+
+    #[test]
+    fn test_string_dictionary_builder_with_existing_dictionary() {
+        let dictionary = StringArray::from(vec![None, Some("def"), Some("abc")]);
+
+        let key_builder = PrimitiveBuilder::<Int8Type>::new(6);
+        let mut builder =
+            StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
+                .unwrap();
+        builder.append("abc").unwrap();
+        builder.append_null().unwrap();
+        builder.append("def").unwrap();
+        builder.append("def").unwrap();
+        builder.append("abc").unwrap();
+        builder.append("ghi").unwrap();
+        let array = builder.finish();
+
+        assert_eq!(
+            array.keys(),
+            &Int8Array::from(vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)])
+        );
+
+        // Values are polymorphic and so require a downcast.
+        let av = array.values();
+        let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
+
+        assert!(!ava.is_valid(0));
+        assert_eq!(ava.value(1), "def");
+        assert_eq!(ava.value(2), "abc");
+        assert_eq!(ava.value(3), "ghi");
+    }
+
+    #[test]
+    fn test_string_dictionary_builder_with_reserved_null_value() {
+        let dictionary: Vec<Option<&str>> = vec![None];
+        let dictionary = StringArray::from(dictionary);
+
+        let key_builder = PrimitiveBuilder::<Int16Type>::new(4);
+        let mut builder =
+            StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary)
+                .unwrap();
+        builder.append("abc").unwrap();
+        builder.append_null().unwrap();
+        builder.append("def").unwrap();
+        builder.append("abc").unwrap();
+        let array = builder.finish();
+
+        assert!(array.is_null(1));
+        assert!(!array.is_valid(1));
+
+        let keys = array.keys();
+
+        assert_eq!(keys.value(0), 1);
+        assert!(keys.is_null(1));
+        // zero initialization is currently guaranteed by Buffer allocation and resizing
+        assert_eq!(keys.value(1), 0);
+        assert_eq!(keys.value(2), 2);
+        assert_eq!(keys.value(3), 1);
+    }
+}
diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow/src/array/builder/struct_builder.rs
new file mode 100644
index 000000000000..e69844b71739
--- /dev/null
+++ b/arrow/src/array/builder/struct_builder.rs
@@ -0,0 +1,420 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::fmt;
+use std::sync::Arc;
+
+use crate::array::*;
+use crate::datatypes::DataType;
+use crate::datatypes::Field;
+use crate::error::Result;
+
+/// Array builder for Struct types.
+///
+/// Note that callers should make sure that methods of all the child field builders are
+/// properly called to maintain the consistency of the data structure.
+pub struct StructBuilder {
+    fields: Vec<Field>,
+    field_builders: Vec<Box<dyn ArrayBuilder>>,
+    bitmap_builder: BooleanBufferBuilder,
+    len: usize,
+}
+
+impl fmt::Debug for StructBuilder {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("StructBuilder")
+            .field("fields", &self.fields)
+            .field("bitmap_builder", &self.bitmap_builder)
+            .field("len", &self.len)
+            .finish()
+    }
+}
+
+impl ArrayBuilder for StructBuilder {
+    /// Returns the number of array slots in the builder.
+    ///
+    /// Note that this always return the first child field builder's length, and it is
+    /// the caller's responsibility to maintain the consistency that all the child field
+    /// builder should have the equal number of elements.
+    fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns whether the number of array slots is zero
+    fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Builds the array.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+
+    /// Returns the builder as a non-mutable `Any` reference.
+    ///
+    /// This is most useful when one wants to call non-mutable APIs on a specific builder
+    /// type. In this case, one can first cast this into a `Any`, and then use
+    /// `downcast_ref` to get a reference on the specific builder.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    ///
+    /// This is most useful when one wants to call mutable APIs on a specific builder
+    /// type. In this case, one can first cast this into a `Any`, and then use
+    /// `downcast_mut` to get a reference on the specific builder.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+}
+
+/// Returns a builder with capacity `capacity` that corresponds to the datatype `DataType`
+/// This function is useful to construct arrays from an arbitrary vectors with known/expected
+/// schema.
+pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilder> {
+    match datatype {
+        DataType::Null => unimplemented!(),
+        DataType::Boolean => Box::new(BooleanBuilder::new(capacity)),
+        DataType::Int8 => Box::new(Int8Builder::new(capacity)),
+        DataType::Int16 => Box::new(Int16Builder::new(capacity)),
+        DataType::Int32 => Box::new(Int32Builder::new(capacity)),
+        DataType::Int64 => Box::new(Int64Builder::new(capacity)),
+        DataType::UInt8 => Box::new(UInt8Builder::new(capacity)),
+        DataType::UInt16 => Box::new(UInt16Builder::new(capacity)),
+        DataType::UInt32 => Box::new(UInt32Builder::new(capacity)),
+        DataType::UInt64 => Box::new(UInt64Builder::new(capacity)),
+        DataType::Float32 => Box::new(Float32Builder::new(capacity)),
+        DataType::Float64 => Box::new(Float64Builder::new(capacity)),
+        DataType::Binary => Box::new(BinaryBuilder::new(capacity)),
+        DataType::FixedSizeBinary(len) => {
+            Box::new(FixedSizeBinaryBuilder::new(capacity, *len))
+        }
+        DataType::Decimal(precision, scale) => {
+            Box::new(DecimalBuilder::new(capacity, *precision, *scale))
+        }
+        DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
+        DataType::Date32 => Box::new(Date32Builder::new(capacity)),
+        DataType::Date64 => Box::new(Date64Builder::new(capacity)),
+        DataType::Time32(TimeUnit::Second) => {
+            Box::new(Time32SecondBuilder::new(capacity))
+        }
+        DataType::Time32(TimeUnit::Millisecond) => {
+            Box::new(Time32MillisecondBuilder::new(capacity))
+        }
+        DataType::Time64(TimeUnit::Microsecond) => {
+            Box::new(Time64MicrosecondBuilder::new(capacity))
+        }
+        DataType::Time64(TimeUnit::Nanosecond) => {
+            Box::new(Time64NanosecondBuilder::new(capacity))
+        }
+        DataType::Timestamp(TimeUnit::Second, _) => {
+            Box::new(TimestampSecondBuilder::new(capacity))
+        }
+        DataType::Timestamp(TimeUnit::Millisecond, _) => {
+            Box::new(TimestampMillisecondBuilder::new(capacity))
+        }
+        DataType::Timestamp(TimeUnit::Microsecond, _) => {
+            Box::new(TimestampMicrosecondBuilder::new(capacity))
+        }
+        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
+            Box::new(TimestampNanosecondBuilder::new(capacity))
+        }
+        DataType::Interval(IntervalUnit::YearMonth) => {
+            Box::new(IntervalYearMonthBuilder::new(capacity))
+        }
+        DataType::Interval(IntervalUnit::DayTime) => {
+            Box::new(IntervalDayTimeBuilder::new(capacity))
+        }
+        DataType::Interval(IntervalUnit::MonthDayNano) => {
+            Box::new(IntervalMonthDayNanoBuilder::new(capacity))
+        }
+        DataType::Duration(TimeUnit::Second) => {
+            Box::new(DurationSecondBuilder::new(capacity))
+        }
+        DataType::Duration(TimeUnit::Millisecond) => {
+            Box::new(DurationMillisecondBuilder::new(capacity))
+        }
+        DataType::Duration(TimeUnit::Microsecond) => {
+            Box::new(DurationMicrosecondBuilder::new(capacity))
+        }
+        DataType::Duration(TimeUnit::Nanosecond) => {
+            Box::new(DurationNanosecondBuilder::new(capacity))
+        }
+        DataType::Struct(fields) => {
+            Box::new(StructBuilder::from_fields(fields.clone(), capacity))
+        }
+        t => panic!("Data type {:?} is not currently supported", t),
+    }
+}
+
+impl StructBuilder {
+    pub fn new(fields: Vec<Field>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
+        Self {
+            fields,
+            field_builders,
+            bitmap_builder: BooleanBufferBuilder::new(0),
+            len: 0,
+        }
+    }
+
+    pub fn from_fields(fields: Vec<Field>, capacity: usize) -> Self {
+        let mut builders = Vec::with_capacity(fields.len());
+        for field in &fields {
+            builders.push(make_builder(field.data_type(), capacity));
+        }
+        Self::new(fields, builders)
+    }
+
+    /// Returns a mutable reference to the child field builder at index `i`.
+    /// Result will be `None` if the input type `T` provided doesn't match the actual
+    /// field builder's type.
+    pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> {
+        self.field_builders[i].as_any_mut().downcast_mut::<T>()
+    }
+
+    /// Returns the number of fields for the struct this builder is building.
+    pub fn num_fields(&self) -> usize {
+        self.field_builders.len()
+    }
+
+    /// Appends an element (either null or non-null) to the struct. The actual elements
+    /// should be appended for each child sub-array in a consistent way.
+    #[inline]
+    pub fn append(&mut self, is_valid: bool) -> Result<()> {
+        self.bitmap_builder.append(is_valid);
+        self.len += 1;
+        Ok(())
+    }
+
+    /// Appends a null element to the struct.
+    #[inline]
+    pub fn append_null(&mut self) -> Result<()> {
+        self.append(false)
+    }
+
+    /// Builds the `StructArray` and reset this builder.
+    pub fn finish(&mut self) -> StructArray {
+        let mut child_data = Vec::with_capacity(self.field_builders.len());
+        for f in &mut self.field_builders {
+            let arr = f.finish();
+            child_data.push(arr.data().clone());
+        }
+
+        let null_bit_buffer = self.bitmap_builder.finish();
+        let null_count = self.len - null_bit_buffer.count_set_bits();
+        let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone()))
+            .len(self.len)
+            .child_data(child_data);
+        if null_count > 0 {
+            builder = builder.null_bit_buffer(Some(null_bit_buffer));
+        }
+
+        self.len = 0;
+
+        let array_data = unsafe { builder.build_unchecked() };
+        StructArray::from(array_data)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::array::Array;
+    use crate::bitmap::Bitmap;
+    use crate::buffer::Buffer;
+
+    #[test]
+    fn test_struct_array_builder() {
+        let string_builder = StringBuilder::new(4);
+        let int_builder = Int32Builder::new(4);
+
+        let mut fields = Vec::new();
+        let mut field_builders = Vec::new();
+        fields.push(Field::new("f1", DataType::Utf8, false));
+        field_builders.push(Box::new(string_builder) as Box<dyn ArrayBuilder>);
+        fields.push(Field::new("f2", DataType::Int32, false));
+        field_builders.push(Box::new(int_builder) as Box<dyn ArrayBuilder>);
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+        assert_eq!(2, builder.num_fields());
+
+        let string_builder = builder
+            .field_builder::<StringBuilder>(0)
+            .expect("builder at field 0 should be string builder");
+        string_builder.append_value("joe").unwrap();
+        string_builder.append_null().unwrap();
+        string_builder.append_null().unwrap();
+        string_builder.append_value("mark").unwrap();
+
+        let int_builder = builder
+            .field_builder::<Int32Builder>(1)
+            .expect("builder at field 1 should be int builder");
+        int_builder.append_value(1).unwrap();
+        int_builder.append_value(2).unwrap();
+        int_builder.append_null().unwrap();
+        int_builder.append_value(4).unwrap();
+
+        builder.append(true).unwrap();
+        builder.append(true).unwrap();
+        builder.append_null().unwrap();
+        builder.append(true).unwrap();
+
+        let arr = builder.finish();
+
+        let struct_data = arr.data();
+        assert_eq!(4, struct_data.len());
+        assert_eq!(1, struct_data.null_count());
+        assert_eq!(
+            Some(&Bitmap::from(Buffer::from(&[11_u8]))),
+            struct_data.null_bitmap()
+        );
+
+        let expected_string_data = ArrayData::builder(DataType::Utf8)
+            .len(4)
+            .null_bit_buffer(Some(Buffer::from(&[9_u8])))
+            .add_buffer(Buffer::from_slice_ref(&[0, 3, 3, 3, 7]))
+            .add_buffer(Buffer::from_slice_ref(b"joemark"))
+            .build()
+            .unwrap();
+
+        let expected_int_data = ArrayData::builder(DataType::Int32)
+            .len(4)
+            .null_bit_buffer(Some(Buffer::from_slice_ref(&[11_u8])))
+            .add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4]))
+            .build()
+            .unwrap();
+
+        assert_eq!(expected_string_data, *arr.column(0).data());
+        assert_eq!(expected_int_data, *arr.column(1).data());
+    }
+
+    #[test]
+    fn test_struct_array_builder_finish() {
+        let int_builder = Int32Builder::new(10);
+        let bool_builder = BooleanBuilder::new(10);
+
+        let mut fields = Vec::new();
+        let mut field_builders = Vec::new();
+        fields.push(Field::new("f1", DataType::Int32, false));
+        field_builders.push(Box::new(int_builder) as Box<dyn ArrayBuilder>);
+        fields.push(Field::new("f2", DataType::Boolean, false));
+        field_builders.push(Box::new(bool_builder) as Box<dyn ArrayBuilder>);
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+            .unwrap();
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_slice(&[
+                false, true, false, true, false, true, false, true, false, true,
+            ])
+            .unwrap();
+
+        // Append slot values - all are valid.
+        for _ in 0..10 {
+            assert!(builder.append(true).is_ok())
+        }
+
+        assert_eq!(10, builder.len());
+
+        let arr = builder.finish();
+
+        assert_eq!(10, arr.len());
+        assert_eq!(0, builder.len());
+
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_slice(&[1, 3, 5, 7, 9])
+            .unwrap();
+        builder
+            .field_builder::<BooleanBuilder>(1)
+            .unwrap()
+            .append_slice(&[false, true, false, true, false])
+            .unwrap();
+
+        // Append slot values - all are valid.
+        for _ in 0..5 {
+            assert!(builder.append(true).is_ok())
+        }
+
+        assert_eq!(5, builder.len());
+
+        let arr = builder.finish();
+
+        assert_eq!(5, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
+    #[test]
+    fn test_struct_array_builder_from_schema() {
+        let mut fields = vec![
+            Field::new("f1", DataType::Float32, false),
+            Field::new("f2", DataType::Utf8, false),
+        ];
+        let sub_fields = vec![
+            Field::new("g1", DataType::Int32, false),
+            Field::new("g2", DataType::Boolean, false),
+        ];
+        let struct_type = DataType::Struct(sub_fields);
+        fields.push(Field::new("f3", struct_type, false));
+
+        let mut builder = StructBuilder::from_fields(fields, 5);
+        assert_eq!(3, builder.num_fields());
+        assert!(builder.field_builder::<Float32Builder>(0).is_some());
+        assert!(builder.field_builder::<StringBuilder>(1).is_some());
+        assert!(builder.field_builder::<StructBuilder>(2).is_some());
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }) is not currently supported"
+    )]
+    fn test_struct_array_builder_from_schema_unsupported_type() {
+        let mut fields = vec![Field::new("f1", DataType::Int16, false)];
+        let list_type =
+            DataType::List(Box::new(Field::new("item", DataType::Int64, true)));
+        fields.push(Field::new("f2", list_type, false));
+
+        let _ = StructBuilder::from_fields(fields, 5);
+    }
+
+    #[test]
+    fn test_struct_array_builder_field_builder_type_mismatch() {
+        let int_builder = Int32Builder::new(10);
+
+        let mut fields = Vec::new();
+        let mut field_builders = Vec::new();
+        fields.push(Field::new("f1", DataType::Int32, false));
+        field_builders.push(Box::new(int_builder) as Box<dyn ArrayBuilder>);
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+        assert!(builder.field_builder::<BinaryBuilder>(0).is_none());
+    }
+}
diff --git a/arrow/src/array/builder/union_builder.rs b/arrow/src/array/builder/union_builder.rs
new file mode 100644
index 000000000000..78f9a3f4b430
--- /dev/null
+++ b/arrow/src/array/builder/union_builder.rs
@@ -0,0 +1,338 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::collections::HashMap;
+
+use crate::array::ArrayDataBuilder;
+use crate::array::Int32BufferBuilder;
+use crate::array::Int8BufferBuilder;
+use crate::array::UnionArray;
+use crate::buffer::MutableBuffer;
+
+use crate::datatypes::ArrowPrimitiveType;
+use crate::datatypes::DataType;
+use crate::datatypes::Field;
+use crate::datatypes::IntervalMonthDayNanoType;
+use crate::datatypes::IntervalUnit;
+use crate::datatypes::{Float32Type, Float64Type};
+use crate::datatypes::{Int16Type, Int32Type, Int64Type, Int8Type};
+use crate::datatypes::{UInt16Type, UInt32Type, UInt64Type, UInt8Type};
+use crate::error::{ArrowError, Result};
+
+use super::{BooleanBufferBuilder, BufferBuilder};
+
+use super::buffer_builder::builder_to_mutable_buffer;
+use super::buffer_builder::mutable_buffer_to_builder;
+use crate::array::make_array;
+
+/// `FieldData` is a helper struct to track the state of the fields in the `UnionBuilder`.
+#[derive(Debug)]
+struct FieldData {
+    /// The type id for this field
+    type_id: i8,
+    /// The Arrow data type represented in the `values_buffer`, which is untyped
+    data_type: DataType,
+    /// A buffer containing the values for this field in raw bytes
+    values_buffer: Option<MutableBuffer>,
+    ///  The number of array slots represented by the buffer
+    slots: usize,
+    /// A builder for the null bitmap
+    bitmap_builder: BooleanBufferBuilder,
+}
+
+impl FieldData {
+    /// Creates a new `FieldData`.
+    fn new(type_id: i8, data_type: DataType) -> Self {
+        Self {
+            type_id,
+            data_type,
+            values_buffer: Some(MutableBuffer::new(1)),
+            slots: 0,
+            bitmap_builder: BooleanBufferBuilder::new(1),
+        }
+    }
+
+    /// Appends a single value to this `FieldData`'s `values_buffer`.
+    #[allow(clippy::unnecessary_wraps)]
+    fn append_to_values_buffer<T: ArrowPrimitiveType>(
+        &mut self,
+        v: T::Native,
+    ) -> Result<()> {
+        let values_buffer = self
+            .values_buffer
+            .take()
+            .expect("Values buffer was never created");
+        let mut builder: BufferBuilder<T::Native> =
+            mutable_buffer_to_builder(values_buffer, self.slots);
+        builder.append(v);
+        let mutable_buffer = builder_to_mutable_buffer(builder);
+        self.values_buffer = Some(mutable_buffer);
+
+        self.slots += 1;
+        self.bitmap_builder.append(true);
+        Ok(())
+    }
+
+    /// Appends a null to this `FieldData`.
+    #[allow(clippy::unnecessary_wraps)]
+    fn append_null<T: ArrowPrimitiveType>(&mut self) -> Result<()> {
+        let values_buffer = self
+            .values_buffer
+            .take()
+            .expect("Values buffer was never created");
+
+        let mut builder: BufferBuilder<T::Native> =
+            mutable_buffer_to_builder(values_buffer, self.slots);
+
+        builder.advance(1);
+        let mutable_buffer = builder_to_mutable_buffer(builder);
+        self.values_buffer = Some(mutable_buffer);
+        self.slots += 1;
+        self.bitmap_builder.append(false);
+        Ok(())
+    }
+
+    /// Appends a null to this `FieldData` when the type is not known at compile time.
+    ///
+    /// As the main `append` method of `UnionBuilder` is generic, we need a way to append null
+    /// slots to the fields that are not being appended to in the case of sparse unions.  This
+    /// method solves this problem by appending dynamically based on `DataType`.
+    ///
+    /// Note, this method does **not** update the length of the `UnionArray` (this is done by the
+    /// main append operation) and assumes that it is called from a method that is generic over `T`
+    /// where `T` satisfies the bound `ArrowPrimitiveType`.
+    fn append_null_dynamic(&mut self) -> Result<()> {
+        match self.data_type {
+            DataType::Null => unimplemented!(),
+            DataType::Int8 => self.append_null::<Int8Type>()?,
+            DataType::Int16 => self.append_null::<Int16Type>()?,
+            DataType::Int32
+            | DataType::Date32
+            | DataType::Time32(_)
+            | DataType::Interval(IntervalUnit::YearMonth) => {
+                self.append_null::<Int32Type>()?
+            }
+            DataType::Int64
+            | DataType::Timestamp(_, _)
+            | DataType::Date64
+            | DataType::Time64(_)
+            | DataType::Interval(IntervalUnit::DayTime)
+            | DataType::Duration(_) => self.append_null::<Int64Type>()?,
+            DataType::Interval(IntervalUnit::MonthDayNano) => self.append_null::<IntervalMonthDayNanoType>()?,
+            DataType::UInt8 => self.append_null::<UInt8Type>()?,
+            DataType::UInt16 => self.append_null::<UInt16Type>()?,
+            DataType::UInt32 => self.append_null::<UInt32Type>()?,
+            DataType::UInt64 => self.append_null::<UInt64Type>()?,
+            DataType::Float32 => self.append_null::<Float32Type>()?,
+            DataType::Float64 => self.append_null::<Float64Type>()?,
+            _ => unreachable!("All cases of types that satisfy the trait bounds over T are covered above."),
+        };
+        Ok(())
+    }
+}
+
+/// Builder type for creating a new `UnionArray`.
+///
+/// Example: **Dense Memory Layout**
+///
+/// ```
+/// use arrow::array::UnionBuilder;
+/// use arrow::datatypes::{Float64Type, Int32Type};
+///
+/// let mut builder = UnionBuilder::new_dense(3);
+/// builder.append::<Int32Type>("a", 1).unwrap();
+/// builder.append::<Float64Type>("b", 3.0).unwrap();
+/// builder.append::<Int32Type>("a", 4).unwrap();
+/// let union = builder.build().unwrap();
+///
+/// assert_eq!(union.type_id(0), 0_i8);
+/// assert_eq!(union.type_id(1), 1_i8);
+/// assert_eq!(union.type_id(2), 0_i8);
+///
+/// assert_eq!(union.value_offset(0), 0_i32);
+/// assert_eq!(union.value_offset(1), 0_i32);
+/// assert_eq!(union.value_offset(2), 1_i32);
+/// ```
+///
+/// Example: **Sparse Memory Layout**
+/// ```
+/// use arrow::array::UnionBuilder;
+/// use arrow::datatypes::{Float64Type, Int32Type};
+///
+/// let mut builder = UnionBuilder::new_sparse(3);
+/// builder.append::<Int32Type>("a", 1).unwrap();
+/// builder.append::<Float64Type>("b", 3.0).unwrap();
+/// builder.append::<Int32Type>("a", 4).unwrap();
+/// let union = builder.build().unwrap();
+///
+/// assert_eq!(union.type_id(0), 0_i8);
+/// assert_eq!(union.type_id(1), 1_i8);
+/// assert_eq!(union.type_id(2), 0_i8);
+///
+/// assert_eq!(union.value_offset(0), 0_i32);
+/// assert_eq!(union.value_offset(1), 1_i32);
+/// assert_eq!(union.value_offset(2), 2_i32);
+/// ```
+#[derive(Debug)]
+pub struct UnionBuilder {
+    /// The current number of slots in the array
+    len: usize,
+    /// Maps field names to `FieldData` instances which track the builders for that field
+    fields: HashMap<String, FieldData>,
+    /// Builder to keep track of type ids
+    type_id_builder: Int8BufferBuilder,
+    /// Builder to keep track of offsets (`None` for sparse unions)
+    value_offset_builder: Option<Int32BufferBuilder>,
+}
+
+impl UnionBuilder {
+    /// Creates a new dense array builder.
+    pub fn new_dense(capacity: usize) -> Self {
+        Self {
+            len: 0,
+            fields: HashMap::default(),
+            type_id_builder: Int8BufferBuilder::new(capacity),
+            value_offset_builder: Some(Int32BufferBuilder::new(capacity)),
+        }
+    }
+
+    /// Creates a new sparse array builder.
+    pub fn new_sparse(capacity: usize) -> Self {
+        Self {
+            len: 0,
+            fields: HashMap::default(),
+            type_id_builder: Int8BufferBuilder::new(capacity),
+            value_offset_builder: None,
+        }
+    }
+
+    /// Appends a null to this builder, encoding the null in the array
+    /// of the `type_name` child / field.
+    ///
+    /// Since `UnionArray` encodes nulls as an entry in its children
+    /// (it doesn't have a validity bitmap itself), and where the null
+    /// is part of the final array, appending a NULL requires
+    /// specifying which field (child) to use.
+    #[inline]
+    pub fn append_null<T: ArrowPrimitiveType>(&mut self, type_name: &str) -> Result<()> {
+        self.append_option::<T>(type_name, None)
+    }
+
+    /// Appends a value to this builder.
+    #[inline]
+    pub fn append<T: ArrowPrimitiveType>(
+        &mut self,
+        type_name: &str,
+        v: T::Native,
+    ) -> Result<()> {
+        self.append_option::<T>(type_name, Some(v))
+    }
+
+    fn append_option<T: ArrowPrimitiveType>(
+        &mut self,
+        type_name: &str,
+        v: Option<T::Native>,
+    ) -> Result<()> {
+        let type_name = type_name.to_string();
+
+        let mut field_data = match self.fields.remove(&type_name) {
+            Some(data) => {
+                if data.data_type != T::DATA_TYPE {
+                    return Err(ArrowError::InvalidArgumentError(format!("Attempt to write col \"{}\" with type {} doesn't match existing type {}", type_name, T::DATA_TYPE, data.data_type)));
+                }
+                data
+            }
+            None => match self.value_offset_builder {
+                Some(_) => FieldData::new(self.fields.len() as i8, T::DATA_TYPE),
+                None => {
+                    let mut fd = FieldData::new(self.fields.len() as i8, T::DATA_TYPE);
+                    for _ in 0..self.len {
+                        fd.append_null::<T>()?;
+                    }
+                    fd
+                }
+            },
+        };
+        self.type_id_builder.append(field_data.type_id);
+
+        match &mut self.value_offset_builder {
+            // Dense Union
+            Some(offset_builder) => {
+                offset_builder.append(field_data.slots as i32);
+            }
+            // Sparse Union
+            None => {
+                for (_, fd) in self.fields.iter_mut() {
+                    // Append to all bar the FieldData currently being appended to
+                    fd.append_null_dynamic()?;
+                }
+            }
+        }
+
+        match v {
+            Some(v) => field_data.append_to_values_buffer::<T>(v)?,
+            None => field_data.append_null::<T>()?,
+        }
+
+        self.fields.insert(type_name, field_data);
+        self.len += 1;
+        Ok(())
+    }
+
+    /// Builds this builder creating a new `UnionArray`.
+    pub fn build(mut self) -> Result<UnionArray> {
+        let type_id_buffer = self.type_id_builder.finish();
+        let value_offsets_buffer = self.value_offset_builder.map(|mut b| b.finish());
+        let mut children = Vec::new();
+        for (
+            name,
+            FieldData {
+                type_id,
+                data_type,
+                values_buffer,
+                slots,
+                mut bitmap_builder,
+            },
+        ) in self.fields.into_iter()
+        {
+            let buffer = values_buffer
+                .expect("The `values_buffer` should only ever be None inside the `append` method.")
+                .into();
+            let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
+                .add_buffer(buffer)
+                .len(slots)
+                .null_bit_buffer(Some(bitmap_builder.finish()));
+
+            let arr_data_ref = unsafe { arr_data_builder.build_unchecked() };
+            let array_ref = make_array(arr_data_ref);
+            children.push((type_id, (Field::new(&name, data_type, false), array_ref)))
+        }
+
+        children.sort_by(|a, b| {
+            a.0.partial_cmp(&b.0)
+                .expect("This will never be None as type ids are always i8 values.")
+        });
+        let children: Vec<_> = children.into_iter().map(|(_, b)| b).collect();
+
+        let type_ids: Vec<i8> = (0_i8..children.len() as i8).collect();
+
+        UnionArray::try_new(&type_ids, type_id_buffer, value_offsets_buffer, children)
+    }
+}
+
+#[cfg(test)]
+mod tests {}
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index cb6b894a058d..3e7e66496162 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -18,7 +18,7 @@
 //! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
 //! common attributes and operations for Arrow array.
 
-use crate::datatypes::{DataType, IntervalUnit, UnionMode};
+use crate::datatypes::{validate_decimal_precision, DataType, IntervalUnit, UnionMode};
 use crate::error::{ArrowError, Result};
 use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
 use crate::{
@@ -712,10 +712,10 @@ impl ArrayData {
         // Additional Type specific checks
         match &self.data_type {
             DataType::Utf8 | DataType::Binary => {
-                self.validate_offsets::<i32>(&self.buffers[0], self.buffers[1].len())?;
+                self.validate_offsets::<i32>(self.buffers[1].len())?;
             }
             DataType::LargeUtf8 | DataType::LargeBinary => {
-                self.validate_offsets::<i64>(&self.buffers[0], self.buffers[1].len())?;
+                self.validate_offsets::<i64>(self.buffers[1].len())?;
             }
             DataType::Dictionary(key_type, _value_type) => {
                 // At the moment, constructing a DictionaryArray will also check this
@@ -738,40 +738,46 @@ impl ArrayData {
     /// entries.
     ///
     /// For an empty array, the `buffer` can also be empty.
-    fn typed_offsets<'a, T: ArrowNativeType + num::Num + std::fmt::Display>(
-        &'a self,
-        buffer: &'a Buffer,
-    ) -> Result<&'a [T]> {
+    fn typed_offsets<T: ArrowNativeType + num::Num>(&self) -> Result<&[T]> {
         // An empty list-like array can have 0 offsets
-        if buffer.is_empty() && self.len == 0 {
+        if self.len == 0 && self.buffers[0].is_empty() {
             return Ok(&[]);
         }
 
-        // Validate that there are the correct number of offsets for this array's length
-        let required_offsets = self.len + self.offset + 1;
+        self.typed_buffer(0, self.len + 1)
+    }
+
+    /// Returns a reference to the data in `buffers[idx]` as a typed slice after validating
+    fn typed_buffer<T: ArrowNativeType + num::Num>(
+        &self,
+        idx: usize,
+        len: usize,
+    ) -> Result<&[T]> {
+        let buffer = &self.buffers[idx];
 
-        if (buffer.len() / std::mem::size_of::<T>()) < required_offsets {
+        let required_len = (len + self.offset) * std::mem::size_of::<T>();
+
+        if buffer.len() < required_len {
             return Err(ArrowError::InvalidArgumentError(format!(
-                "Offsets buffer size (bytes): {} isn't large enough for {}. Length {} needs {}",
-                buffer.len(), self.data_type, self.len, required_offsets
+                "Buffer {} of {} isn't large enough. Expected {} bytes got {}",
+                idx,
+                self.data_type,
+                required_len,
+                buffer.len()
             )));
         }
 
-        // Justification: buffer size was validated above
-        Ok(unsafe {
-            &(buffer.typed_data::<T>()[self.offset..self.offset + self.len + 1])
-        })
+        Ok(&buffer.typed_data::<T>()[self.offset..self.offset + len])
     }
 
     /// Does a cheap sanity check that the `self.len` values in `buffer` are valid
     /// offsets (of type T) into some other buffer of `values_length` bytes long
     fn validate_offsets<T: ArrowNativeType + num::Num + std::fmt::Display>(
         &self,
-        buffer: &Buffer,
         values_length: usize,
     ) -> Result<()> {
         // Justification: buffer size was validated above
-        let offsets = self.typed_offsets::<T>(buffer)?;
+        let offsets = self.typed_offsets::<T>()?;
         if offsets.is_empty() {
             return Ok(());
         }
@@ -819,12 +825,12 @@ impl ArrayData {
         match &self.data_type {
             DataType::List(field) | DataType::Map(field, _) => {
                 let values_data = self.get_single_valid_child_data(field.data_type())?;
-                self.validate_offsets::<i32>(&self.buffers[0], values_data.len)?;
+                self.validate_offsets::<i32>(values_data.len)?;
                 Ok(())
             }
             DataType::LargeList(field) => {
                 let values_data = self.get_single_valid_child_data(field.data_type())?;
-                self.validate_offsets::<i64>(&self.buffers[0], values_data.len)?;
+                self.validate_offsets::<i64>(values_data.len)?;
                 Ok(())
             }
             DataType::FixedSizeList(field, list_size) => {
@@ -979,7 +985,7 @@ impl ArrayData {
             )));
         }
 
-        self.validate_dictionary_offset()?;
+        self.validate_values()?;
 
         // validate all children recursively
         self.child_data
@@ -997,8 +1003,15 @@ impl ArrayData {
         Ok(())
     }
 
-    pub fn validate_dictionary_offset(&self) -> Result<()> {
+    pub fn validate_values(&self) -> Result<()> {
         match &self.data_type {
+            DataType::Decimal(p, _) => {
+                let values_buffer: &[i128] = self.typed_buffer(0, self.len)?;
+                for value in values_buffer {
+                    validate_decimal_precision(*value, *p)?;
+                }
+                Ok(())
+            }
             DataType::Utf8 => self.validate_utf8::<i32>(),
             DataType::LargeUtf8 => self.validate_utf8::<i64>(),
             DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
@@ -1007,11 +1020,11 @@ impl ArrayData {
             }
             DataType::List(_) | DataType::Map(_, _) => {
                 let child = &self.child_data[0];
-                self.validate_offsets_full::<i32>(child.len + child.offset)
+                self.validate_offsets_full::<i32>(child.len)
             }
             DataType::LargeList(_) => {
                 let child = &self.child_data[0];
-                self.validate_offsets_full::<i64>(child.len + child.offset)
+                self.validate_offsets_full::<i64>(child.len)
             }
             DataType::Union(_, _, _) => {
                 // Validate Union Array as part of implementing new Union semantics
@@ -1053,17 +1066,12 @@ impl ArrayData {
     ///
     /// For example, the offsets buffer contained `[1, 2, 4]`, this
     /// function would call `validate([1,2])`, and `validate([2,4])`
-    fn validate_each_offset<T, V>(
-        &self,
-        offsets_buffer: &Buffer,
-        offset_limit: usize,
-        validate: V,
-    ) -> Result<()>
+    fn validate_each_offset<T, V>(&self, offset_limit: usize, validate: V) -> Result<()>
     where
-        T: ArrowNativeType + std::convert::TryInto<usize> + num::Num + std::fmt::Display,
+        T: ArrowNativeType + TryInto<usize> + num::Num + std::fmt::Display,
         V: Fn(usize, Range<usize>) -> Result<()>,
     {
-        self.typed_offsets::<T>(offsets_buffer)?
+        self.typed_offsets::<T>()?
             .iter()
             .enumerate()
             .map(|(i, x)| {
@@ -1109,50 +1117,39 @@ impl ArrayData {
     /// into `buffers[1]` are valid utf8 sequences
     fn validate_utf8<T>(&self) -> Result<()>
     where
-        T: ArrowNativeType + std::convert::TryInto<usize> + num::Num + std::fmt::Display,
+        T: ArrowNativeType + TryInto<usize> + num::Num + std::fmt::Display,
     {
-        let offset_buffer = &self.buffers[0];
         let values_buffer = &self.buffers[1].as_slice();
 
-        self.validate_each_offset::<T, _>(
-            offset_buffer,
-            values_buffer.len(),
-            |string_index, range| {
-                std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
-                    ArrowError::InvalidArgumentError(format!(
-                        "Invalid UTF8 sequence at string index {} ({:?}): {}",
-                        string_index, range, e
-                    ))
-                })?;
-                Ok(())
-            },
-        )
+        self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
+            std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
+                ArrowError::InvalidArgumentError(format!(
+                    "Invalid UTF8 sequence at string index {} ({:?}): {}",
+                    string_index, range, e
+                ))
+            })?;
+            Ok(())
+        })
     }
 
     /// Ensures that all offsets in `buffers[0]` into `buffers[1]` are
     /// between `0` and `offset_limit`
     fn validate_offsets_full<T>(&self, offset_limit: usize) -> Result<()>
     where
-        T: ArrowNativeType + std::convert::TryInto<usize> + num::Num + std::fmt::Display,
+        T: ArrowNativeType + TryInto<usize> + num::Num + std::fmt::Display,
     {
-        let offset_buffer = &self.buffers[0];
-
-        self.validate_each_offset::<T, _>(
-            offset_buffer,
-            offset_limit,
-            |_string_index, _range| {
-                // No validation applied to each value, but the iteration
-                // itself applies bounds checking to each range
-                Ok(())
-            },
-        )
+        self.validate_each_offset::<T, _>(offset_limit, |_string_index, _range| {
+            // No validation applied to each value, but the iteration
+            // itself applies bounds checking to each range
+            Ok(())
+        })
     }
 
     /// Validates that each value in self.buffers (typed as T)
     /// is within the range [0, max_value], inclusive
     fn check_bounds<T>(&self, max_value: i64) -> Result<()>
     where
-        T: ArrowNativeType + std::convert::TryInto<i64> + num::Num + std::fmt::Display,
+        T: ArrowNativeType + TryInto<i64> + num::Num + std::fmt::Display,
     {
         let required_len = self.len + self.offset;
         let buffer = &self.buffers[0];
@@ -1163,7 +1160,7 @@ impl ArrayData {
 
         // Justification: buffer size was validated above
         let indexes: &[T] =
-            unsafe { &(buffer.typed_data::<T>()[self.offset..self.offset + self.len]) };
+            &buffer.typed_data::<T>()[self.offset..self.offset + self.len];
 
         indexes.iter().enumerate().try_for_each(|(i, &dict_index)| {
             // Do not check the value is null (value can be arbitrary)
@@ -1492,8 +1489,9 @@ mod tests {
     use std::ptr::NonNull;
 
     use crate::array::{
-        make_array, Array, BooleanBuilder, Int32Array, Int32Builder, Int64Array,
-        StringArray, StructBuilder, UInt64Array,
+        make_array, Array, BooleanBuilder, DecimalBuilder, FixedSizeListBuilder,
+        Int32Array, Int32Builder, Int64Array, StringArray, StructBuilder, UInt64Array,
+        UInt8Builder,
     };
     use crate::buffer::Buffer;
     use crate::datatypes::Field;
@@ -1843,7 +1841,7 @@ mod tests {
 
     #[test]
     #[should_panic(
-        expected = "Offsets buffer size (bytes): 4 isn't large enough for LargeUtf8. Length 0 needs 1"
+        expected = "Buffer 0 of LargeUtf8 isn't large enough. Expected 8 bytes got 4"
     )]
     fn test_empty_large_utf8_array_with_wrong_type_offsets() {
         let data_buffer = Buffer::from(&[]);
@@ -1861,7 +1859,7 @@ mod tests {
 
     #[test]
     #[should_panic(
-        expected = "Offsets buffer size (bytes): 8 isn't large enough for Utf8. Length 2 needs 3"
+        expected = "Buffer 0 of Utf8 isn't large enough. Expected 12 bytes got 8"
     )]
     fn test_validate_offsets_i32() {
         let data_buffer = Buffer::from_slice_ref(&"abcdef".as_bytes());
@@ -1879,7 +1877,7 @@ mod tests {
 
     #[test]
     #[should_panic(
-        expected = "Offsets buffer size (bytes): 16 isn't large enough for LargeUtf8. Length 2 needs 3"
+        expected = "Buffer 0 of LargeUtf8 isn't large enough. Expected 24 bytes got 16"
     )]
     fn test_validate_offsets_i64() {
         let data_buffer = Buffer::from_slice_ref(&"abcdef".as_bytes());
@@ -2707,4 +2705,72 @@ mod tests {
 
         assert_eq!(array, &expected);
     }
+
+    #[test]
+    #[cfg(not(feature = "force_validate"))]
+    fn test_decimal_full_validation() {
+        let values_builder = UInt8Builder::new(10);
+        let byte_width = 16;
+        let mut fixed_size_builder =
+            FixedSizeListBuilder::new(values_builder, byte_width);
+        let value_as_bytes = DecimalBuilder::from_i128_to_fixed_size_bytes(
+            123456,
+            fixed_size_builder.value_length() as usize,
+        )
+        .unwrap();
+        fixed_size_builder
+            .values()
+            .append_slice(value_as_bytes.as_slice())
+            .unwrap();
+        fixed_size_builder.append(true).unwrap();
+        let fixed_size_array = fixed_size_builder.finish();
+
+        // Build ArrayData for Decimal
+        let builder = ArrayData::builder(DataType::Decimal(5, 3))
+            .len(fixed_size_array.len())
+            .add_buffer(fixed_size_array.data_ref().child_data()[0].buffers()[0].clone());
+        let array_data = unsafe { builder.build_unchecked() };
+        let validation_result = array_data.validate_full();
+        let error = validation_result.unwrap_err();
+        assert_eq!(
+            "Invalid argument error: 123456 is too large to store in a Decimal of precision 5. Max is 99999",
+            error.to_string()
+        );
+    }
+
+    #[test]
+    fn test_decimal_validation() {
+        let mut builder = DecimalBuilder::new(4, 10, 4);
+        builder.append_value(10000).unwrap();
+        builder.append_value(20000).unwrap();
+        let array = builder.finish();
+
+        array.data().validate_full().unwrap();
+    }
+
+    #[test]
+    #[cfg(not(feature = "force_validate"))]
+    fn test_sliced_array_child() {
+        let values = Int32Array::from_iter_values([1, 2, 3]);
+        let values_sliced = values.slice(1, 2);
+        let offsets = Buffer::from_iter([1_i32, 3_i32]);
+
+        let list_field = Field::new("element", DataType::Int32, false);
+        let data_type = DataType::List(Box::new(list_field));
+
+        let data = unsafe {
+            ArrayData::new_unchecked(
+                data_type,
+                1,
+                None,
+                None,
+                0,
+                vec![offsets],
+                vec![values_sliced.data().clone()],
+            )
+        };
+
+        let err = data.validate_values().unwrap_err();
+        assert_eq!(err.to_string(), "Invalid argument error: Offset invariant failure: offset at position 1 out of bounds: 3 > 2");
+    }
 }
diff --git a/arrow/src/array/equal/list.rs b/arrow/src/array/equal/list.rs
index 65d320c0079d..0feefa7aa11a 100644
--- a/arrow/src/array/equal/list.rs
+++ b/arrow/src/array/equal/list.rs
@@ -73,6 +73,11 @@ pub(super) fn list_equal<T: OffsetSizeTrait>(
     // however, one is more likely to slice into a list array and get a region that has 0
     // child values.
     // The test that triggered this behaviour had [4, 4] as a slice of 1 value slot.
+    // For the edge case that zero length list arrays are always equal.
+    if len == 0 {
+        return true;
+    }
+
     let lhs_child_length = lhs_offsets[lhs_start + len].to_usize().unwrap()
         - lhs_offsets[lhs_start].to_usize().unwrap();
 
diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs
index b89a8fa53e0b..c3b0bbc95c2b 100644
--- a/arrow/src/array/equal/mod.rs
+++ b/arrow/src/array/equal/mod.rs
@@ -629,6 +629,57 @@ mod tests {
         test_equal(&a, &b, false);
     }
 
+    #[test]
+    fn test_empty_offsets_list_equal() {
+        let empty: Vec<i32> = vec![];
+        let values = Int32Array::from(empty);
+        let empty_offsets: [u8; 0] = [];
+
+        let a = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
+            "item",
+            DataType::Int32,
+            true,
+        ))))
+        .len(0)
+        .add_buffer(Buffer::from(&empty_offsets))
+        .add_child_data(values.data().clone())
+        .null_bit_buffer(Some(Buffer::from(&empty_offsets)))
+        .build()
+        .unwrap();
+
+        let b = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
+            "item",
+            DataType::Int32,
+            true,
+        ))))
+        .len(0)
+        .add_buffer(Buffer::from(&empty_offsets))
+        .add_child_data(values.data().clone())
+        .null_bit_buffer(Some(Buffer::from(&empty_offsets)))
+        .build()
+        .unwrap();
+
+        test_equal(&a, &b, true);
+
+        let c = ArrayDataBuilder::new(DataType::List(Box::new(Field::new(
+            "item",
+            DataType::Int32,
+            true,
+        ))))
+        .len(0)
+        .add_buffer(Buffer::from(vec![0i32, 2, 3, 4, 6, 7, 8].to_byte_slice()))
+        .add_child_data(
+            Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4])
+                .data()
+                .clone(),
+        )
+        .null_bit_buffer(Some(Buffer::from(vec![0b00001001])))
+        .build()
+        .unwrap();
+
+        test_equal(&a, &c, true);
+    }
+
     // Test the case where null_count > 0
     #[test]
     fn test_list_null() {
diff --git a/arrow/src/array/equal_json.rs b/arrow/src/array/equal_json.rs
index 64f109df5ff9..9db1a4397cb8 100644
--- a/arrow/src/array/equal_json.rs
+++ b/arrow/src/array/equal_json.rs
@@ -370,7 +370,7 @@ impl JsonEqual for DecimalArray {
                 self.is_valid(i)
                     && (s
                         .parse::<i128>()
-                        .map_or_else(|_| false, |v| v == self.value(i)))
+                        .map_or_else(|_| false, |v| v == self.value(i).as_i128()))
             }
             JNull => self.is_null(i),
             _ => false,
diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs
index 57329037bc46..12d6f440b78d 100644
--- a/arrow/src/array/ffi.rs
+++ b/arrow/src/array/ffi.rs
@@ -74,6 +74,11 @@ mod tests {
         let result = &ArrayData::try_from(d1)?;
 
         assert_eq!(result, expected);
+
+        unsafe {
+            Arc::from_raw(array);
+            Arc::from_raw(schema);
+        }
         Ok(())
     }
 
diff --git a/arrow/src/array/iterator.rs b/arrow/src/array/iterator.rs
index 18bdca621795..bc70d1a2a8ed 100644
--- a/arrow/src/array/iterator.rs
+++ b/arrow/src/array/iterator.rs
@@ -425,7 +425,7 @@ impl<'a> std::iter::Iterator for DecimalIter<'a> {
             if self.array.is_null(old) {
                 Some(None)
             } else {
-                Some(Some(self.array.value(old)))
+                Some(Some(self.array.value(old).as_i128()))
             }
         }
     }
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index 0bd32d347772..bbe62cf6a1f6 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -15,40 +15,66 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! The central type in Apache Arrow are arrays, represented
-//! by the [`Array` trait](crate::array::Array).
-//! An array represents a known-length sequence of values all
-//! having the same type.
+//! The central type in Apache Arrow are arrays, which are a known-length sequence of values
+//! all having the same type. This module provides concrete implementations of each type, as
+//! well as an [`Array`] trait that can be used for type-erasure.
 //!
-//! Internally, those values are represented by one or several
-//! [buffers](crate::buffer::Buffer), the number and meaning
-//! of which depend on the array’s data type, as documented in
-//! [the Arrow data layout specification](https://arrow.apache.org/docs/format/Columnar.html).
-//! For example, the type `Int16Array` represents an Apache
-//! Arrow array of 16-bit integers.
+//! # Downcasting an Array
 //!
-//! Those buffers consist of the value data itself and an
-//! optional [bitmap buffer](crate::bitmap::Bitmap) that
-//! indicates which array entries are null values.
-//! The bitmap buffer can be entirely omitted if the array is
-//! known to have zero null values.
+//! Arrays are often passed around as a dynamically typed [`&dyn Array`] or [`ArrayRef`].
+//! For example, [`RecordBatch`](`crate::record_batch::RecordBatch`) stores columns as [`ArrayRef`].
 //!
-//! There are concrete implementations of this trait for each
-//! data type, that help you access individual values of the
-//! array.
+//! Whilst these arrays can be passed directly to the [`compute`](crate::compute),
+//! [`csv`](crate::csv), [`json`](crate::json), etc... APIs, it is often the case that you wish
+//! to interact with the data directly. This requires downcasting to the concrete type of the array:
+//!
+//! ```
+//! # use arrow::array::{Array, Float32Array, Int32Array};
+//! #
+//! fn sum_int32(array: &dyn Array) -> i32 {
+//!     let integers: &Int32Array = array.as_any().downcast_ref().unwrap();
+//!     integers.iter().map(|val| val.unwrap_or_default()).sum()
+//! }
+//!
+//! // Note: the values for positions corresponding to nulls will be arbitrary
+//! fn as_f32_slice(array: &dyn Array) -> &[f32] {
+//!     array.as_any().downcast_ref::<Float32Array>().unwrap().values()
+//! }
+//! ```
 //!
 //! # Building an Array
 //!
-//! Arrow's `Arrays` are immutable, but there is the trait
-//! [`ArrayBuilder`](crate::array::ArrayBuilder)
-//! that helps you with constructing new `Arrays`. As with the
-//! `Array` trait, there are builder implementations for all
-//! concrete array types.
+//! Most [`Array`] implementations can be constructed directly from iterators or [`Vec`]
 //!
-//! # Example
 //! ```
-//! use arrow::array::Int16Array;
+//! # use arrow::array::Int32Array;
+//! # use arrow::array::StringArray;
+//! # use arrow::array::ListArray;
+//! # use arrow::datatypes::Int32Type;
+//! #
+//! Int32Array::from(vec![1, 2]);
+//! Int32Array::from(vec![Some(1), None]);
+//! Int32Array::from_iter([1, 2, 3, 4]);
+//! Int32Array::from_iter([Some(1), Some(2), None, Some(4)]);
+//!
+//! StringArray::from(vec!["foo", "bar"]);
+//! StringArray::from(vec![Some("foo"), None]);
+//! StringArray::from_iter([Some("foo"), None]);
+//! StringArray::from_iter_values(["foo", "bar"]);
+//!
+//! ListArray::from_iter_primitive::<Int32Type, _, _>([
+//!     Some(vec![Some(1), None, Some(3)]),
+//!     None,
+//!     Some(vec![])
+//! ]);
+//! ```
+//!
+//! Additionally [`ArrayBuilder`](crate::array::ArrayBuilder) implementations can be
+//! used to construct arrays with a push-based interface
 //!
+//! ```
+//! # use arrow::array::Int16Array;
+//! #
 //! // Create a new builder with a capacity of 100
 //! let mut builder = Int16Array::builder(100);
 //!
@@ -78,6 +104,43 @@
 //!     "Get slice of len 2 starting at idx 3"
 //! )
 //! ```
+//!
+//! # Zero-Copy Slicing
+//!
+//! Given an [`Array`] of arbitrary length, it is possible to create an owned slice of this
+//! data. Internally this just increments some ref-counts, and so is incredibly cheap
+//!
+//! ```rust
+//! # use std::sync::Arc;
+//! # use arrow::array::{Array, Int32Array, ArrayRef};
+//! let array = Arc::new(Int32Array::from_iter([1, 2, 3])) as ArrayRef;
+//!
+//! // Slice with offset 1 and length 2
+//! let sliced = array.slice(1, 2);
+//! let ints = sliced.as_any().downcast_ref::<Int32Array>().unwrap();
+//! assert_eq!(ints.values(), &[2, 3]);
+//! ```
+//!
+//! # Internal Representation
+//!
+//! Internally, arrays are represented by one or several [`Buffer`], the number and meaning of
+//! which depend on the array’s data type, as documented in the [Arrow specification].
+//!
+//! For example, the type `Int16Array` represents an array of 16-bit integers and consists of:
+//!
+//! * An optional [`Bitmap`] identifying any null values
+//! * A contiguous [`Buffer`] of 16-bit integers
+//!
+//! Similarly, the type `StringArray` represents an array of UTF-8 strings and consists of:
+//!
+//! * An optional [`Bitmap`] identifying any null values
+//! * An offsets [`Buffer`] of 32-bit integers identifying valid UTF-8 sequences within the values buffer
+//! * A values [`Buffer`] of UTF-8 encoded string data
+//!
+//! [Arrow specification]: https://arrow.apache.org/docs/format/Columnar.html
+//! [`&dyn Array`]: Array
+//! [`Bitmap`]: crate::bitmap::Bitmap
+//! [`Buffer`]: crate::buffer::Buffer
 
 #[allow(clippy::module_inception)]
 mod array;
@@ -398,9 +461,29 @@ pub use self::array_string::GenericStringArray;
 
 // --------------------- Array Builder ---------------------
 
-pub use self::builder::make_builder;
+pub use self::builder::ArrayBuilder;
+pub use self::builder::BinaryBuilder;
 pub use self::builder::BooleanBufferBuilder;
+pub use self::builder::BooleanBuilder;
 pub use self::builder::BufferBuilder;
+pub use self::builder::DecimalBuilder;
+pub use self::builder::FixedSizeBinaryBuilder;
+pub use self::builder::FixedSizeListBuilder;
+pub use self::builder::GenericListBuilder;
+pub use self::builder::GenericStringBuilder;
+pub use self::builder::LargeBinaryBuilder;
+pub use self::builder::LargeListBuilder;
+pub use self::builder::LargeStringBuilder;
+pub use self::builder::ListBuilder;
+pub use self::builder::MapBuilder;
+pub use self::builder::PrimitiveBuilder;
+pub use self::builder::PrimitiveDictionaryBuilder;
+pub use self::builder::StringBuilder;
+pub use self::builder::StringDictionaryBuilder;
+pub use self::builder::StructBuilder;
+pub use self::builder::UnionBuilder;
+
+pub use self::builder::make_builder;
 
 pub type Int8BufferBuilder = BufferBuilder<i8>;
 pub type Int16BufferBuilder = BufferBuilder<i16>;
@@ -446,26 +529,6 @@ pub type DurationMicrosecondBufferBuilder =
 pub type DurationNanosecondBufferBuilder =
     BufferBuilder<<DurationNanosecondType as ArrowPrimitiveType>::Native>;
 
-pub use self::builder::ArrayBuilder;
-pub use self::builder::BinaryBuilder;
-pub use self::builder::BooleanBuilder;
-pub use self::builder::DecimalBuilder;
-pub use self::builder::FixedSizeBinaryBuilder;
-pub use self::builder::FixedSizeListBuilder;
-pub use self::builder::GenericListBuilder;
-pub use self::builder::GenericStringBuilder;
-pub use self::builder::LargeBinaryBuilder;
-pub use self::builder::LargeListBuilder;
-pub use self::builder::LargeStringBuilder;
-pub use self::builder::ListBuilder;
-pub use self::builder::MapBuilder;
-pub use self::builder::PrimitiveBuilder;
-pub use self::builder::PrimitiveDictionaryBuilder;
-pub use self::builder::StringBuilder;
-pub use self::builder::StringDictionaryBuilder;
-pub use self::builder::StructBuilder;
-pub use self::builder::UnionBuilder;
-
 pub type Int8Builder = PrimitiveBuilder<Int8Type>;
 pub type Int16Builder = PrimitiveBuilder<Int16Type>;
 pub type Int32Builder = PrimitiveBuilder<Int32Type>;
diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs
index 4671d82673fd..68ae7f6d4d0d 100644
--- a/arrow/src/array/transform/mod.rs
+++ b/arrow/src/array/transform/mod.rs
@@ -78,18 +78,13 @@ impl<'a> _MutableArrayData<'a> {
             }
         };
 
-        let mut array_data_builder = ArrayDataBuilder::new(self.data_type)
+        ArrayDataBuilder::new(self.data_type)
             .offset(0)
             .len(self.len)
             .null_count(self.null_count)
             .buffers(buffers)
-            .child_data(child_data);
-        if self.null_count > 0 {
-            array_data_builder =
-                array_data_builder.null_bit_buffer(Some(self.null_buffer.into()));
-        }
-
-        array_data_builder
+            .child_data(child_data)
+            .null_bit_buffer((self.null_count > 0).then(|| self.null_buffer.into()))
     }
 }
 
@@ -184,48 +179,23 @@ fn build_extend_dictionary(
     max: usize,
 ) -> Option<Extend> {
     use crate::datatypes::*;
+    macro_rules! validate_and_build {
+        ($dt: ty) => {{
+            let _: $dt = max.try_into().ok()?;
+            let offset: $dt = offset.try_into().ok()?;
+            Some(primitive::build_extend_with_offset(array, offset))
+        }};
+    }
     match array.data_type() {
         DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() {
-            DataType::UInt8 => {
-                let _: u8 = max.try_into().ok()?;
-                let offset: u8 = offset.try_into().ok()?;
-                Some(primitive::build_extend_with_offset(array, offset))
-            }
-            DataType::UInt16 => {
-                let _: u16 = max.try_into().ok()?;
-                let offset: u16 = offset.try_into().ok()?;
-                Some(primitive::build_extend_with_offset(array, offset))
-            }
-            DataType::UInt32 => {
-                let _: u32 = max.try_into().ok()?;
-                let offset: u32 = offset.try_into().ok()?;
-                Some(primitive::build_extend_with_offset(array, offset))
-            }
-            DataType::UInt64 => {
-                let _: u64 = max.try_into().ok()?;
-                let offset: u64 = offset.try_into().ok()?;
-                Some(primitive::build_extend_with_offset(array, offset))
-            }
-            DataType::Int8 => {
-                let _: i8 = max.try_into().ok()?;
-                let offset: i8 = offset.try_into().ok()?;
-                Some(primitive::build_extend_with_offset(array, offset))
-            }
-            DataType::Int16 => {
-                let _: i16 = max.try_into().ok()?;
-                let offset: i16 = offset.try_into().ok()?;
-                Some(primitive::build_extend_with_offset(array, offset))
-            }
-            DataType::Int32 => {
-                let _: i32 = max.try_into().ok()?;
-                let offset: i32 = offset.try_into().ok()?;
-                Some(primitive::build_extend_with_offset(array, offset))
-            }
-            DataType::Int64 => {
-                let _: i64 = max.try_into().ok()?;
-                let offset: i64 = offset.try_into().ok()?;
-                Some(primitive::build_extend_with_offset(array, offset))
-            }
+            DataType::UInt8 => validate_and_build!(u8),
+            DataType::UInt16 => validate_and_build!(u16),
+            DataType::UInt32 => validate_and_build!(u32),
+            DataType::UInt64 => validate_and_build!(u64),
+            DataType::Int8 => validate_and_build!(i8),
+            DataType::Int16 => validate_and_build!(i16),
+            DataType::Int32 => validate_and_build!(i32),
+            DataType::Int64 => validate_and_build!(i64),
             _ => unreachable!(),
         },
         _ => None,
@@ -394,7 +364,7 @@ impl<'a> MutableArrayData<'a> {
     /// a [Capacities] variant is not yet supported.
     pub fn with_capacities(
         arrays: Vec<&'a ArrayData>,
-        mut use_nulls: bool,
+        use_nulls: bool,
         capacities: Capacities,
     ) -> Self {
         let data_type = arrays[0].data_type();
@@ -402,20 +372,22 @@ impl<'a> MutableArrayData<'a> {
 
         // if any of the arrays has nulls, insertions from any array requires setting bits
         // as there is at least one array with nulls.
-        if arrays.iter().any(|array| array.null_count() > 0) {
-            use_nulls = true;
-        };
+        let use_nulls = use_nulls | arrays.iter().any(|array| array.null_count() > 0);
 
         let mut array_capacity;
 
         let [buffer1, buffer2] = match (data_type, &capacities) {
-            (DataType::LargeUtf8, Capacities::Binary(capacity, Some(value_cap)))
-            | (DataType::LargeBinary, Capacities::Binary(capacity, Some(value_cap))) => {
+            (
+                DataType::LargeUtf8 | DataType::LargeBinary,
+                Capacities::Binary(capacity, Some(value_cap)),
+            ) => {
                 array_capacity = *capacity;
                 preallocate_offset_and_binary_buffer::<i64>(*capacity, *value_cap)
             }
-            (DataType::Utf8, Capacities::Binary(capacity, Some(value_cap)))
-            | (DataType::Binary, Capacities::Binary(capacity, Some(value_cap))) => {
+            (
+                DataType::Utf8 | DataType::Binary,
+                Capacities::Binary(capacity, Some(value_cap)),
+            ) => {
                 array_capacity = *capacity;
                 preallocate_offset_and_binary_buffer::<i32>(*capacity, *value_cap)
             }
@@ -423,6 +395,13 @@ impl<'a> MutableArrayData<'a> {
                 array_capacity = *capacity;
                 new_buffers(data_type, *capacity)
             }
+            (
+                DataType::List(_) | DataType::LargeList(_),
+                Capacities::List(capacity, _),
+            ) => {
+                array_capacity = *capacity;
+                new_buffers(data_type, *capacity)
+            }
             _ => panic!("Capacities: {:?} not yet supported", capacities),
         };
 
@@ -462,11 +441,10 @@ impl<'a> MutableArrayData<'a> {
                 let capacities = if let Capacities::List(capacity, ref child_capacities) =
                     capacities
                 {
-                    array_capacity = capacity;
                     child_capacities
                         .clone()
                         .map(|c| *c)
-                        .unwrap_or(Capacities::Array(array_capacity))
+                        .unwrap_or(Capacities::Array(capacity))
                 } else {
                     Capacities::Array(array_capacity)
                 };
@@ -721,6 +699,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(feature = "force_validate"))]
     fn test_decimal() {
         let decimal_array =
             create_decimal_array(&[Some(1), Some(2), None, Some(3)], 10, 3);
@@ -734,6 +713,7 @@ mod tests {
         assert_eq!(array, expected);
     }
     #[test]
+    #[cfg(not(feature = "force_validate"))]
     fn test_decimal_offset() {
         let decimal_array =
             create_decimal_array(&[Some(1), Some(2), None, Some(3)], 10, 3);
@@ -748,6 +728,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(feature = "force_validate"))]
     fn test_decimal_null_offset_nulls() {
         let decimal_array =
             create_decimal_array(&[Some(1), Some(2), None, Some(3)], 10, 3);
@@ -1343,6 +1324,40 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_list_append_with_capacities() -> Result<()> {
+        let mut builder = ListBuilder::<Int64Builder>::new(Int64Builder::new(24));
+        builder.values().append_slice(&[1, 2, 3])?;
+        builder.append(true)?;
+        builder.values().append_slice(&[4, 5])?;
+        builder.append(true)?;
+        builder.values().append_slice(&[6, 7, 8])?;
+        builder.values().append_slice(&[9, 10, 11])?;
+        builder.append(true)?;
+        let a = builder.finish();
+
+        let a_builder = Int64Builder::new(24);
+        let mut a_builder = ListBuilder::<Int64Builder>::new(a_builder);
+        a_builder.values().append_slice(&[12, 13])?;
+        a_builder.append(true)?;
+        a_builder.append(true)?;
+        a_builder.values().append_slice(&[14, 15, 16, 17])?;
+        a_builder.append(true)?;
+        let b = a_builder.finish();
+
+        let mutable = MutableArrayData::with_capacities(
+            vec![a.data(), b.data()],
+            false,
+            Capacities::List(6, Some(Box::new(Capacities::Array(17)))),
+        );
+
+        // capacities are rounded up to multiples of 64 by MutableBuffer
+        assert_eq!(mutable.data.buffer1.capacity(), 64);
+        assert_eq!(mutable.data.child_data[0].data.buffer1.capacity(), 192);
+
+        Ok(())
+    }
+
     #[test]
     fn test_map_nulls_append() -> Result<()> {
         let mut builder = MapBuilder::<Int64Builder, Int64Builder>::new(
diff --git a/arrow/src/buffer/immutable.rs b/arrow/src/buffer/immutable.rs
index c34ea101bb3b..f5d59c5ed555 100644
--- a/arrow/src/buffer/immutable.rs
+++ b/arrow/src/buffer/immutable.rs
@@ -181,19 +181,15 @@ impl Buffer {
 
     /// View buffer as typed slice.
     ///
-    /// # Safety
+    /// # Panics
     ///
-    /// `ArrowNativeType` is public so that it can be used as a trait bound for other public
-    /// components, such as the `ToByteSlice` trait.  However, this means that it can be
-    /// implemented by user defined types, which it is not intended for.
-    pub unsafe fn typed_data<T: ArrowNativeType + num::Num>(&self) -> &[T] {
-        // JUSTIFICATION
-        //  Benefit
-        //      Many of the buffers represent specific types, and consumers of `Buffer` often need to re-interpret them.
-        //  Soundness
-        //      * The pointer is non-null by construction
-        //      * alignment asserted below.
-        let (prefix, offsets, suffix) = self.as_slice().align_to::<T>();
+    /// This function panics if the underlying buffer is not aligned
+    /// correctly for type `T`.
+    pub fn typed_data<T: ArrowNativeType>(&self) -> &[T] {
+        // SAFETY
+        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
+        // implementation outside this crate, and this method checks alignment
+        let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::<T>() };
         assert!(prefix.is_empty() && suffix.is_empty());
         offsets
     }
@@ -451,7 +447,7 @@ mod tests {
     macro_rules! check_as_typed_data {
         ($input: expr, $native_t: ty) => {{
             let buffer = Buffer::from_slice_ref($input);
-            let slice: &[$native_t] = unsafe { buffer.typed_data::<$native_t>() };
+            let slice: &[$native_t] = buffer.typed_data::<$native_t>();
             assert_eq!($input, slice);
         }};
     }
@@ -573,12 +569,12 @@ mod tests {
             )
         };
 
-        let slice = unsafe { buffer.typed_data::<i32>() };
+        let slice = buffer.typed_data::<i32>();
         assert_eq!(slice, &[1, 2, 3, 4, 5]);
 
         let buffer = buffer.slice(std::mem::size_of::<i32>());
 
-        let slice = unsafe { buffer.typed_data::<i32>() };
+        let slice = buffer.typed_data::<i32>();
         assert_eq!(slice, &[2, 3, 4, 5]);
     }
 }
diff --git a/arrow/src/buffer/mod.rs b/arrow/src/buffer/mod.rs
index cf0461b5f536..b392b0583d6d 100644
--- a/arrow/src/buffer/mod.rs
+++ b/arrow/src/buffer/mod.rs
@@ -23,6 +23,9 @@ pub use immutable::*;
 mod mutable;
 pub use mutable::*;
 mod ops;
+mod scalar;
+pub use scalar::*;
+
 pub use ops::*;
 
 use crate::error::{ArrowError, Result};
diff --git a/arrow/src/buffer/mutable.rs b/arrow/src/buffer/mutable.rs
index 709973b4401b..11783b82da54 100644
--- a/arrow/src/buffer/mutable.rs
+++ b/arrow/src/buffer/mutable.rs
@@ -30,7 +30,11 @@ use std::ptr::NonNull;
 /// along cache lines and in multiple of 64 bytes.
 /// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice]
 /// to insert many items, and `into` to convert it to [`Buffer`].
+///
+/// For a safe, strongly typed API consider using [`crate::array::BufferBuilder`]
+///
 /// # Example
+///
 /// ```
 /// # use arrow::buffer::{Buffer, MutableBuffer};
 /// let mut buffer = MutableBuffer::new(0);
@@ -152,6 +156,17 @@ impl MutableBuffer {
         }
     }
 
+    /// Truncates this buffer to `len` bytes
+    ///
+    /// If `len` is greater than the buffer's current length, this has no effect
+    #[inline(always)]
+    pub fn truncate(&mut self, len: usize) {
+        if len > self.len {
+            return;
+        }
+        self.len = len;
+    }
+
     /// Resizes the buffer, either truncating its contents (with no change in capacity), or
     /// growing it (potentially reallocating it) and writing `value` in the newly available bytes.
     /// # Example
@@ -273,19 +288,18 @@ impl MutableBuffer {
         Buffer::from_bytes(bytes)
     }
 
-    /// View this buffer asa slice of a specific type.
-    ///
-    /// # Safety
-    ///
-    /// This function must only be used with buffers which are treated
-    /// as type `T` (e.g.  extended with items of type `T`).
+    /// View this buffer as a slice of a specific type.
     ///
     /// # Panics
     ///
     /// This function panics if the underlying buffer is not aligned
     /// correctly for type `T`.
-    pub unsafe fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
-        let (prefix, offsets, suffix) = self.as_slice_mut().align_to_mut::<T>();
+    pub fn typed_data_mut<T: ArrowNativeType>(&mut self) -> &mut [T] {
+        // SAFETY
+        // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
+        // implementation outside this crate, and this method checks alignment
+        let (prefix, offsets, suffix) =
+            unsafe { self.as_slice_mut().align_to_mut::<T>() };
         assert!(prefix.is_empty() && suffix.is_empty());
         offsets
     }
@@ -299,7 +313,7 @@ impl MutableBuffer {
     /// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
     /// ```
     #[inline]
-    pub fn extend_from_slice<T: ToByteSlice>(&mut self, items: &[T]) {
+    pub fn extend_from_slice<T: ArrowNativeType>(&mut self, items: &[T]) {
         let len = items.len();
         let additional = len * std::mem::size_of::<T>();
         self.reserve(additional);
diff --git a/arrow/src/buffer/ops.rs b/arrow/src/buffer/ops.rs
index e0086a1a8207..ea155c8d78e4 100644
--- a/arrow/src/buffer/ops.rs
+++ b/arrow/src/buffer/ops.rs
@@ -15,110 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#[cfg(feature = "simd")]
-use crate::util::bit_util;
-#[cfg(feature = "simd")]
-use packed_simd::u8x64;
-
-#[cfg(feature = "avx512")]
-use crate::arch::avx512::*;
-use crate::util::bit_util::ceil;
-#[cfg(any(feature = "simd", feature = "avx512"))]
-use std::borrow::BorrowMut;
-
 use super::{Buffer, MutableBuffer};
-
-/// Apply a bitwise operation `simd_op` / `scalar_op` to two inputs using simd instructions and return the result as a Buffer.
-/// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time
-/// and the `scalar_op` gets applied to remaining bytes.
-/// Contrary to the non-simd version `bitwise_bin_op_helper`, the offset and length is specified in bytes
-/// and this version does not support operations starting at arbitrary bit offsets.
-#[cfg(feature = "simd")]
-pub fn bitwise_bin_op_simd_helper<SI, SC>(
-    left: &Buffer,
-    left_offset: usize,
-    right: &Buffer,
-    right_offset: usize,
-    len: usize,
-    simd_op: SI,
-    scalar_op: SC,
-) -> Buffer
-where
-    SI: Fn(u8x64, u8x64) -> u8x64,
-    SC: Fn(u8, u8) -> u8,
-{
-    let mut result = MutableBuffer::new(len).with_bitset(len, false);
-    let lanes = u8x64::lanes();
-
-    let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes);
-    let mut right_chunks = right.as_slice()[right_offset..].chunks_exact(lanes);
-    let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-        .for_each(|(res, (left, right))| {
-            unsafe { bit_util::bitwise_bin_op_simd(&left, &right, res, &simd_op) };
-        });
-
-    result_chunks
-        .into_remainder()
-        .iter_mut()
-        .zip(
-            left_chunks
-                .remainder()
-                .iter()
-                .zip(right_chunks.remainder().iter()),
-        )
-        .for_each(|(res, (left, right))| {
-            *res = scalar_op(*left, *right);
-        });
-
-    result.into()
-}
-
-/// Apply a bitwise operation `simd_op` / `scalar_op` to one input using simd instructions and return the result as a Buffer.
-/// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time
-/// and the `scalar_op` gets applied to remaining bytes.
-/// Contrary to the non-simd version `bitwise_unary_op_helper`, the offset and length is specified in bytes
-/// and this version does not support operations starting at arbitrary bit offsets.
-#[cfg(feature = "simd")]
-pub fn bitwise_unary_op_simd_helper<SI, SC>(
-    left: &Buffer,
-    left_offset: usize,
-    len: usize,
-    simd_op: SI,
-    scalar_op: SC,
-) -> Buffer
-where
-    SI: Fn(u8x64) -> u8x64,
-    SC: Fn(u8) -> u8,
-{
-    let mut result = MutableBuffer::new(len).with_bitset(len, false);
-    let lanes = u8x64::lanes();
-
-    let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes);
-    let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes);
-
-    result_chunks
-        .borrow_mut()
-        .zip(left_chunks.borrow_mut())
-        .for_each(|(res, left)| unsafe {
-            let data_simd = u8x64::from_slice_unaligned_unchecked(left);
-            let simd_result = simd_op(data_simd);
-            simd_result.write_to_slice_unaligned_unchecked(res);
-        });
-
-    result_chunks
-        .into_remainder()
-        .iter_mut()
-        .zip(left_chunks.remainder().iter())
-        .for_each(|(res, left)| {
-            *res = scalar_op(*left);
-        });
-
-    result.into()
-}
+use crate::util::bit_util::ceil;
 
 /// Apply a bitwise operation `op` to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
@@ -170,9 +68,7 @@ where
 
     let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits);
 
-    // Safety: buffer is always treated as type `u64` in the code
-    // below.
-    let result_chunks = unsafe { result.typed_data_mut::<u64>().iter_mut() };
+    let result_chunks = result.typed_data_mut::<u64>().iter_mut();
 
     result_chunks
         .zip(left_chunks.iter())
@@ -189,100 +85,6 @@ where
     result.into()
 }
 
-#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
-pub fn buffer_bin_and(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        let len = len_in_bits / 8;
-        let left_offset = left_offset_in_bits / 8;
-        let right_offset = right_offset_in_bits / 8;
-
-        let mut result = MutableBuffer::new(len).with_bitset(len, false);
-
-        let mut left_chunks =
-            left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut right_chunks =
-            right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut result_chunks =
-            result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES);
-
-        result_chunks
-            .borrow_mut()
-            .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-            .for_each(|(res, (left, right))| unsafe {
-                avx512_bin_and(left, right, res);
-            });
-
-        result_chunks
-            .into_remainder()
-            .iter_mut()
-            .zip(
-                left_chunks
-                    .remainder()
-                    .iter()
-                    .zip(right_chunks.remainder().iter()),
-            )
-            .for_each(|(res, (left, right))| {
-                *res = *left & *right;
-            });
-
-        result.into()
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a & b,
-        )
-    }
-}
-
-#[cfg(all(feature = "simd", not(feature = "avx512")))]
-pub fn buffer_bin_and(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        bitwise_bin_op_simd_helper(
-            &left,
-            left_offset_in_bits / 8,
-            &right,
-            right_offset_in_bits / 8,
-            len_in_bits / 8,
-            |a, b| a & b,
-            |a, b| a & b,
-        )
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a & b,
-        )
-    }
-}
-
-// Note: do not target specific features like x86 without considering
-// other targets like wasm32, as those would fail to build
-#[cfg(all(not(any(feature = "simd", feature = "avx512"))))]
 pub fn buffer_bin_and(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -300,98 +102,6 @@ pub fn buffer_bin_and(
     )
 }
 
-#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
-pub fn buffer_bin_or(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        let len = len_in_bits / 8;
-        let left_offset = left_offset_in_bits / 8;
-        let right_offset = right_offset_in_bits / 8;
-
-        let mut result = MutableBuffer::new(len).with_bitset(len, false);
-
-        let mut left_chunks =
-            left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut right_chunks =
-            right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
-        let mut result_chunks =
-            result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES);
-
-        result_chunks
-            .borrow_mut()
-            .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
-            .for_each(|(res, (left, right))| unsafe {
-                avx512_bin_or(left, right, res);
-            });
-
-        result_chunks
-            .into_remainder()
-            .iter_mut()
-            .zip(
-                left_chunks
-                    .remainder()
-                    .iter()
-                    .zip(right_chunks.remainder().iter()),
-            )
-            .for_each(|(res, (left, right))| {
-                *res = *left | *right;
-            });
-
-        result.into()
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a | b,
-        )
-    }
-}
-
-#[cfg(all(feature = "simd", not(feature = "avx512")))]
-pub fn buffer_bin_or(
-    left: &Buffer,
-    left_offset_in_bits: usize,
-    right: &Buffer,
-    right_offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
-    if left_offset_in_bits % 8 == 0
-        && right_offset_in_bits % 8 == 0
-        && len_in_bits % 8 == 0
-    {
-        bitwise_bin_op_simd_helper(
-            &left,
-            left_offset_in_bits / 8,
-            &right,
-            right_offset_in_bits / 8,
-            len_in_bits / 8,
-            |a, b| a | b,
-            |a, b| a | b,
-        )
-    } else {
-        bitwise_bin_op_helper(
-            &left,
-            left_offset_in_bits,
-            right,
-            right_offset_in_bits,
-            len_in_bits,
-            |a, b| a | b,
-        )
-    }
-}
-
-#[cfg(all(not(any(feature = "simd", feature = "avx512"))))]
 pub fn buffer_bin_or(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -414,20 +124,5 @@ pub fn buffer_unary_not(
     offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    // SIMD implementation if available and byte-aligned
-    #[cfg(feature = "simd")]
-    if offset_in_bits % 8 == 0 && len_in_bits % 8 == 0 {
-        return bitwise_unary_op_simd_helper(
-            &left,
-            offset_in_bits / 8,
-            len_in_bits / 8,
-            |a| !a,
-            |a| !a,
-        );
-    }
-    // Default implementation
-    #[allow(unreachable_code)]
-    {
-        bitwise_unary_op_helper(left, offset_in_bits, len_in_bits, |a| !a)
-    }
+    bitwise_unary_op_helper(left, offset_in_bits, len_in_bits, |a| !a)
 }
diff --git a/arrow/src/buffer/scalar.rs b/arrow/src/buffer/scalar.rs
new file mode 100644
index 000000000000..7d663cd2bf96
--- /dev/null
+++ b/arrow/src/buffer/scalar.rs
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::buffer::Buffer;
+use crate::datatypes::ArrowNativeType;
+use std::ops::Deref;
+
+/// Provides a safe API for interpreting a [`Buffer`] as a slice of [`ArrowNativeType`]
+///
+/// # Safety
+///
+/// All [`ArrowNativeType`] are valid for all possible backing byte representations, and as
+/// a result they are "trivially safely transmutable".
+#[derive(Debug)]
+pub struct ScalarBuffer<T: ArrowNativeType> {
+    #[allow(unused)]
+    buffer: Buffer,
+    // Borrows from `buffer` and is valid for the lifetime of `buffer`
+    ptr: *const T,
+    // The length of this slice
+    len: usize,
+}
+
+impl<T: ArrowNativeType> ScalarBuffer<T> {
+    /// Create a new [`ScalarBuffer`] from a [`Buffer`], and an `offset`
+    /// and `length` in units of `T`
+    ///
+    /// # Panics
+    ///
+    /// This method will panic if
+    ///
+    /// * `offset` or `len` would result in overflow
+    /// * `buffer` is not aligned to a multiple of `std::mem::size_of::<T>`
+    /// * `bytes` is not large enough for the requested slice
+    pub fn new(buffer: Buffer, offset: usize, len: usize) -> Self {
+        let size = std::mem::size_of::<T>();
+        let offset_len = offset.checked_add(len).expect("length overflow");
+        let start_bytes = offset.checked_mul(size).expect("start bytes overflow");
+        let end_bytes = offset_len.checked_mul(size).expect("end bytes overflow");
+
+        let bytes = &buffer.as_slice()[start_bytes..end_bytes];
+
+        // SAFETY: all byte sequences correspond to a valid instance of T
+        let (prefix, offsets, suffix) = unsafe { bytes.align_to::<T>() };
+        assert!(
+            prefix.is_empty() && suffix.is_empty(),
+            "buffer is not aligned to {} byte boundary",
+            size
+        );
+
+        let ptr = offsets.as_ptr();
+        Self { buffer, ptr, len }
+    }
+}
+
+impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
+    type Target = [T];
+
+    fn deref(&self) -> &Self::Target {
+        // SAFETY: Bounds checked in constructor and ptr is valid for the lifetime of self
+        unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
+    }
+}
+
+impl<T: ArrowNativeType> AsRef<[T]> for ScalarBuffer<T> {
+    fn as_ref(&self) -> &[T] {
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_basic() {
+        let expected = [0_i32, 1, 2];
+        let buffer = Buffer::from_iter(expected.iter().cloned());
+        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 0, 3);
+        assert_eq!(*typed, expected);
+
+        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 2);
+        assert_eq!(*typed, expected[1..]);
+
+        let typed = ScalarBuffer::<i32>::new(buffer.clone(), 1, 0);
+        assert!(typed.is_empty());
+
+        let typed = ScalarBuffer::<i32>::new(buffer, 3, 0);
+        assert!(typed.is_empty());
+    }
+
+    #[test]
+    #[should_panic(expected = "buffer is not aligned to 4 byte boundary")]
+    fn test_unaligned() {
+        let expected = [0_i32, 1, 2];
+        let buffer = Buffer::from_iter(expected.iter().cloned());
+        let buffer = buffer.slice(1);
+        ScalarBuffer::<i32>::new(buffer, 0, 2);
+    }
+
+    #[test]
+    #[should_panic(expected = "range end index 16 out of range for slice of length 12")]
+    fn test_length_out_of_bounds() {
+        let buffer = Buffer::from_iter([0_i32, 1, 2]);
+        ScalarBuffer::<i32>::new(buffer, 1, 3);
+    }
+
+    #[test]
+    #[should_panic(expected = "range end index 16 out of range for slice of length 12")]
+    fn test_offset_out_of_bounds() {
+        let buffer = Buffer::from_iter([0_i32, 1, 2]);
+        ScalarBuffer::<i32>::new(buffer, 4, 0);
+    }
+
+    #[test]
+    #[should_panic(expected = "length overflow")]
+    fn test_length_overflow() {
+        let buffer = Buffer::from_iter([0_i32, 1, 2]);
+        ScalarBuffer::<i32>::new(buffer, usize::MAX, 1);
+    }
+
+    #[test]
+    #[should_panic(expected = "start bytes overflow")]
+    fn test_start_overflow() {
+        let buffer = Buffer::from_iter([0_i32, 1, 2]);
+        ScalarBuffer::<i32>::new(buffer, usize::MAX / 4 + 1, 0);
+    }
+
+    #[test]
+    #[should_panic(expected = "end bytes overflow")]
+    fn test_end_overflow() {
+        let buffer = Buffer::from_iter([0_i32, 1, 2]);
+        ScalarBuffer::<i32>::new(buffer, 0, usize::MAX / 4 + 1);
+    }
+}
diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs
index 2d63924a12d3..04865e15bca2 100644
--- a/arrow/src/compute/kernels/arithmetic.rs
+++ b/arrow/src/compute/kernels/arithmetic.rs
@@ -64,7 +64,7 @@ where
     }
 
     let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
+        combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;
 
     let values = left
         .values()
@@ -117,7 +117,7 @@ where
     }
 
     let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
+        combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;
 
     let buffer = if let Some(b) = &null_bit_buffer {
         let values = left.values().iter().zip(right.values()).enumerate().map(
@@ -316,7 +316,7 @@ where
 
     // Create the combined `Bitmap`
     let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
+        combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;
 
     let lanes = T::lanes();
     let buffer_size = left.len() * std::mem::size_of::<T::Native>();
diff --git a/arrow/src/compute/kernels/boolean.rs b/arrow/src/compute/kernels/boolean.rs
index 5f52d56bb4d2..209edc48d195 100644
--- a/arrow/src/compute/kernels/boolean.rs
+++ b/arrow/src/compute/kernels/boolean.rs
@@ -193,7 +193,7 @@ where
 
     let left_data = left.data_ref();
     let right_data = right.data_ref();
-    let null_bit_buffer = combine_option_bitmap(left_data, right_data, len)?;
+    let null_bit_buffer = combine_option_bitmap(&[left_data, right_data], len)?;
 
     let left_buffer = &left_data.buffers()[0];
     let right_buffer = &right_data.buffers()[0];
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index 93a8ebcb6b5a..fa92179b747c 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -353,7 +353,7 @@ macro_rules! cast_decimal_to_integer {
             if array.is_null(i) {
                 value_builder.append_null()?;
             } else {
-                let v = array.value(i) / div;
+                let v = array.value(i).as_i128() / div;
                 // check the overflow
                 // For example: Decimal(128,10,0) as i8
                 // 128 is out of range i8
@@ -383,7 +383,7 @@ macro_rules! cast_decimal_to_float {
             } else {
                 // The range of f32 or f64 is larger than i128, we don't need to check overflow.
                 // cast the i128 to f64 will lose precision, for example the `112345678901234568` will be as `112345678901234560`.
-                let v = (array.value(i) as f64 / div) as $NATIVE_TYPE;
+                let v = (array.value(i).as_i128() as f64 / div) as $NATIVE_TYPE;
                 value_builder.append_value(v)?;
             }
         }
@@ -2084,7 +2084,7 @@ where
     let list_data = array.data();
     let str_values_buf = str_array.value_data();
 
-    let offsets = unsafe { list_data.buffers()[0].typed_data::<OffsetSizeFrom>() };
+    let offsets = list_data.buffers()[0].typed_data::<OffsetSizeFrom>();
 
     let mut offset_builder = BufferBuilder::<OffsetSizeTo>::new(offsets.len());
     offsets.iter().try_for_each::<_, Result<_>>(|offset| {
@@ -2196,6 +2196,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::util::decimal::Decimal128;
     use crate::{buffer::Buffer, util::display::array_value_to_string};
 
     macro_rules! generate_cast_test_case {
@@ -2247,9 +2248,9 @@ mod tests {
             DecimalArray,
             &output_type,
             vec![
-                Some(11234560_i128),
-                Some(21234560_i128),
-                Some(31234560_i128),
+                Some(Decimal128::new_from_i128(20, 4, 11234560_i128)),
+                Some(Decimal128::new_from_i128(20, 4, 21234560_i128)),
+                Some(Decimal128::new_from_i128(20, 4, 31234560_i128)),
                 None
             ]
         );
@@ -2426,11 +2427,11 @@ mod tests {
                 DecimalArray,
                 &decimal_type,
                 vec![
-                    Some(1000000_i128),
-                    Some(2000000_i128),
-                    Some(3000000_i128),
+                    Some(Decimal128::new_from_i128(38, 6, 1000000_i128)),
+                    Some(Decimal128::new_from_i128(38, 6, 2000000_i128)),
+                    Some(Decimal128::new_from_i128(38, 6, 3000000_i128)),
                     None,
-                    Some(5000000_i128)
+                    Some(Decimal128::new_from_i128(38, 6, 5000000_i128))
                 ]
             );
         }
@@ -2458,12 +2459,12 @@ mod tests {
             DecimalArray,
             &decimal_type,
             vec![
-                Some(1100000_i128),
-                Some(2200000_i128),
-                Some(4400000_i128),
+                Some(Decimal128::new_from_i128(38, 6, 1100000_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 2200000_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 4400000_i128)),
                 None,
-                Some(1123456_i128),
-                Some(1123456_i128),
+                Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
             ]
         );
 
@@ -2483,13 +2484,13 @@ mod tests {
             DecimalArray,
             &decimal_type,
             vec![
-                Some(1100000_i128),
-                Some(2200000_i128),
-                Some(4400000_i128),
+                Some(Decimal128::new_from_i128(38, 6, 1100000_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 2200000_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 4400000_i128)),
                 None,
-                Some(1123456_i128),
-                Some(1123456_i128),
-                Some(1123456_i128),
+                Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
+                Some(Decimal128::new_from_i128(38, 6, 1123456_i128)),
             ]
         );
     }
diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs
index 590ed5b0f735..068b9dedf59b 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -52,7 +52,7 @@ macro_rules! compare_op {
         }
 
         let null_bit_buffer =
-            combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
+            combine_option_bitmap(&[$left.data_ref(), $right.data_ref()], $left.len())?;
 
         // Safety:
         // `i < $left.len()` and $left.len() == $right.len()
@@ -86,7 +86,7 @@ macro_rules! compare_op_primitive {
         }
 
         let null_bit_buffer =
-            combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
+            combine_option_bitmap(&[$left.data_ref(), $right.data_ref()], $left.len())?;
 
         let mut values = MutableBuffer::from_len_zeroed(($left.len() + 7) / 8);
         let lhs_chunks_iter = $left.values().chunks_exact(8);
@@ -258,7 +258,7 @@ where
     }
 
     let null_bit_buffer =
-        combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?;
+        combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;
 
     let mut result = BooleanBufferBuilder::new(left.len());
     for i in 0..left.len() {
@@ -548,6 +548,89 @@ pub fn ilike_utf8_scalar<OffsetSize: OffsetSizeTrait>(
     Ok(BooleanArray::from(data))
 }
 
+/// Perform SQL `left NOT ILIKE right` operation on [`StringArray`] /
+/// [`LargeStringArray`].
+///
+/// See the documentation on [`like_utf8`] for more details.
+pub fn nilike_utf8<OffsetSize: OffsetSizeTrait>(
+    left: &GenericStringArray<OffsetSize>,
+    right: &GenericStringArray<OffsetSize>,
+) -> Result<BooleanArray> {
+    regex_like(left, right, true, |re_pattern| {
+        Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| {
+            ArrowError::ComputeError(format!(
+                "Unable to build regex from ILIKE pattern: {}",
+                e
+            ))
+        })
+    })
+}
+
+/// Perform SQL `left NOT ILIKE right` operation on [`StringArray`] /
+/// [`LargeStringArray`] and a scalar.
+///
+/// See the documentation on [`like_utf8`] for more details.
+pub fn nilike_utf8_scalar<OffsetSize: OffsetSizeTrait>(
+    left: &GenericStringArray<OffsetSize>,
+    right: &str,
+) -> Result<BooleanArray> {
+    let null_bit_buffer = left.data().null_buffer().cloned();
+    let mut result = BooleanBufferBuilder::new(left.len());
+
+    if !right.contains(is_like_pattern) {
+        // fast path, can use equals
+        for i in 0..left.len() {
+            result.append(left.value(i) != right);
+        }
+    } else if right.ends_with('%') && !right[..right.len() - 1].contains(is_like_pattern)
+    {
+        // fast path, can use ends_with
+        for i in 0..left.len() {
+            result.append(
+                !left
+                    .value(i)
+                    .to_uppercase()
+                    .starts_with(&right[..right.len() - 1].to_uppercase()),
+            );
+        }
+    } else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
+        // fast path, can use starts_with
+        for i in 0..left.len() {
+            result.append(
+                !left
+                    .value(i)
+                    .to_uppercase()
+                    .ends_with(&right[1..].to_uppercase()),
+            );
+        }
+    } else {
+        let re_pattern = escape(right).replace('%', ".*").replace('_', ".");
+        let re = Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| {
+            ArrowError::ComputeError(format!(
+                "Unable to build regex from ILIKE pattern: {}",
+                e
+            ))
+        })?;
+        for i in 0..left.len() {
+            let haystack = left.value(i);
+            result.append(!re.is_match(haystack));
+        }
+    }
+
+    let data = unsafe {
+        ArrayData::new_unchecked(
+            DataType::Boolean,
+            left.len(),
+            None,
+            null_bit_buffer,
+            0,
+            vec![result.finish()],
+            vec![],
+        )
+    };
+    Ok(BooleanArray::from(data))
+}
+
 /// Perform SQL `array ~ regex_array` operation on [`StringArray`] / [`LargeStringArray`].
 /// If `regex_array` element has an empty value, the corresponding result value is always true.
 ///
@@ -567,7 +650,7 @@ pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
         ));
     }
     let null_bit_buffer =
-        combine_option_bitmap(array.data_ref(), regex_array.data_ref(), array.len())?;
+        combine_option_bitmap(&[array.data_ref(), regex_array.data_ref()], array.len())?;
 
     let mut patterns: HashMap<String, Regex> = HashMap::new();
     let mut result = BooleanBufferBuilder::new(array.len());
@@ -1676,7 +1759,8 @@ where
         ));
     }
 
-    let null_bit_buffer = combine_option_bitmap(left.data_ref(), right.data_ref(), len)?;
+    let null_bit_buffer =
+        combine_option_bitmap(&[left.data_ref(), right.data_ref()], len)?;
 
     // we process the data in chunks so that each iteration results in one u64 of comparison result bits
     const CHUNK_SIZE: usize = 64;
@@ -2617,7 +2701,7 @@ where
     let num_bytes = bit_util::ceil(left_len, 8);
 
     let not_both_null_bit_buffer =
-        match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
+        match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len)? {
             Some(buff) => buff,
             None => new_all_set_buffer(num_bytes),
         };
@@ -2674,7 +2758,7 @@ where
     let num_bytes = bit_util::ceil(left_len, 8);
 
     let not_both_null_bit_buffer =
-        match combine_option_bitmap(left.data_ref(), right.data_ref(), left_len)? {
+        match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len)? {
             Some(buff) => buff,
             None => new_all_set_buffer(num_bytes),
         };
@@ -3983,6 +4067,60 @@ mod tests {
         vec![false, true, false, false]
     );
 
+    test_utf8!(
+        test_utf8_array_nilike,
+        vec!["arrow", "arrow", "ARROW", "arrow", "ARROW", "ARROWS", "arROw"],
+        vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
+        nilike_utf8,
+        vec![false, false, false, true, true, false, true]
+    );
+    test_utf8_scalar!(
+        nilike_utf8_scalar_escape_testing,
+        vec!["varchar(255)", "int(255)", "varchar", "int"],
+        "%(%)%",
+        nilike_utf8_scalar,
+        vec![false, false, true, true]
+    );
+    test_utf8_scalar!(
+        test_utf8_array_nilike_scalar,
+        vec!["arrow", "parquet", "datafusion", "flight"],
+        "%AR%",
+        nilike_utf8_scalar,
+        vec![false, false, true, true]
+    );
+
+    test_utf8_scalar!(
+        test_utf8_array_nilike_scalar_start,
+        vec!["arrow", "parrow", "arrows", "ARR"],
+        "aRRow%",
+        nilike_utf8_scalar,
+        vec![false, true, false, true]
+    );
+
+    test_utf8_scalar!(
+        test_utf8_array_nilike_scalar_end,
+        vec!["ArroW", "parrow", "ARRowS", "arr"],
+        "%arrow",
+        nilike_utf8_scalar,
+        vec![false, false, true, true]
+    );
+
+    test_utf8_scalar!(
+        test_utf8_array_nilike_scalar_equals,
+        vec!["arrow", "parrow", "arrows", "arr"],
+        "arrow",
+        nilike_utf8_scalar,
+        vec![false, true, true, true]
+    );
+
+    test_utf8_scalar!(
+        test_utf8_array_nilike_scalar_one,
+        vec!["arrow", "arrows", "parrow", "arr"],
+        "arrow_",
+        nilike_utf8_scalar,
+        vec![true, false, true, true]
+    );
+
     test_utf8!(
         test_utf8_array_neq,
         vec!["arrow", "arrow", "arrow", "arrow"],
diff --git a/arrow/src/compute/kernels/concat_elements.rs b/arrow/src/compute/kernels/concat_elements.rs
index 47cbdfab17e0..bc341df889c0 100644
--- a/arrow/src/compute/kernels/concat_elements.rs
+++ b/arrow/src/compute/kernels/concat_elements.rs
@@ -45,7 +45,7 @@ pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
         )));
     }
 
-    let output_bitmap = combine_option_bitmap(left.data(), right.data(), left.len())?;
+    let output_bitmap = combine_option_bitmap(&[left.data(), right.data()], left.len())?;
 
     let left_offsets = left.value_offsets();
     let right_offsets = right.value_offsets();
diff --git a/arrow/src/compute/kernels/filter.rs b/arrow/src/compute/kernels/filter.rs
index b59625115209..1af93bff5ad7 100644
--- a/arrow/src/compute/kernels/filter.rs
+++ b/arrow/src/compute/kernels/filter.rs
@@ -29,7 +29,7 @@ use crate::buffer::{buffer_bin_and, Buffer, MutableBuffer};
 use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
 use crate::record_batch::RecordBatch;
-use crate::util::bit_chunk_iterator::{UnalignedBitChunk, UnalignedBitChunkIterator};
+use crate::util::bit_iterator::{BitIndexIterator, BitSliceIterator};
 use crate::util::bit_util;
 
 /// If the filter selects more than this fraction of rows, use
@@ -72,47 +72,15 @@ macro_rules! downcast_dict_filter {
 ///
 /// 2. Only performant for filters that copy across long contiguous runs
 #[derive(Debug)]
-pub struct SlicesIterator<'a> {
-    iter: UnalignedBitChunkIterator<'a>,
-    len: usize,
-    current_offset: i64,
-    current_chunk: u64,
-}
+pub struct SlicesIterator<'a>(BitSliceIterator<'a>);
 
 impl<'a> SlicesIterator<'a> {
     pub fn new(filter: &'a BooleanArray) -> Self {
         let values = &filter.data_ref().buffers()[0];
         let len = filter.len();
-        let chunk = UnalignedBitChunk::new(values.as_slice(), filter.offset(), len);
-        let mut iter = chunk.iter();
-
-        let current_offset = -(chunk.lead_padding() as i64);
-        let current_chunk = iter.next().unwrap_or(0);
-
-        Self {
-            iter,
-            len,
-            current_offset,
-            current_chunk,
-        }
-    }
-
-    /// Returns `Some((chunk_offset, bit_offset))` for the next chunk that has at
-    /// least one bit set, or None if there is no such chunk.
-    ///
-    /// Where `chunk_offset` is the bit offset to the current `u64` chunk
-    /// and `bit_offset` is the offset of the first `1` bit in that chunk
-    fn advance_to_set_bit(&mut self) -> Option<(i64, u32)> {
-        loop {
-            if self.current_chunk != 0 {
-                // Find the index of the first 1
-                let bit_pos = self.current_chunk.trailing_zeros();
-                return Some((self.current_offset, bit_pos));
-            }
+        let offset = filter.offset();
 
-            self.current_chunk = self.iter.next()?;
-            self.current_offset += 64;
-        }
+        Self(BitSliceIterator::new(values, offset, len))
     }
 }
 
@@ -120,43 +88,7 @@ impl<'a> Iterator for SlicesIterator<'a> {
     type Item = (usize, usize);
 
     fn next(&mut self) -> Option<Self::Item> {
-        // Used as termination condition
-        if self.len == 0 {
-            return None;
-        }
-
-        let (start_chunk, start_bit) = self.advance_to_set_bit()?;
-
-        // Set bits up to start
-        self.current_chunk |= (1 << start_bit) - 1;
-
-        loop {
-            if self.current_chunk != u64::MAX {
-                // Find the index of the first 0
-                let end_bit = self.current_chunk.trailing_ones();
-
-                // Zero out up to end_bit
-                self.current_chunk &= !((1 << end_bit) - 1);
-
-                return Some((
-                    (start_chunk + start_bit as i64) as usize,
-                    (self.current_offset + end_bit as i64) as usize,
-                ));
-            }
-
-            match self.iter.next() {
-                Some(next) => {
-                    self.current_chunk = next;
-                    self.current_offset += 64;
-                }
-                None => {
-                    return Some((
-                        (start_chunk + start_bit as i64) as usize,
-                        std::mem::replace(&mut self.len, 0),
-                    ));
-                }
-            }
-        }
+        self.0.next()
     }
 }
 
@@ -165,29 +97,16 @@ impl<'a> Iterator for SlicesIterator<'a> {
 /// This provides the best performance on most predicates, apart from those which keep
 /// large runs and therefore favour [`SlicesIterator`]
 struct IndexIterator<'a> {
-    current_chunk: u64,
-    chunk_offset: i64,
     remaining: usize,
-    iter: UnalignedBitChunkIterator<'a>,
+    iter: BitIndexIterator<'a>,
 }
 
 impl<'a> IndexIterator<'a> {
-    fn new(filter: &'a BooleanArray, len: usize) -> Self {
+    fn new(filter: &'a BooleanArray, remaining: usize) -> Self {
         assert_eq!(filter.null_count(), 0);
         let data = filter.data();
-        let chunks =
-            UnalignedBitChunk::new(&data.buffers()[0], data.offset(), data.len());
-        let mut iter = chunks.iter();
-
-        let current_chunk = iter.next().unwrap_or(0);
-        let chunk_offset = -(chunks.lead_padding() as i64);
-
-        Self {
-            current_chunk,
-            chunk_offset,
-            remaining: len,
-            iter,
-        }
+        let iter = BitIndexIterator::new(&data.buffers()[0], data.offset(), data.len());
+        Self { remaining, iter }
     }
 }
 
@@ -195,17 +114,13 @@ impl<'a> Iterator for IndexIterator<'a> {
     type Item = usize;
 
     fn next(&mut self) -> Option<Self::Item> {
-        while self.remaining != 0 {
-            if self.current_chunk != 0 {
-                let bit_pos = self.current_chunk.trailing_zeros();
-                self.current_chunk ^= 1 << bit_pos;
-                self.remaining -= 1;
-                return Some((self.chunk_offset + bit_pos as i64) as usize);
-            }
-
+        if self.remaining != 0 {
+            // Fascinatingly swapping these two lines around results in a 50%
+            // performance regression for some benchmarks
+            let next = self.iter.next().expect("IndexIterator exhausted early");
+            self.remaining -= 1;
             // Must panic if exhausted early as trusted length iterator
-            self.current_chunk = self.iter.next().expect("IndexIterator exhausted early");
-            self.chunk_offset += 64;
+            return Some(next);
         }
         None
     }
@@ -1332,6 +1247,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg_attr(miri, ignore)]
     fn fuzz_test_slices_iterator() {
         let mut rng = thread_rng();
 
@@ -1401,6 +1317,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg_attr(miri, ignore)]
     fn fuzz_filter() {
         let mut rng = thread_rng();
 
diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs
index 140a57f33ed5..e399cf9f0c19 100644
--- a/arrow/src/compute/kernels/sort.rs
+++ b/arrow/src/compute/kernels/sort.rs
@@ -452,8 +452,7 @@ fn sort_boolean(
     let mut result = MutableBuffer::new(result_capacity);
     // sets len to capacity so we can access the whole buffer as a typed slice
     result.resize(result_capacity, 0);
-    // Safety: the buffer is always treated as `u32` in the code below
-    let result_slice: &mut [u32] = unsafe { result.typed_data_mut() };
+    let result_slice: &mut [u32] = result.typed_data_mut();
 
     if options.nulls_first {
         let size = nulls_len.min(len);
@@ -504,7 +503,7 @@ where
         .expect("Unable to downcast to decimal array");
     let valids = value_indices
         .into_iter()
-        .map(|index| (index, decimal_array.value(index as usize)))
+        .map(|index| (index, decimal_array.value(index as usize).as_i128()))
         .collect::<Vec<(u32, i128)>>();
     sort_primitive_inner(decimal_values, null_indices, cmp, options, limit, valids)
 }
@@ -565,8 +564,7 @@ where
     let mut result = MutableBuffer::new(result_capacity);
     // sets len to capacity so we can access the whole buffer as a typed slice
     result.resize(result_capacity, 0);
-    // Safety: the buffer is always treated as `u32` in the code below
-    let result_slice: &mut [u32] = unsafe { result.typed_data_mut() };
+    let result_slice: &mut [u32] = result.typed_data_mut();
 
     if options.nulls_first {
         let size = nulls_len.min(len);
diff --git a/arrow/src/compute/kernels/substring.rs b/arrow/src/compute/kernels/substring.rs
index f1b6e8d4aa79..024f5633fef4 100644
--- a/arrow/src/compute/kernels/substring.rs
+++ b/arrow/src/compute/kernels/substring.rs
@@ -16,7 +16,8 @@
 // under the License.
 
 //! Defines kernel to extract a substring of an Array
-//! Supported array types: \[Large\]StringArray, \[Large\]BinaryArray
+//! Supported array types:
+//! [GenericStringArray], [GenericBinaryArray], [FixedSizeBinaryArray], [DictionaryArray]
 
 use crate::array::DictionaryArray;
 use crate::buffer::MutableBuffer;
@@ -29,7 +30,7 @@ use crate::{
 use std::cmp::Ordering;
 use std::sync::Arc;
 
-/// Returns an ArrayRef with substrings of all the elements in `array`.
+/// Returns an [`ArrayRef`] with substrings of all the elements in `array`.
 ///
 /// # Arguments
 ///
@@ -38,7 +39,7 @@ use std::sync::Arc;
 /// otherwise count from the end of the string.
 ///
 /// * `length`(option) - The length of all substrings.
-/// If `length` is `None`, then the substring is from `start` to the end of the string.
+/// If `length` is [None], then the substring is from `start` to the end of the string.
 ///
 /// Attention: Both `start` and `length` are counted by byte, not by char.
 ///
@@ -53,9 +54,10 @@ use std::sync::Arc;
 /// ```
 ///
 /// # Error
-/// - The function errors when the passed array is not a \[Large\]String array, \[Large\]Binary
-///   array, or DictionaryArray with \[Large\]String or \[Large\]Binary as its value type.
+/// - The function errors when the passed array is not a [`GenericStringArray`], [`GenericBinaryArray`], [`FixedSizeBinaryArray`]
+///   or [`DictionaryArray`] with supported array type as its value type.
 /// - The function errors if the offset of a substring in the input array is at invalid char boundary (only for \[Large\]String array).
+/// It is recommended to use [`substring_by_char`] if the input array may contain non-ASCII chars.
 ///
 /// ## Example of trying to get an invalid utf-8 format substring
 /// ```
@@ -150,6 +152,101 @@ pub fn substring(array: &dyn Array, start: i64, length: Option<u64>) -> Result<A
     }
 }
 
+/// # Arguments
+/// * `array` - The input string array
+///
+/// * `start` - The start index of all substrings.
+/// If `start >= 0`, then count from the start of the string,
+/// otherwise count from the end of the string.
+///
+/// * `length`(option) - The length of all substrings.
+/// If `length` is `None`, then the substring is from `start` to the end of the string.
+///
+/// Attention: Both `start` and `length` are counted by char.
+///
+/// # Performance
+/// This function is slower than [substring].
+/// Theoretically, the time complexity is `O(n)` where `n` is the length of the value buffer.
+/// It is recommended to use [substring] if the input array only contains ASCII chars.
+///
+/// # Basic usage
+/// ```
+/// # use arrow::array::StringArray;
+/// # use arrow::compute::kernels::substring::substring_by_char;
+/// let array = StringArray::from(vec![Some("arrow"), None, Some("Γ ⊢x:T")]);
+/// let result = substring_by_char(&array, 1, Some(4)).unwrap();
+/// assert_eq!(result, StringArray::from(vec![Some("rrow"), None, Some(" ⊢x:")]));
+/// ```
+pub fn substring_by_char<OffsetSize: OffsetSizeTrait>(
+    array: &GenericStringArray<OffsetSize>,
+    start: i64,
+    length: Option<u64>,
+) -> Result<GenericStringArray<OffsetSize>> {
+    let mut vals = BufferBuilder::<u8>::new({
+        let offsets = array.value_offsets();
+        (offsets[array.len()] - offsets[0]).to_usize().unwrap()
+    });
+    let mut new_offsets = BufferBuilder::<OffsetSize>::new(array.len() + 1);
+    new_offsets.append(OffsetSize::zero());
+    let length = length.map(|len| len.to_usize().unwrap());
+
+    array.iter().for_each(|val| {
+        if let Some(val) = val {
+            let char_count = val.chars().count();
+            let start = if start >= 0 {
+                start.to_usize().unwrap()
+            } else {
+                char_count - (-start).to_usize().unwrap().min(char_count)
+            };
+            let (start_offset, end_offset) = get_start_end_offset(val, start, length);
+            vals.append_slice(&val.as_bytes()[start_offset..end_offset]);
+        }
+        new_offsets.append(OffsetSize::from_usize(vals.len()).unwrap());
+    });
+    let data = unsafe {
+        ArrayData::new_unchecked(
+            GenericStringArray::<OffsetSize>::get_data_type(),
+            array.len(),
+            None,
+            array
+                .data_ref()
+                .null_buffer()
+                .map(|b| b.bit_slice(array.offset(), array.len())),
+            0,
+            vec![new_offsets.finish(), vals.finish()],
+            vec![],
+        )
+    };
+    Ok(GenericStringArray::<OffsetSize>::from(data))
+}
+
+/// * `val` - string
+/// * `start` - the start char index of the substring
+/// * `length` - the char length of the substring
+///
+/// Return the `start` and `end` offset (by byte) of the substring
+fn get_start_end_offset(
+    val: &str,
+    start: usize,
+    length: Option<usize>,
+) -> (usize, usize) {
+    let len = val.len();
+    let mut offset_char_iter = val.char_indices();
+    let start_offset = offset_char_iter
+        .nth(start)
+        .map_or(len, |(offset, _)| offset);
+    let end_offset = length.map_or(len, |length| {
+        if length > 0 {
+            offset_char_iter
+                .nth(length - 1)
+                .map_or(len, |(offset, _)| offset)
+        } else {
+            start_offset
+        }
+    });
+    (start_offset, end_offset)
+}
+
 fn binary_substring<OffsetSize: OffsetSizeTrait>(
     array: &GenericBinaryArray<OffsetSize>,
     start: OffsetSize,
@@ -348,218 +445,138 @@ mod tests {
     use super::*;
     use crate::datatypes::*;
 
-    #[allow(clippy::type_complexity)]
-    fn with_nulls_generic_binary<O: OffsetSizeTrait>() -> Result<()> {
-        let cases: Vec<(Vec<Option<&[u8]>>, i64, Option<u64>, Vec<Option<&[u8]>>)> = vec![
-            // all-nulls array is always identical
-            (vec![None, None, None], -1, Some(1), vec![None, None, None]),
+    /// A helper macro to generate test cases.
+    /// # Arguments
+    /// * `input` - A vector which array can be built from.
+    /// * `start` - The start index of the substring.
+    /// * `len` - The length of the substring.
+    /// * `result` - The expected result of substring, which is a vector that array can be built from.
+    /// # Return
+    /// A vector of `(input, start, len, result)`.
+    ///
+    /// Users can provide any number of `(start, len, result)` to generate test cases for one `input`.
+    macro_rules! gen_test_cases {
+        ($input:expr, $(($start:expr, $len:expr, $result:expr)), *) => {
+            [
+                $(
+                    ($input.clone(), $start, $len, $result),
+                )*
+            ]
+        };
+    }
+
+    /// A helper macro to test the substring functions.
+    /// # Arguments
+    /// * `cases` - The test cases which is a vector of `(input, start, len, result)`.
+    /// Please look at [`gen_test_cases`] to find how to generate it.
+    /// * `array_ty` - The array type.
+    /// * `substring_fn` - Either [`substring`] or [`substring_by_char`].
+    macro_rules! do_test {
+        ($cases:expr, $array_ty:ty, $substring_fn:ident) => {
+            $cases
+                .into_iter()
+                .for_each(|(array, start, length, expected)| {
+                    let array = <$array_ty>::from(array);
+                    let result = $substring_fn(&array, start, length).unwrap();
+                    let result = result.as_any().downcast_ref::<$array_ty>().unwrap();
+                    let expected = <$array_ty>::from(expected);
+                    assert_eq!(&expected, result);
+                })
+        };
+    }
+
+    fn with_nulls_generic_binary<O: OffsetSizeTrait>() {
+        let input = vec![
+            Some("hello".as_bytes()),
+            None,
+            Some(&[0xf8, 0xf9, 0xff, 0xfa]),
+        ];
+        // all-nulls array is always identical
+        let base_case = gen_test_cases!(
+            vec![None, None, None],
+            (-1, Some(1), vec![None, None, None])
+        );
+        let cases = gen_test_cases!(
+            input,
             // identity
-            (
-                vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])],
-                0,
-                None,
-                vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])],
-            ),
+            (0, None, input.clone()),
             // 0 length -> Nothing
-            (
-                vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])],
-                0,
-                Some(0),
-                vec![Some(&[]), None, Some(&[])],
-            ),
+            (0, Some(0), vec![Some(&[]), None, Some(&[])]),
             // high start -> Nothing
-            (
-                vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])],
-                1000,
-                Some(0),
-                vec![Some(&[]), None, Some(&[])],
-            ),
+            (1000, Some(0), vec![Some(&[]), None, Some(&[])]),
             // high negative start -> identity
-            (
-                vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])],
-                -1000,
-                None,
-                vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])],
-            ),
+            (-1000, None, input.clone()),
             // high length -> identity
-            (
-                vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])],
-                0,
-                Some(1000),
-                vec![Some(b"hello"), None, Some(&[0xf8, 0xf9, 0xff, 0xfa])],
-            ),
-        ];
-
-        cases.into_iter().try_for_each::<_, Result<()>>(
-            |(array, start, length, expected)| {
-                let array = GenericBinaryArray::<O>::from(array);
-                let result: ArrayRef = substring(&array, start, length)?;
-                assert_eq!(array.len(), result.len());
-
-                let result = result
-                    .as_any()
-                    .downcast_ref::<GenericBinaryArray<O>>()
-                    .unwrap();
-                let expected = GenericBinaryArray::<O>::from(expected);
-                assert_eq!(&expected, result);
-                Ok(())
-            },
-        )?;
-
-        Ok(())
+            (0, Some(1000), input.clone())
+        );
+
+        do_test!(
+            [&base_case[..], &cases[..]].concat(),
+            GenericBinaryArray<O>,
+            substring
+        );
     }
 
     #[test]
-    fn with_nulls_binary() -> Result<()> {
+    fn with_nulls_binary() {
         with_nulls_generic_binary::<i32>()
     }
 
     #[test]
-    fn with_nulls_large_binary() -> Result<()> {
+    fn with_nulls_large_binary() {
         with_nulls_generic_binary::<i64>()
     }
 
-    #[allow(clippy::type_complexity)]
-    fn without_nulls_generic_binary<O: OffsetSizeTrait>() -> Result<()> {
-        let cases: Vec<(Vec<&[u8]>, i64, Option<u64>, Vec<&[u8]>)> = vec![
-            // empty array is always identical
-            (vec![b"", b"", b""], 2, Some(1), vec![b"", b"", b""]),
+    fn without_nulls_generic_binary<O: OffsetSizeTrait>() {
+        let input = vec!["hello".as_bytes(), b"", &[0xf8, 0xf9, 0xff, 0xfa]];
+        // empty array is always identical
+        let base_case = gen_test_cases!(
+            vec!["".as_bytes(), b"", b""],
+            (2, Some(1), vec!["".as_bytes(), b"", b""])
+        );
+        let cases = gen_test_cases!(
+            input,
+            // identity
+            (0, None, input.clone()),
             // increase start
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                0,
-                None,
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                1,
-                None,
-                vec![b"ello", b"", &[0xf9, 0xff, 0xfa]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                2,
-                None,
-                vec![b"llo", b"", &[0xff, 0xfa]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                3,
-                None,
-                vec![b"lo", b"", &[0xfa]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                10,
-                None,
-                vec![b"", b"", b""],
-            ),
+            (1, None, vec![b"ello", b"", &[0xf9, 0xff, 0xfa]]),
+            (2, None, vec![b"llo", b"", &[0xff, 0xfa]]),
+            (3, None, vec![b"lo", b"", &[0xfa]]),
+            (10, None, vec![b"", b"", b""]),
             // increase start negatively
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                -1,
-                None,
-                vec![b"o", b"", &[0xfa]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                -2,
-                None,
-                vec![b"lo", b"", &[0xff, 0xfa]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                -3,
-                None,
-                vec![b"llo", b"", &[0xf9, 0xff, 0xfa]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                -10,
-                None,
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-            ),
+            (-1, None, vec![b"o", b"", &[0xfa]]),
+            (-2, None, vec![b"lo", b"", &[0xff, 0xfa]]),
+            (-3, None, vec![b"llo", b"", &[0xf9, 0xff, 0xfa]]),
+            (-10, None, input.clone()),
             // increase length
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                1,
-                Some(1),
-                vec![b"e", b"", &[0xf9]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                1,
-                Some(2),
-                vec![b"el", b"", &[0xf9, 0xff]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                1,
-                Some(3),
-                vec![b"ell", b"", &[0xf9, 0xff, 0xfa]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                1,
-                Some(4),
-                vec![b"ello", b"", &[0xf9, 0xff, 0xfa]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                -3,
-                Some(1),
-                vec![b"l", b"", &[0xf9]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                -3,
-                Some(2),
-                vec![b"ll", b"", &[0xf9, 0xff]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                -3,
-                Some(3),
-                vec![b"llo", b"", &[0xf9, 0xff, 0xfa]],
-            ),
-            (
-                vec![b"hello", b"", &[0xf8, 0xf9, 0xff, 0xfa]],
-                -3,
-                Some(4),
-                vec![b"llo", b"", &[0xf9, 0xff, 0xfa]],
-            ),
-        ];
-
-        cases.into_iter().try_for_each::<_, Result<()>>(
-            |(array, start, length, expected)| {
-                let array = GenericBinaryArray::<O>::from(array);
-                let result = substring(&array, start, length)?;
-                assert_eq!(array.len(), result.len());
-                let result = result
-                    .as_any()
-                    .downcast_ref::<GenericBinaryArray<O>>()
-                    .unwrap();
-                let expected = GenericBinaryArray::<O>::from(expected);
-                assert_eq!(&expected, result,);
-                Ok(())
-            },
-        )?;
-
-        Ok(())
+            (1, Some(1), vec![b"e", b"", &[0xf9]]),
+            (1, Some(2), vec![b"el", b"", &[0xf9, 0xff]]),
+            (1, Some(3), vec![b"ell", b"", &[0xf9, 0xff, 0xfa]]),
+            (1, Some(4), vec![b"ello", b"", &[0xf9, 0xff, 0xfa]]),
+            (-3, Some(1), vec![b"l", b"", &[0xf9]]),
+            (-3, Some(2), vec![b"ll", b"", &[0xf9, 0xff]]),
+            (-3, Some(3), vec![b"llo", b"", &[0xf9, 0xff, 0xfa]]),
+            (-3, Some(4), vec![b"llo", b"", &[0xf9, 0xff, 0xfa]])
+        );
+
+        do_test!(
+            [&base_case[..], &cases[..]].concat(),
+            GenericBinaryArray<O>,
+            substring
+        );
     }
 
     #[test]
-    fn without_nulls_binary() -> Result<()> {
+    fn without_nulls_binary() {
         without_nulls_generic_binary::<i32>()
     }
 
     #[test]
-    fn without_nulls_large_binary() -> Result<()> {
+    fn without_nulls_large_binary() {
         without_nulls_generic_binary::<i64>()
     }
 
-    fn generic_binary_with_non_zero_offset<O: OffsetSizeTrait>() -> Result<()> {
+    fn generic_binary_with_non_zero_offset<O: OffsetSizeTrait>() {
         let values = 0_u8..15;
         let offsets = &[
             O::zero(),
@@ -576,11 +593,12 @@ mod tests {
             .add_buffer(Buffer::from_iter(values))
             .null_bit_buffer(Some(Buffer::from(bitmap)))
             .offset(1)
-            .build()?;
+            .build()
+            .unwrap();
         // array is `[null, [10, 11, 12, 13, 14]]`
         let array = GenericBinaryArray::<O>::from(data);
         // result is `[null, [11, 12, 13, 14]]`
-        let result = substring(&array, 1, None)?;
+        let result = substring(&array, 1, None).unwrap();
         let result = result
             .as_any()
             .downcast_ref::<GenericBinaryArray<O>>()
@@ -588,277 +606,96 @@ mod tests {
         let expected =
             GenericBinaryArray::<O>::from_opt_vec(vec![None, Some(&[11_u8, 12, 13, 14])]);
         assert_eq!(result, &expected);
-
-        Ok(())
     }
 
     #[test]
-    fn binary_with_non_zero_offset() -> Result<()> {
+    fn binary_with_non_zero_offset() {
         generic_binary_with_non_zero_offset::<i32>()
     }
 
     #[test]
-    fn large_binary_with_non_zero_offset() -> Result<()> {
+    fn large_binary_with_non_zero_offset() {
         generic_binary_with_non_zero_offset::<i64>()
     }
 
     #[test]
-    #[allow(clippy::type_complexity)]
-    fn with_nulls_fixed_size_binary() -> Result<()> {
-        let cases: Vec<(Vec<Option<&[u8]>>, i64, Option<u64>, Vec<Option<&[u8]>>)> = vec![
-            // all-nulls array is always identical
-            (vec![None, None, None], 3, Some(2), vec![None, None, None]),
+    fn with_nulls_fixed_size_binary() {
+        let input = vec![Some("cat".as_bytes()), None, Some(&[0xf8, 0xf9, 0xff])];
+        // all-nulls array is always identical
+        let base_case =
+            gen_test_cases!(vec![None, None, None], (3, Some(2), vec![None, None, None]));
+        let cases = gen_test_cases!(
+            input,
+            // identity
+            (0, None, input.clone()),
             // increase start
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                0,
-                None,
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                1,
-                None,
-                vec![Some(b"at"), None, Some(&[0xf9, 0xff])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                2,
-                None,
-                vec![Some(b"t"), None, Some(&[0xff])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                3,
-                None,
-                vec![Some(b""), None, Some(&[])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                10,
-                None,
-                vec![Some(b""), None, Some(b"")],
-            ),
+            (1, None, vec![Some(b"at"), None, Some(&[0xf9, 0xff])]),
+            (2, None, vec![Some(b"t"), None, Some(&[0xff])]),
+            (3, None, vec![Some(b""), None, Some(b"")]),
+            (10, None, vec![Some(b""), None, Some(b"")]),
             // increase start negatively
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                -1,
-                None,
-                vec![Some(b"t"), None, Some(&[0xff])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                -2,
-                None,
-                vec![Some(b"at"), None, Some(&[0xf9, 0xff])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                -3,
-                None,
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                -10,
-                None,
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-            ),
+            (-1, None, vec![Some(b"t"), None, Some(&[0xff])]),
+            (-2, None, vec![Some(b"at"), None, Some(&[0xf9, 0xff])]),
+            (-3, None, input.clone()),
+            (-10, None, input.clone()),
             // increase length
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                1,
-                Some(1),
-                vec![Some(b"a"), None, Some(&[0xf9])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                1,
-                Some(2),
-                vec![Some(b"at"), None, Some(&[0xf9, 0xff])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                1,
-                Some(3),
-                vec![Some(b"at"), None, Some(&[0xf9, 0xff])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                -3,
-                Some(1),
-                vec![Some(b"c"), None, Some(&[0xf8])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                -3,
-                Some(2),
-                vec![Some(b"ca"), None, Some(&[0xf8, 0xf9])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                -3,
-                Some(3),
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-            ),
-            (
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-                -3,
-                Some(4),
-                vec![Some(b"cat"), None, Some(&[0xf8, 0xf9, 0xff])],
-            ),
-        ];
-
-        cases.into_iter().try_for_each::<_, Result<()>>(
-            |(array, start, length, expected)| {
-                let array = FixedSizeBinaryArray::try_from_sparse_iter(array.into_iter())
-                    .unwrap();
-                let result = substring(&array, start, length)?;
-                assert_eq!(array.len(), result.len());
-                let result = result
-                    .as_any()
-                    .downcast_ref::<FixedSizeBinaryArray>()
-                    .unwrap();
-                let expected =
-                    FixedSizeBinaryArray::try_from_sparse_iter(expected.into_iter())
-                        .unwrap();
-                assert_eq!(&expected, result,);
-                Ok(())
-            },
-        )?;
-
-        Ok(())
+            (1, Some(1), vec![Some(b"a"), None, Some(&[0xf9])]),
+            (1, Some(2), vec![Some(b"at"), None, Some(&[0xf9, 0xff])]),
+            (1, Some(3), vec![Some(b"at"), None, Some(&[0xf9, 0xff])]),
+            (-3, Some(1), vec![Some(b"c"), None, Some(&[0xf8])]),
+            (-3, Some(2), vec![Some(b"ca"), None, Some(&[0xf8, 0xf9])]),
+            (-3, Some(3), input.clone()),
+            (-3, Some(4), input.clone())
+        );
+
+        do_test!(
+            [&base_case[..], &cases[..]].concat(),
+            FixedSizeBinaryArray,
+            substring
+        );
     }
 
     #[test]
-    #[allow(clippy::type_complexity)]
-    fn without_nulls_fixed_size_binary() -> Result<()> {
-        let cases: Vec<(Vec<&[u8]>, i64, Option<u64>, Vec<&[u8]>)> = vec![
-            // empty array is always identical
-            (vec![b"", b"", &[]], 3, Some(2), vec![b"", b"", &[]]),
+    fn without_nulls_fixed_size_binary() {
+        let input = vec!["cat".as_bytes(), b"dog", &[0xf8, 0xf9, 0xff]];
+        // empty array is always identical
+        let base_case = gen_test_cases!(
+            vec!["".as_bytes(), &[], &[]],
+            (1, Some(2), vec!["".as_bytes(), &[], &[]])
+        );
+        let cases = gen_test_cases!(
+            input,
+            // identity
+            (0, None, input.clone()),
             // increase start
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                0,
-                None,
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                1,
-                None,
-                vec![b"at", b"og", &[0xf9, 0xff]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                2,
-                None,
-                vec![b"t", b"g", &[0xff]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                3,
-                None,
-                vec![b"", b"", &[]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                10,
-                None,
-                vec![b"", b"", b""],
-            ),
+            (1, None, vec![b"at", b"og", &[0xf9, 0xff]]),
+            (2, None, vec![b"t", b"g", &[0xff]]),
+            (3, None, vec![&[], &[], &[]]),
+            (10, None, vec![&[], &[], &[]]),
             // increase start negatively
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                -1,
-                None,
-                vec![b"t", b"g", &[0xff]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                -2,
-                None,
-                vec![b"at", b"og", &[0xf9, 0xff]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                -3,
-                None,
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                -10,
-                None,
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-            ),
+            (-1, None, vec![b"t", b"g", &[0xff]]),
+            (-2, None, vec![b"at", b"og", &[0xf9, 0xff]]),
+            (-3, None, input.clone()),
+            (-10, None, input.clone()),
             // increase length
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                1,
-                Some(1),
-                vec![b"a", b"o", &[0xf9]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                1,
-                Some(2),
-                vec![b"at", b"og", &[0xf9, 0xff]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                1,
-                Some(3),
-                vec![b"at", b"og", &[0xf9, 0xff]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                -3,
-                Some(1),
-                vec![b"c", b"d", &[0xf8]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                -3,
-                Some(2),
-                vec![b"ca", b"do", &[0xf8, 0xf9]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                -3,
-                Some(3),
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-            ),
-            (
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-                -3,
-                Some(4),
-                vec![b"cat", b"dog", &[0xf8, 0xf9, 0xff]],
-            ),
-        ];
-
-        cases.into_iter().try_for_each::<_, Result<()>>(
-            |(array, start, length, expected)| {
-                let array =
-                    FixedSizeBinaryArray::try_from_iter(array.into_iter()).unwrap();
-                let result = substring(&array, start, length)?;
-                assert_eq!(array.len(), result.len());
-                let result = result
-                    .as_any()
-                    .downcast_ref::<FixedSizeBinaryArray>()
-                    .unwrap();
-                let expected =
-                    FixedSizeBinaryArray::try_from_iter(expected.into_iter()).unwrap();
-                assert_eq!(&expected, result,);
-                Ok(())
-            },
-        )?;
-
-        Ok(())
+            (1, Some(1), vec![b"a", b"o", &[0xf9]]),
+            (1, Some(2), vec![b"at", b"og", &[0xf9, 0xff]]),
+            (1, Some(3), vec![b"at", b"og", &[0xf9, 0xff]]),
+            (-3, Some(1), vec![b"c", b"d", &[0xf8]]),
+            (-3, Some(2), vec![b"ca", b"do", &[0xf8, 0xf9]]),
+            (-3, Some(3), input.clone()),
+            (-3, Some(4), input.clone())
+        );
+
+        do_test!(
+            [&base_case[..], &cases[..]].concat(),
+            FixedSizeBinaryArray,
+            substring
+        );
     }
 
     #[test]
-    fn fixed_size_binary_with_non_zero_offset() -> Result<()> {
+    fn fixed_size_binary_with_non_zero_offset() {
         let values: [u8; 15] = *b"hellotherearrow";
         // set the first and third element to be valid
         let bits_v = [0b101_u8];
@@ -873,7 +710,7 @@ mod tests {
         // array is `[null, "arrow"]`
         let array = FixedSizeBinaryArray::from(data);
         // result is `[null, "rrow"]`
-        let result = substring(&array, 1, None)?;
+        let result = substring(&array, 1, None).unwrap();
         let result = result
             .as_any()
             .downcast_ref::<FixedSizeBinaryArray>()
@@ -883,165 +720,90 @@ mod tests {
         )
         .unwrap();
         assert_eq!(result, &expected);
-
-        Ok(())
     }
 
-    fn with_nulls_generic_string<O: OffsetSizeTrait>() -> Result<()> {
-        let cases = vec![
-            // all-nulls array is always identical
-            (vec![None, None, None], 0, None, vec![None, None, None]),
+    fn with_nulls_generic_string<O: OffsetSizeTrait>() {
+        let input = vec![Some("hello"), None, Some("word")];
+        // all-nulls array is always identical
+        let base_case =
+            gen_test_cases!(vec![None, None, None], (0, None, vec![None, None, None]));
+        let cases = gen_test_cases!(
+            input,
             // identity
-            (
-                vec![Some("hello"), None, Some("word")],
-                0,
-                None,
-                vec![Some("hello"), None, Some("word")],
-            ),
+            (0, None, input.clone()),
             // 0 length -> Nothing
-            (
-                vec![Some("hello"), None, Some("word")],
-                0,
-                Some(0),
-                vec![Some(""), None, Some("")],
-            ),
+            (0, Some(0), vec![Some(""), None, Some("")]),
             // high start -> Nothing
-            (
-                vec![Some("hello"), None, Some("word")],
-                1000,
-                Some(0),
-                vec![Some(""), None, Some("")],
-            ),
+            (1000, Some(0), vec![Some(""), None, Some("")]),
             // high negative start -> identity
-            (
-                vec![Some("hello"), None, Some("word")],
-                -1000,
-                None,
-                vec![Some("hello"), None, Some("word")],
-            ),
+            (-1000, None, input.clone()),
             // high length -> identity
-            (
-                vec![Some("hello"), None, Some("word")],
-                0,
-                Some(1000),
-                vec![Some("hello"), None, Some("word")],
-            ),
-        ];
-
-        cases.into_iter().try_for_each::<_, Result<()>>(
-            |(array, start, length, expected)| {
-                let array = GenericStringArray::<O>::from(array);
-                let result: ArrayRef = substring(&array, start, length)?;
-                assert_eq!(array.len(), result.len());
-
-                let result = result
-                    .as_any()
-                    .downcast_ref::<GenericStringArray<O>>()
-                    .unwrap();
-                let expected = GenericStringArray::<O>::from(expected);
-                assert_eq!(&expected, result);
-                Ok(())
-            },
-        )?;
-
-        Ok(())
+            (0, Some(1000), input.clone())
+        );
+
+        do_test!(
+            [&base_case[..], &cases[..]].concat(),
+            GenericStringArray<O>,
+            substring
+        );
     }
 
     #[test]
-    fn with_nulls_string() -> Result<()> {
+    fn with_nulls_string() {
         with_nulls_generic_string::<i32>()
     }
 
     #[test]
-    fn with_nulls_large_string() -> Result<()> {
+    fn with_nulls_large_string() {
         with_nulls_generic_string::<i64>()
     }
 
-    fn without_nulls_generic_string<O: OffsetSizeTrait>() -> Result<()> {
-        let cases = vec![
-            // empty array is always identical
-            (vec!["", "", ""], 0, None, vec!["", "", ""]),
-            // increase start
-            (
-                vec!["hello", "", "word"],
-                0,
-                None,
-                vec!["hello", "", "word"],
-            ),
-            (vec!["hello", "", "word"], 1, None, vec!["ello", "", "ord"]),
-            (vec!["hello", "", "word"], 2, None, vec!["llo", "", "rd"]),
-            (vec!["hello", "", "word"], 3, None, vec!["lo", "", "d"]),
-            (vec!["hello", "", "word"], 10, None, vec!["", "", ""]),
+    fn without_nulls_generic_string<O: OffsetSizeTrait>() {
+        let input = vec!["hello", "", "word"];
+        // empty array is always identical
+        let base_case = gen_test_cases!(vec!["", "", ""], (0, None, vec!["", "", ""]));
+        let cases = gen_test_cases!(
+            input,
+            // identity
+            (0, None, input.clone()),
+            (1, None, vec!["ello", "", "ord"]),
+            (2, None, vec!["llo", "", "rd"]),
+            (3, None, vec!["lo", "", "d"]),
+            (10, None, vec!["", "", ""]),
             // increase start negatively
-            (vec!["hello", "", "word"], -1, None, vec!["o", "", "d"]),
-            (vec!["hello", "", "word"], -2, None, vec!["lo", "", "rd"]),
-            (vec!["hello", "", "word"], -3, None, vec!["llo", "", "ord"]),
-            (
-                vec!["hello", "", "word"],
-                -10,
-                None,
-                vec!["hello", "", "word"],
-            ),
+            (-1, None, vec!["o", "", "d"]),
+            (-2, None, vec!["lo", "", "rd"]),
+            (-3, None, vec!["llo", "", "ord"]),
+            (-10, None, input.clone()),
             // increase length
-            (vec!["hello", "", "word"], 1, Some(1), vec!["e", "", "o"]),
-            (vec!["hello", "", "word"], 1, Some(2), vec!["el", "", "or"]),
-            (
-                vec!["hello", "", "word"],
-                1,
-                Some(3),
-                vec!["ell", "", "ord"],
-            ),
-            (
-                vec!["hello", "", "word"],
-                1,
-                Some(4),
-                vec!["ello", "", "ord"],
-            ),
-            (vec!["hello", "", "word"], -3, Some(1), vec!["l", "", "o"]),
-            (vec!["hello", "", "word"], -3, Some(2), vec!["ll", "", "or"]),
-            (
-                vec!["hello", "", "word"],
-                -3,
-                Some(3),
-                vec!["llo", "", "ord"],
-            ),
-            (
-                vec!["hello", "", "word"],
-                -3,
-                Some(4),
-                vec!["llo", "", "ord"],
-            ),
-        ];
-
-        cases.into_iter().try_for_each::<_, Result<()>>(
-            |(array, start, length, expected)| {
-                let array = GenericStringArray::<O>::from(array);
-                let result = substring(&array, start, length)?;
-                assert_eq!(array.len(), result.len());
-                let result = result
-                    .as_any()
-                    .downcast_ref::<GenericStringArray<O>>()
-                    .unwrap();
-                let expected = GenericStringArray::<O>::from(expected);
-                assert_eq!(&expected, result,);
-                Ok(())
-            },
-        )?;
-
-        Ok(())
+            (1, Some(1), vec!["e", "", "o"]),
+            (1, Some(2), vec!["el", "", "or"]),
+            (1, Some(3), vec!["ell", "", "ord"]),
+            (1, Some(4), vec!["ello", "", "ord"]),
+            (-3, Some(1), vec!["l", "", "o"]),
+            (-3, Some(2), vec!["ll", "", "or"]),
+            (-3, Some(3), vec!["llo", "", "ord"]),
+            (-3, Some(4), vec!["llo", "", "ord"])
+        );
+
+        do_test!(
+            [&base_case[..], &cases[..]].concat(),
+            GenericStringArray<O>,
+            substring
+        );
     }
 
     #[test]
-    fn without_nulls_string() -> Result<()> {
+    fn without_nulls_string() {
         without_nulls_generic_string::<i32>()
     }
 
     #[test]
-    fn without_nulls_large_string() -> Result<()> {
+    fn without_nulls_large_string() {
         without_nulls_generic_string::<i64>()
     }
 
-    fn generic_string_with_non_zero_offset<O: OffsetSizeTrait>() -> Result<()> {
+    fn generic_string_with_non_zero_offset<O: OffsetSizeTrait>() {
         let values = "hellotherearrow";
         let offsets = &[
             O::zero(),
@@ -1058,45 +820,164 @@ mod tests {
             .add_buffer(Buffer::from(values))
             .null_bit_buffer(Some(Buffer::from(bitmap)))
             .offset(1)
-            .build()?;
+            .build()
+            .unwrap();
         // array is `[null, "arrow"]`
         let array = GenericStringArray::<O>::from(data);
         // result is `[null, "rrow"]`
-        let result = substring(&array, 1, None)?;
+        let result = substring(&array, 1, None).unwrap();
         let result = result
             .as_any()
             .downcast_ref::<GenericStringArray<O>>()
             .unwrap();
         let expected = GenericStringArray::<O>::from(vec![None, Some("rrow")]);
         assert_eq!(result, &expected);
-
-        Ok(())
     }
 
     #[test]
-    fn string_with_non_zero_offset() -> Result<()> {
+    fn string_with_non_zero_offset() {
         generic_string_with_non_zero_offset::<i32>()
     }
 
     #[test]
-    fn large_string_with_non_zero_offset() -> Result<()> {
+    fn large_string_with_non_zero_offset() {
         generic_string_with_non_zero_offset::<i64>()
     }
 
+    fn with_nulls_generic_string_by_char<O: OffsetSizeTrait>() {
+        let input = vec![Some("hello"), None, Some("Γ ⊢x:T")];
+        // all-nulls array is always identical
+        let base_case =
+            gen_test_cases!(vec![None, None, None], (0, None, vec![None, None, None]));
+        let cases = gen_test_cases!(
+            input,
+            // identity
+            (0, None, input.clone()),
+            // 0 length -> Nothing
+            (0, Some(0), vec![Some(""), None, Some("")]),
+            // high start -> Nothing
+            (1000, Some(0), vec![Some(""), None, Some("")]),
+            // high negative start -> identity
+            (-1000, None, input.clone()),
+            // high length -> identity
+            (0, Some(1000), input.clone())
+        );
+
+        do_test!(
+            [&base_case[..], &cases[..]].concat(),
+            GenericStringArray<O>,
+            substring_by_char
+        );
+    }
+
     #[test]
-    fn dictionary() -> Result<()> {
-        _dictionary::<Int8Type>()?;
-        _dictionary::<Int16Type>()?;
-        _dictionary::<Int32Type>()?;
-        _dictionary::<Int64Type>()?;
-        _dictionary::<UInt8Type>()?;
-        _dictionary::<UInt16Type>()?;
-        _dictionary::<UInt32Type>()?;
-        _dictionary::<UInt64Type>()?;
-        Ok(())
+    fn with_nulls_string_by_char() {
+        with_nulls_generic_string_by_char::<i32>()
+    }
+
+    #[test]
+    fn with_nulls_large_string_by_char() {
+        with_nulls_generic_string_by_char::<i64>()
+    }
+
+    fn without_nulls_generic_string_by_char<O: OffsetSizeTrait>() {
+        let input = vec!["hello", "", "Γ ⊢x:T"];
+        // empty array is always identical
+        let base_case = gen_test_cases!(vec!["", "", ""], (0, None, vec!["", "", ""]));
+        let cases = gen_test_cases!(
+            input,
+            //identity
+            (0, None, input.clone()),
+            // increase start
+            (1, None, vec!["ello", "", " ⊢x:T"]),
+            (2, None, vec!["llo", "", "⊢x:T"]),
+            (3, None, vec!["lo", "", "x:T"]),
+            (10, None, vec!["", "", ""]),
+            // increase start negatively
+            (-1, None, vec!["o", "", "T"]),
+            (-2, None, vec!["lo", "", ":T"]),
+            (-4, None, vec!["ello", "", "⊢x:T"]),
+            (-10, None, input.clone()),
+            // increase length
+            (1, Some(1), vec!["e", "", " "]),
+            (1, Some(2), vec!["el", "", " ⊢"]),
+            (1, Some(3), vec!["ell", "", " ⊢x"]),
+            (1, Some(6), vec!["ello", "", " ⊢x:T"]),
+            (-4, Some(1), vec!["e", "", "⊢"]),
+            (-4, Some(2), vec!["el", "", "⊢x"]),
+            (-4, Some(3), vec!["ell", "", "⊢x:"]),
+            (-4, Some(4), vec!["ello", "", "⊢x:T"])
+        );
+
+        do_test!(
+            [&base_case[..], &cases[..]].concat(),
+            GenericStringArray<O>,
+            substring_by_char
+        );
+    }
+
+    #[test]
+    fn without_nulls_string_by_char() {
+        without_nulls_generic_string_by_char::<i32>()
     }
 
-    fn _dictionary<K: ArrowDictionaryKeyType>() -> Result<()> {
+    #[test]
+    fn without_nulls_large_string_by_char() {
+        without_nulls_generic_string_by_char::<i64>()
+    }
+
+    fn generic_string_by_char_with_non_zero_offset<O: OffsetSizeTrait>() {
+        let values = "S→T = Πx:S.T";
+        let offsets = &[
+            O::zero(),
+            O::from_usize(values.char_indices().nth(3).map(|(pos, _)| pos).unwrap())
+                .unwrap(),
+            O::from_usize(values.char_indices().nth(6).map(|(pos, _)| pos).unwrap())
+                .unwrap(),
+            O::from_usize(values.len()).unwrap(),
+        ];
+        // set the first and third element to be valid
+        let bitmap = [0b101_u8];
+
+        let data = ArrayData::builder(GenericStringArray::<O>::get_data_type())
+            .len(2)
+            .add_buffer(Buffer::from_slice_ref(offsets))
+            .add_buffer(Buffer::from(values))
+            .null_bit_buffer(Some(Buffer::from(bitmap)))
+            .offset(1)
+            .build()
+            .unwrap();
+        // array is `[null, "Πx:S.T"]`
+        let array = GenericStringArray::<O>::from(data);
+        // result is `[null, "x:S.T"]`
+        let result = substring_by_char(&array, 1, None).unwrap();
+        let expected = GenericStringArray::<O>::from(vec![None, Some("x:S.T")]);
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn string_with_non_zero_offset_by_char() {
+        generic_string_by_char_with_non_zero_offset::<i32>()
+    }
+
+    #[test]
+    fn large_string_with_non_zero_offset_by_char() {
+        generic_string_by_char_with_non_zero_offset::<i64>()
+    }
+
+    #[test]
+    fn dictionary() {
+        _dictionary::<Int8Type>();
+        _dictionary::<Int16Type>();
+        _dictionary::<Int32Type>();
+        _dictionary::<Int64Type>();
+        _dictionary::<UInt8Type>();
+        _dictionary::<UInt16Type>();
+        _dictionary::<UInt32Type>();
+        _dictionary::<UInt64Type>();
+    }
+
+    fn _dictionary<K: ArrowDictionaryKeyType>() {
         const TOTAL: i32 = 100;
 
         let v = ["aaa", "bbb", "ccc", "ddd", "eee"];
@@ -1116,7 +997,7 @@ mod tests {
         let expected: Vec<Option<&str>> =
             data.iter().map(|opt| opt.map(|s| &s[1..3])).collect();
 
-        let res = substring(&dict_array, 1, Some(2))?;
+        let res = substring(&dict_array, 1, Some(2)).unwrap();
         let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
         let actual: Vec<Option<&str>> = actual
             .values()
@@ -1129,8 +1010,6 @@ mod tests {
         for i in 0..TOTAL as usize {
             assert_eq!(expected[i], actual[i],);
         }
-
-        Ok(())
     }
 
     #[test]
diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs
index 567bf5c8ba27..624e9ddcdb58 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -524,7 +524,7 @@ where
                 if decimal_values.is_null(index) {
                     Ok(None)
                 } else {
-                    Ok(Some(decimal_values.value(index)))
+                    Ok(Some(decimal_values.value(index).as_i128()))
                 }
             });
             let t: Result<Option<Option<_>>> = t.transpose();
@@ -688,8 +688,7 @@ where
     let bytes_offset = (data_len + 1) * std::mem::size_of::<OffsetSize>();
     let mut offsets_buffer = MutableBuffer::from_len_zeroed(bytes_offset);
 
-    // Safety: the buffer is always treated as as a type of `OffsetSize` in the code below
-    let offsets = unsafe { offsets_buffer.typed_data_mut() };
+    let offsets = offsets_buffer.typed_data_mut();
     let mut values = MutableBuffer::new(0);
     let mut length_so_far = OffsetSize::zero();
     offsets[0] = length_so_far;
diff --git a/arrow/src/compute/kernels/temporal.rs b/arrow/src/compute/kernels/temporal.rs
index aa49462da864..9998649ead30 100644
--- a/arrow/src/compute/kernels/temporal.rs
+++ b/arrow/src/compute/kernels/temporal.rs
@@ -17,7 +17,7 @@
 
 //! Defines temporal kernels for time and date related functions.
 
-use chrono::{Datelike, Timelike};
+use chrono::{Datelike, NaiveDate, NaiveDateTime, Timelike};
 
 use crate::array::*;
 use crate::datatypes::*;
@@ -112,6 +112,34 @@ macro_rules! return_compute_error_with {
     };
 }
 
+trait ChronoDateQuarter {
+    /// Returns a value in range `1..=4` indicating the quarter this date falls into
+    fn quarter(&self) -> u32;
+
+    /// Returns a value in range `0..=3` indicating the quarter (zero-based) this date falls into
+    fn quarter0(&self) -> u32;
+}
+
+impl ChronoDateQuarter for NaiveDateTime {
+    fn quarter(&self) -> u32 {
+        self.quarter0() + 1
+    }
+
+    fn quarter0(&self) -> u32 {
+        self.month0() / 3
+    }
+}
+
+impl ChronoDateQuarter for NaiveDate {
+    fn quarter(&self) -> u32 {
+        self.quarter0() + 1
+    }
+
+    fn quarter0(&self) -> u32 {
+        self.month0() / 3
+    }
+}
+
 #[cfg(not(feature = "chrono-tz"))]
 pub fn using_chrono_tz_and_utc_naive_date_time(
     _tz: &str,
@@ -183,6 +211,34 @@ where
     Ok(b.finish())
 }
 
+/// Extracts the quarter of a given temporal array as an array of integers
+pub fn quarter<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
+where
+    T: ArrowTemporalType + ArrowNumericType,
+    i64: std::convert::From<T::Native>,
+{
+    let mut b = Int32Builder::new(array.len());
+    match array.data_type() {
+        &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => {
+            extract_component_from_array!(array, b, quarter, value_as_datetime)
+        }
+        &DataType::Timestamp(_, Some(ref tz)) => {
+            let mut scratch = Parsed::new();
+            extract_component_from_array!(
+                array,
+                b,
+                quarter,
+                value_as_datetime_with_tz,
+                tz,
+                scratch
+            )
+        }
+        dt => return_compute_error_with!("quarter does not support", dt),
+    }
+
+    Ok(b.finish())
+}
+
 /// Extracts the month of a given temporal array as an array of integers
 pub fn month<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
 where
@@ -211,6 +267,37 @@ where
     Ok(b.finish())
 }
 
+/// Extracts the day of week of a given temporal array as an array of
+/// integers.
+///
+/// Monday is encoded as `0`, Tuesday as `1`, etc.
+pub fn weekday<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
+where
+    T: ArrowTemporalType + ArrowNumericType,
+    i64: std::convert::From<T::Native>,
+{
+    let mut b = Int32Builder::new(array.len());
+    match array.data_type() {
+        &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) => {
+            extract_component_from_array!(array, b, weekday, value_as_datetime)
+        }
+        &DataType::Timestamp(_, Some(ref tz)) => {
+            let mut scratch = Parsed::new();
+            extract_component_from_array!(
+                array,
+                b,
+                weekday,
+                value_as_datetime_with_tz,
+                tz,
+                scratch
+            )
+        }
+        dt => return_compute_error_with!("weekday does not support", dt),
+    }
+
+    Ok(b.finish())
+}
+
 /// Extracts the day of a given temporal array as an array of integers
 pub fn day<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
 where
@@ -389,6 +476,48 @@ mod tests {
         assert_eq!(2012, b.value(2));
     }
 
+    #[test]
+    fn test_temporal_array_date64_quarter() {
+        //1514764800000 -> 2018-01-01
+        //1566275025000 -> 2019-08-20
+        let a: PrimitiveArray<Date64Type> =
+            vec![Some(1514764800000), None, Some(1566275025000)].into();
+
+        let b = quarter(&a).unwrap();
+        assert_eq!(1, b.value(0));
+        assert!(!b.is_valid(1));
+        assert_eq!(3, b.value(2));
+    }
+
+    #[test]
+    fn test_temporal_array_date32_quarter() {
+        let a: PrimitiveArray<Date32Type> = vec![Some(1), None, Some(300)].into();
+
+        let b = quarter(&a).unwrap();
+        assert_eq!(1, b.value(0));
+        assert!(!b.is_valid(1));
+        assert_eq!(4, b.value(2));
+    }
+
+    #[test]
+    fn test_temporal_array_timestamp_quarter_with_timezone() {
+        use std::sync::Arc;
+
+        // 24 * 60 * 60 = 86400
+        let a = Arc::new(TimestampSecondArray::from_vec(
+            vec![86400 * 90],
+            Some("+00:00".to_string()),
+        ));
+        let b = quarter(&a).unwrap();
+        assert_eq!(2, b.value(0));
+        let a = Arc::new(TimestampSecondArray::from_vec(
+            vec![86400 * 90],
+            Some("-10:00".to_string()),
+        ));
+        let b = quarter(&a).unwrap();
+        assert_eq!(1, b.value(0));
+    }
+
     #[test]
     fn test_temporal_array_date64_month() {
         //1514764800000 -> 2018-01-01
@@ -416,7 +545,7 @@ mod tests {
     fn test_temporal_array_timestamp_month_with_timezone() {
         use std::sync::Arc;
 
-        // 24 * 60 * 60 = 8640
+        // 24 * 60 * 60 = 86400
         let a = Arc::new(TimestampSecondArray::from_vec(
             vec![86400 * 31],
             Some("+00:00".to_string()),
@@ -435,7 +564,7 @@ mod tests {
     fn test_temporal_array_timestamp_day_with_timezone() {
         use std::sync::Arc;
 
-        // 24 * 60 * 60 = 8640
+        // 24 * 60 * 60 = 86400
         let a = Arc::new(TimestampSecondArray::from_vec(
             vec![86400],
             Some("+00:00".to_string()),
@@ -450,6 +579,19 @@ mod tests {
         assert_eq!(1, b.value(0));
     }
 
+    #[test]
+    fn test_temporal_array_date64_weekday() {
+        //1514764800000 -> 2018-01-01 (Monday)
+        //1550636625000 -> 2019-02-20 (Wednesday)
+        let a: PrimitiveArray<Date64Type> =
+            vec![Some(1514764800000), None, Some(1550636625000)].into();
+
+        let b = weekday(&a).unwrap();
+        assert_eq!(0, b.value(0));
+        assert!(!b.is_valid(1));
+        assert_eq!(2, b.value(2));
+    }
+
     #[test]
     fn test_temporal_array_date64_day() {
         //1514764800000 -> 2018-01-01
diff --git a/arrow/src/compute/util.rs b/arrow/src/compute/util.rs
index 4b5029d68a7c..c8e68fbeb353 100644
--- a/arrow/src/compute/util.rs
+++ b/arrow/src/compute/util.rs
@@ -24,38 +24,41 @@ use crate::error::{ArrowError, Result};
 use num::{One, ToPrimitive, Zero};
 use std::ops::Add;
 
-/// Combines the null bitmaps of two arrays using a bitwise `and` operation.
+/// Combines the null bitmaps of multiple arrays using a bitwise `and` operation.
 ///
 /// This function is useful when implementing operations on higher level arrays.
 #[allow(clippy::unnecessary_wraps)]
 pub(super) fn combine_option_bitmap(
-    left_data: &ArrayData,
-    right_data: &ArrayData,
+    arrays: &[&ArrayData],
     len_in_bits: usize,
 ) -> Result<Option<Buffer>> {
-    let left_offset_in_bits = left_data.offset();
-    let right_offset_in_bits = right_data.offset();
-
-    let left = left_data.null_buffer();
-    let right = right_data.null_buffer();
-
-    match left {
-        None => match right {
-            None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
-        },
-        Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
-
-            Some(r) => Ok(Some(buffer_bin_and(
-                l,
-                left_offset_in_bits,
-                r,
-                right_offset_in_bits,
-                len_in_bits,
-            ))),
-        },
-    }
+    arrays
+        .iter()
+        .map(|array| (array.null_buffer().cloned(), array.offset()))
+        .reduce(|acc, buffer_and_offset| match (acc, buffer_and_offset) {
+            ((None, _), (None, _)) => (None, 0),
+            ((Some(buffer), offset), (None, _)) | ((None, _), (Some(buffer), offset)) => {
+                (Some(buffer), offset)
+            }
+            ((Some(buffer_left), offset_left), (Some(buffer_right), offset_right)) => (
+                Some(buffer_bin_and(
+                    &buffer_left,
+                    offset_left,
+                    &buffer_right,
+                    offset_right,
+                    len_in_bits,
+                )),
+                0,
+            ),
+        })
+        .map_or(
+            Err(ArrowError::ComputeError(
+                "Arrays must not be empty".to_string(),
+            )),
+            |(buffer, offset)| {
+                Ok(buffer.map(|buffer| buffer.bit_slice(offset, len_in_bits)))
+            },
+        )
 }
 
 /// Takes/filters a list array's inner data using the offsets of the list array.
@@ -184,7 +187,7 @@ pub(super) mod tests {
         offset: usize,
         null_bit_buffer: Option<Buffer>,
     ) -> Arc<ArrayData> {
-        let buffer = Buffer::from(&vec![11; len]);
+        let buffer = Buffer::from(&vec![11; len + offset]);
 
         Arc::new(
             ArrayData::try_new(
@@ -206,25 +209,87 @@ pub(super) mod tests {
             make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
         let inverse_bitmap =
             make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
+        let some_other_bitmap =
+            make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b11010111])));
         assert_eq!(
-            None,
-            combine_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap()
+            combine_option_bitmap(&[], 8).unwrap_err().to_string(),
+            "Compute error: Arrays must not be empty",
         );
         assert_eq!(
             Some(Buffer::from([0b01001010])),
-            combine_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap()
+            combine_option_bitmap(&[&some_bitmap], 8).unwrap()
+        );
+        assert_eq!(
+            None,
+            combine_option_bitmap(&[&none_bitmap, &none_bitmap], 8).unwrap()
         );
         assert_eq!(
             Some(Buffer::from([0b01001010])),
-            combine_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap()
+            combine_option_bitmap(&[&some_bitmap, &none_bitmap], 8).unwrap()
+        );
+        assert_eq!(
+            Some(Buffer::from([0b11010111])),
+            combine_option_bitmap(&[&none_bitmap, &some_other_bitmap], 8).unwrap()
         );
         assert_eq!(
             Some(Buffer::from([0b01001010])),
-            combine_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap()
+            combine_option_bitmap(&[&some_bitmap, &some_bitmap], 8,).unwrap()
         );
         assert_eq!(
             Some(Buffer::from([0b0])),
-            combine_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap()
+            combine_option_bitmap(&[&some_bitmap, &inverse_bitmap], 8,).unwrap()
+        );
+        assert_eq!(
+            Some(Buffer::from([0b01000010])),
+            combine_option_bitmap(&[&some_bitmap, &some_other_bitmap, &none_bitmap], 8,)
+                .unwrap()
+        );
+        assert_eq!(
+            Some(Buffer::from([0b00001001])),
+            combine_option_bitmap(
+                &[
+                    &some_bitmap.slice(3, 5),
+                    &inverse_bitmap.slice(2, 5),
+                    &some_other_bitmap.slice(1, 5)
+                ],
+                5,
+            )
+            .unwrap()
+        );
+    }
+
+    #[test]
+    fn test_combine_option_bitmap_with_offsets() {
+        let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
+        let bitmap0 =
+            make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10101010])));
+        let bitmap1 =
+            make_data_with_null_bit_buffer(8, 1, Some(Buffer::from([0b01010100, 0b1])));
+        let bitmap2 =
+            make_data_with_null_bit_buffer(8, 2, Some(Buffer::from([0b10101000, 0b10])));
+        assert_eq!(
+            Some(Buffer::from([0b10101010])),
+            combine_option_bitmap(&[&bitmap1], 8).unwrap()
+        );
+        assert_eq!(
+            Some(Buffer::from([0b10101010])),
+            combine_option_bitmap(&[&bitmap2], 8).unwrap()
+        );
+        assert_eq!(
+            Some(Buffer::from([0b10101010])),
+            combine_option_bitmap(&[&bitmap1, &none_bitmap], 8).unwrap()
+        );
+        assert_eq!(
+            Some(Buffer::from([0b10101010])),
+            combine_option_bitmap(&[&none_bitmap, &bitmap2], 8).unwrap()
+        );
+        assert_eq!(
+            Some(Buffer::from([0b10101010])),
+            combine_option_bitmap(&[&bitmap0, &bitmap1], 8).unwrap()
+        );
+        assert_eq!(
+            Some(Buffer::from([0b10101010])),
+            combine_option_bitmap(&[&bitmap1, &bitmap2], 8).unwrap()
         );
     }
 
diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs
index d8841964b586..21e107ee4c8e 100644
--- a/arrow/src/csv/reader.rs
+++ b/arrow/src/csv/reader.rs
@@ -120,7 +120,7 @@ pub struct ReaderOptions {
 /// Return inferred schema and number of records used for inference. This function does not change
 /// reader cursor offset.
 pub fn infer_file_schema<R: Read + Seek>(
-    reader: &mut R,
+    reader: R,
     delimiter: u8,
     max_read_records: Option<usize>,
     has_header: bool,
@@ -136,12 +136,13 @@ pub fn infer_file_schema<R: Read + Seek>(
 }
 
 fn infer_file_schema_with_csv_options<R: Read + Seek>(
-    reader: &mut R,
-    roptoins: ReaderOptions,
+    mut reader: R,
+    roptions: ReaderOptions,
 ) -> Result<(Schema, usize)> {
     let saved_offset = reader.seek(SeekFrom::Current(0))?;
 
-    let (schema, records_count) = infer_reader_schema_with_csv_options(reader, roptoins)?;
+    let (schema, records_count) =
+        infer_reader_schema_with_csv_options(&mut reader, roptions)?;
     // return the reader seek back to the start
     reader.seek(SeekFrom::Start(saved_offset))?;
 
@@ -155,7 +156,7 @@ fn infer_file_schema_with_csv_options<R: Read + Seek>(
 ///
 /// Return infered schema and number of records used for inference.
 pub fn infer_reader_schema<R: Read>(
-    reader: &mut R,
+    reader: R,
     delimiter: u8,
     max_read_records: Option<usize>,
     has_header: bool,
@@ -170,7 +171,7 @@ pub fn infer_reader_schema<R: Read>(
 }
 
 fn infer_reader_schema_with_csv_options<R: Read>(
-    reader: &mut R,
+    reader: R,
     roptions: ReaderOptions,
 ) -> Result<(Schema, usize)> {
     let mut csv_reader = Reader::build_csv_reader(
@@ -1203,8 +1204,8 @@ mod tests {
     fn test_csv_reader_with_decimal() {
         let schema = Schema::new(vec![
             Field::new("city", DataType::Utf8, false),
-            Field::new("lat", DataType::Decimal(26, 6), false),
-            Field::new("lng", DataType::Decimal(26, 6), false),
+            Field::new("lat", DataType::Decimal(38, 6), false),
+            Field::new("lng", DataType::Decimal(38, 6), false),
         ]);
 
         let file = File::open("test/data/decimal_test.csv").unwrap();
diff --git a/arrow/src/csv/writer.rs b/arrow/src/csv/writer.rs
index b7755fae311b..6735d9668560 100644
--- a/arrow/src/csv/writer.rs
+++ b/arrow/src/csv/writer.rs
@@ -798,6 +798,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
             // starting at row 2 and up to row 6.
             None,
             None,
+            None,
         );
         let rb = reader.next().unwrap().unwrap();
         let c1 = rb.column(0).as_any().downcast_ref::<Date32Array>().unwrap();
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index a740e8ecc019..895e5cc67c38 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -671,7 +671,7 @@ impl DataType {
 
     /// Compares the datatype with another, ignoring nested field names
     /// and metadata.
-    pub(crate) fn equals_datatype(&self, other: &DataType) -> bool {
+    pub fn equals_datatype(&self, other: &DataType) -> bool {
         match (&self, other) {
             (DataType::List(a), DataType::List(b))
             | (DataType::LargeList(a), DataType::LargeList(b)) => {
diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs
index 2a8c99f0f89e..d9a3f667d8e4 100644
--- a/arrow/src/datatypes/native.rs
+++ b/arrow/src/datatypes/native.rs
@@ -19,6 +19,10 @@ use super::DataType;
 use half::f16;
 use serde_json::{Number, Value};
 
+mod private {
+    pub trait Sealed {}
+}
+
 /// Trait declaring any type that is serializable to JSON. This includes all primitive types (bool, i32, etc.).
 pub trait JsonSerializable: 'static {
     fn into_json_value(self) -> Option<Value>;
@@ -26,8 +30,26 @@ pub trait JsonSerializable: 'static {
 
 /// Trait expressing a Rust type that has the same in-memory representation
 /// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow is represented in bits).
+///
 /// In little endian machines, types that implement [`ArrowNativeType`] can be memcopied to arrow buffers
 /// as is.
+///
+/// # Transmute Safety
+///
+/// A type T implementing this trait means that any arbitrary slice of bytes of length and
+/// alignment `size_of::<T>()` can be safely interpreted as a value of that type without
+/// being unsound, i.e. potentially resulting in undefined behaviour.
+///
+/// Note: in the case of floating point numbers this transmutation can result in a signalling
+/// NaN, which, whilst sound, can be unwieldy. In general, whilst it is perfectly sound to
+/// reinterpret bytes as different types using this trait, it is likely unwise. For more information
+/// see [f32::from_bits] and [f64::from_bits].
+///
+/// Note: `bool` is restricted to `0` or `1`, and so `bool: !ArrowNativeType`
+///
+/// # Sealed
+///
+/// Due to the above restrictions, this trait is sealed to prevent accidental misuse
 pub trait ArrowNativeType:
     std::fmt::Debug
     + Send
@@ -37,6 +59,7 @@ pub trait ArrowNativeType:
     + std::str::FromStr
     + Default
     + JsonSerializable
+    + private::Sealed
 {
     /// Convert native type from usize.
     #[inline]
@@ -109,6 +132,7 @@ impl JsonSerializable for i8 {
     }
 }
 
+impl private::Sealed for i8 {}
 impl ArrowNativeType for i8 {
     #[inline]
     fn from_usize(v: usize) -> Option<Self> {
@@ -132,6 +156,7 @@ impl JsonSerializable for i16 {
     }
 }
 
+impl private::Sealed for i16 {}
 impl ArrowNativeType for i16 {
     #[inline]
     fn from_usize(v: usize) -> Option<Self> {
@@ -155,6 +180,7 @@ impl JsonSerializable for i32 {
     }
 }
 
+impl private::Sealed for i32 {}
 impl ArrowNativeType for i32 {
     #[inline]
     fn from_usize(v: usize) -> Option<Self> {
@@ -184,6 +210,7 @@ impl JsonSerializable for i64 {
     }
 }
 
+impl private::Sealed for i64 {}
 impl ArrowNativeType for i64 {
     #[inline]
     fn from_usize(v: usize) -> Option<Self> {
@@ -217,6 +244,7 @@ impl JsonSerializable for i128 {
     }
 }
 
+impl private::Sealed for i128 {}
 impl ArrowNativeType for i128 {
     #[inline]
     fn from_usize(v: usize) -> Option<Self> {
@@ -246,6 +274,7 @@ impl JsonSerializable for u8 {
     }
 }
 
+impl private::Sealed for u8 {}
 impl ArrowNativeType for u8 {
     #[inline]
     fn from_usize(v: usize) -> Option<Self> {
@@ -269,6 +298,7 @@ impl JsonSerializable for u16 {
     }
 }
 
+impl private::Sealed for u16 {}
 impl ArrowNativeType for u16 {
     #[inline]
     fn from_usize(v: usize) -> Option<Self> {
@@ -292,6 +322,7 @@ impl JsonSerializable for u32 {
     }
 }
 
+impl private::Sealed for u32 {}
 impl ArrowNativeType for u32 {
     #[inline]
     fn from_usize(v: usize) -> Option<Self> {
@@ -315,6 +346,7 @@ impl JsonSerializable for u64 {
     }
 }
 
+impl private::Sealed for u64 {}
 impl ArrowNativeType for u64 {
     #[inline]
     fn from_usize(v: usize) -> Option<Self> {
@@ -351,8 +383,11 @@ impl JsonSerializable for f64 {
 }
 
 impl ArrowNativeType for f16 {}
+impl private::Sealed for f16 {}
 impl ArrowNativeType for f32 {}
+impl private::Sealed for f32 {}
 impl ArrowNativeType for f64 {}
+impl private::Sealed for f64 {}
 
 /// Allows conversion from supported Arrow types to a byte slice.
 pub trait ToByteSlice {
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 4ab929829bfd..84905af20a63 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -79,6 +79,8 @@
 //! unsafe {
 //!     Box::from_raw(out_array_ptr);
 //!     Box::from_raw(out_schema_ptr);
+//!     Arc::from_raw(array_ptr);
+//!     Arc::from_raw(schema_ptr);
 //! }
 //!
 //! Ok(())
@@ -907,6 +909,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(feature = "force_validate"))]
     fn test_decimal_round_trip() -> Result<()> {
         // create an array natively
         let original_array = [Some(12345_i128), Some(-12345_i128), None]
diff --git a/arrow/src/ffi_stream.rs b/arrow/src/ffi_stream.rs
index ab4caea36f8e..3a85f2ef6421 100644
--- a/arrow/src/ffi_stream.rs
+++ b/arrow/src/ffi_stream.rs
@@ -81,7 +81,6 @@ const EINVAL: i32 = 22;
 const ENOSYS: i32 = 78;
 
 /// ABI-compatible struct for `ArrayStream` from C Stream Interface
-/// This interface is experimental
 /// See <https://arrow.apache.org/docs/format/CStreamInterface.html#structure-definitions>
 /// This was created by bindgen
 #[repr(C)]
@@ -198,13 +197,6 @@ impl ExportedArrayStream {
     }
 
     pub fn get_schema(&mut self, out: *mut FFI_ArrowSchema) -> i32 {
-        unsafe {
-            match (*out).release {
-                None => (),
-                Some(release) => release(out),
-            };
-        };
-
         let mut private_data = self.get_private_data();
         let reader = &private_data.batch_reader;
 
@@ -224,18 +216,17 @@ impl ExportedArrayStream {
     }
 
     pub fn get_next(&mut self, out: *mut FFI_ArrowArray) -> i32 {
-        unsafe {
-            match (*out).release {
-                None => (),
-                Some(release) => release(out),
-            };
-        };
-
         let mut private_data = self.get_private_data();
         let reader = &mut private_data.batch_reader;
 
         let ret_code = match reader.next() {
-            None => 0,
+            None => {
+                // Marks ArrowArray released to indicate reaching the end of stream.
+                unsafe {
+                    (*out).release = None;
+                }
+                0
+            }
             Some(next_batch) => {
                 if let Ok(batch) = next_batch {
                     let struct_array = StructArray::from(batch);
@@ -275,7 +266,7 @@ fn get_error_code(err: &ArrowError) -> i32 {
 /// Struct used to fetch `RecordBatch` from the C Stream Interface.
 /// Its main responsibility is to expose `RecordBatchReader` functionality
 /// that requires [FFI_ArrowArrayStream].
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct ArrowArrayStreamReader {
     stream: Arc<FFI_ArrowArrayStream>,
     schema: SchemaRef,
@@ -508,6 +499,8 @@ mod tests {
         }
 
         assert_eq!(produced_batches, vec![batch.clone(), batch]);
+
+        unsafe { Arc::from_raw(stream_ptr) };
         Ok(())
     }
 
@@ -537,6 +530,8 @@ mod tests {
         }
 
         assert_eq!(produced_batches, vec![batch.clone(), batch]);
+
+        unsafe { Arc::from_raw(stream_ptr) };
         Ok(())
     }
 
diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index 7a0f2cce05c2..2ce29024ba11 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -31,7 +31,7 @@ use crate::compute::cast;
 use crate::datatypes::{DataType, Field, IntervalUnit, Schema, SchemaRef, UnionMode};
 use crate::error::{ArrowError, Result};
 use crate::ipc;
-use crate::record_batch::{RecordBatch, RecordBatchReader};
+use crate::record_batch::{RecordBatch, RecordBatchOptions, RecordBatchReader};
 
 use crate::ipc::compression::compression::CompressionCodecType;
 use crate::ipc::compression::{
@@ -111,6 +111,7 @@ fn read_uncompressed_size(buffer: &[u8]) -> i64 {
 ///     - check if the bit width of non-64-bit numbers is 64, and
 ///     - read the buffer as 64-bit (signed integer or float), and
 ///     - cast the 64-bit array to the appropriate data type
+#[allow(clippy::too_many_arguments)]
 fn create_array(
     nodes: &[ipc::FieldNode],
     field: &Field,
@@ -120,6 +121,7 @@ fn create_array(
     mut node_index: usize,
     mut buffer_index: usize,
     compression_codec: &CompressionCodecType,
+    metadata: &ipc::MetadataVersion,
 ) -> Result<(ArrayRef, usize, usize)> {
     use DataType::*;
     let data_type = field.data_type();
@@ -167,6 +169,7 @@ fn create_array(
                 node_index,
                 buffer_index,
                 compression_codec,
+                metadata,
             )?;
             node_index = triple.1;
             buffer_index = triple.2;
@@ -190,6 +193,7 @@ fn create_array(
                 node_index,
                 buffer_index,
                 compression_codec,
+                metadata,
             )?;
             node_index = triple.1;
             buffer_index = triple.2;
@@ -217,6 +221,7 @@ fn create_array(
                     node_index,
                     buffer_index,
                     compression_codec,
+                    metadata,
                 )?;
                 node_index = triple.1;
                 buffer_index = triple.2;
@@ -265,6 +270,13 @@ fn create_array(
 
             let len = union_node.length() as usize;
 
+            // In V4, union types has validity bitmap
+            // In V5 and later, union types have no validity bitmap
+            if metadata < &ipc::MetadataVersion::V5 {
+                read_buffer(&buffers[buffer_index], data);
+                buffer_index += 1;
+            }
+
             let type_ids: Buffer =
                 read_buffer(&buffers[buffer_index], data, compression_codec)[..len]
                     .into();
@@ -293,6 +305,7 @@ fn create_array(
                     node_index,
                     buffer_index,
                     compression_codec,
+                    metadata,
                 )?;
 
                 node_index = triple.1;
@@ -649,6 +662,7 @@ pub fn read_record_batch(
     schema: SchemaRef,
     dictionaries_by_id: &HashMap<i64, ArrayRef>,
     projection: Option<&[usize]>,
+    metadata: &ipc::MetadataVersion,
 ) -> Result<RecordBatch> {
     let buffers = batch.buffers().ok_or_else(|| {
         ArrowError::IoError("Unable to get buffers from IPC RecordBatch".to_string())
@@ -672,6 +686,11 @@ pub fn read_record_batch(
     let mut node_index = 0;
     let mut arrays = vec![];
 
+    let options = RecordBatchOptions {
+        row_count: Some(batch.length() as usize),
+        ..Default::default()
+    };
+
     if let Some(projection) = projection {
         // project fields
         for (idx, field) in schema.fields().iter().enumerate() {
@@ -686,6 +705,7 @@ pub fn read_record_batch(
                     node_index,
                     buffer_index,
                     &compression_codec,
+                    metadata,
                 )?;
                 node_index = triple.1;
                 buffer_index = triple.2;
@@ -707,7 +727,11 @@ pub fn read_record_batch(
             }
         }
 
-        RecordBatch::try_new(Arc::new(schema.project(projection)?), arrays)
+        RecordBatch::try_new_with_options(
+            Arc::new(schema.project(projection)?),
+            arrays,
+            &options,
+        )
     } else {
         // keep track of index as lists require more than one node
         for field in schema.fields() {
@@ -720,12 +744,13 @@ pub fn read_record_batch(
                 node_index,
                 buffer_index,
                 &compression_codec,
+                metadata,
             )?;
             node_index = triple.1;
             buffer_index = triple.2;
             arrays.push(triple.0);
         }
-        RecordBatch::try_new(schema, arrays)
+        RecordBatch::try_new_with_options(schema, arrays, &options)
     }
 }
 
@@ -736,6 +761,7 @@ pub fn read_dictionary(
     batch: ipc::DictionaryBatch,
     schema: &Schema,
     dictionaries_by_id: &mut HashMap<i64, ArrayRef>,
+    metadata: &ipc::MetadataVersion,
 ) -> Result<()> {
     if batch.isDelta() {
         return Err(ArrowError::IoError(
@@ -756,7 +782,7 @@ pub fn read_dictionary(
         DataType::Dictionary(_, ref value_type) => {
             // Make a fake schema for the dictionary batch.
             let schema = Schema {
-                fields: vec![Field::new("", value_type.as_ref().clone(), false)],
+                fields: vec![Field::new("", value_type.as_ref().clone(), true)],
                 metadata: HashMap::new(),
             };
             // Read a single column
@@ -766,6 +792,7 @@ pub fn read_dictionary(
                 Arc::new(schema),
                 dictionaries_by_id,
                 None,
+                metadata,
             )?;
             Some(record_batch.column(0).clone())
         }
@@ -896,7 +923,13 @@ impl<R: Read + Seek> FileReader<R> {
                         ))?;
                         reader.read_exact(&mut buf)?;
 
-                        read_dictionary(&buf, batch, &schema, &mut dictionaries_by_id)?;
+                        read_dictionary(
+                            &buf,
+                            batch,
+                            &schema,
+                            &mut dictionaries_by_id,
+                            &message.version(),
+                        )?;
                     }
                     t => {
                         return Err(ArrowError::IoError(format!(
@@ -1004,6 +1037,7 @@ impl<R: Read + Seek> FileReader<R> {
                     self.schema(),
                     &self.dictionaries_by_id,
                     self.projection.as_ref().map(|x| x.0.as_ref()),
+                    &message.version()
 
                 ).map(Some)
             }
@@ -1178,7 +1212,7 @@ impl<R: Read> StreamReader<R> {
                 let mut buf = vec![0; message.bodyLength() as usize];
                 self.reader.read_exact(&mut buf)?;
 
-                read_record_batch(&buf, batch, self.schema(), &self.dictionaries_by_id, self.projection.as_ref().map(|x| x.0.as_ref())).map(Some)
+                read_record_batch(&buf, batch, self.schema(), &self.dictionaries_by_id, self.projection.as_ref().map(|x| x.0.as_ref()), &message.version()).map(Some)
             }
             ipc::MessageHeader::DictionaryBatch => {
                 let batch = message.header_as_dictionary_batch().ok_or_else(|| {
@@ -1191,7 +1225,7 @@ impl<R: Read> StreamReader<R> {
                 self.reader.read_exact(&mut buf)?;
 
                 read_dictionary(
-                    &buf, batch, &self.schema, &mut self.dictionaries_by_id
+                    &buf, batch, &self.schema, &mut self.dictionaries_by_id, &message.version()
                 )?;
 
                 // read the next message until we encounter a RecordBatch
@@ -1233,6 +1267,7 @@ mod tests {
     use crate::{datatypes, util::integration_util::*};
 
     #[test]
+    #[cfg(not(feature = "force_validate"))]
     fn read_generated_files_014() {
         let testdata = crate::util::test_util::arrow_test_data();
         let version = "0.14.1";
@@ -1353,6 +1388,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(feature = "force_validate"))]
     fn read_generated_streams_014() {
         let testdata = crate::util::test_util::arrow_test_data();
         let version = "0.14.1";
@@ -2032,4 +2068,17 @@ mod tests {
         let output_batch = roundtrip_ipc_stream(&input_batch);
         assert_eq!(input_batch, output_batch);
     }
+
+    #[test]
+    fn test_no_columns_batch() {
+        let schema = Arc::new(Schema::new(vec![]));
+        let options = RecordBatchOptions {
+            match_field_names: true,
+            row_count: Some(10),
+        };
+        let input_batch =
+            RecordBatch::try_new_with_options(schema, vec![], &options).unwrap();
+        let output_batch = roundtrip_ipc_stream(&input_batch);
+        assert_eq!(input_batch, output_batch);
+    }
 }
diff --git a/arrow/src/ipc/writer.rs b/arrow/src/ipc/writer.rs
index 1c7eb1e2d99a..dabbd70abc43 100644
--- a/arrow/src/ipc/writer.rs
+++ b/arrow/src/ipc/writer.rs
@@ -401,6 +401,7 @@ impl IpcDataGenerator {
                 array.len(),
                 array.null_count(),
                 &compression_codec,
+                write_options,
             );
         }
         // pad the tail of body data
@@ -474,6 +475,7 @@ impl IpcDataGenerator {
             array_data.len(),
             array_data.null_count(),
             &compression_codec,
+            write_options,
         );
 
         // pad the tail of body data
@@ -945,7 +947,18 @@ fn write_continuation<W: Write>(
     Ok(written)
 }
 
+/// In V4, null types have no validity bitmap
+/// In V5 and later, null and union types have no validity bitmap
+fn has_validity_bitmap(data_type: &DataType, write_options: &IpcWriteOptions) -> bool {
+    if write_options.metadata_version < ipc::MetadataVersion::V5 {
+        !matches!(data_type, DataType::Null)
+    } else {
+        !matches!(data_type, DataType::Null | DataType::Union(_, _, _))
+    }
+}
+
 /// Write array data to a vector of bytes
+#[allow(clippy::too_many_arguments)]
 fn write_array_data(
     array_data: &ArrayData,
     buffers: &mut Vec<ipc::Buffer>,
@@ -955,6 +968,7 @@ fn write_array_data(
     num_rows: usize,
     null_count: usize,
     compression_codec: &CompressionCodecType,
+    write_options: &IpcWriteOptions,
 ) -> i64 {
     let mut offset = offset;
     if !matches!(array_data.data_type(), DataType::Null) {
@@ -964,12 +978,7 @@ fn write_array_data(
         // where null_count is always 0.
         nodes.push(ipc::FieldNode::new(num_rows as i64, num_rows as i64));
     }
-    // NullArray does not have any buffers, thus the null buffer is not generated
-    // UnionArray does not have a validity buffer
-    if !matches!(
-        array_data.data_type(),
-        DataType::Null | DataType::Union(_, _, _)
-    ) {
+    if has_validity_bitmap(array_data.data_type(), write_options) {
         // write null buffer if exists
         let null_buffer = match array_data.null_buffer() {
             None => {
@@ -1003,6 +1012,7 @@ fn write_array_data(
                 data_ref.len(),
                 data_ref.null_count(),
                 &compression_codec,
+                write_options,
             );
         });
     }
@@ -1383,6 +1393,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(feature = "force_validate"))]
     fn read_and_rewrite_generated_files_014() {
         let testdata = crate::util::test_util::arrow_test_data();
         let version = "0.14.1";
@@ -1435,6 +1446,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(feature = "force_validate"))]
     fn read_and_rewrite_generated_streams_014() {
         let testdata = crate::util::test_util::arrow_test_data();
         let version = "0.14.1";
@@ -1688,4 +1700,116 @@ mod tests {
         // Dictionary with id 2 should have been written to the dict tracker
         assert!(dict_tracker.written.contains_key(&2));
     }
+
+    #[test]
+    fn read_union_017() {
+        let testdata = crate::util::test_util::arrow_test_data();
+        let version = "0.17.1";
+        let data_file = File::open(format!(
+            "{}/arrow-ipc-stream/integration/0.17.1/generated_union.stream",
+            testdata,
+        ))
+        .unwrap();
+
+        let reader = StreamReader::try_new(data_file, None).unwrap();
+
+        // read and rewrite the stream to a temp location
+        {
+            let file = File::create(format!(
+                "target/debug/testdata/{}-generated_union.stream",
+                version
+            ))
+            .unwrap();
+            let mut writer = StreamWriter::try_new(file, &reader.schema()).unwrap();
+            reader.for_each(|batch| {
+                writer.write(&batch.unwrap()).unwrap();
+            });
+            writer.finish().unwrap();
+        }
+
+        // Compare original file and rewrote file
+        let file = File::open(format!(
+            "target/debug/testdata/{}-generated_union.stream",
+            version
+        ))
+        .unwrap();
+        let rewrite_reader = StreamReader::try_new(file, None).unwrap();
+
+        let data_file = File::open(format!(
+            "{}/arrow-ipc-stream/integration/0.17.1/generated_union.stream",
+            testdata,
+        ))
+        .unwrap();
+        let reader = StreamReader::try_new(data_file, None).unwrap();
+
+        reader.into_iter().zip(rewrite_reader.into_iter()).for_each(
+            |(batch1, batch2)| {
+                assert_eq!(batch1.unwrap(), batch2.unwrap());
+            },
+        );
+    }
+
+    fn write_union_file(options: IpcWriteOptions) {
+        let schema = Schema::new(vec![Field::new(
+            "union",
+            DataType::Union(
+                vec![
+                    Field::new("a", DataType::Int32, false),
+                    Field::new("c", DataType::Float64, false),
+                ],
+                vec![0, 1],
+                UnionMode::Sparse,
+            ),
+            true,
+        )]);
+        let mut builder = UnionBuilder::new_sparse(5);
+        builder.append::<Int32Type>("a", 1).unwrap();
+        builder.append_null::<Int32Type>("a").unwrap();
+        builder.append::<Float64Type>("c", 3.0).unwrap();
+        builder.append_null::<Float64Type>("c").unwrap();
+        builder.append::<Int32Type>("a", 4).unwrap();
+        let union = builder.build().unwrap();
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(union) as ArrayRef],
+        )
+        .unwrap();
+        let file_name = "target/debug/testdata/union.arrow_file";
+        {
+            let file = File::create(&file_name).unwrap();
+            let mut writer =
+                FileWriter::try_new_with_options(file, &schema, options).unwrap();
+
+            writer.write(&batch).unwrap();
+            writer.finish().unwrap();
+        }
+
+        {
+            let file = File::open(&file_name).unwrap();
+            let reader = FileReader::try_new(file, None).unwrap();
+            reader.for_each(|maybe_batch| {
+                maybe_batch
+                    .unwrap()
+                    .columns()
+                    .iter()
+                    .zip(batch.columns())
+                    .for_each(|(a, b)| {
+                        assert_eq!(a.data_type(), b.data_type());
+                        assert_eq!(a.len(), b.len());
+                        assert_eq!(a.null_count(), b.null_count());
+                    });
+            });
+        }
+    }
+
+    #[test]
+    fn test_write_union_file_v4_v5() {
+        write_union_file(
+            IpcWriteOptions::try_new(8, false, MetadataVersion::V4).unwrap(),
+        );
+        write_union_file(
+            IpcWriteOptions::try_new(8, false, MetadataVersion::V5).unwrap(),
+        );
+    }
 }
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index 0d3ea0f0a2c3..95c69ca0be6d 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -15,123 +15,220 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! A native Rust implementation of [Apache Arrow](https://arrow.apache.org), a cross-language
+//! A complete, safe, native Rust implementation of [Apache Arrow](https://arrow.apache.org), a cross-language
 //! development platform for in-memory data.
 //!
-//! ### DataType
+//! # Columnar Format
 //!
-//! Every [`Array`](array::Array) in this crate has an associated [`DataType`](datatypes::DataType),
-//! that specifies how its data is layed in memory and represented.
-//! Thus, a central enum of this crate is [`DataType`](datatypes::DataType), that contains the set of valid
-//! DataTypes in the specification. For example, [`DataType::Utf8`](datatypes::DataType::Utf8).
+//! The [`array`] module provides statically typed implementations of all the array
+//! types as defined by the [Arrow Columnar Format](https://arrow.apache.org/docs/format/Columnar.html).
 //!
-//! ## Array
-//!
-//! The central trait of this package is the dynamically-typed [`Array`](array::Array) that
-//! represents a fixed-sized, immutable, Send + Sync Array of nullable elements. An example of such an array is [`UInt32Array`](array::UInt32Array).
-//! One way to think about an arrow [`Array`](array::Array) is a `Arc<[Option<T>; len]>` where T can be anything ranging from an integer to a string, or even
-//! another [`Array`](array::Array).
-//!
-//! [`Arrays`](array::Array) have [`len()`](array::Array::len), [`data_type()`](array::Array::data_type), and the nullability of each of its elements,
-//! can be obtained via [`is_null(index)`](array::Array::is_null). To downcast an [`Array`](array::Array) to a specific implementation, you can use
+//! For example, an [`Int32Array`](array::Int32Array) represents a nullable array of `i32`
 //!
 //! ```rust
-//! use arrow::array::{Array, UInt32Array};
-//! let array = UInt32Array::from(vec![Some(1), None, Some(3)]);
+//! # use arrow::array::{Array, Int32Array};
+//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
 //! assert_eq!(array.len(), 3);
 //! assert_eq!(array.value(0), 1);
 //! assert_eq!(array.is_null(1), true);
-//! ```
 //!
-//! To make the array dynamically typed, we wrap it in an [`Arc`](std::sync::Arc):
-//!
-//! ```rust
-//! # use std::sync::Arc;
-//! use arrow::datatypes::DataType;
-//! use arrow::array::{UInt32Array, ArrayRef};
-//! # let array = UInt32Array::from(vec![Some(1), None, Some(3)]);
-//! let array: ArrayRef = Arc::new(array);
-//! assert_eq!(array.len(), 3);
-//! // array.value() is not available in the dynamically-typed version
-//! assert_eq!(array.is_null(1), true);
-//! assert_eq!(array.data_type(), &DataType::UInt32);
+//! let collected: Vec<_> = array.iter().collect();
+//! assert_eq!(collected, vec![Some(1), None, Some(3)]);
+//! assert_eq!(array.values(), [1, 0, 3])
 //! ```
 //!
-//! to downcast, use `as_any()`:
+//! It is also possible to write generic code. For example, the following is generic over
+//! all primitively typed arrays:
 //!
 //! ```rust
-//! # use std::sync::Arc;
-//! # use arrow::array::{UInt32Array, ArrayRef};
-//! # let array = UInt32Array::from(vec![Some(1), None, Some(3)]);
-//! # let array: ArrayRef = Arc::new(array);
-//! let array = array.as_any().downcast_ref::<UInt32Array>().unwrap();
-//! assert_eq!(array.value(0), 1);
+//! # use std::iter::Sum;
+//! # use arrow::array::{Float32Array, PrimitiveArray, TimestampNanosecondArray};
+//! # use arrow::datatypes::ArrowPrimitiveType;
+//! #
+//! fn sum<T: ArrowPrimitiveType>(array: &PrimitiveArray<T>) -> T::Native
+//! where
+//!     T: ArrowPrimitiveType,
+//!     T::Native: Sum
+//! {
+//!     array.iter().map(|v| v.unwrap_or_default()).sum()
+//! }
+//!
+//! assert_eq!(sum(&Float32Array::from(vec![1.1, 2.9, 3.])), 7.);
+//! assert_eq!(sum(&TimestampNanosecondArray::from(vec![1, 2, 3])), 6);
 //! ```
 //!
-//! ## Memory and Buffers
+//! For more examples, consult the [`array`] docs.
 //!
-//! Data in [`Array`](array::Array) is stored in [`ArrayData`](array::ArrayData), that in turn
-//! is a collection of other [`ArrayData`](array::ArrayData) and [`Buffers`](buffer::Buffer).
-//! [`Buffers`](buffer::Buffer) is the central struct that array implementations use keep allocated memory and pointers.
-//! The [`MutableBuffer`](buffer::MutableBuffer) is the mutable counter-part of[`Buffer`](buffer::Buffer).
-//! These are the lowest abstractions of this crate, and are used throughout the crate to
-//! efficiently allocate, write, read and deallocate memory.
+//! # Type Erasure / Trait Objects
 //!
-//! ## Field, Schema and RecordBatch
+//! It is often the case that code wishes to handle any type of array, without necessarily knowing
+//! its concrete type. This use-case is catered for by a combination of [`Array`]
+//! and [`DataType`](datatypes::DataType), with the former providing a type-erased container for
+//! the array, and the latter identifying the concrete type of array.
 //!
-//! [`Field`](datatypes::Field) is a struct that contains an array's metadata (datatype and whether its values
-//! can be null), and a name. [`Schema`](datatypes::Schema) is a vector of fields with optional metadata.
-//! Together, they form the basis of a schematic representation of a group of [`Arrays`](array::Array).
+//! ```rust
+//! # use arrow::array::{Array, Float32Array};
+//! # use arrow::array::StringArray;
+//! # use arrow::datatypes::DataType;
+//! #
+//! fn impl_string(array: &StringArray) {}
+//! fn impl_f32(array: &Float32Array) {}
+//!
+//! fn impl_dyn(array: &dyn Array) {
+//!     match array.data_type() {
+//!         DataType::Utf8 => impl_string(array.as_any().downcast_ref().unwrap()),
+//!         DataType::Float32 => impl_f32(array.as_any().downcast_ref().unwrap()),
+//!         _ => unimplemented!()
+//!     }
+//! }
+//! ```
 //!
-//! In fact, [`RecordBatch`](record_batch::RecordBatch) is a struct with a [`Schema`](datatypes::Schema) and a vector of
-//! [`Array`](array::Array)s, all with the same `len`. A record batch is the highest order struct that this crate currently offers
-//! and is broadly used to represent a table where each column in an `Array`.
+//! It is also common to want to write a function that returns one of a number of possible
+//! array implementations. [`ArrayRef`] is a type-alias for [`Arc<dyn Array>`](array::Array)
+//! which is frequently used for this purpose
 //!
-//! ## Compute
+//! ```rust
+//! # use std::str::FromStr;
+//! # use std::sync::Arc;
+//! # use arrow::array::{ArrayRef, Int32Array, PrimitiveArray};
+//! # use arrow::datatypes::{ArrowPrimitiveType, DataType, Int32Type, UInt32Type};
+//! # use arrow::compute::cast;
+//! #
+//! fn parse_to_primitive<'a, T, I>(iter: I) -> PrimitiveArray<T>
+//! where
+//!     T: ArrowPrimitiveType,
+//!     I: IntoIterator<Item=&'a str>,
+//! {
+//!     PrimitiveArray::from_iter(iter.into_iter().map(|val| T::Native::from_str(val).ok()))
+//! }
+//!
+//! fn parse_strings<'a, I>(iter: I, to_data_type: DataType) -> ArrayRef
+//! where
+//!     I: IntoIterator<Item=&'a str>,
+//! {
+//!    match to_data_type {
+//!        DataType::Int32 => Arc::new(parse_to_primitive::<Int32Type, _>(iter)) as _,
+//!        DataType::UInt32 => Arc::new(parse_to_primitive::<UInt32Type, _>(iter)) as _,
+//!        _ => unimplemented!()
+//!    }
+//! }
+//!
+//! let array = parse_strings(["1", "2", "3"], DataType::Int32);
+//! let integers = array.as_any().downcast_ref::<Int32Array>().unwrap();
+//! assert_eq!(integers.values(), [1, 2, 3])
+//! ```
 //!
-//! This crate offers many operations (called kernels) to operate on [`Array`](array::Array)s, that you can find at [`Kernel`](compute::kernels).
-//! It has both vertical and horizontal operations, and some of them have an SIMD implementation.
+//! # Compute Kernels
 //!
-//! ## Status
+//! The [`compute`](compute) module provides optimised implementations of many common operations,
+//! for example the `parse_strings` operation above could also be implemented as follows:
 //!
-//! This crate has most of the implementation of the arrow specification. Specifically, it supports the following types:
+//! ```
+//! # use std::sync::Arc;
+//! # use arrow::error::Result;
+//! # use arrow::array::{ArrayRef, StringArray, UInt32Array};
+//! # use arrow::datatypes::DataType;
+//! #
+//! fn parse_strings<'a, I>(iter: I, to_data_type: &DataType) -> Result<ArrayRef>
+//! where
+//!     I: IntoIterator<Item=&'a str>,
+//! {
+//!     let array = Arc::new(StringArray::from_iter(iter.into_iter().map(Some))) as _;
+//!     arrow::compute::cast(&array, to_data_type)
+//! }
+//!
+//! let array = parse_strings(["1", "2", "3"], &DataType::UInt32).unwrap();
+//! let integers = array.as_any().downcast_ref::<UInt32Array>().unwrap();
+//! assert_eq!(integers.values(), [1, 2, 3])
+//! ```
 //!
-//! * All arrow primitive types, such as [`Int32Array`](array::UInt8Array), [`BooleanArray`](array::BooleanArray) and [`Float64Array`](array::Float64Array).
-//! * All arrow variable length types, such as [`StringArray`](array::StringArray) and [`BinaryArray`](array::BinaryArray)
-//! * All composite types such as [`StructArray`](array::StructArray) and [`ListArray`](array::ListArray)
-//! * Dictionary types  [`DictionaryArray`](array::DictionaryArray)
-
+//! This module also implements many common vertical operations:
 //!
-//! This crate also implements many common vertical operations:
-//! * all mathematical binary operators, such as [`subtract`](compute::kernels::arithmetic::subtract)
-//! * all boolean binary operators such as [`equality`](compute::kernels::comparison::eq)
+//! * All mathematical binary operators, such as [`subtract`](compute::kernels::arithmetic::subtract)
+//! * All boolean binary operators such as [`equality`](compute::kernels::comparison::eq)
 //! * [`cast`](compute::kernels::cast::cast)
 //! * [`filter`](compute::kernels::filter::filter)
 //! * [`take`](compute::kernels::take::take) and [`limit`](compute::kernels::limit::limit)
 //! * [`sort`](compute::kernels::sort::sort)
 //! * some string operators such as [`substring`](compute::kernels::substring::substring) and [`length`](compute::kernels::length::length)
 //!
-//! as well as some horizontal operations, such as
+//! As well as some horizontal operations, such as:
 //!
 //! * [`min`](compute::kernels::aggregate::min) and [`max`](compute::kernels::aggregate::max)
 //! * [`sum`](compute::kernels::aggregate::sum)
 //!
-//! Finally, this crate implements some readers and writers to different formats:
+//! # Tabular Representation
+//!
+//! It is common to want to group one or more columns together into a tabular representation. This
+//! is provided by [`RecordBatch`] which combines a [`Schema`](datatypes::Schema)
+//! and a corresponding list of [`ArrayRef`].
 //!
-//! * JSON: [`Reader`](json::reader::Reader)
+//!
+//! ```
+//! # use std::sync::Arc;
+//! # use arrow::array::{Float32Array, Int32Array};
+//! # use arrow::record_batch::RecordBatch;
+//! #
+//! let col_1 = Arc::new(Int32Array::from_iter([1, 2, 3])) as _;
+//! let col_2 = Arc::new(Float32Array::from_iter([1., 6.3, 4.])) as _;
+//!
+//! let batch = RecordBatch::try_from_iter([("col1", col_1), ("col_2", col_2)]).unwrap();
+//! ```
+//!
+//! # IO
+//!
+//! This crate provides readers and writers for various formats to/from [`RecordBatch`]
+//!
+//! * JSON: [`Reader`](json::reader::Reader) and [`Writer`](json::writer::Writer)
 //! * CSV: [`Reader`](csv::reader::Reader) and [`Writer`](csv::writer::Writer)
 //! * IPC: [`Reader`](ipc::reader::StreamReader) and [`Writer`](ipc::writer::FileWriter)
 //!
-//! The parquet implementation is on a [separate crate](https://crates.io/crates/parquet)
+//! Parquet is published as a [separate crate](https://crates.io/crates/parquet)
+//!
+//! # Memory and Buffers
+//!
+//! Advanced users may wish to interact with the underlying buffers of an [`Array`], for example,
+//! for FFI or high-performance conversion from other formats. This interface is provided by
+//! [`ArrayData`] which stores the [`Buffer`] comprising an [`Array`], and can be accessed
+//! with [`Array::data`](array::Array::data)
+//!
+//! The APIs for constructing [`ArrayData`] come in safe, and unsafe variants, with the former
+//! performing extensive, but potentially expensive validation to ensure the buffers are well-formed.
+//!
+//! An [`ArrayRef`] can be cheaply created from an [`ArrayData`] using [`make_array`],
+//! or by using the appropriate [`From`] conversion on the concrete [`Array`] implementation.
+//!
+//! # Safety and Security
+//!
+//! Like many crates, this crate makes use of unsafe where prudent. However, it endeavours to be
+//! sound. Specifically, **it should not be possible to trigger undefined behaviour using safe APIs.**
+//!
+//! If you think you have found an instance where this is possible, please file
+//! a ticket in our [issue tracker] and it will be triaged and fixed. For more information on
+//! arrow's use of unsafe, see [here](https://github.com/apache/arrow-rs/tree/master/arrow#safety).
+//!
+//! # Higher-level Processing
+//!
+//! This crate aims to provide reusable, low-level primitives for operating on columnar data. For
+//! more sophisticated query processing workloads, consider checking out [DataFusion]. This
+//! orchestrates the primitives exported by this crate into an embeddable query engine, with
+//! SQL and DataFrame frontends, and heavily influences this crate's roadmap.
+//!
+//! [`array`]: mod@array
+//! [`Array`]: array::Array
+//! [`ArrayRef`]: array::ArrayRef
+//! [`ArrayData`]: array::ArrayData
+//! [`make_array`]: array::make_array
+//! [`Buffer`]: buffer::Buffer
+//! [`RecordBatch`]: record_batch::RecordBatch
+//! [DataFusion]: https://github.com/apache/arrow-datafusion
+//! [issue tracker]: https://github.com/apache/arrow-rs/issues
+//!
 
-#![cfg_attr(feature = "avx512", feature(stdsimd))]
-#![cfg_attr(feature = "avx512", feature(repr_simd))]
-#![cfg_attr(feature = "avx512", feature(avx512_target_feature))]
 #![deny(clippy::redundant_clone)]
 #![warn(missing_debug_implementations)]
 
 pub mod alloc;
-mod arch;
 pub mod array;
 pub mod bitmap;
 pub mod buffer;
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index 62e6316b621c..3ae5b3b9987f 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -24,13 +24,16 @@ use std::sync::Arc;
 use pyo3::ffi::Py_uintptr_t;
 use pyo3::import_exception;
 use pyo3::prelude::*;
-use pyo3::types::PyList;
+use pyo3::types::{PyList, PyTuple};
 
 use crate::array::{Array, ArrayData, ArrayRef};
 use crate::datatypes::{DataType, Field, Schema};
 use crate::error::ArrowError;
 use crate::ffi;
 use crate::ffi::FFI_ArrowSchema;
+use crate::ffi_stream::{
+    export_reader_into_raw, ArrowArrayStreamReader, FFI_ArrowArrayStream,
+};
 use crate::record_batch::RecordBatch;
 
 import_exception!(pyarrow, ArrowException);
@@ -198,6 +201,42 @@ impl PyArrowConvert for RecordBatch {
     }
 }
 
+impl PyArrowConvert for ArrowArrayStreamReader {
+    fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        // prepare a pointer to receive the stream struct
+        let stream = Box::new(FFI_ArrowArrayStream::empty());
+        let stream_ptr = Box::into_raw(stream) as *mut FFI_ArrowArrayStream;
+
+        // make the conversion through PyArrow's private API
+        // this changes the pointer's memory and is thus unsafe.
+        // In particular, `_export_to_c` can go out of bounds
+        let args = PyTuple::new(value.py(), &[stream_ptr as Py_uintptr_t]);
+        value.call_method1("_export_to_c", args)?;
+
+        let stream_reader =
+            unsafe { ArrowArrayStreamReader::from_raw(stream_ptr).unwrap() };
+
+        unsafe {
+            Box::from_raw(stream_ptr);
+        }
+
+        Ok(stream_reader)
+    }
+
+    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+        let stream = Box::new(FFI_ArrowArrayStream::empty());
+        let stream_ptr = Box::into_raw(stream) as *mut FFI_ArrowArrayStream;
+
+        unsafe { export_reader_into_raw(Box::new(self.clone()), stream_ptr) };
+
+        let module = py.import("pyarrow")?;
+        let class = module.getattr("RecordBatchReader")?;
+        let args = PyTuple::new(py, &[stream_ptr as Py_uintptr_t]);
+        let reader = class.call_method1("_import_from_c", args)?;
+        Ok(PyObject::from(reader))
+    }
+}
+
 macro_rules! add_conversion {
     ($typ:ty) => {
         impl<'source> FromPyObject<'source> for $typ {
@@ -219,3 +258,4 @@ add_conversion!(Field);
 add_conversion!(Schema);
 add_conversion!(ArrayData);
 add_conversion!(RecordBatch);
+add_conversion!(ArrowArrayStreamReader);
diff --git a/arrow/src/util/bit_chunk_iterator.rs b/arrow/src/util/bit_chunk_iterator.rs
index db5aca2a1b3f..f0127ed2267f 100644
--- a/arrow/src/util/bit_chunk_iterator.rs
+++ b/arrow/src/util/bit_chunk_iterator.rs
@@ -1,5 +1,3 @@
-use std::fmt::Debug;
-
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
@@ -16,7 +14,11 @@ use std::fmt::Debug;
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+
+//! Types for iterating over bitmasks in 64-bit chunks
+
 use crate::util::bit_util::ceil;
+use std::fmt::Debug;
 
 /// Iterates over an arbitrarily aligned byte buffer
 ///
@@ -611,6 +613,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg_attr(miri, ignore)]
     fn fuzz_unaligned_bit_chunk_iterator() {
         let mut rng = thread_rng();
 
diff --git a/arrow/src/util/bit_iterator.rs b/arrow/src/util/bit_iterator.rs
new file mode 100644
index 000000000000..bba9dac60a4b
--- /dev/null
+++ b/arrow/src/util/bit_iterator.rs
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::util::bit_chunk_iterator::{UnalignedBitChunk, UnalignedBitChunkIterator};
+
+/// Iterator of contiguous ranges of set bits within a provided packed bitmask
+///
+/// Returns `(usize, usize)` each representing an interval where the corresponding
+/// bits in the provides mask are set
+///
+#[derive(Debug)]
+pub struct BitSliceIterator<'a> {
+    iter: UnalignedBitChunkIterator<'a>,
+    len: usize,
+    current_offset: i64,
+    current_chunk: u64,
+}
+
+impl<'a> BitSliceIterator<'a> {
+    /// Create a new [`BitSliceIterator`] from the provide `buffer`,
+    /// and `offset` and `len` in bits
+    pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
+        let chunk = UnalignedBitChunk::new(buffer, offset, len);
+        let mut iter = chunk.iter();
+
+        let current_offset = -(chunk.lead_padding() as i64);
+        let current_chunk = iter.next().unwrap_or(0);
+
+        Self {
+            iter,
+            len,
+            current_offset,
+            current_chunk,
+        }
+    }
+
+    /// Returns `Some((chunk_offset, bit_offset))` for the next chunk that has at
+    /// least one bit set, or None if there is no such chunk.
+    ///
+    /// Where `chunk_offset` is the bit offset to the current `u64` chunk
+    /// and `bit_offset` is the offset of the first `1` bit in that chunk
+    fn advance_to_set_bit(&mut self) -> Option<(i64, u32)> {
+        loop {
+            if self.current_chunk != 0 {
+                // Find the index of the first 1
+                let bit_pos = self.current_chunk.trailing_zeros();
+                return Some((self.current_offset, bit_pos));
+            }
+
+            self.current_chunk = self.iter.next()?;
+            self.current_offset += 64;
+        }
+    }
+}
+
+impl<'a> Iterator for BitSliceIterator<'a> {
+    type Item = (usize, usize);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // Used as termination condition
+        if self.len == 0 {
+            return None;
+        }
+
+        let (start_chunk, start_bit) = self.advance_to_set_bit()?;
+
+        // Set bits up to start
+        self.current_chunk |= (1 << start_bit) - 1;
+
+        loop {
+            if self.current_chunk != u64::MAX {
+                // Find the index of the first 0
+                let end_bit = self.current_chunk.trailing_ones();
+
+                // Zero out up to end_bit
+                self.current_chunk &= !((1 << end_bit) - 1);
+
+                return Some((
+                    (start_chunk + start_bit as i64) as usize,
+                    (self.current_offset + end_bit as i64) as usize,
+                ));
+            }
+
+            match self.iter.next() {
+                Some(next) => {
+                    self.current_chunk = next;
+                    self.current_offset += 64;
+                }
+                None => {
+                    return Some((
+                        (start_chunk + start_bit as i64) as usize,
+                        std::mem::replace(&mut self.len, 0),
+                    ));
+                }
+            }
+        }
+    }
+}
+
+/// An iterator of `usize` whose index in a provided bitmask is true
+///
+/// This provides the best performance on most masks, apart from those which contain
+/// large runs and therefore favour [`BitSliceIterator`]
+#[derive(Debug)]
+pub struct BitIndexIterator<'a> {
+    current_chunk: u64,
+    chunk_offset: i64,
+    iter: UnalignedBitChunkIterator<'a>,
+}
+
+impl<'a> BitIndexIterator<'a> {
+    /// Create a new [`BitIndexIterator`] from the provide `buffer`,
+    /// and `offset` and `len` in bits
+    pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self {
+        let chunks = UnalignedBitChunk::new(buffer, offset, len);
+        let mut iter = chunks.iter();
+
+        let current_chunk = iter.next().unwrap_or(0);
+        let chunk_offset = -(chunks.lead_padding() as i64);
+
+        Self {
+            current_chunk,
+            chunk_offset,
+            iter,
+        }
+    }
+}
+
+impl<'a> Iterator for BitIndexIterator<'a> {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            if self.current_chunk != 0 {
+                let bit_pos = self.current_chunk.trailing_zeros();
+                self.current_chunk ^= 1 << bit_pos;
+                return Some((self.chunk_offset + bit_pos as i64) as usize);
+            }
+
+            self.current_chunk = self.iter.next()?;
+            self.chunk_offset += 64;
+        }
+    }
+}
+
+// Note: tests located in filter module
diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs
new file mode 100644
index 000000000000..b78af3acc6cd
--- /dev/null
+++ b/arrow/src/util/decimal.rs
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Decimal related utils
+
+use std::cmp::Ordering;
+
+/// Represents a decimal value with precision and scale.
+/// The decimal value is represented by a signed 128-bit integer.
+#[derive(Debug)]
+pub struct Decimal128 {
+    #[allow(dead_code)]
+    precision: usize,
+    scale: usize,
+    value: i128,
+}
+
+impl PartialOrd for Decimal128 {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        assert_eq!(
+            self.scale, other.scale,
+            "Cannot compare two Decimal128 with different scale: {}, {}",
+            self.scale, other.scale
+        );
+        self.value.partial_cmp(&other.value)
+    }
+}
+
+impl Ord for Decimal128 {
+    fn cmp(&self, other: &Self) -> Ordering {
+        assert_eq!(
+            self.scale, other.scale,
+            "Cannot compare two Decimal128 with different scale: {}, {}",
+            self.scale, other.scale
+        );
+        self.value.cmp(&other.value)
+    }
+}
+
+impl PartialEq<Self> for Decimal128 {
+    fn eq(&self, other: &Self) -> bool {
+        assert_eq!(
+            self.scale, other.scale,
+            "Cannot compare two Decimal128 with different scale: {}, {}",
+            self.scale, other.scale
+        );
+        self.value.eq(&other.value)
+    }
+}
+
+impl Eq for Decimal128 {}
+
+impl Decimal128 {
+    pub fn new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> Self {
+        let as_array = bytes.try_into();
+        let value = match as_array {
+            Ok(v) if bytes.len() == 16 => i128::from_le_bytes(v),
+            _ => panic!("Input to Decimal128 is not 128bit integer."),
+        };
+
+        Decimal128 {
+            precision,
+            scale,
+            value,
+        }
+    }
+
+    pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self {
+        Decimal128 {
+            precision,
+            scale,
+            value,
+        }
+    }
+
+    pub fn as_i128(&self) -> i128 {
+        self.value
+    }
+
+    pub fn as_string(&self) -> String {
+        let value_str = self.value.to_string();
+
+        if self.scale == 0 {
+            value_str
+        } else {
+            let (sign, rest) = value_str.split_at(if self.value >= 0 { 0 } else { 1 });
+
+            if rest.len() > self.scale {
+                // Decimal separator is in the middle of the string
+                let (whole, decimal) = value_str.split_at(value_str.len() - self.scale);
+                format!("{}.{}", whole, decimal)
+            } else {
+                // String has to be padded
+                format!("{}0.{:0>width$}", sign, rest, width = self.scale)
+            }
+        }
+    }
+}
+
+impl From<Decimal128> for i128 {
+    fn from(decimal: Decimal128) -> Self {
+        decimal.as_i128()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::util::decimal::Decimal128;
+
+    #[test]
+    fn decimal_128_to_string() {
+        let mut value = Decimal128::new_from_i128(5, 2, 100);
+        assert_eq!(value.as_string(), "1.00");
+
+        value = Decimal128::new_from_i128(5, 3, 100);
+        assert_eq!(value.as_string(), "0.100");
+    }
+
+    #[test]
+    fn decimal_128_from_bytes() {
+        let bytes = 100_i128.to_le_bytes();
+        let value = Decimal128::new_from_bytes(5, 2, &bytes);
+        assert_eq!(value.as_string(), "1.00");
+    }
+
+    fn i128_func(value: impl Into<i128>) -> i128 {
+        value.into()
+    }
+
+    #[test]
+    fn decimal_128_to_i128() {
+        let value = Decimal128::new_from_i128(5, 2, 100);
+        let integer = i128_func(value);
+        assert_eq!(integer, 100);
+    }
+}
diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs
index 3b6de8a4b263..86253da8d777 100644
--- a/arrow/src/util/mod.rs
+++ b/arrow/src/util/mod.rs
@@ -18,6 +18,7 @@
 #[cfg(feature = "test_utils")]
 pub mod bench_util;
 pub mod bit_chunk_iterator;
+pub mod bit_iterator;
 pub(crate) mod bit_mask;
 pub mod bit_util;
 #[cfg(feature = "test_utils")]
@@ -35,4 +36,5 @@ pub mod test_util;
 mod trusted_len;
 pub(crate) use trusted_len::trusted_len_unzip;
 
+pub mod decimal;
 pub(crate) mod reader_parser;
diff --git a/arrow/test/dependency/README.md b/arrow/test/dependency/README.md
deleted file mode 100644
index b618b4636e7c..000000000000
--- a/arrow/test/dependency/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-This directory contains projects that use arrow as a dependency with
-various combinations of feature flags.
diff --git a/arrow/test/dependency/default-features/Cargo.toml b/arrow/test/dependency/default-features/Cargo.toml
deleted file mode 100644
index c03aef9979e5..000000000000
--- a/arrow/test/dependency/default-features/Cargo.toml
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "defeault-features"
-description = "Models a user application of arrow that uses default features of arrow"
-version = "0.1.0"
-edition = "2021"
-rust-version = "1.57"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-arrow = { path = "../../../../arrow", version = "15.0.0" }
-
-[workspace]
diff --git a/arrow/test/dependency/default-features/src/main.rs b/arrow/test/dependency/default-features/src/main.rs
deleted file mode 100644
index e7a11a969c03..000000000000
--- a/arrow/test/dependency/default-features/src/main.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-fn main() {
-    println!("Hello, world!");
-}
diff --git a/arrow/test/dependency/no-default-features/Cargo.toml b/arrow/test/dependency/no-default-features/Cargo.toml
deleted file mode 100644
index c637aa6e9a8b..000000000000
--- a/arrow/test/dependency/no-default-features/Cargo.toml
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "no-default-features"
-description = "Models a user application of arrow that specifies no-default-features=true"
-version = "0.1.0"
-edition = "2021"
-rust-version = "1.57"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-arrow = { path = "../../../../arrow", version = "15.0.0", default-features = false }
-
-[workspace]
diff --git a/arrow/test/dependency/no-default-features/src/main.rs b/arrow/test/dependency/no-default-features/src/main.rs
deleted file mode 100644
index e7a11a969c03..000000000000
--- a/arrow/test/dependency/no-default-features/src/main.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-fn main() {
-    println!("Hello, world!");
-}
diff --git a/arrow/test/dependency/simd/Cargo.toml b/arrow/test/dependency/simd/Cargo.toml
deleted file mode 100644
index ecc6a93a8be9..000000000000
--- a/arrow/test/dependency/simd/Cargo.toml
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "defeault-features"
-description = "Models a user application of arrow that uses the simd feature of arrow"
-version = "0.1.0"
-edition = "2021"
-rust-version = "1.57"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-arrow = { path = "../../../../arrow", version = "15.0.0", features = ["simd"]}
-
-[workspace]
diff --git a/arrow/test/dependency/simd/src/main.rs b/arrow/test/dependency/simd/src/main.rs
deleted file mode 100644
index e7a11a969c03..000000000000
--- a/arrow/test/dependency/simd/src/main.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-fn main() {
-    println!("Hello, world!");
-}
diff --git a/dev/release/README.md b/dev/release/README.md
index 96c730a63c6e..912b60dae6b3 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -61,7 +61,7 @@ CHANGELOG_GITHUB_TOKEN=<TOKEN> ./dev/release/update_change_log.sh
 git commit -a -m 'Create changelog'
 
 # update versions
-sed -i '' -e 's/14.0.0/15.0.0/g' `find . -name 'Cargo.toml' -or -name '*.md' | grep -v CHANGELOG.md`
+sed -i '' -e 's/14.0.0/16.0.0/g' `find . -name 'Cargo.toml' -or -name '*.md' | grep -v CHANGELOG.md`
 git commit -a -m 'Update version'
 ```
 
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index c7996a78af86..466f6fa45267 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -20,3 +20,4 @@ conbench/.isort.cfg
 arrow-flight/src/arrow.flight.protocol.rs
 arrow-flight/src/sql/arrow.flight.protocol.sql.rs
 .github/*
+parquet/src/bin/parquet-fromcsv-help.txt
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index 43b4ee606421..316f10c2594b 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -29,8 +29,8 @@
 
 set -e
 
-SINCE_TAG="14.0.0"
-FUTURE_RELEASE="15.0.0"
+SINCE_TAG="15.0.0"
+FUTURE_RELEASE="16.0.0"
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
diff --git a/integration-testing/Cargo.toml b/integration-testing/Cargo.toml
index 7215c31d2f9a..57b5211129ff 100644
--- a/integration-testing/Cargo.toml
+++ b/integration-testing/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "arrow-integration-testing"
 description = "Binaries used in the Arrow integration tests"
-version = "15.0.0"
+version = "16.0.0"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -34,7 +34,7 @@ logging = ["tracing-subscriber"]
 arrow = { path = "../arrow" }
 arrow-flight = { path = "../arrow-flight" }
 async-trait = "0.1.41"
-clap = { version = "3", features = ["derive", "env"] }
+clap = { version = "~3.1", features = ["derive", "env"] }
 futures = "0.3"
 hex = "0.4"
 prost = "0.10"
diff --git a/integration-testing/src/flight_client_scenarios/integration_test.rs b/integration-testing/src/flight_client_scenarios/integration_test.rs
index 4158a7352140..62fe2b85d262 100644
--- a/integration-testing/src/flight_client_scenarios/integration_test.rs
+++ b/integration-testing/src/flight_client_scenarios/integration_test.rs
@@ -270,6 +270,7 @@ async fn receive_batch_flight_data(
                 .expect("Error parsing dictionary"),
             &schema,
             dictionaries_by_id,
+            &message.version(),
         )
         .expect("Error reading dictionary");
 
diff --git a/integration-testing/src/flight_server_scenarios/integration_test.rs b/integration-testing/src/flight_server_scenarios/integration_test.rs
index 52086aade748..7ad3d18eb5ba 100644
--- a/integration-testing/src/flight_server_scenarios/integration_test.rs
+++ b/integration-testing/src/flight_server_scenarios/integration_test.rs
@@ -296,6 +296,7 @@ async fn record_batch_from_message(
         schema_ref,
         dictionaries_by_id,
         None,
+        &message.version(),
     );
 
     arrow_batch_result.map_err(|e| {
@@ -313,8 +314,13 @@ async fn dictionary_from_message(
         Status::internal("Could not parse message header as dictionary batch")
     })?;
 
-    let dictionary_batch_result =
-        reader::read_dictionary(data_body, ipc_batch, &schema_ref, dictionaries_by_id);
+    let dictionary_batch_result = reader::read_dictionary(
+        data_body,
+        ipc_batch,
+        &schema_ref,
+        dictionaries_by_id,
+        &message.version(),
+    );
     dictionary_batch_result.map_err(|e| {
         Status::internal(format!("Could not convert to Dictionary: {:?}", e))
     })
diff --git a/integration-testing/src/lib.rs b/integration-testing/src/lib.rs
index 90537242a11f..c7796ece4c73 100644
--- a/integration-testing/src/lib.rs
+++ b/integration-testing/src/lib.rs
@@ -593,6 +593,10 @@ fn array_from_json(
         }
         DataType::Decimal(precision, scale) => {
             let mut b = DecimalBuilder::new(json_col.count, *precision, *scale);
+            // C++ interop tests involve incompatible decimal values
+            unsafe {
+                b.disable_value_validation();
+            }
             for (is_valid, value) in json_col
                 .validity
                 .as_ref()
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index bb7a8cd10583..28347bcb7dda 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "parquet"
-version = "15.0.0"
+version = "16.0.0"
 license = "Apache-2.0"
 description = "Apache Parquet implementation in Rust"
 homepage = "https://github.com/apache/arrow-rs"
@@ -30,47 +30,52 @@ edition = "2021"
 rust-version = "1.57"
 
 [dependencies]
-parquet-format = "4.0.0"
-bytes = "1.1"
-byteorder = "1"
-thrift = "0.13"
-snap = { version = "1.0", optional = true }
-brotli = { version = "3.3", optional = true }
-flate2 = { version = "1.0", optional = true }
-lz4 = { version = "1.23", optional = true }
+parquet-format = { version = "4.0.0", default-features = false }
+bytes = { version = "1.1", default-features = false, features = ["std"] }
+byteorder = { version = "1", default-features = false }
+thrift = { version = "0.13", default-features = false }
+snap = { version = "1.0", default-features = false, optional = true }
+brotli = { version = "3.3", default-features = false, features = ["std"], optional = true }
+flate2 = { version = "1.0", default-features = false, features = ["rust_backend"], optional = true }
+lz4 = { version = "1.23", default-features = false, optional = true }
 zstd = { version = "0.11.1", optional = true, default-features = false }
-chrono = { version = "0.4", default-features = false }
-num = "0.4"
-num-bigint = "0.4"
-arrow = { path = "../arrow", version = "15.0.0", optional = true, default-features = false, features = ["ipc"] }
-base64 = { version = "0.13", optional = true }
-clap = { version = "3", optional = true, features = ["derive", "env"] }
-serde_json = { version = "1.0", features = ["preserve_order"], optional = true }
-rand = "0.8"
-futures = { version = "0.3", optional = true }
+chrono = { version = "0.4", default-features = false, features = ["alloc"] }
+num = { version = "0.4", default-features = false }
+num-bigint = { version = "0.4", default-features = false }
+arrow = { path = "../arrow", version = "16.0.0", optional = true, default-features = false, features = ["ipc"] }
+base64 = { version = "0.13", default-features = false, features = ["std"], optional = true }
+clap = { version = "~3.1", default-features = false, features = ["std", "derive", "env"], optional = true }
+serde_json = { version = "1.0", default-features = false, optional = true }
+rand = { version = "0.8", default-features = false }
+futures = { version = "0.3", default-features = false, features = ["std" ], optional = true }
 tokio = { version = "1.0", optional = true, default-features = false, features = ["macros", "fs", "rt", "io-util"] }
 
 [dev-dependencies]
-criterion = "0.3"
-rand = "0.8"
-snap = "1.0"
-tempfile = "3.0"
-brotli = "3.3"
-flate2 = "1.0"
-lz4 = "1.23"
-serde_json = { version = "1.0", features = ["preserve_order"] }
-arrow = { path = "../arrow", version = "15.0.0", default-features = false, features = ["test_utils", "prettyprint"] }
+base64 = { version = "0.13", default-features = false, features = ["std"] }
+criterion = { version = "0.3", default-features = false }
+snap = { version = "1.0", default-features = false }
+tempfile = { version = "3.0", default-features = false }
+brotli = { version = "3.3", default-features = false, features = [ "std" ] }
+flate2 = { version = "1.0", default-features = false, features = [ "rust_backend" ] }
+lz4 = { version = "1.23", default-features = false }
+zstd = { version = "0.11", default-features = false }
+serde_json = { version = "1.0", default-features = false, features = ["preserve_order"] }
+arrow = { path = "../arrow", version = "16.0.0", default-features = false, features = ["ipc", "test_utils", "prettyprint"] }
 
 [package.metadata.docs.rs]
 all-features = true
 
 [features]
 default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"]
-cli = ["serde_json", "base64", "clap"]
+# Enable arrow reader/writer APIs
+arrow = ["dep:arrow", "base64"]
+# Enable CLI tools
+cli = ["serde_json", "base64", "clap","arrow/csv"]
+# Enable internal testing APIs
 test_common = []
 # Experimental, unstable functionality primarily used for testing
 experimental = []
-# Enable async API
+# Enable async APIs
 async = ["futures", "tokio"]
 
 [[bin]]
@@ -85,13 +90,18 @@ required-features = ["cli"]
 name = "parquet-rowcount"
 required-features = ["cli"]
 
+[[bin]]
+name = "parquet-fromcsv"
+required-features = ["cli"]
+
 [[bench]]
 name = "arrow_writer"
+required-features = ["arrow"]
 harness = false
 
 [[bench]]
 name = "arrow_reader"
-required-features = ["test_common", "experimental"]
+required-features = ["arrow", "test_common", "experimental"]
 harness = false
 
 [lib]
diff --git a/parquet/README.md b/parquet/README.md
index ed8b5518b8f9..fbb6e3e1b5d5 100644
--- a/parquet/README.md
+++ b/parquet/README.md
@@ -27,7 +27,7 @@ See [crate documentation](https://docs.rs/parquet/latest/parquet/) for examples
 
 ## Rust Version Compatbility
 
-This crate is tested with the latest stable version of Rust. We do not currrently test against other, older versions of the Rust compiler.
+This crate is tested with the latest stable version of Rust. We do not currently test against other, older versions of the Rust compiler.
 
 ## Features
 
diff --git a/parquet/benches/arrow_reader.rs b/parquet/benches/arrow_reader.rs
index 04e48baef705..647a8dc6f393 100644
--- a/parquet/benches/arrow_reader.rs
+++ b/parquet/benches/arrow_reader.rs
@@ -355,27 +355,6 @@ fn create_string_byte_array_dictionary_reader(
     .unwrap()
 }
 
-fn create_complex_object_byte_array_dictionary_reader(
-    page_iterator: impl PageIterator + 'static,
-    column_desc: ColumnDescPtr,
-) -> Box<dyn ArrayReader> {
-    use parquet::arrow::array_reader::ComplexObjectArrayReader;
-    use parquet::arrow::converter::{Utf8ArrayConverter, Utf8Converter};
-    let arrow_type =
-        DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
-
-    let converter = Utf8Converter::new(Utf8ArrayConverter {});
-    Box::new(
-        ComplexObjectArrayReader::<ByteArrayType, Utf8Converter>::new(
-            Box::new(page_iterator),
-            column_desc,
-            converter,
-            Some(arrow_type),
-        )
-        .unwrap(),
-    )
-}
-
 fn bench_primitive<T>(
     group: &mut BenchmarkGroup<WallTime>,
     schema: &SchemaDescPtr,
@@ -678,18 +657,7 @@ fn add_benches(c: &mut Criterion) {
 
     let mut group = c.benchmark_group("arrow_array_reader/StringDictionary");
 
-    group.bench_function("dictionary encoded, mandatory, no NULLs - old", |b| {
-        b.iter(|| {
-            let array_reader = create_complex_object_byte_array_dictionary_reader(
-                dictionary_string_no_null_data.clone(),
-                mandatory_string_column_desc.clone(),
-            );
-            count = bench_array_reader(array_reader);
-        });
-        assert_eq!(count, EXPECTED_VALUE_COUNT);
-    });
-
-    group.bench_function("dictionary encoded, mandatory, no NULLs - new", |b| {
+    group.bench_function("dictionary encoded, mandatory, no NULLs", |b| {
         b.iter(|| {
             let array_reader = create_string_byte_array_dictionary_reader(
                 dictionary_string_no_null_data.clone(),
@@ -700,18 +668,7 @@ fn add_benches(c: &mut Criterion) {
         assert_eq!(count, EXPECTED_VALUE_COUNT);
     });
 
-    group.bench_function("dictionary encoded, optional, no NULLs - old", |b| {
-        b.iter(|| {
-            let array_reader = create_complex_object_byte_array_dictionary_reader(
-                dictionary_string_no_null_data.clone(),
-                optional_string_column_desc.clone(),
-            );
-            count = bench_array_reader(array_reader);
-        });
-        assert_eq!(count, EXPECTED_VALUE_COUNT);
-    });
-
-    group.bench_function("dictionary encoded, optional, no NULLs - new", |b| {
+    group.bench_function("dictionary encoded, optional, no NULLs", |b| {
         b.iter(|| {
             let array_reader = create_string_byte_array_dictionary_reader(
                 dictionary_string_no_null_data.clone(),
@@ -722,18 +679,7 @@ fn add_benches(c: &mut Criterion) {
         assert_eq!(count, EXPECTED_VALUE_COUNT);
     });
 
-    group.bench_function("dictionary encoded, optional, half NULLs - old", |b| {
-        b.iter(|| {
-            let array_reader = create_complex_object_byte_array_dictionary_reader(
-                dictionary_string_half_null_data.clone(),
-                optional_string_column_desc.clone(),
-            );
-            count = bench_array_reader(array_reader);
-        });
-        assert_eq!(count, EXPECTED_VALUE_COUNT);
-    });
-
-    group.bench_function("dictionary encoded, optional, half NULLs - new", |b| {
+    group.bench_function("dictionary encoded, optional, half NULLs", |b| {
         b.iter(|| {
             let array_reader = create_string_byte_array_dictionary_reader(
                 dictionary_string_half_null_data.clone(),
diff --git a/parquet/src/arrow/array_reader/builder.rs b/parquet/src/arrow/array_reader/builder.rs
index 7b9adfc23f25..e8c22f95aa0a 100644
--- a/parquet/src/arrow/array_reader/builder.rs
+++ b/parquet/src/arrow/array_reader/builder.rs
@@ -25,7 +25,7 @@ use crate::arrow::array_reader::{
     ComplexObjectArrayReader, ListArrayReader, MapArrayReader, NullArrayReader,
     PrimitiveArrayReader, RowGroupCollection, StructArrayReader,
 };
-use crate::arrow::converter::{
+use crate::arrow::buffer::converter::{
     DecimalArrayConverter, DecimalConverter, FixedLenBinaryConverter,
     FixedSizeArrayConverter, Int96ArrayConverter, Int96Converter,
     IntervalDayTimeArrayConverter, IntervalDayTimeConverter,
diff --git a/parquet/src/arrow/array_reader/byte_array.rs b/parquet/src/arrow/array_reader/byte_array.rs
index b3606a7808b0..9e0f83fa9450 100644
--- a/parquet/src/arrow/array_reader/byte_array.rs
+++ b/parquet/src/arrow/array_reader/byte_array.rs
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::arrow::array_reader::offset_buffer::OffsetBuffer;
 use crate::arrow::array_reader::{read_records, ArrayReader};
+use crate::arrow::buffer::offset_buffer::OffsetBuffer;
 use crate::arrow::record_reader::buffer::ScalarValue;
 use crate::arrow::record_reader::GenericRecordReader;
 use crate::arrow::schema::parquet_to_arrow_field;
@@ -125,13 +125,13 @@ impl<I: OffsetSizeTrait + ScalarValue> ArrayReader for ByteArrayReader<I> {
     fn get_def_levels(&self) -> Option<&[i16]> {
         self.def_levels_buffer
             .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
+            .map(|buf| buf.typed_data())
     }
 
     fn get_rep_levels(&self) -> Option<&[i16]> {
         self.rep_levels_buffer
             .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
+            .map(|buf| buf.typed_data())
     }
 }
 
diff --git a/parquet/src/arrow/array_reader/byte_array_dictionary.rs b/parquet/src/arrow/array_reader/byte_array_dictionary.rs
index fe8448ffb31e..0cd67206f000 100644
--- a/parquet/src/arrow/array_reader/byte_array_dictionary.rs
+++ b/parquet/src/arrow/array_reader/byte_array_dictionary.rs
@@ -24,12 +24,11 @@ use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
 use arrow::buffer::Buffer;
 use arrow::datatypes::{ArrowNativeType, DataType as ArrowType};
 
-use crate::arrow::array_reader::dictionary_buffer::DictionaryBuffer;
-use crate::arrow::array_reader::{
-    byte_array::{ByteArrayDecoder, ByteArrayDecoderPlain},
-    offset_buffer::OffsetBuffer,
-};
+use crate::arrow::array_reader::byte_array::{ByteArrayDecoder, ByteArrayDecoderPlain};
 use crate::arrow::array_reader::{read_records, ArrayReader};
+use crate::arrow::buffer::{
+    dictionary_buffer::DictionaryBuffer, offset_buffer::OffsetBuffer,
+};
 use crate::arrow::record_reader::buffer::{BufferQueue, ScalarValue};
 use crate::arrow::record_reader::GenericRecordReader;
 use crate::arrow::schema::parquet_to_arrow_field;
@@ -188,13 +187,13 @@ where
     fn get_def_levels(&self) -> Option<&[i16]> {
         self.def_levels_buffer
             .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
+            .map(|buf| buf.typed_data())
     }
 
     fn get_rep_levels(&self) -> Option<&[i16]> {
         self.rep_levels_buffer
             .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
+            .map(|buf| buf.typed_data())
     }
 }
 
@@ -236,13 +235,13 @@ where
     fn new(col: &ColumnDescPtr) -> Self {
         let validate_utf8 = col.converted_type() == ConvertedType::UTF8;
 
-        let value_type =
-            match (V::IS_LARGE, col.converted_type() == ConvertedType::UTF8) {
-                (true, true) => ArrowType::LargeUtf8,
-                (true, false) => ArrowType::LargeBinary,
-                (false, true) => ArrowType::Utf8,
-                (false, false) => ArrowType::Binary,
-            };
+        let value_type = match (V::IS_LARGE, col.converted_type() == ConvertedType::UTF8)
+        {
+            (true, true) => ArrowType::LargeUtf8,
+            (true, false) => ArrowType::LargeBinary,
+            (false, true) => ArrowType::Utf8,
+            (false, false) => ArrowType::Binary,
+        };
 
         Self {
             dict: None,
@@ -357,7 +356,7 @@ where
                         assert_eq!(dict.data_type(), &self.value_type);
 
                         let dict_buffers = dict.data().buffers();
-                        let dict_offsets = unsafe { dict_buffers[0].typed_data::<V>() };
+                        let dict_offsets = dict_buffers[0].typed_data::<V>();
                         let dict_values = dict_buffers[1].as_slice();
 
                         values.extend_from_dictionary(
diff --git a/parquet/src/arrow/array_reader.rs b/parquet/src/arrow/array_reader/mod.rs
similarity index 99%
rename from parquet/src/arrow/array_reader.rs
rename to parquet/src/arrow/array_reader/mod.rs
index c70071dacf3f..6207b377d137 100644
--- a/parquet/src/arrow/array_reader.rs
+++ b/parquet/src/arrow/array_reader/mod.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Logic for reading into arrow arrays
+
 use std::any::Any;
 use std::cmp::max;
 use std::marker::PhantomData;
@@ -34,7 +36,7 @@ use arrow::datatypes::{
     UInt32Type as ArrowUInt32Type, UInt64Type as ArrowUInt64Type,
 };
 
-use crate::arrow::converter::Converter;
+use crate::arrow::buffer::converter::Converter;
 use crate::arrow::record_reader::buffer::{ScalarValue, ValuesBuffer};
 use crate::arrow::record_reader::{GenericRecordReader, RecordReader};
 use crate::arrow::schema::parquet_to_arrow_field;
@@ -50,11 +52,9 @@ use crate::schema::types::{ColumnDescPtr, SchemaDescPtr};
 mod builder;
 mod byte_array;
 mod byte_array_dictionary;
-mod dictionary_buffer;
 mod empty_array;
 mod list_array;
 mod map_array;
-mod offset_buffer;
 
 #[cfg(test)]
 mod test_util;
@@ -226,13 +226,13 @@ where
     fn get_def_levels(&self) -> Option<&[i16]> {
         self.def_levels_buffer
             .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
+            .map(|buf| buf.typed_data())
     }
 
     fn get_rep_levels(&self) -> Option<&[i16]> {
         self.rep_levels_buffer
             .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
+            .map(|buf| buf.typed_data())
     }
 }
 
@@ -447,13 +447,13 @@ where
     fn get_def_levels(&self) -> Option<&[i16]> {
         self.def_levels_buffer
             .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
+            .map(|buf| buf.typed_data())
     }
 
     fn get_rep_levels(&self) -> Option<&[i16]> {
         self.rep_levels_buffer
             .as_ref()
-            .map(|buf| unsafe { buf.typed_data() })
+            .map(|buf| buf.typed_data())
     }
 }
 
@@ -811,7 +811,7 @@ mod tests {
         TimestampMillisecondType as ArrowTimestampMillisecondType,
     };
 
-    use crate::arrow::converter::{Utf8ArrayConverter, Utf8Converter};
+    use crate::arrow::buffer::converter::{Utf8ArrayConverter, Utf8Converter};
     use crate::basic::{Encoding, Type as PhysicalType};
     use crate::column::page::Page;
     use crate::data_type::{ByteArray, ByteArrayType, DataType, Int32Type, Int64Type};
@@ -1384,8 +1384,7 @@ mod tests {
         let mut all_values = Vec::with_capacity(num_pages * values_per_page);
 
         for i in 0..num_pages {
-            let mut dict_encoder =
-                DictEncoder::<ByteArrayType>::new(column_desc.clone());
+            let mut dict_encoder = DictEncoder::<ByteArrayType>::new(column_desc.clone());
             // add data page
             let mut values = Vec::with_capacity(values_per_page);
 
diff --git a/parquet/src/arrow/arrow_reader.rs b/parquet/src/arrow/arrow_reader.rs
index 34a14f3725f7..89406cd616a4 100644
--- a/parquet/src/arrow/arrow_reader.rs
+++ b/parquet/src/arrow/arrow_reader.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Contains reader which reads parquet data into arrow array.
+//! Contains reader which reads parquet data into arrow [`RecordBatch`]
 
 use std::sync::Arc;
 
@@ -31,7 +31,8 @@ use crate::arrow::schema::parquet_to_arrow_schema_by_columns;
 use crate::arrow::ProjectionMask;
 use crate::errors::Result;
 use crate::file::metadata::{KeyValue, ParquetMetaData};
-use crate::file::reader::FileReader;
+use crate::file::reader::{ChunkReader, FileReader, SerializedFileReader};
+use crate::schema::types::SchemaDescriptor;
 
 /// Arrow reader api.
 /// With this api, user can get arrow schema from parquet file, and read parquet data
@@ -144,15 +145,40 @@ impl ArrowReader for ParquetFileArrowReader {
 }
 
 impl ParquetFileArrowReader {
-    /// Create a new [`ParquetFileArrowReader`]
+    /// Create a new [`ParquetFileArrowReader`] with the provided [`ChunkReader`]
+    ///
+    /// ```no_run
+    /// # use std::fs::File;
+    /// # use bytes::Bytes;
+    /// # use parquet::arrow::ParquetFileArrowReader;
+    ///
+    /// let file = File::open("file.parquet").unwrap();
+    /// let reader = ParquetFileArrowReader::try_new(file).unwrap();
+    ///
+    /// let bytes = Bytes::from(vec![]);
+    /// let reader = ParquetFileArrowReader::try_new(bytes).unwrap();
+    /// ```
+    pub fn try_new<R: ChunkReader + 'static>(chunk_reader: R) -> Result<Self> {
+        Self::try_new_with_options(chunk_reader, Default::default())
+    }
+
+    /// Create a new [`ParquetFileArrowReader`] with the provided [`ChunkReader`]
+    /// and [`ArrowReaderOptions`]
+    pub fn try_new_with_options<R: ChunkReader + 'static>(
+        chunk_reader: R,
+        options: ArrowReaderOptions,
+    ) -> Result<Self> {
+        let file_reader = Arc::new(SerializedFileReader::new(chunk_reader)?);
+        Ok(Self::new_with_options(file_reader, options))
+    }
+
+    /// Create a new [`ParquetFileArrowReader`] with the provided [`Arc<dyn FileReader>`]
     pub fn new(file_reader: Arc<dyn FileReader>) -> Self {
-        Self {
-            file_reader,
-            options: Default::default(),
-        }
+        Self::new_with_options(file_reader, Default::default())
     }
 
-    /// Create a new [`ParquetFileArrowReader`] with the provided [`ArrowReaderOptions`]
+    /// Create a new [`ParquetFileArrowReader`] with the provided [`Arc<dyn FileReader>`]
+    /// and [`ArrowReaderOptions`]
     pub fn new_with_options(
         file_reader: Arc<dyn FileReader>,
         options: ArrowReaderOptions,
@@ -164,10 +190,21 @@ impl ParquetFileArrowReader {
     }
 
     /// Expose the reader metadata
+    #[deprecated = "use metadata() instead"]
     pub fn get_metadata(&mut self) -> ParquetMetaData {
         self.file_reader.metadata().clone()
     }
 
+    /// Returns the parquet metadata
+    pub fn metadata(&self) -> &ParquetMetaData {
+        self.file_reader.metadata()
+    }
+
+    /// Returns the parquet schema
+    pub fn parquet_schema(&self) -> &SchemaDescriptor {
+        self.file_reader.metadata().file_metadata().schema_descr()
+    }
+
     /// Returns the key value metadata, returns `None` if [`ArrowReaderOptions::skip_arrow_metadata`]
     fn get_kv_metadata(&self) -> Option<&Vec<KeyValue>> {
         if self.options.skip_arrow_metadata {
@@ -236,6 +273,7 @@ impl ParquetRecordBatchReader {
 
 #[cfg(test)]
 mod tests {
+    use bytes::Bytes;
     use std::cmp::min;
     use std::convert::TryFrom;
     use std::fs::File;
@@ -256,7 +294,7 @@ mod tests {
     use crate::arrow::arrow_reader::{
         ArrowReader, ArrowReaderOptions, ParquetFileArrowReader,
     };
-    use crate::arrow::converter::{
+    use crate::arrow::buffer::converter::{
         BinaryArrayConverter, Converter, FixedSizeArrayConverter, FromConverter,
         IntervalDayTimeArrayConverter, LargeUtf8ArrayConverter, Utf8ArrayConverter,
     };
@@ -273,7 +311,6 @@ mod tests {
     use crate::file::writer::SerializedFileWriter;
     use crate::schema::parser::parse_message_type;
     use crate::schema::types::{Type, TypePtr};
-    use crate::util::cursor::SliceableCursor;
     use crate::util::test_common::RandGen;
 
     #[test]
@@ -357,8 +394,7 @@ mod tests {
 
         file.rewind().unwrap();
 
-        let parquet_reader = SerializedFileReader::try_from(file).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
         let record_reader = arrow_reader.get_record_reader(2).unwrap();
 
         let batches = record_reader.collect::<ArrowResult<Vec<_>>>().unwrap();
@@ -589,9 +625,8 @@ mod tests {
         let file_variants = vec![("fixed_length", 25), ("int32", 4), ("int64", 10)];
         for (prefix, target_precision) in file_variants {
             let path = format!("{}/{}_decimal.parquet", testdata, prefix);
-            let parquet_reader =
-                SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap();
-            let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
+            let file = File::open(&path).unwrap();
+            let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
 
             let mut record_reader = arrow_reader.get_record_reader(32).unwrap();
 
@@ -609,7 +644,7 @@ mod tests {
             assert_eq!(col.scale(), 2);
 
             for (i, v) in expected.enumerate() {
-                assert_eq!(col.value(i), v * 100_i128);
+                assert_eq!(col.value(i).as_i128(), v * 100_i128);
             }
         }
     }
@@ -859,9 +894,7 @@ mod tests {
 
         file.rewind().unwrap();
 
-        let parquet_reader = SerializedFileReader::try_from(file).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
-
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
         let mut record_reader = arrow_reader
             .get_record_reader(opts.record_batch_size)
             .unwrap();
@@ -1010,11 +1043,7 @@ mod tests {
         let testdata = arrow::util::test_util::parquet_test_data();
         let path = format!("{}/nested_structs.rust.parquet", testdata);
         let file = File::open(&path).unwrap();
-        let parquet_file_reader = SerializedFileReader::try_from(file).unwrap();
-        let file_metadata = parquet_file_reader.metadata().file_metadata();
-        let schema = file_metadata.schema_descr_ptr();
-
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_file_reader));
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
         let record_batch_reader = arrow_reader
             .get_record_reader(60)
             .expect("Failed to read into array!");
@@ -1023,7 +1052,7 @@ mod tests {
             batch.unwrap();
         }
 
-        let mask = ProjectionMask::leaves(&schema, [3, 8, 10]);
+        let mask = ProjectionMask::leaves(arrow_reader.parquet_schema(), [3, 8, 10]);
         let projected_reader = arrow_reader
             .get_record_reader_by_columns(mask.clone(), 60)
             .unwrap();
@@ -1063,9 +1092,8 @@ mod tests {
     fn test_read_maps() {
         let testdata = arrow::util::test_util::parquet_test_data();
         let path = format!("{}/nested_maps.snappy.parquet", testdata);
-        let parquet_file_reader =
-            SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_file_reader));
+        let file = File::open(&path).unwrap();
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
         let record_batch_reader = arrow_reader
             .get_record_reader(60)
             .expect("Failed to read into array!");
@@ -1112,14 +1140,12 @@ mod tests {
             writer.close().unwrap();
         }
 
-        let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
-        let file_metadata = file_reader.metadata().file_metadata();
-        let mask = ProjectionMask::leaves(file_metadata.schema_descr(), [0]);
+        let mut reader = ParquetFileArrowReader::try_new(file).unwrap();
+        let mask = ProjectionMask::leaves(reader.parquet_schema(), [0]);
 
-        let mut batch = ParquetFileArrowReader::new(file_reader);
-        let reader = batch.get_record_reader_by_columns(mask, 1024).unwrap();
+        let reader = reader.get_record_reader_by_columns(mask, 1024).unwrap();
 
-        let expected_schema = arrow::datatypes::Schema::new(vec![Field::new(
+        let expected_schema = Schema::new(vec![Field::new(
             "group",
             ArrowDataType::Struct(vec![Field::new("leaf", ArrowDataType::Int32, false)]),
             true,
@@ -1150,10 +1176,8 @@ mod tests {
             114, 111, 119, 0, 130, 0, 0, 0, 80, 65, 82, 49,
         ];
 
-        let file = SliceableCursor::new(data);
-        let file_reader = SerializedFileReader::new(file).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
-
+        let file = Bytes::from(data);
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
         let mut record_batch_reader = arrow_reader
             .get_record_reader_by_columns(ProjectionMask::all(), 10)
             .unwrap();
@@ -1229,8 +1253,7 @@ mod tests {
 
         file.rewind().unwrap();
 
-        let parquet_reader = SerializedFileReader::try_from(file).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
 
         let record_reader = arrow_reader.get_record_reader(3).unwrap();
 
@@ -1268,9 +1291,8 @@ mod tests {
     fn test_read_null_list() {
         let testdata = arrow::util::test_util::parquet_test_data();
         let path = format!("{}/null_list.parquet", testdata);
-        let parquet_file_reader =
-            SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_file_reader));
+        let file = File::open(&path).unwrap();
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
         let mut record_batch_reader = arrow_reader
             .get_record_reader(60)
             .expect("Failed to read into array!");
@@ -1390,12 +1412,12 @@ mod tests {
         let testdata = arrow::util::test_util::parquet_test_data();
         let path = format!("{}/alltypes_plain.parquet", testdata);
         let file = File::open(&path).unwrap();
-        let reader = SerializedFileReader::try_from(file).unwrap();
-        let file_metadata = reader.metadata().file_metadata();
+
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
+        let file_metadata = arrow_reader.metadata().file_metadata();
         let expected_rows = file_metadata.num_rows() as usize;
         let schema = file_metadata.schema_descr_ptr();
 
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader));
         let mask = ProjectionMask::leaves(&schema, []);
         let batch_reader = arrow_reader.get_record_reader_by_columns(mask, 2).unwrap();
 
diff --git a/parquet/src/arrow/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
similarity index 100%
rename from parquet/src/arrow/levels.rs
rename to parquet/src/arrow/arrow_writer/levels.rs
diff --git a/parquet/src/arrow/arrow_writer.rs b/parquet/src/arrow/arrow_writer/mod.rs
similarity index 98%
rename from parquet/src/arrow/arrow_writer.rs
rename to parquet/src/arrow/arrow_writer/mod.rs
index 530dfe2ad090..83f1bc70b525 100644
--- a/parquet/src/arrow/arrow_writer.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -27,18 +27,20 @@ use arrow::datatypes::{DataType as ArrowDataType, IntervalUnit, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow_array::Array;
 
-use super::levels::LevelInfo;
 use super::schema::{
     add_encoded_arrow_schema_to_metadata, arrow_to_parquet_schema,
     decimal_length_from_precision,
 };
 
-use crate::arrow::levels::calculate_array_levels;
 use crate::column::writer::ColumnWriter;
 use crate::errors::{ParquetError, Result};
+use crate::file::metadata::RowGroupMetaDataPtr;
 use crate::file::properties::WriterProperties;
 use crate::file::writer::{SerializedColumnWriter, SerializedRowGroupWriter};
 use crate::{data_type::*, file::writer::SerializedFileWriter};
+use levels::{calculate_array_levels, LevelInfo};
+
+mod levels;
 
 /// Arrow writer
 ///
@@ -95,6 +97,11 @@ impl<W: Write> ArrowWriter<W> {
         })
     }
 
+    /// Returns metadata for any flushed row groups
+    pub fn flushed_row_groups(&self) -> &[RowGroupMetaDataPtr] {
+        self.writer.flushed_row_groups()
+    }
+
     /// Enqueues the provided `RecordBatch` to be written
     ///
     /// If following this there are more than `max_row_group_size` rows buffered,
@@ -569,7 +576,7 @@ macro_rules! def_get_binary_array_fn {
         fn $name(array: &$ty) -> Vec<ByteArray> {
             let mut byte_array = ByteArray::new();
             let ptr = crate::util::memory::ByteBufferPtr::new(
-                unsafe { array.value_data().typed_data::<u8>() }.to_vec(),
+                array.value_data().as_slice().to_vec(),
             );
             byte_array.set_data(ptr);
             array
@@ -666,7 +673,7 @@ fn get_decimal_array_slice(
     let mut values = Vec::with_capacity(indices.len());
     let size = decimal_length_from_precision(array.precision());
     for i in indices {
-        let as_be_bytes = array.value(*i).to_be_bytes();
+        let as_be_bytes = array.value(*i).as_i128().to_be_bytes();
         let resized_value = as_be_bytes[(16 - size)..].to_vec();
         values.push(FixedLenByteArray::from(ByteArray::from(resized_value)));
     }
@@ -689,6 +696,7 @@ fn get_fsb_array_slice(
 mod tests {
     use super::*;
 
+    use bytes::Bytes;
     use std::fs::File;
     use std::sync::Arc;
 
@@ -750,9 +758,8 @@ mod tests {
             writer.close().unwrap();
         }
 
-        let cursor = crate::file::serialized_reader::SliceableCursor::new(buffer);
-        let reader = SerializedFileReader::new(cursor).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader));
+        let cursor = Bytes::from(buffer);
+        let mut arrow_reader = ParquetFileArrowReader::try_new(cursor).unwrap();
         let mut record_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
 
         let actual_batch = record_batch_reader
@@ -1187,8 +1194,8 @@ mod tests {
         writer.write(&expected_batch).unwrap();
         writer.close().unwrap();
 
-        let reader = SerializedFileReader::new(file.try_clone().unwrap()).unwrap();
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader));
+        let mut arrow_reader =
+            ParquetFileArrowReader::try_new(file.try_clone().unwrap()).unwrap();
         let mut record_batch_reader = arrow_reader.get_record_reader(1024).unwrap();
 
         let actual_batch = record_batch_reader
@@ -1917,10 +1924,9 @@ mod tests {
 
         writer.close().unwrap();
 
-        let reader = SerializedFileReader::new(file).unwrap();
-        assert_eq!(&row_group_sizes(reader.metadata()), &[200, 200, 50]);
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
+        assert_eq!(&row_group_sizes(arrow_reader.metadata()), &[200, 200, 50]);
 
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader));
         let batches = arrow_reader
             .get_record_reader(100)
             .unwrap()
@@ -2060,13 +2066,12 @@ mod tests {
         writer.close().unwrap();
 
         // Read Data
-        let reader = SerializedFileReader::new(file).unwrap();
-
         // Should have written entire first batch and first row of second to the first row group
         // leaving a single row in the second row group
-        assert_eq!(&row_group_sizes(reader.metadata()), &[6, 1]);
 
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(reader));
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
+        assert_eq!(&row_group_sizes(arrow_reader.metadata()), &[6, 1]);
+
         let batches = arrow_reader
             .get_record_reader(2)
             .unwrap()
diff --git a/parquet/src/arrow/async_reader.rs b/parquet/src/arrow/async_reader.rs
index 5cd091184bfa..3f14114e3c60 100644
--- a/parquet/src/arrow/async_reader.rs
+++ b/parquet/src/arrow/async_reader.rs
@@ -78,13 +78,15 @@
 use std::collections::VecDeque;
 use std::fmt::Formatter;
 use std::io::{Cursor, SeekFrom};
+use std::ops::Range;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use byteorder::{ByteOrder, LittleEndian};
+use bytes::{Buf, Bytes};
 use futures::future::{BoxFuture, FutureExt};
 use futures::stream::Stream;
+use parquet_format::PageType;
 use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
 
 use arrow::datatypes::SchemaRef;
@@ -95,14 +97,64 @@ use crate::arrow::arrow_reader::ParquetRecordBatchReader;
 use crate::arrow::schema::parquet_to_arrow_schema;
 use crate::arrow::ProjectionMask;
 use crate::basic::Compression;
-use crate::column::page::{PageIterator, PageReader};
+use crate::column::page::{Page, PageIterator, PageReader};
+use crate::compression::{create_codec, Codec};
 use crate::errors::{ParquetError, Result};
-use crate::file::footer::parse_metadata_buffer;
+use crate::file::footer::{decode_footer, decode_metadata};
 use crate::file::metadata::ParquetMetaData;
 use crate::file::reader::SerializedPageReader;
-use crate::file::PARQUET_MAGIC;
-use crate::schema::types::{ColumnDescPtr, SchemaDescPtr};
-use crate::util::memory::ByteBufferPtr;
+use crate::file::serialized_reader::{decode_page, read_page_header};
+use crate::file::FOOTER_SIZE;
+use crate::schema::types::{ColumnDescPtr, SchemaDescPtr, SchemaDescriptor};
+
+/// The asynchronous interface used by [`ParquetRecordBatchStream`] to read parquet files
+pub trait AsyncFileReader {
+    /// Retrieve the bytes in `range`
+    fn get_bytes(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>>;
+
+    /// Provides asynchronous access to the [`ParquetMetaData`] of a parquet file,
+    /// allowing fine-grained control over how metadata is sourced, in particular allowing
+    /// for caching, pre-fetching, catalog metadata, etc...
+    fn get_metadata(&mut self) -> BoxFuture<'_, Result<Arc<ParquetMetaData>>>;
+}
+
+impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncFileReader for T {
+    fn get_bytes(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>> {
+        async move {
+            self.seek(SeekFrom::Start(range.start as u64)).await?;
+
+            let to_read = range.end - range.start;
+            let mut buffer = Vec::with_capacity(to_read);
+            let read = self.take(to_read as u64).read_to_end(&mut buffer).await?;
+            if read != to_read {
+                eof_err!("expected to read {} bytes, got {}", to_read, read);
+            }
+
+            Ok(buffer.into())
+        }
+        .boxed()
+    }
+
+    fn get_metadata(&mut self) -> BoxFuture<'_, Result<Arc<ParquetMetaData>>> {
+        const FOOTER_SIZE_I64: i64 = FOOTER_SIZE as i64;
+        async move {
+            self.seek(SeekFrom::End(-FOOTER_SIZE_I64)).await?;
+
+            let mut buf = [0_u8; FOOTER_SIZE];
+            self.read_exact(&mut buf).await?;
+
+            let metadata_len = decode_footer(&buf)?;
+            self.seek(SeekFrom::End(-FOOTER_SIZE_I64 - metadata_len as i64))
+                .await?;
+
+            let mut buf = Vec::with_capacity(metadata_len);
+            self.read_to_end(&mut buf).await?;
+
+            Ok(Arc::new(decode_metadata(&buf)?))
+        }
+        .boxed()
+    }
+}
 
 /// A builder used to construct a [`ParquetRecordBatchStream`] for a parquet file
 ///
@@ -124,10 +176,10 @@ pub struct ParquetRecordBatchStreamBuilder<T> {
     projection: ProjectionMask,
 }
 
-impl<T: AsyncRead + AsyncSeek + Unpin> ParquetRecordBatchStreamBuilder<T> {
+impl<T: AsyncFileReader> ParquetRecordBatchStreamBuilder<T> {
     /// Create a new [`ParquetRecordBatchStreamBuilder`] with the provided parquet file
     pub async fn new(mut input: T) -> Result<Self> {
-        let metadata = Arc::new(read_footer(&mut input).await?);
+        let metadata = input.get_metadata().await?;
 
         let schema = Arc::new(parquet_to_arrow_schema(
             metadata.file_metadata().schema_descr(),
@@ -149,6 +201,11 @@ impl<T: AsyncRead + AsyncSeek + Unpin> ParquetRecordBatchStreamBuilder<T> {
         &self.metadata
     }
 
+    /// Returns the parquet [`SchemaDescriptor`] for this parquet file
+    pub fn parquet_schema(&self) -> &SchemaDescriptor {
+        self.metadata.file_metadata().schema_descr()
+    }
+
     /// Returns the arrow [`SchemaRef`] for this parquet file
     pub fn schema(&self) -> &SchemaRef {
         &self.schema
@@ -264,8 +321,9 @@ impl<T> ParquetRecordBatchStream<T> {
     }
 }
 
-impl<T: AsyncRead + AsyncSeek + Unpin + Send + 'static> Stream
-    for ParquetRecordBatchStream<T>
+impl<T> Stream for ParquetRecordBatchStream<T>
+where
+    T: AsyncFileReader + Unpin + Send + 'static,
 {
     type Item = Result<RecordBatch>;
 
@@ -309,6 +367,7 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send + 'static> Stream
                             let mut column_chunks =
                                 vec![None; row_group_metadata.columns().len()];
 
+                            // TODO: Combine consecutive ranges
                             for (idx, chunk) in column_chunks.iter_mut().enumerate() {
                                 if !projection.leaf_included(idx) {
                                     continue;
@@ -316,18 +375,16 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send + 'static> Stream
 
                                 let column = row_group_metadata.column(idx);
                                 let (start, length) = column.byte_range();
-                                let end = start + length;
-
-                                input.seek(SeekFrom::Start(start)).await?;
 
-                                let mut buffer = vec![0_u8; (end - start) as usize];
-                                input.read_exact(buffer.as_mut_slice()).await?;
+                                let data = input
+                                    .get_bytes(start as usize..(start + length) as usize)
+                                    .await?;
 
                                 *chunk = Some(InMemoryColumnChunk {
                                     num_values: column.num_values(),
                                     compression: column.compression(),
                                     physical_type: column.column_type(),
-                                    data: ByteBufferPtr::new(buffer),
+                                    data,
                                 });
                             }
 
@@ -379,34 +436,7 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send + 'static> Stream
     }
 }
 
-async fn read_footer<T: AsyncRead + AsyncSeek + Unpin>(
-    input: &mut T,
-) -> Result<ParquetMetaData> {
-    input.seek(SeekFrom::End(-8)).await?;
-
-    let mut buf = [0_u8; 8];
-    input.read_exact(&mut buf).await?;
-
-    if buf[4..] != PARQUET_MAGIC {
-        return Err(general_err!("Invalid Parquet file. Corrupt footer"));
-    }
-
-    let metadata_len = LittleEndian::read_i32(&buf[..4]) as i64;
-    if metadata_len < 0 {
-        return Err(general_err!(
-            "Invalid Parquet file. Metadata length is less than zero ({})",
-            metadata_len
-        ));
-    }
-
-    input.seek(SeekFrom::End(-8 - metadata_len)).await?;
-
-    let mut buf = Vec::with_capacity(metadata_len as usize + 8);
-    input.read_to_end(&mut buf).await?;
-
-    parse_metadata_buffer(&mut Cursor::new(buf))
-}
-
+/// An in-memory collection of column chunks
 struct InMemoryRowGroup {
     schema: SchemaDescPtr,
     column_chunks: Vec<Option<InMemoryColumnChunk>>,
@@ -433,18 +463,19 @@ impl RowGroupCollection for InMemoryRowGroup {
     }
 }
 
+/// Data for a single column chunk
 #[derive(Clone)]
 struct InMemoryColumnChunk {
     num_values: i64,
     compression: Compression,
     physical_type: crate::basic::Type,
-    data: ByteBufferPtr,
+    data: Bytes,
 }
 
 impl InMemoryColumnChunk {
     fn pages(&self) -> Result<Box<dyn PageReader>> {
         let page_reader = SerializedPageReader::new(
-            Cursor::new(self.data.clone()),
+            self.data.clone().reader(),
             self.num_values,
             self.compression,
             self.physical_type,
@@ -454,6 +485,82 @@ impl InMemoryColumnChunk {
     }
 }
 
+// A serialized implementation for Parquet [`PageReader`].
+struct InMemoryColumnChunkReader {
+    chunk: InMemoryColumnChunk,
+    decompressor: Option<Box<dyn Codec>>,
+    offset: usize,
+    seen_num_values: i64,
+}
+
+impl InMemoryColumnChunkReader {
+    /// Creates a new serialized page reader from file source.
+    pub fn new(chunk: InMemoryColumnChunk) -> Result<Self> {
+        let decompressor = create_codec(chunk.compression)?;
+        let result = Self {
+            chunk,
+            decompressor,
+            offset: 0,
+            seen_num_values: 0,
+        };
+        Ok(result)
+    }
+}
+
+impl Iterator for InMemoryColumnChunkReader {
+    type Item = Result<Page>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.get_next_page().transpose()
+    }
+}
+
+impl PageReader for InMemoryColumnChunkReader {
+    fn get_next_page(&mut self) -> Result<Option<Page>> {
+        while self.seen_num_values < self.chunk.num_values {
+            let mut cursor = Cursor::new(&self.chunk.data.as_ref()[self.offset..]);
+            let page_header = read_page_header(&mut cursor)?;
+            let compressed_size = page_header.compressed_page_size as usize;
+
+            self.offset += cursor.position() as usize;
+            let start_offset = self.offset;
+            let end_offset = self.offset + compressed_size;
+            self.offset = end_offset;
+
+            let buffer = self.chunk.data.slice(start_offset..end_offset);
+
+            let result = match page_header.type_ {
+                PageType::DataPage | PageType::DataPageV2 => {
+                    let decoded = decode_page(
+                        page_header,
+                        buffer.into(),
+                        self.chunk.physical_type,
+                        self.decompressor.as_mut(),
+                    )?;
+                    self.seen_num_values += decoded.num_values() as i64;
+                    decoded
+                }
+                PageType::DictionaryPage => decode_page(
+                    page_header,
+                    buffer.into(),
+                    self.chunk.physical_type,
+                    self.decompressor.as_mut(),
+                )?,
+                _ => {
+                    // For unknown page type (e.g., INDEX_PAGE), skip and read next.
+                    continue;
+                }
+            };
+
+            return Ok(Some(result));
+        }
+
+        // We are at the end of this column chunk and no more page left. Return None.
+        Ok(None)
+    }
+}
+
+/// Implements [`PageIterator`] for a single column chunk, yielding a single [`PageReader`]
 struct ColumnChunkIterator {
     schema: SchemaDescPtr,
     column_schema: ColumnDescPtr,
@@ -477,3 +584,82 @@ impl PageIterator for ColumnChunkIterator {
         Ok(self.column_schema.clone())
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::arrow::{ArrowReader, ParquetFileArrowReader};
+    use arrow::error::Result as ArrowResult;
+    use futures::TryStreamExt;
+    use std::sync::Mutex;
+
+    struct TestReader {
+        data: Bytes,
+        metadata: Arc<ParquetMetaData>,
+        requests: Arc<Mutex<Vec<Range<usize>>>>,
+    }
+
+    impl AsyncFileReader for TestReader {
+        fn get_bytes(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>> {
+            self.requests.lock().unwrap().push(range.clone());
+            futures::future::ready(Ok(self.data.slice(range))).boxed()
+        }
+
+        fn get_metadata(&mut self) -> BoxFuture<'_, Result<Arc<ParquetMetaData>>> {
+            futures::future::ready(Ok(self.metadata.clone())).boxed()
+        }
+    }
+
+    #[tokio::test]
+    async fn test_async_reader() {
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{}/alltypes_plain.parquet", testdata);
+        let data = Bytes::from(std::fs::read(path).unwrap());
+
+        let metadata = crate::file::footer::parse_metadata(&data).unwrap();
+        let metadata = Arc::new(metadata);
+
+        assert_eq!(metadata.num_row_groups(), 1);
+
+        let async_reader = TestReader {
+            data: data.clone(),
+            metadata: metadata.clone(),
+            requests: Default::default(),
+        };
+
+        let requests = async_reader.requests.clone();
+        let builder = ParquetRecordBatchStreamBuilder::new(async_reader)
+            .await
+            .unwrap();
+
+        let mask = ProjectionMask::leaves(builder.parquet_schema(), vec![1, 2]);
+        let stream = builder
+            .with_projection(mask.clone())
+            .with_batch_size(1024)
+            .build()
+            .unwrap();
+
+        let async_batches: Vec<_> = stream.try_collect().await.unwrap();
+
+        let mut sync_reader = ParquetFileArrowReader::try_new(data).unwrap();
+        let sync_batches = sync_reader
+            .get_record_reader_by_columns(mask, 1024)
+            .unwrap()
+            .collect::<ArrowResult<Vec<_>>>()
+            .unwrap();
+
+        assert_eq!(async_batches, sync_batches);
+
+        let requests = requests.lock().unwrap();
+        let (offset_1, length_1) = metadata.row_group(0).column(1).byte_range();
+        let (offset_2, length_2) = metadata.row_group(0).column(2).byte_range();
+
+        assert_eq!(
+            &requests[..],
+            &[
+                offset_1 as usize..(offset_1 + length_1) as usize,
+                offset_2 as usize..(offset_2 + length_2) as usize
+            ]
+        );
+    }
+}
diff --git a/parquet/src/arrow/bit_util.rs b/parquet/src/arrow/buffer/bit_util.rs
similarity index 100%
rename from parquet/src/arrow/bit_util.rs
rename to parquet/src/arrow/buffer/bit_util.rs
diff --git a/parquet/src/arrow/converter.rs b/parquet/src/arrow/buffer/converter.rs
similarity index 100%
rename from parquet/src/arrow/converter.rs
rename to parquet/src/arrow/buffer/converter.rs
diff --git a/parquet/src/arrow/array_reader/dictionary_buffer.rs b/parquet/src/arrow/buffer/dictionary_buffer.rs
similarity index 98%
rename from parquet/src/arrow/array_reader/dictionary_buffer.rs
rename to parquet/src/arrow/buffer/dictionary_buffer.rs
index 6dc9cc80f398..ffa3a4843c50 100644
--- a/parquet/src/arrow/array_reader/dictionary_buffer.rs
+++ b/parquet/src/arrow/buffer/dictionary_buffer.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::arrow::array_reader::offset_buffer::OffsetBuffer;
+use crate::arrow::buffer::offset_buffer::OffsetBuffer;
 use crate::arrow::record_reader::buffer::{
     BufferQueue, ScalarBuffer, ScalarValue, ValuesBuffer,
 };
@@ -106,7 +106,7 @@ impl<K: ScalarValue + ArrowNativeType + Ord, V: ScalarValue + OffsetSizeTrait>
             Self::Dict { keys, values } => {
                 let mut spilled = OffsetBuffer::default();
                 let dict_buffers = values.data().buffers();
-                let dict_offsets = unsafe { dict_buffers[0].typed_data::<V>() };
+                let dict_offsets = dict_buffers[0].typed_data::<V>();
                 let dict_values = dict_buffers[1].as_slice();
 
                 if values.is_empty() {
diff --git a/arrow/src/arch/mod.rs b/parquet/src/arrow/buffer/mod.rs
similarity index 79%
rename from arrow/src/arch/mod.rs
rename to parquet/src/arrow/buffer/mod.rs
index 56d8f4c0e2cf..5ee89aa1a782 100644
--- a/arrow/src/arch/mod.rs
+++ b/parquet/src/arrow/buffer/mod.rs
@@ -15,8 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-///
-/// Arch module contains architecture specific code.
-/// Be aware that not all machines have these specific operations available.
-#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
-pub(crate) mod avx512;
+//! Logic for reading data into arrow buffers
+
+pub mod bit_util;
+pub mod converter;
+pub mod dictionary_buffer;
+pub mod offset_buffer;
diff --git a/parquet/src/arrow/array_reader/offset_buffer.rs b/parquet/src/arrow/buffer/offset_buffer.rs
similarity index 98%
rename from parquet/src/arrow/array_reader/offset_buffer.rs
rename to parquet/src/arrow/buffer/offset_buffer.rs
index 23e7af7595c4..2d73e3f146b6 100644
--- a/parquet/src/arrow/array_reader/offset_buffer.rs
+++ b/parquet/src/arrow/buffer/offset_buffer.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::arrow::bit_util::iter_set_bits_rev;
+use crate::arrow::buffer::bit_util::iter_set_bits_rev;
 use crate::arrow::record_reader::buffer::{
     BufferQueue, ScalarBuffer, ScalarValue, ValuesBuffer,
 };
@@ -58,7 +58,7 @@ impl<I: OffsetSizeTrait + ScalarValue> OffsetBuffer<I> {
     /// the start of a UTF-8 codepoint
     ///
     /// Note: This does not verify that the entirety of `data` is valid
-    /// UTF-8. This should be done by calling [`Self::values_as_str`] after
+    /// UTF-8. This should be done by calling [`Self::check_valid_utf8`] after
     /// all data has been written
     pub fn try_push(&mut self, data: &[u8], validate_utf8: bool) -> Result<()> {
         if validate_utf8 {
diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index 5a5135cd34d7..3aee7cf42cbc 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -96,12 +96,9 @@
 //! # writer.close().unwrap();
 //!
 //! let file = File::open("data.parquet").unwrap();
-//! let file_reader = SerializedFileReader::new(file).unwrap();
 //!
-//! let file_metadata = file_reader.metadata().file_metadata();
-//! let mask = ProjectionMask::leaves(file_metadata.schema_descr(), [0]);
-//!
-//! let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
+//! let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
+//! let mask = ProjectionMask::leaves(arrow_reader.parquet_schema(), [0]);
 //!
 //! println!("Converted arrow schema is: {}", arrow_reader.get_schema().unwrap());
 //! println!("Arrow schema after projection is: {}",
@@ -125,14 +122,12 @@
 experimental_mod!(array_reader);
 pub mod arrow_reader;
 pub mod arrow_writer;
-mod bit_util;
+mod buffer;
 
 #[cfg(feature = "async")]
 pub mod async_reader;
 
-experimental_mod!(converter);
-pub(in crate::arrow) mod levels;
-pub(in crate::arrow) mod record_reader;
+mod record_reader;
 experimental_mod!(schema);
 
 pub use self::arrow_reader::ArrowReader;
diff --git a/parquet/src/arrow/record_reader/buffer.rs b/parquet/src/arrow/record_reader/buffer.rs
index 4fa8213dedcc..7101eaa9ccc9 100644
--- a/parquet/src/arrow/record_reader/buffer.rs
+++ b/parquet/src/arrow/record_reader/buffer.rs
@@ -17,9 +17,9 @@
 
 use std::marker::PhantomData;
 
-use crate::arrow::bit_util::iter_set_bits_rev;
+use crate::arrow::buffer::bit_util::iter_set_bits_rev;
 use arrow::buffer::{Buffer, MutableBuffer};
-use arrow::datatypes::ToByteSlice;
+use arrow::datatypes::ArrowNativeType;
 
 /// A buffer that supports writing new data to the end, and removing data from the front
 ///
@@ -172,7 +172,7 @@ impl<T: ScalarValue> ScalarBuffer<T> {
     }
 }
 
-impl<T: ScalarValue + ToByteSlice> ScalarBuffer<T> {
+impl<T: ScalarValue + ArrowNativeType> ScalarBuffer<T> {
     pub fn push(&mut self, v: T) {
         self.buffer.push(v);
         self.len += 1;
diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs
index 93de4006c10d..9cca25c8ae5c 100644
--- a/parquet/src/arrow/record_reader/definition_levels.rs
+++ b/parquet/src/arrow/record_reader/definition_levels.rs
@@ -21,7 +21,7 @@ use arrow::array::BooleanBufferBuilder;
 use arrow::bitmap::Bitmap;
 use arrow::buffer::Buffer;
 
-use crate::arrow::bit_util::count_set_bits;
+use crate::arrow::buffer::bit_util::count_set_bits;
 use crate::arrow::record_reader::buffer::BufferQueue;
 use crate::basic::Encoding;
 use crate::column::reader::decoder::{
diff --git a/parquet/src/arrow/record_reader.rs b/parquet/src/arrow/record_reader/mod.rs
similarity index 99%
rename from parquet/src/arrow/record_reader.rs
rename to parquet/src/arrow/record_reader/mod.rs
index 89d782b1aca8..023a538a2741 100644
--- a/parquet/src/arrow/record_reader.rs
+++ b/parquet/src/arrow/record_reader/mod.rs
@@ -573,7 +573,7 @@ mod tests {
 
         // Verify result record data
         let actual = record_reader.consume_record_data().unwrap();
-        let actual_values = unsafe { actual.typed_data::<i32>() };
+        let actual_values = actual.typed_data::<i32>();
 
         let expected = &[0, 7, 0, 6, 3, 0, 8];
         assert_eq!(actual_values.len(), expected.len());
@@ -687,7 +687,7 @@ mod tests {
 
         // Verify result record data
         let actual = record_reader.consume_record_data().unwrap();
-        let actual_values = unsafe { actual.typed_data::<i32>() };
+        let actual_values = actual.typed_data::<i32>();
         let expected = &[4, 0, 0, 7, 6, 3, 2, 8, 9];
         assert_eq!(actual_values.len(), expected.len());
 
diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema.rs
index 5416e4078538..f3d0a3d9b36b 100644
--- a/parquet/src/arrow/schema.rs
+++ b/parquet/src/arrow/schema.rs
@@ -478,11 +478,11 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
 mod tests {
     use super::*;
 
-    use std::{collections::HashMap, convert::TryFrom, sync::Arc};
+    use std::{collections::HashMap, sync::Arc};
 
     use arrow::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
 
-    use crate::file::{metadata::KeyValue, reader::SerializedFileReader};
+    use crate::file::metadata::KeyValue;
     use crate::{
         arrow::{ArrowReader, ArrowWriter, ParquetFileArrowReader},
         schema::{parser::parse_message_type, types::SchemaDescriptor},
@@ -571,9 +571,12 @@ mod tests {
         ];
         assert_eq!(&arrow_fields, converted_arrow_schema.fields());
 
-        let converted_arrow_schema =
-            parquet_to_arrow_schema_by_columns(&parquet_schema, ProjectionMask::all(), None)
-                .unwrap();
+        let converted_arrow_schema = parquet_to_arrow_schema_by_columns(
+            &parquet_schema,
+            ProjectionMask::all(),
+            None,
+        )
+        .unwrap();
         assert_eq!(&arrow_fields, converted_arrow_schema.fields());
     }
 
@@ -1599,13 +1602,13 @@ mod tests {
         writer.close()?;
 
         // read file back
-        let parquet_reader = SerializedFileReader::try_from(file)?;
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
         let read_schema = arrow_reader.get_schema()?;
         assert_eq!(schema, read_schema);
 
         // read all fields by columns
-        let partial_read_schema = arrow_reader.get_schema_by_columns(ProjectionMask::all())?;
+        let partial_read_schema =
+            arrow_reader.get_schema_by_columns(ProjectionMask::all())?;
         assert_eq!(schema, partial_read_schema);
 
         Ok(())
@@ -1668,13 +1671,13 @@ mod tests {
         writer.close()?;
 
         // read file back
-        let parquet_reader = SerializedFileReader::try_from(file)?;
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader));
+        let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
         let read_schema = arrow_reader.get_schema()?;
         assert_eq!(schema, read_schema);
 
         // read all fields by columns
-        let partial_read_schema = arrow_reader.get_schema_by_columns(ProjectionMask::all())?;
+        let partial_read_schema =
+            arrow_reader.get_schema_by_columns(ProjectionMask::all())?;
         assert_eq!(schema, partial_read_schema);
 
         Ok(())
diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs
index 7eff2156fd9d..59a0fe07b7de 100644
--- a/parquet/src/basic.rs
+++ b/parquet/src/basic.rs
@@ -41,7 +41,7 @@ pub use parquet_format::{
 /// control the on disk storage format.
 /// For example INT16 is not included as a type since a good encoding of INT32
 /// would handle this.
-#[derive(Debug, Clone, Copy, PartialEq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum Type {
     BOOLEAN,
     INT32,
diff --git a/parquet/src/bin/parquet-fromcsv-help.txt b/parquet/src/bin/parquet-fromcsv-help.txt
new file mode 100644
index 000000000000..f4fe704ab267
--- /dev/null
+++ b/parquet/src/bin/parquet-fromcsv-help.txt
@@ -0,0 +1,66 @@
+Apache Arrow <dev@arrow.apache.org>
+Binary to convert csv to Parquet
+
+USAGE:
+    parquet [OPTIONS] --schema <SCHEMA> --input-file <INPUT_FILE> --output-file <OUTPUT_FILE>
+
+OPTIONS:
+    -b, --batch-size <BATCH_SIZE>
+            batch size
+            
+            [env: PARQUET_FROM_CSV_BATCHSIZE=]
+            [default: 1000]
+
+    -c, --parquet-compression <PARQUET_COMPRESSION>
+            compression mode
+            
+            [default: SNAPPY]
+
+    -d, --delimiter <DELIMITER>
+            field delimiter
+            
+            default value: when input_format==CSV: ',' when input_format==TSV: 'TAB'
+
+    -D, --double-quote <DOUBLE_QUOTE>
+            double quote
+
+    -e, --escape-char <ESCAPE_CHAR>
+            escape charactor
+
+    -f, --input-format <INPUT_FORMAT>
+            input file format
+            
+            [default: csv]
+            [possible values: csv, tsv]
+
+    -h, --has-header
+            has header
+
+        --help
+            Print help information
+
+    -i, --input-file <INPUT_FILE>
+            input CSV file
+
+    -m, --max-row-group-size <MAX_ROW_GROUP_SIZE>
+            max row group size
+
+    -o, --output-file <OUTPUT_FILE>
+            output Parquet file
+
+    -q, --quote-char <QUOTE_CHAR>
+            quate charactor
+
+    -r, --record-terminator <RECORD_TERMINATOR>
+            record terminator
+            
+            [possible values: lf, crlf, cr]
+
+    -s, --schema <SCHEMA>
+            message schema for output Parquet
+
+    -V, --version
+            Print version information
+
+    -w, --writer-version <WRITER_VERSION>
+            writer version
diff --git a/parquet/src/bin/parquet-fromcsv.rs b/parquet/src/bin/parquet-fromcsv.rs
new file mode 100644
index 000000000000..aa1d50563cd9
--- /dev/null
+++ b/parquet/src/bin/parquet-fromcsv.rs
@@ -0,0 +1,636 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Binary file to converts csv to Parquet file
+//!
+//! # Install
+//!
+//! `parquet-fromcsv` can be installed using `cargo`:
+//!
+//! ```text
+//! cargo install parquet --features=cli
+//! ```
+//!
+//! After this `parquet-fromcsv` shoud be available:
+//!
+//! ```text
+//! parquet-fromcsv --schema message_schema_for_parquet.txt input.csv output.parquet
+//! ```
+//!
+//! The binary can also be built from the source code and run as follows:
+//!
+//! ```text
+//! cargo run --features=cli --bin parquet-fromcsv --schema message_schema_for_parquet.txt \
+//!    \ input.csv output.parquet
+//! ```
+//!
+//! # Options
+//!
+//! ```text
+#![doc = include_str!("./parquet-fromcsv-help.txt")] // Update for this file : Run test test_command_help
+//! ```
+//!
+//! ## Parquet file options
+//!
+//! - `-b`, `--batch-size` : Batch size for Parquet
+//! - `-c`, `--parquet-compression` : Compression option for Parquet, default is SNAPPY
+//! - `-s`, `--schema` : Path to message schema for generated Parquet file
+//! - `-o`, `--output-file` : Path to output Parquet file
+//! - `-w`, `--writer-version` : Writer version
+//! - `-m`, `--max-row-group-size` : Max row group size
+//!
+//! ## Input file options
+//!
+//! - `-i`, `--input-file` : Path to input CSV file
+//! - `-f`, `--input-format` : Dialect for input file, `csv` or `tsv`.
+//! - `-d`, `--delimiter : Field delimitor for CSV file, default depends `--input-format`
+//! - `-e`, `--escape` : Escape charactor for input file
+//! - `-h`, `--has-header` : Input has header
+//! - `-r`, `--record-terminator` : Record terminator charactor for input. default is CRLF
+//! - `-q`, `--quote-char` : Input quoting charactor
+//!
+
+use std::{
+    fmt::Display,
+    fs::{read_to_string, File},
+    path::{Path, PathBuf},
+    sync::Arc,
+};
+
+use arrow::{csv::ReaderBuilder, datatypes::Schema, error::ArrowError};
+use clap::{ArgEnum, Parser};
+use parquet::{
+    arrow::{parquet_to_arrow_schema, ArrowWriter},
+    basic::Compression,
+    errors::ParquetError,
+    file::properties::{WriterProperties, WriterVersion},
+    schema::{parser::parse_message_type, types::SchemaDescriptor},
+};
+
+#[derive(Debug)]
+enum ParquetFromCsvError {
+    CommandLineParseError(clap::Error),
+    IoError(std::io::Error),
+    ArrowError(ArrowError),
+    ParquetError(ParquetError),
+    WithContext(String, Box<Self>),
+}
+
+impl From<std::io::Error> for ParquetFromCsvError {
+    fn from(e: std::io::Error) -> Self {
+        Self::IoError(e)
+    }
+}
+
+impl From<ArrowError> for ParquetFromCsvError {
+    fn from(e: ArrowError) -> Self {
+        Self::ArrowError(e)
+    }
+}
+
+impl From<ParquetError> for ParquetFromCsvError {
+    fn from(e: ParquetError) -> Self {
+        Self::ParquetError(e)
+    }
+}
+
+impl From<clap::Error> for ParquetFromCsvError {
+    fn from(e: clap::Error) -> Self {
+        Self::CommandLineParseError(e)
+    }
+}
+
+impl ParquetFromCsvError {
+    pub fn with_context<E: Into<ParquetFromCsvError>>(
+        inner_error: E,
+        context: &str,
+    ) -> ParquetFromCsvError {
+        let inner = inner_error.into();
+        ParquetFromCsvError::WithContext(context.to_string(), Box::new(inner))
+    }
+}
+
+impl Display for ParquetFromCsvError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ParquetFromCsvError::CommandLineParseError(e) => write!(f, "{}", e),
+            ParquetFromCsvError::IoError(e) => write!(f, "{}", e),
+            ParquetFromCsvError::ArrowError(e) => write!(f, "{}", e),
+            ParquetFromCsvError::ParquetError(e) => write!(f, "{}", e),
+            ParquetFromCsvError::WithContext(c, e) => {
+                writeln!(f, "{}", e)?;
+                write!(f, "context: {}", c)
+            }
+        }
+    }
+}
+
+#[derive(Debug, Parser)]
+#[clap(author, version, about("Binary to convert csv to Parquet"), long_about=None)]
+struct Args {
+    /// Path to a text file containing a parquet schema definition
+    #[clap(short, long, help("message schema for output Parquet"))]
+    schema: PathBuf,
+    /// input CSV file path
+    #[clap(short, long, help("input CSV file"))]
+    input_file: PathBuf,
+    /// output Parquet file path
+    #[clap(short, long, help("output Parquet file"))]
+    output_file: PathBuf,
+    /// input file format
+    #[clap(
+        arg_enum,
+        short('f'),
+        long,
+        help("input file format"),
+        default_value_t=CsvDialect::Csv
+    )]
+    input_format: CsvDialect,
+    /// batch size
+    #[clap(
+        short,
+        long,
+        help("batch size"),
+        default_value_t = 1000,
+        env = "PARQUET_FROM_CSV_BATCHSIZE"
+    )]
+    batch_size: usize,
+    /// has header line
+    #[clap(short, long, help("has header"))]
+    has_header: bool,
+    /// field delimiter
+    ///
+    /// default value:
+    ///  when input_format==CSV: ','
+    ///  when input_format==TSV: 'TAB'
+    #[clap(short, long, help("field delimiter"))]
+    delimiter: Option<char>,
+    #[clap(arg_enum, short, long, help("record terminator"))]
+    record_terminator: Option<RecordTerminator>,
+    #[clap(short, long, help("escape charactor"))]
+    escape_char: Option<char>,
+    #[clap(short, long, help("quate charactor"))]
+    quote_char: Option<char>,
+    #[clap(short('D'), long, help("double quote"))]
+    double_quote: Option<bool>,
+    #[clap(short('c'), long, help("compression mode"), default_value_t=Compression::SNAPPY)]
+    #[clap(parse(try_from_str =compression_from_str))]
+    parquet_compression: Compression,
+
+    #[clap(short, long, help("writer version"))]
+    #[clap(parse(try_from_str =writer_version_from_str))]
+    writer_version: Option<WriterVersion>,
+    #[clap(short, long, help("max row group size"))]
+    max_row_group_size: Option<usize>,
+}
+
+fn compression_from_str(cmp: &str) -> Result<Compression, String> {
+    match cmp.to_uppercase().as_str() {
+        "UNCOMPRESSED" => Ok(Compression::UNCOMPRESSED),
+        "SNAPPY" => Ok(Compression::SNAPPY),
+        "GZIP" => Ok(Compression::GZIP),
+        "LZO" => Ok(Compression::LZO),
+        "BROTLI" => Ok(Compression::BROTLI),
+        "LZ4" => Ok(Compression::LZ4),
+        "ZSTD" => Ok(Compression::ZSTD),
+        v => Err(
+            format!("Unknown compression {0} : possible values UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD ",v)
+        )
+    }
+}
+
+fn writer_version_from_str(cmp: &str) -> Result<WriterVersion, String> {
+    match cmp.to_uppercase().as_str() {
+        "1" => Ok(WriterVersion::PARQUET_1_0),
+        "2" => Ok(WriterVersion::PARQUET_2_0),
+        v => Err(format!(
+            "Unknown writer version {0} : possible values 1, 2",
+            v
+        )),
+    }
+}
+
+impl Args {
+    fn schema_path(&self) -> &Path {
+        self.schema.as_path()
+    }
+    fn get_delimiter(&self) -> u8 {
+        match self.delimiter {
+            Some(ch) => ch as u8,
+            None => match self.input_format {
+                CsvDialect::Csv => b',',
+                CsvDialect::Tsv => b'\t',
+            },
+        }
+    }
+    fn get_terminator(&self) -> Option<u8> {
+        match self.record_terminator {
+            Some(RecordTerminator::LF) => Some(0x0a),
+            Some(RecordTerminator::CR) => Some(0x0d),
+            Some(RecordTerminator::Crlf) => None,
+            None => match self.input_format {
+                CsvDialect::Csv => None,
+                CsvDialect::Tsv => Some(0x0a),
+            },
+        }
+    }
+    fn get_escape(&self) -> Option<u8> {
+        self.escape_char.map(|ch| ch as u8)
+    }
+    fn get_quote(&self) -> Option<u8> {
+        if self.quote_char.is_none() {
+            match self.input_format {
+                CsvDialect::Csv => Some(b'\"'),
+                CsvDialect::Tsv => None,
+            }
+        } else {
+            self.quote_char.map(|c| c as u8)
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, ArgEnum, PartialEq)]
+enum CsvDialect {
+    Csv,
+    Tsv,
+}
+
+#[derive(Debug, Clone, Copy, ArgEnum, PartialEq)]
+enum RecordTerminator {
+    LF,
+    Crlf,
+    CR,
+}
+
+fn configure_writer_properties(args: &Args) -> WriterProperties {
+    let mut properties_builder =
+        WriterProperties::builder().set_compression(args.parquet_compression);
+    if let Some(writer_version) = args.writer_version {
+        properties_builder = properties_builder.set_writer_version(writer_version);
+    }
+    if let Some(max_row_group_size) = args.max_row_group_size {
+        properties_builder =
+            properties_builder.set_max_row_group_size(max_row_group_size);
+    }
+    properties_builder.build()
+}
+
+fn configure_reader_builder(args: &Args, arrow_schema: Arc<Schema>) -> ReaderBuilder {
+    fn configure_reader<T, F: Fn(ReaderBuilder, T) -> ReaderBuilder>(
+        builder: ReaderBuilder,
+        value: Option<T>,
+        fun: F,
+    ) -> ReaderBuilder {
+        if let Some(val) = value {
+            fun(builder, val)
+        } else {
+            builder
+        }
+    }
+
+    let mut builder = ReaderBuilder::new()
+        .with_schema(arrow_schema)
+        .with_batch_size(args.batch_size)
+        .has_header(args.has_header)
+        .with_delimiter(args.get_delimiter());
+
+    builder = configure_reader(
+        builder,
+        args.get_terminator(),
+        ReaderBuilder::with_terminator,
+    );
+    builder = configure_reader(builder, args.get_escape(), ReaderBuilder::with_escape);
+    builder = configure_reader(builder, args.get_quote(), ReaderBuilder::with_quote);
+
+    builder
+}
+
+fn arrow_schema_from_string(schema: &str) -> Result<Arc<Schema>, ParquetFromCsvError> {
+    let schema = Arc::new(parse_message_type(schema)?);
+    let desc = SchemaDescriptor::new(schema);
+    let arrow_schema = Arc::new(parquet_to_arrow_schema(&desc, None)?);
+    Ok(arrow_schema)
+}
+
+fn convert_csv_to_parquet(args: &Args) -> Result<(), ParquetFromCsvError> {
+    let schema = read_to_string(args.schema_path()).map_err(|e| {
+        ParquetFromCsvError::with_context(
+            e,
+            &format!("Failed to open schema file {:#?}", args.schema_path()),
+        )
+    })?;
+    let arrow_schema = arrow_schema_from_string(&schema)?;
+
+    // create output parquet writer
+    let parquet_file = File::create(&args.output_file).map_err(|e| {
+        ParquetFromCsvError::with_context(
+            e,
+            &format!("Failed to create output file {:#?}", &args.output_file),
+        )
+    })?;
+
+    let writer_properties = Some(configure_writer_properties(args));
+    let mut arrow_writer =
+        ArrowWriter::try_new(parquet_file, arrow_schema.clone(), writer_properties)
+            .map_err(|e| {
+                ParquetFromCsvError::with_context(e, "Failed to create ArrowWriter")
+            })?;
+
+    // open input file
+    let input_file = File::open(&args.input_file).map_err(|e| {
+        ParquetFromCsvError::with_context(
+            e,
+            &format!("Failed to open input file {:#?}", &args.input_file),
+        )
+    })?;
+    // create input csv reader
+    let builder = configure_reader_builder(args, arrow_schema);
+    let reader = builder.build(input_file)?;
+    for batch_result in reader {
+        let batch = batch_result.map_err(|e| {
+            ParquetFromCsvError::with_context(e, "Failed to read RecordBatch from CSV")
+        })?;
+        arrow_writer.write(&batch).map_err(|e| {
+            ParquetFromCsvError::with_context(e, "Failed to write RecordBatch to parquet")
+        })?;
+    }
+    arrow_writer
+        .close()
+        .map_err(|e| ParquetFromCsvError::with_context(e, "Failed to close parquet"))?;
+    Ok(())
+}
+
+fn main() -> Result<(), ParquetFromCsvError> {
+    let args = Args::parse();
+    convert_csv_to_parquet(&args)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::{
+        io::{Seek, SeekFrom, Write},
+        path::{Path, PathBuf},
+    };
+
+    use super::*;
+    use arrow::datatypes::{DataType, Field};
+    use clap::{CommandFactory, Parser};
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn test_command_help() {
+        let mut cmd = Args::command();
+        let dir = std::env::var("CARGO_MANIFEST_DIR").unwrap();
+        let mut path_buf = PathBuf::from(dir);
+        path_buf.push("src");
+        path_buf.push("bin");
+        path_buf.push("parquet-fromcsv-help.txt");
+        let expected = std::fs::read_to_string(path_buf).unwrap();
+        let mut buffer_vec = Vec::new();
+        let mut buffer = std::io::Cursor::new(&mut buffer_vec);
+        cmd.write_long_help(&mut buffer).unwrap();
+        // Remove Parquet version string from the help text
+        let mut actual = String::from_utf8(buffer_vec).unwrap();
+        let pos = actual.find('\n').unwrap() + 1;
+        actual = actual[pos..].to_string();
+        assert_eq!(
+            expected, actual,
+            "help text not match. please update to \n---\n{}\n---\n",
+            actual
+        )
+    }
+
+    fn parse_args(mut extra_args: Vec<&str>) -> Result<Args, ParquetFromCsvError> {
+        let mut args = vec![
+            "test",
+            "--schema",
+            "test.schema",
+            "--input-file",
+            "infile.csv",
+            "--output-file",
+            "out.parquet",
+        ];
+        args.append(&mut extra_args);
+        let args = Args::try_parse_from(args.iter())?;
+        Ok(args)
+    }
+
+    #[test]
+    fn test_parse_arg_minimum() -> Result<(), ParquetFromCsvError> {
+        let args = parse_args(vec![])?;
+
+        assert_eq!(args.schema, PathBuf::from(Path::new("test.schema")));
+        assert_eq!(args.input_file, PathBuf::from(Path::new("infile.csv")));
+        assert_eq!(args.output_file, PathBuf::from(Path::new("out.parquet")));
+        // test default values
+        assert_eq!(args.input_format, CsvDialect::Csv);
+        assert_eq!(args.batch_size, 1000);
+        assert_eq!(args.has_header, false);
+        assert_eq!(args.delimiter, None);
+        assert_eq!(args.get_delimiter(), b',');
+        assert_eq!(args.record_terminator, None);
+        assert_eq!(args.get_terminator(), None); // CRLF
+        assert_eq!(args.quote_char, None);
+        assert_eq!(args.get_quote(), Some(b'\"'));
+        assert_eq!(args.double_quote, None);
+        assert_eq!(args.parquet_compression, Compression::SNAPPY);
+        Ok(())
+    }
+
+    #[test]
+    fn test_parse_arg_format_variants() -> Result<(), ParquetFromCsvError> {
+        let args = parse_args(vec!["--input-format", "csv"])?;
+        assert_eq!(args.input_format, CsvDialect::Csv);
+        assert_eq!(args.get_delimiter(), b',');
+        assert_eq!(args.get_terminator(), None); // CRLF
+        assert_eq!(args.get_quote(), Some(b'\"'));
+        assert_eq!(args.get_escape(), None);
+        let args = parse_args(vec!["--input-format", "tsv"])?;
+        assert_eq!(args.input_format, CsvDialect::Tsv);
+        assert_eq!(args.get_delimiter(), b'\t');
+        assert_eq!(args.get_terminator(), Some(b'\x0a')); // LF
+        assert_eq!(args.get_quote(), None); // quote none
+        assert_eq!(args.get_escape(), None);
+
+        let args = parse_args(vec!["--input-format", "csv", "--escape-char", "\\"])?;
+        assert_eq!(args.input_format, CsvDialect::Csv);
+        assert_eq!(args.get_delimiter(), b',');
+        assert_eq!(args.get_terminator(), None); // CRLF
+        assert_eq!(args.get_quote(), Some(b'\"'));
+        assert_eq!(args.get_escape(), Some(b'\\'));
+
+        let args = parse_args(vec!["--input-format", "tsv", "--delimiter", ":"])?;
+        assert_eq!(args.input_format, CsvDialect::Tsv);
+        assert_eq!(args.get_delimiter(), b':');
+        assert_eq!(args.get_terminator(), Some(b'\x0a')); // LF
+        assert_eq!(args.get_quote(), None); // quote none
+        assert_eq!(args.get_escape(), None);
+
+        Ok(())
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_parse_arg_format_error() {
+        parse_args(vec!["--input-format", "excel"]).unwrap();
+    }
+
+    #[test]
+    fn test_parse_arg_compression_format() {
+        let args = parse_args(vec!["--parquet-compression", "uncompressed"]).unwrap();
+        assert_eq!(args.parquet_compression, Compression::UNCOMPRESSED);
+        let args = parse_args(vec!["--parquet-compression", "snappy"]).unwrap();
+        assert_eq!(args.parquet_compression, Compression::SNAPPY);
+        let args = parse_args(vec!["--parquet-compression", "gzip"]).unwrap();
+        assert_eq!(args.parquet_compression, Compression::GZIP);
+        let args = parse_args(vec!["--parquet-compression", "lzo"]).unwrap();
+        assert_eq!(args.parquet_compression, Compression::LZO);
+        let args = parse_args(vec!["--parquet-compression", "lz4"]).unwrap();
+        assert_eq!(args.parquet_compression, Compression::LZ4);
+        let args = parse_args(vec!["--parquet-compression", "brotli"]).unwrap();
+        assert_eq!(args.parquet_compression, Compression::BROTLI);
+        let args = parse_args(vec!["--parquet-compression", "zstd"]).unwrap();
+        assert_eq!(args.parquet_compression, Compression::ZSTD);
+    }
+
+    #[test]
+    fn test_parse_arg_compression_format_fail() {
+        match parse_args(vec!["--parquet-compression", "zip"]) {
+            Ok(_) => panic!("unexpected success"),
+            Err(e) => assert_eq!(
+                format!("{}", e),
+                "error: Invalid value \"zip\" for '--parquet-compression <PARQUET_COMPRESSION>': Unknown compression ZIP : possible values UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD \n\nFor more information try --help\n"),
+        }
+    }
+
+    fn assert_debug_text(debug_text: &str, name: &str, value: &str) {
+        let pattern = format!(" {}: {}", name, value);
+        assert!(
+            debug_text.contains(&pattern),
+            "\"{}\" not contains \"{}\"",
+            debug_text,
+            pattern
+        )
+    }
+
+    #[test]
+    fn test_configure_reader_builder() {
+        let args = Args {
+            schema: PathBuf::from(Path::new("schema.arvo")),
+            input_file: PathBuf::from(Path::new("test.csv")),
+            output_file: PathBuf::from(Path::new("out.parquet")),
+            batch_size: 1000,
+            input_format: CsvDialect::Csv,
+            has_header: false,
+            delimiter: None,
+            record_terminator: None,
+            escape_char: None,
+            quote_char: None,
+            double_quote: None,
+            parquet_compression: Compression::SNAPPY,
+            writer_version: None,
+            max_row_group_size: None,
+        };
+        let arrow_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Utf8, false),
+            Field::new("field2", DataType::Utf8, false),
+            Field::new("field3", DataType::Utf8, false),
+            Field::new("field4", DataType::Utf8, false),
+            Field::new("field5", DataType::Utf8, false),
+        ]));
+
+        let reader_builder = configure_reader_builder(&args, arrow_schema.clone());
+        let builder_debug = format!("{:?}", reader_builder);
+        assert_debug_text(&builder_debug, "has_header", "false");
+        assert_debug_text(&builder_debug, "delimiter", "Some(44)");
+        assert_debug_text(&builder_debug, "quote", "Some(34)");
+        assert_debug_text(&builder_debug, "terminator", "None");
+        assert_debug_text(&builder_debug, "batch_size", "1000");
+        assert_debug_text(&builder_debug, "escape", "None");
+
+        let args = Args {
+            schema: PathBuf::from(Path::new("schema.arvo")),
+            input_file: PathBuf::from(Path::new("test.csv")),
+            output_file: PathBuf::from(Path::new("out.parquet")),
+            batch_size: 2000,
+            input_format: CsvDialect::Tsv,
+            has_header: true,
+            delimiter: None,
+            record_terminator: None,
+            escape_char: Some('\\'),
+            quote_char: None,
+            double_quote: None,
+            parquet_compression: Compression::SNAPPY,
+            writer_version: None,
+            max_row_group_size: None,
+        };
+        let arrow_schema = Arc::new(Schema::new(vec![
+            Field::new("field1", DataType::Utf8, false),
+            Field::new("field2", DataType::Utf8, false),
+            Field::new("field3", DataType::Utf8, false),
+            Field::new("field4", DataType::Utf8, false),
+            Field::new("field5", DataType::Utf8, false),
+        ]));
+        let reader_builder = configure_reader_builder(&args, arrow_schema.clone());
+        let builder_debug = format!("{:?}", reader_builder);
+        assert_debug_text(&builder_debug, "has_header", "true");
+        assert_debug_text(&builder_debug, "delimiter", "Some(9)");
+        assert_debug_text(&builder_debug, "quote", "None");
+        assert_debug_text(&builder_debug, "terminator", "Some(10)");
+        assert_debug_text(&builder_debug, "batch_size", "2000");
+        assert_debug_text(&builder_debug, "escape", "Some(92)");
+    }
+
+    #[test]
+    fn test_convert_csv_to_parquet() {
+        let schema = NamedTempFile::new().unwrap();
+        let schema_text = r"message schema {
+            optional int32 id;
+            optional binary name (STRING);
+        }";
+        schema.as_file().write_all(schema_text.as_bytes()).unwrap();
+
+        let mut input_file = NamedTempFile::new().unwrap();
+        {
+            let csv = input_file.as_file_mut();
+            for index in 1..2000 {
+                write!(csv, "{},\"name_{}\"\r\n", index, index).unwrap();
+            }
+            csv.flush().unwrap();
+            csv.seek(SeekFrom::Start(0)).unwrap();
+        }
+        let output_parquet = NamedTempFile::new().unwrap();
+
+        let args = Args {
+            schema: PathBuf::from(schema.path()),
+            input_file: PathBuf::from(input_file.path()),
+            output_file: PathBuf::from(output_parquet.path()),
+            batch_size: 1000,
+            input_format: CsvDialect::Csv,
+            has_header: false,
+            delimiter: None,
+            record_terminator: None,
+            escape_char: None,
+            quote_char: None,
+            double_quote: None,
+            parquet_compression: Compression::SNAPPY,
+            writer_version: None,
+            max_row_group_size: None,
+        };
+        convert_csv_to_parquet(&args).unwrap();
+    }
+}
diff --git a/parquet/src/column/page.rs b/parquet/src/column/page.rs
index 8c3a31d2f356..9364bd30fffd 100644
--- a/parquet/src/column/page.rs
+++ b/parquet/src/column/page.rs
@@ -219,7 +219,7 @@ pub trait PageWriter {
     fn close(&mut self) -> Result<()>;
 }
 
-/// An iterator over pages of some specific column in a parquet file.
+/// An iterator over pages of one specific column in a parquet file.
 pub trait PageIterator: Iterator<Item = Result<Box<dyn PageReader>>> + Send {
     /// Get schema of parquet file.
     fn schema(&mut self) -> Result<SchemaDescPtr>;
diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs
index f4aecbf4e86f..a5e49360a28a 100644
--- a/parquet/src/compression.rs
+++ b/parquet/src/compression.rs
@@ -49,13 +49,14 @@ use crate::errors::{ParquetError, Result};
 
 /// Parquet compression codec interface.
 pub trait Codec: Send {
-    /// Compresses data stored in slice `input_buf` and writes the compressed result
+    /// Compresses data stored in slice `input_buf` and appends the compressed result
     /// to `output_buf`.
+    ///
     /// Note that you'll need to call `clear()` before reusing the same `output_buf`
     /// across different `compress` calls.
     fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()>;
 
-    /// Decompresses data stored in slice `input_buf` and writes output to `output_buf`.
+    /// Decompresses data stored in slice `input_buf` and appends output to `output_buf`.
     /// Returns the total number of bytes written.
     fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>)
         -> Result<usize>;
@@ -111,9 +112,10 @@ mod snappy_codec {
             output_buf: &mut Vec<u8>,
         ) -> Result<usize> {
             let len = decompress_len(input_buf)?;
-            output_buf.resize(len, 0);
+            let offset = output_buf.len();
+            output_buf.resize(offset + len, 0);
             self.decoder
-                .decompress(input_buf, output_buf)
+                .decompress(input_buf, &mut output_buf[offset..])
                 .map_err(|e| e.into())
         }
 
@@ -340,13 +342,13 @@ mod tests {
             .expect("Error when compressing");
 
         // Decompress with c2
-        let mut decompressed_size = c2
+        let decompressed_size = c2
             .decompress(compressed.as_slice(), &mut decompressed)
             .expect("Error when decompressing");
         assert_eq!(data.len(), decompressed_size);
-        decompressed.truncate(decompressed_size);
         assert_eq!(data, decompressed.as_slice());
 
+        decompressed.clear();
         compressed.clear();
 
         // Compress with c2
@@ -354,12 +356,32 @@ mod tests {
             .expect("Error when compressing");
 
         // Decompress with c1
-        decompressed_size = c1
+        let decompressed_size = c1
             .decompress(compressed.as_slice(), &mut decompressed)
             .expect("Error when decompressing");
         assert_eq!(data.len(), decompressed_size);
-        decompressed.truncate(decompressed_size);
         assert_eq!(data, decompressed.as_slice());
+
+        decompressed.clear();
+        compressed.clear();
+
+        // Test does not trample existing data in output buffers
+        let prefix = &[0xDE, 0xAD, 0xBE, 0xEF];
+        decompressed.extend_from_slice(prefix);
+        compressed.extend_from_slice(prefix);
+
+        c2.compress(data, &mut compressed)
+            .expect("Error when compressing");
+
+        assert_eq!(&compressed[..4], prefix);
+
+        let decompressed_size = c2
+            .decompress(&compressed[4..], &mut decompressed)
+            .expect("Error when decompressing");
+
+        assert_eq!(data.len(), decompressed_size);
+        assert_eq!(data, &decompressed[4..]);
+        assert_eq!(&decompressed[..4], prefix);
     }
 
     fn test_codec(c: CodecType) {
diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index c01fb153089d..86ccefbd85eb 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -30,7 +30,7 @@ use crate::column::reader::{ColumnReader, ColumnReaderImpl};
 use crate::column::writer::{ColumnWriter, ColumnWriterImpl};
 use crate::errors::{ParquetError, Result};
 use crate::util::{
-    bit_util::{from_ne_slice, FromBytes},
+    bit_util::{from_le_slice, from_ne_slice, FromBytes},
     memory::ByteBufferPtr,
 };
 
@@ -1194,8 +1194,14 @@ make_type!(
 
 impl FromBytes for Int96 {
     type Buffer = [u8; 12];
-    fn from_le_bytes(_bs: Self::Buffer) -> Self {
-        unimplemented!()
+    fn from_le_bytes(bs: Self::Buffer) -> Self {
+        let mut i = Int96::new();
+        i.set_data(
+            from_le_slice(&bs[0..4]),
+            from_le_slice(&bs[4..8]),
+            from_le_slice(&bs[8..12]),
+        );
+        i
     }
     fn from_be_bytes(_bs: Self::Buffer) -> Self {
         unimplemented!()
@@ -1215,8 +1221,8 @@ impl FromBytes for Int96 {
 // appear to actual be converted directly from bytes
 impl FromBytes for ByteArray {
     type Buffer = [u8; 8];
-    fn from_le_bytes(_bs: Self::Buffer) -> Self {
-        unreachable!()
+    fn from_le_bytes(bs: Self::Buffer) -> Self {
+        ByteArray::from(bs.to_vec())
     }
     fn from_be_bytes(_bs: Self::Buffer) -> Self {
         unreachable!()
@@ -1229,8 +1235,8 @@ impl FromBytes for ByteArray {
 impl FromBytes for FixedLenByteArray {
     type Buffer = [u8; 8];
 
-    fn from_le_bytes(_bs: Self::Buffer) -> Self {
-        unreachable!()
+    fn from_le_bytes(bs: Self::Buffer) -> Self {
+        Self(ByteArray::from(bs.to_vec()))
     }
     fn from_be_bytes(_bs: Self::Buffer) -> Self {
         unreachable!()
diff --git a/parquet/src/errors.rs b/parquet/src/errors.rs
index fcbb846f110f..c2fb5bd66cf9 100644
--- a/parquet/src/errors.rs
+++ b/parquet/src/errors.rs
@@ -17,7 +17,7 @@
 
 //! Common Parquet errors and macros.
 
-use std::{cell, convert, io, result, str};
+use std::{cell, io, result, str};
 
 #[cfg(any(feature = "arrow", test))]
 use arrow::error::ArrowError;
@@ -108,7 +108,7 @@ pub type Result<T> = result::Result<T, ParquetError>;
 // ----------------------------------------------------------------------
 // Conversion from `ParquetError` to other types of `Error`s
 
-impl convert::From<ParquetError> for io::Error {
+impl From<ParquetError> for io::Error {
     fn from(e: ParquetError) -> Self {
         io::Error::new(io::ErrorKind::Other, e)
     }
@@ -135,6 +135,7 @@ macro_rules! eof_err {
     ($fmt:expr, $($args:expr),*) => (ParquetError::EOF(format!($fmt, $($args),*)));
 }
 
+#[cfg(any(feature = "arrow", test))]
 macro_rules! arrow_err {
     ($fmt:expr) => (ParquetError::ArrowError($fmt.to_owned()));
     ($fmt:expr, $($args:expr),*) => (ParquetError::ArrowError(format!($fmt, $($args),*)));
diff --git a/parquet/src/file/footer.rs b/parquet/src/file/footer.rs
index db8a23d8ebca..dc1d66d0fa44 100644
--- a/parquet/src/file/footer.rs
+++ b/parquet/src/file/footer.rs
@@ -15,11 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{
-    cmp::min,
-    io::{Cursor, Read, Seek, SeekFrom},
-    sync::Arc,
-};
+use std::{io::Read, sync::Arc};
 
 use byteorder::{ByteOrder, LittleEndian};
 use parquet_format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData};
@@ -28,10 +24,7 @@ use thrift::protocol::TCompactInputProtocol;
 use crate::basic::ColumnOrder;
 
 use crate::errors::{ParquetError, Result};
-use crate::file::{
-    metadata::*, reader::ChunkReader, DEFAULT_FOOTER_READ_SIZE, FOOTER_SIZE,
-    PARQUET_MAGIC,
-};
+use crate::file::{metadata::*, reader::ChunkReader, FOOTER_SIZE, PARQUET_MAGIC};
 
 use crate::schema::types::{self, SchemaDescriptor};
 
@@ -52,55 +45,42 @@ pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaDat
         ));
     }
 
-    // read and cache up to DEFAULT_FOOTER_READ_SIZE bytes from the end and process the footer
-    let default_end_len = min(DEFAULT_FOOTER_READ_SIZE, chunk_reader.len() as usize);
-    let mut default_end_reader = chunk_reader
-        .get_read(chunk_reader.len() - default_end_len as u64, default_end_len)?;
-    let mut default_len_end_buf = vec![0; default_end_len];
-    default_end_reader.read_exact(&mut default_len_end_buf)?;
+    let mut footer = [0_u8; 8];
+    chunk_reader
+        .get_read(file_size - 8, 8)?
+        .read_exact(&mut footer)?;
 
-    // check this is indeed a parquet file
-    if default_len_end_buf[default_end_len - 4..] != PARQUET_MAGIC {
-        return Err(general_err!("Invalid Parquet file. Corrupt footer"));
-    }
+    let metadata_len = decode_footer(&footer)?;
+    let footer_metadata_len = FOOTER_SIZE + metadata_len;
 
-    // get the metadata length from the footer
-    let metadata_len = LittleEndian::read_i32(
-        &default_len_end_buf[default_end_len - 8..default_end_len - 4],
-    ) as i64;
-    if metadata_len < 0 {
+    if footer_metadata_len > file_size as usize {
         return Err(general_err!(
-            "Invalid Parquet file. Metadata length is less than zero ({})",
-            metadata_len
+            "Invalid Parquet file. Reported metadata length of {} + {} byte footer, but file is only {} bytes",
+            metadata_len,
+            FOOTER_SIZE,
+            file_size
         ));
     }
-    let footer_metadata_len = FOOTER_SIZE + metadata_len as usize;
 
-    // build up the reader covering the entire metadata
-    let mut default_end_cursor = Cursor::new(default_len_end_buf);
-    if footer_metadata_len > file_size as usize {
-        return Err(general_err!(
-            "Invalid Parquet file. Metadata start is less than zero ({})",
-            file_size as i64 - footer_metadata_len as i64
+    let mut metadata = Vec::with_capacity(metadata_len);
+
+    let read = chunk_reader
+        .get_read(file_size - footer_metadata_len as u64, metadata_len)?
+        .read_to_end(&mut metadata)?;
+
+    if read != metadata_len {
+        return Err(eof_err!(
+            "Expected to read {} bytes of metadata, got {}",
+            metadata_len,
+            read
         ));
-    } else if footer_metadata_len < DEFAULT_FOOTER_READ_SIZE {
-        // the whole metadata is in the bytes we already read
-        default_end_cursor.seek(SeekFrom::End(-(footer_metadata_len as i64)))?;
-        parse_metadata_buffer(&mut default_end_cursor)
-    } else {
-        // the end of file read by default is not long enough, read missing bytes
-        let complementary_end_read = chunk_reader.get_read(
-            file_size - footer_metadata_len as u64,
-            FOOTER_SIZE + metadata_len as usize - default_end_len,
-        )?;
-        parse_metadata_buffer(&mut complementary_end_read.chain(default_end_cursor))
     }
+
+    decode_metadata(&metadata)
 }
 
-/// Reads [`ParquetMetaData`] from the provided [`Read`] starting at the readers current position
-pub(crate) fn parse_metadata_buffer<T: Read + ?Sized>(
-    metadata_read: &mut T,
-) -> Result<ParquetMetaData> {
+/// Decodes [`ParquetMetaData`] from the provided bytes
+pub fn decode_metadata(metadata_read: &[u8]) -> Result<ParquetMetaData> {
     // TODO: row group filtering
     let mut prot = TCompactInputProtocol::new(metadata_read);
     let t_file_metadata: TFileMetaData = TFileMetaData::read_from_in_protocol(&mut prot)
@@ -124,6 +104,23 @@ pub(crate) fn parse_metadata_buffer<T: Read + ?Sized>(
     Ok(ParquetMetaData::new(file_metadata, row_groups))
 }
 
+/// Decodes the footer returning the metadata length in bytes
+pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
+    // check this is indeed a parquet file
+    if slice[4..] != PARQUET_MAGIC {
+        return Err(general_err!("Invalid Parquet file. Corrupt footer"));
+    }
+
+    // get the metadata length from the footer
+    let metadata_len = LittleEndian::read_i32(&slice[..4]);
+    metadata_len.try_into().map_err(|_| {
+        general_err!(
+            "Invalid Parquet file. Metadata length is less than zero ({})",
+            metadata_len
+        )
+    })
+}
+
 /// Parses column orders from Thrift definition.
 /// If no column orders are defined, returns `None`.
 fn parse_column_orders(
@@ -160,11 +157,11 @@ fn parse_column_orders(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use bytes::Bytes;
 
     use crate::basic::SortOrder;
     use crate::basic::Type;
     use crate::schema::types::Type as SchemaType;
-    use crate::util::cursor::SliceableCursor;
     use parquet_format::TypeDefinedOrder;
 
     #[test]
@@ -180,7 +177,7 @@ mod tests {
 
     #[test]
     fn test_parse_metadata_corrupt_footer() {
-        let data = SliceableCursor::new(Arc::new(vec![1, 2, 3, 4, 5, 6, 7, 8]));
+        let data = Bytes::from(vec![1, 2, 3, 4, 5, 6, 7, 8]);
         let reader_result = parse_metadata(&data);
         assert!(reader_result.is_err());
         assert_eq!(
@@ -191,8 +188,7 @@ mod tests {
 
     #[test]
     fn test_parse_metadata_invalid_length() {
-        let test_file =
-            SliceableCursor::new(Arc::new(vec![0, 0, 0, 255, b'P', b'A', b'R', b'1']));
+        let test_file = Bytes::from(vec![0, 0, 0, 255, b'P', b'A', b'R', b'1']);
         let reader_result = parse_metadata(&test_file);
         assert!(reader_result.is_err());
         assert_eq!(
@@ -205,13 +201,14 @@ mod tests {
 
     #[test]
     fn test_parse_metadata_invalid_start() {
-        let test_file =
-            SliceableCursor::new(Arc::new(vec![255, 0, 0, 0, b'P', b'A', b'R', b'1']));
+        let test_file = Bytes::from(vec![255, 0, 0, 0, b'P', b'A', b'R', b'1']);
         let reader_result = parse_metadata(&test_file);
         assert!(reader_result.is_err());
         assert_eq!(
             reader_result.err().unwrap(),
-            general_err!("Invalid Parquet file. Metadata start is less than zero (-255)")
+            general_err!(
+                "Invalid Parquet file. Reported metadata length of 255 + 8 byte footer, but file is only 8 bytes"
+            )
         );
     }
 
diff --git a/parquet/src/file/metadata.rs b/parquet/src/file/metadata.rs
index 1d35d196322f..a3477dd75779 100644
--- a/parquet/src/file/metadata.rs
+++ b/parquet/src/file/metadata.rs
@@ -35,11 +35,12 @@
 
 use std::sync::Arc;
 
-use parquet_format::{ColumnChunk, ColumnMetaData, RowGroup};
+use parquet_format::{ColumnChunk, ColumnMetaData, PageLocation, RowGroup};
 
 use crate::basic::{ColumnOrder, Compression, Encoding, Type};
 use crate::errors::{ParquetError, Result};
 use crate::file::page_encoding_stats::{self, PageEncodingStats};
+use crate::file::page_index::index::Index;
 use crate::file::statistics::{self, Statistics};
 use crate::schema::types::{
     ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor,
@@ -51,6 +52,8 @@ use crate::schema::types::{
 pub struct ParquetMetaData {
     file_metadata: FileMetaData,
     row_groups: Vec<RowGroupMetaData>,
+    page_indexes: Option<Vec<Index>>,
+    offset_indexes: Option<Vec<Vec<PageLocation>>>,
 }
 
 impl ParquetMetaData {
@@ -60,6 +63,22 @@ impl ParquetMetaData {
         ParquetMetaData {
             file_metadata,
             row_groups,
+            page_indexes: None,
+            offset_indexes: None,
+        }
+    }
+
+    pub fn new_with_page_index(
+        file_metadata: FileMetaData,
+        row_groups: Vec<RowGroupMetaData>,
+        page_indexes: Option<Vec<Index>>,
+        offset_indexes: Option<Vec<Vec<PageLocation>>>,
+    ) -> Self {
+        ParquetMetaData {
+            file_metadata,
+            row_groups,
+            page_indexes,
+            offset_indexes,
         }
     }
 
@@ -83,6 +102,16 @@ impl ParquetMetaData {
     pub fn row_groups(&self) -> &[RowGroupMetaData] {
         &self.row_groups
     }
+
+    /// Returns page indexes in this file.
+    pub fn page_indexes(&self) -> Option<&Vec<Index>> {
+        self.page_indexes.as_ref()
+    }
+
+    /// Returns offset indexes in this file.
+    pub fn offset_indexes(&self) -> Option<&Vec<Vec<PageLocation>>> {
+        self.offset_indexes.as_ref()
+    }
 }
 
 pub type KeyValue = parquet_format::KeyValue;
@@ -188,12 +217,13 @@ impl FileMetaData {
 pub type RowGroupMetaDataPtr = Arc<RowGroupMetaData>;
 
 /// Metadata for a row group.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq)]
 pub struct RowGroupMetaData {
     columns: Vec<ColumnChunkMetaData>,
     num_rows: i64,
     total_byte_size: i64,
     schema_descr: SchemaDescPtr,
+    // Todo add filter result -> row range
 }
 
 impl RowGroupMetaData {
diff --git a/parquet/src/file/mod.rs b/parquet/src/file/mod.rs
index d293dc7731ad..66d8ce48e0a7 100644
--- a/parquet/src/file/mod.rs
+++ b/parquet/src/file/mod.rs
@@ -98,14 +98,13 @@
 pub mod footer;
 pub mod metadata;
 pub mod page_encoding_stats;
+pub mod page_index;
 pub mod properties;
 pub mod reader;
 pub mod serialized_reader;
 pub mod statistics;
 pub mod writer;
 
-const FOOTER_SIZE: usize = 8;
-pub(crate) const PARQUET_MAGIC: [u8; 4] = [b'P', b'A', b'R', b'1'];
-
-/// The number of bytes read at the end of the parquet file on first read
-const DEFAULT_FOOTER_READ_SIZE: usize = 64 * 1024;
+/// The length of the parquet footer in bytes
+pub const FOOTER_SIZE: usize = 8;
+const PARQUET_MAGIC: [u8; 4] = [b'P', b'A', b'R', b'1'];
diff --git a/parquet/src/file/page_index/index.rs b/parquet/src/file/page_index/index.rs
new file mode 100644
index 000000000000..e97826c63b41
--- /dev/null
+++ b/parquet/src/file/page_index/index.rs
@@ -0,0 +1,209 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::basic::Type;
+use crate::data_type::private::ParquetValueType;
+use crate::data_type::Int96;
+use crate::errors::ParquetError;
+use crate::util::bit_util::from_le_slice;
+use parquet_format::{BoundaryOrder, ColumnIndex};
+use std::fmt::Debug;
+
+/// The statistics in one page
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct PageIndex<T> {
+    /// The minimum value, It is None when all values are null
+    pub min: Option<T>,
+    /// The maximum value, It is None when all values are null
+    pub max: Option<T>,
+    /// Null values in the page
+    pub null_count: Option<i64>,
+}
+
+impl<T> PageIndex<T> {
+    pub fn min(&self) -> Option<&T> {
+        self.min.as_ref()
+    }
+    pub fn max(&self) -> Option<&T> {
+        self.max.as_ref()
+    }
+    pub fn null_count(&self) -> Option<i64> {
+        self.null_count
+    }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum Index {
+    BOOLEAN(BooleanIndex),
+    INT32(NativeIndex<i32>),
+    INT64(NativeIndex<i64>),
+    INT96(NativeIndex<Int96>),
+    FLOAT(NativeIndex<f32>),
+    DOUBLE(NativeIndex<f64>),
+    BYTE_ARRAY(ByteArrayIndex),
+    FIXED_LEN_BYTE_ARRAY(ByteArrayIndex),
+}
+
+/// An index of a column of [`Type`] physical representation
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct NativeIndex<T: ParquetValueType> {
+    /// The physical type
+    pub physical_type: Type,
+    /// The indexes, one item per page
+    pub indexes: Vec<PageIndex<T>>,
+    /// the order
+    pub boundary_order: BoundaryOrder,
+}
+
+impl<T: ParquetValueType> NativeIndex<T> {
+    /// Creates a new [`NativeIndex`]
+    pub(crate) fn try_new(
+        index: ColumnIndex,
+        physical_type: Type,
+    ) -> Result<Self, ParquetError> {
+        let len = index.min_values.len();
+
+        let null_counts = index
+            .null_counts
+            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
+            .unwrap_or_else(|| vec![None; len]);
+
+        let indexes = index
+            .min_values
+            .iter()
+            .zip(index.max_values.into_iter())
+            .zip(index.null_pages.into_iter())
+            .zip(null_counts.into_iter())
+            .map(|(((min, max), is_null), null_count)| {
+                let (min, max) = if is_null {
+                    (None, None)
+                } else {
+                    let min = min.as_slice();
+                    let max = max.as_slice();
+                    (Some(from_le_slice::<T>(min)), Some(from_le_slice::<T>(max)))
+                };
+                Ok(PageIndex {
+                    min,
+                    max,
+                    null_count,
+                })
+            })
+            .collect::<Result<Vec<_>, ParquetError>>()?;
+
+        Ok(Self {
+            physical_type,
+            indexes,
+            boundary_order: index.boundary_order,
+        })
+    }
+}
+
+/// An index of a column of bytes type
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct ByteArrayIndex {
+    /// The physical type
+    pub physical_type: Type,
+    /// The indexes, one item per page
+    pub indexes: Vec<PageIndex<Vec<u8>>>,
+    pub boundary_order: BoundaryOrder,
+}
+
+impl ByteArrayIndex {
+    pub(crate) fn try_new(
+        index: ColumnIndex,
+        physical_type: Type,
+    ) -> Result<Self, ParquetError> {
+        let len = index.min_values.len();
+
+        let null_counts = index
+            .null_counts
+            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
+            .unwrap_or_else(|| vec![None; len]);
+
+        let indexes = index
+            .min_values
+            .into_iter()
+            .zip(index.max_values.into_iter())
+            .zip(index.null_pages.into_iter())
+            .zip(null_counts.into_iter())
+            .map(|(((min, max), is_null), null_count)| {
+                let (min, max) = if is_null {
+                    (None, None)
+                } else {
+                    (Some(min), Some(max))
+                };
+                Ok(PageIndex {
+                    min,
+                    max,
+                    null_count,
+                })
+            })
+            .collect::<Result<Vec<_>, ParquetError>>()?;
+
+        Ok(Self {
+            physical_type,
+            indexes,
+            boundary_order: index.boundary_order,
+        })
+    }
+}
+
+/// An index of a column of boolean physical type
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct BooleanIndex {
+    /// The indexes, one item per page
+    pub indexes: Vec<PageIndex<bool>>,
+    pub boundary_order: BoundaryOrder,
+}
+
+impl BooleanIndex {
+    pub(crate) fn try_new(index: ColumnIndex) -> Result<Self, ParquetError> {
+        let len = index.min_values.len();
+
+        let null_counts = index
+            .null_counts
+            .map(|x| x.into_iter().map(Some).collect::<Vec<_>>())
+            .unwrap_or_else(|| vec![None; len]);
+
+        let indexes = index
+            .min_values
+            .into_iter()
+            .zip(index.max_values.into_iter())
+            .zip(index.null_pages.into_iter())
+            .zip(null_counts.into_iter())
+            .map(|(((min, max), is_null), null_count)| {
+                let (min, max) = if is_null {
+                    (None, None)
+                } else {
+                    let min = min[0] != 0;
+                    let max = max[0] == 1;
+                    (Some(min), Some(max))
+                };
+                Ok(PageIndex {
+                    min,
+                    max,
+                    null_count,
+                })
+            })
+            .collect::<Result<Vec<_>, ParquetError>>()?;
+
+        Ok(Self {
+            indexes,
+            boundary_order: index.boundary_order,
+        })
+    }
+}
diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs
new file mode 100644
index 000000000000..8414480903fd
--- /dev/null
+++ b/parquet/src/file/page_index/index_reader.rs
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::basic::Type;
+use crate::data_type::Int96;
+use crate::errors::ParquetError;
+use crate::file::metadata::ColumnChunkMetaData;
+use crate::file::page_index::index::{BooleanIndex, ByteArrayIndex, Index, NativeIndex};
+use crate::file::reader::ChunkReader;
+use parquet_format::{ColumnIndex, OffsetIndex, PageLocation};
+use std::io::{Cursor, Read};
+use thrift::protocol::TCompactInputProtocol;
+
+/// Read on row group's all columns indexes and change into  [`Index`]
+/// If not the format not available return an empty vector.
+pub fn read_columns_indexes<R: ChunkReader>(
+    reader: &R,
+    chunks: &[ColumnChunkMetaData],
+) -> Result<Vec<Index>, ParquetError> {
+    let (offset, lengths) = get_index_offset_and_lengths(chunks)?;
+    let length = lengths.iter().sum::<usize>();
+
+    //read all need data into buffer
+    let mut reader = reader.get_read(offset, reader.len() as usize)?;
+    let mut data = vec![0; length];
+    reader.read_exact(&mut data)?;
+
+    let mut start = 0;
+    let data = lengths.into_iter().map(|length| {
+        let r = &data[start..start + length];
+        start += length;
+        r
+    });
+
+    chunks
+        .iter()
+        .zip(data)
+        .map(|(chunk, data)| {
+            let column_type = chunk.column_type();
+            deserialize_column_index(data, column_type)
+        })
+        .collect()
+}
+
+/// Read on row group's all indexes and change into  [`Index`]
+/// If not the format not available return an empty vector.
+pub fn read_pages_locations<R: ChunkReader>(
+    reader: &R,
+    chunks: &[ColumnChunkMetaData],
+) -> Result<Vec<Vec<PageLocation>>, ParquetError> {
+    let (offset, total_length) = get_location_offset_and_total_length(chunks)?;
+
+    //read all need data into buffer
+    let mut reader = reader.get_read(offset, reader.len() as usize)?;
+    let mut data = vec![0; total_length];
+    reader.read_exact(&mut data)?;
+
+    let mut d = Cursor::new(data);
+    let mut result = vec![];
+
+    for _ in 0..chunks.len() {
+        let mut prot = TCompactInputProtocol::new(&mut d);
+        let offset = OffsetIndex::read_from_in_protocol(&mut prot)?;
+        result.push(offset.page_locations);
+    }
+    Ok(result)
+}
+
+//Get File offsets of every ColumnChunk's page_index
+//If there are invalid offset return a zero offset with empty lengths.
+fn get_index_offset_and_lengths(
+    chunks: &[ColumnChunkMetaData],
+) -> Result<(u64, Vec<usize>), ParquetError> {
+    let first_col_metadata = if let Some(chunk) = chunks.first() {
+        chunk
+    } else {
+        return Ok((0, vec![]));
+    };
+
+    let offset: u64 = if let Some(offset) = first_col_metadata.column_index_offset() {
+        offset.try_into().unwrap()
+    } else {
+        return Ok((0, vec![]));
+    };
+
+    let lengths = chunks
+        .iter()
+        .map(|x| x.column_index_length())
+        .map(|maybe_length| {
+            let index_length = maybe_length.ok_or_else(|| {
+                ParquetError::General(
+                    "The column_index_length must exist if offset_index_offset exists"
+                        .to_string(),
+                )
+            })?;
+
+            Ok(index_length.try_into().unwrap())
+        })
+        .collect::<Result<Vec<_>, ParquetError>>()?;
+
+    Ok((offset, lengths))
+}
+
+//Get File offset of ColumnChunk's pages_locations
+//If there are invalid offset return a zero offset with zero length.
+fn get_location_offset_and_total_length(
+    chunks: &[ColumnChunkMetaData],
+) -> Result<(u64, usize), ParquetError> {
+    let metadata = if let Some(chunk) = chunks.first() {
+        chunk
+    } else {
+        return Ok((0, 0));
+    };
+
+    let offset: u64 = if let Some(offset) = metadata.offset_index_offset() {
+        offset.try_into().unwrap()
+    } else {
+        return Ok((0, 0));
+    };
+
+    let total_length = chunks
+        .iter()
+        .map(|x| x.offset_index_length().unwrap())
+        .sum::<i32>() as usize;
+    Ok((offset, total_length))
+}
+
+fn deserialize_column_index(
+    data: &[u8],
+    column_type: Type,
+) -> Result<Index, ParquetError> {
+    let mut d = Cursor::new(data);
+    let mut prot = TCompactInputProtocol::new(&mut d);
+
+    let index = ColumnIndex::read_from_in_protocol(&mut prot)?;
+
+    let index = match column_type {
+        Type::BOOLEAN => Index::BOOLEAN(BooleanIndex::try_new(index)?),
+        Type::INT32 => Index::INT32(NativeIndex::<i32>::try_new(index, column_type)?),
+        Type::INT64 => Index::INT64(NativeIndex::<i64>::try_new(index, column_type)?),
+        Type::INT96 => Index::INT96(NativeIndex::<Int96>::try_new(index, column_type)?),
+        Type::FLOAT => Index::FLOAT(NativeIndex::<f32>::try_new(index, column_type)?),
+        Type::DOUBLE => Index::DOUBLE(NativeIndex::<f64>::try_new(index, column_type)?),
+        Type::BYTE_ARRAY => {
+            Index::BYTE_ARRAY(ByteArrayIndex::try_new(index, column_type)?)
+        }
+        Type::FIXED_LEN_BYTE_ARRAY => {
+            Index::FIXED_LEN_BYTE_ARRAY(ByteArrayIndex::try_new(index, column_type)?)
+        }
+    };
+
+    Ok(index)
+}
diff --git a/parquet/src/file/page_index/mod.rs b/parquet/src/file/page_index/mod.rs
new file mode 100644
index 000000000000..fc87ef20448f
--- /dev/null
+++ b/parquet/src/file/page_index/mod.rs
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod index;
+pub mod index_reader;
+pub(crate) mod range;
diff --git a/parquet/src/file/page_index/range.rs b/parquet/src/file/page_index/range.rs
new file mode 100644
index 000000000000..06c06553ccd5
--- /dev/null
+++ b/parquet/src/file/page_index/range.rs
@@ -0,0 +1,474 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::errors::ParquetError;
+use parquet_format::PageLocation;
+use std::cmp::Ordering;
+use std::collections::VecDeque;
+use std::ops::RangeInclusive;
+
+type Range = RangeInclusive<usize>;
+
+pub trait RangeOps {
+    fn is_before(&self, other: &Self) -> bool;
+
+    fn is_after(&self, other: &Self) -> bool;
+
+    fn count(&self) -> usize;
+
+    fn union(left: &Range, right: &Range) -> Option<Range>;
+
+    fn intersection(left: &Range, right: &Range) -> Option<Range>;
+}
+
+impl RangeOps for Range {
+    fn is_before(&self, other: &Range) -> bool {
+        self.end() < other.start()
+    }
+
+    fn is_after(&self, other: &Range) -> bool {
+        self.start() > other.end()
+    }
+
+    fn count(&self) -> usize {
+        self.end() + 1 - self.start()
+    }
+
+    /// Return the union of the two ranges,
+    /// Return `None` if there are hole between them.
+    fn union(left: &Range, right: &Range) -> Option<Range> {
+        if left.start() <= right.start() {
+            if left.end() + 1 >= *right.start() {
+                return Some(Range::new(
+                    *left.start(),
+                    std::cmp::max(*left.end(), *right.end()),
+                ));
+            }
+        } else if right.end() + 1 >= *left.start() {
+            return Some(Range::new(
+                *right.start(),
+                std::cmp::max(*left.end(), *right.end()),
+            ));
+        }
+        None
+    }
+
+    /// Returns the intersection of the two ranges,
+    /// return null if they are not overlapped.
+    fn intersection(left: &Range, right: &Range) -> Option<Range> {
+        if left.start() <= right.start() {
+            if left.end() >= right.start() {
+                return Some(Range::new(
+                    *right.start(),
+                    std::cmp::min(*left.end(), *right.end()),
+                ));
+            }
+        } else if right.end() >= left.start() {
+            return Some(Range::new(
+                *left.start(),
+                std::cmp::min(*left.end(), *right.end()),
+            ));
+        }
+        None
+    }
+}
+
+/// Struct representing row ranges in a row-group. These row ranges are calculated as a result of using
+/// the column index on the filtering.
+#[derive(Debug, Clone)]
+pub struct RowRanges {
+    pub ranges: VecDeque<Range>,
+}
+
+impl RowRanges {
+    //create an empty RowRanges
+    pub fn new_empty() -> Self {
+        RowRanges {
+            ranges: VecDeque::new(),
+        }
+    }
+
+    pub fn count(&self) -> usize {
+        self.ranges.len()
+    }
+
+    pub fn filter_with_mask(&self, mask: &[bool]) -> Result<RowRanges, ParquetError> {
+        if self.ranges.len() != mask.len() {
+            return Err(ParquetError::General(format!(
+                "Mask size {} is not equal to number of pages {}",
+                mask.len(),
+                self.count()
+            )));
+        }
+        let vec_range = mask
+            .iter()
+            .zip(self.ranges.clone())
+            .filter_map(|(&f, r)| if f { Some(r) } else { None })
+            .collect();
+        Ok(RowRanges { ranges: vec_range })
+    }
+
+    /// Add a range to the end of the list of ranges. It maintains the disjunctive ascending order of the ranges by
+    /// trying to union the specified range to the last ranges in the list. The specified range shall be larger than
+    /// the last one or might be overlapped with some of the last ones.
+    /// [a, b] < [c, d] if b < c
+    pub fn add(&mut self, mut range: Range) {
+        let count = self.count();
+        if count > 0 {
+            for i in 1..(count + 1) {
+                let index = count - i;
+                let last = self.ranges.get(index).unwrap();
+                assert!(!last.is_after(&range), "Must add range in ascending!");
+                // try to merge range
+                match Range::union(last, &range) {
+                    None => {
+                        break;
+                    }
+                    Some(r) => {
+                        range = r;
+                        self.ranges.remove(index);
+                    }
+                }
+            }
+        }
+        self.ranges.push_back(range);
+    }
+
+    /// Calculates the union of the two specified RowRanges object. The union of two range is calculated if there are no
+    /// elements between them. Otherwise, the two disjunctive ranges are stored separately.
+    /// For example:
+    /// [113, 241] ∪ [221, 340] = [113, 330]
+    /// [113, 230] ∪ [231, 340] = [113, 340]
+    /// while
+    /// [113, 230] ∪ [232, 340] = [113, 230], [232, 340]
+    ///
+    /// The result RowRanges object will contain all the row indexes that were contained in one of the specified objects.
+    pub fn union(mut left: RowRanges, mut right: RowRanges) -> RowRanges {
+        let v1 = &mut left.ranges;
+        let v2 = &mut right.ranges;
+        let mut result = RowRanges::new_empty();
+        if v2.is_empty() {
+            left.clone()
+        } else {
+            let mut range2 = v2.pop_front().unwrap();
+            while !v1.is_empty() {
+                let range1 = v1.pop_front().unwrap();
+                if range1.is_after(&range2) {
+                    result.add(range2);
+                    range2 = range1;
+                    std::mem::swap(v1, v2);
+                } else {
+                    result.add(range1);
+                }
+            }
+
+            result.add(range2);
+            while !v2.is_empty() {
+                result.add(v2.pop_front().unwrap())
+            }
+
+            result
+        }
+    }
+
+    /// Calculates the intersection of the two specified RowRanges object. Two ranges intersect if they have common
+    /// elements otherwise the result is empty.
+    /// For example:
+    /// [113, 241] ∩ [221, 340] = [221, 241]
+    /// while
+    /// [113, 230] ∩ [231, 340] = <EMPTY>
+    ///
+    /// The result RowRanges object will contain all the row indexes there were contained in both of the specified objects
+    #[allow(clippy::mut_range_bound)]
+    pub fn intersection(left: RowRanges, right: RowRanges) -> RowRanges {
+        let mut result = RowRanges::new_empty();
+        let mut right_index = 0;
+        for l in left.ranges.iter() {
+            for i in right_index..right.ranges.len() {
+                let r = right.ranges.get(i).unwrap();
+                if l.is_before(r) {
+                    break;
+                } else if l.is_after(r) {
+                    right_index = i + 1;
+                    continue;
+                }
+                if let Some(ra) = Range::intersection(l, r) {
+                    result.add(ra);
+                }
+            }
+        }
+        result
+    }
+
+    pub fn row_count(&self) -> usize {
+        self.ranges.iter().map(|x| x.count()).sum()
+    }
+
+    pub fn is_overlapping(&self, x: &Range) -> bool {
+        self.ranges
+            .binary_search_by(|y| -> Ordering {
+                if y.is_before(x) {
+                    Ordering::Less
+                } else if y.is_after(x) {
+                    Ordering::Greater
+                } else {
+                    Ordering::Equal
+                }
+            })
+            .is_ok()
+    }
+}
+
+/// Takes an array of [`PageLocation`], and a total number of rows, and based on the provided `page_mask`
+/// returns the corresponding [`RowRanges`] to scan
+pub fn compute_row_ranges(
+    page_mask: &[bool],
+    locations: &[PageLocation],
+    total_rows: usize,
+) -> Result<RowRanges, ParquetError> {
+    if page_mask.len() != locations.len() {
+        return Err(ParquetError::General(format!(
+            "Page_mask size {} is not equal to number of locations {}",
+            page_mask.len(),
+            locations.len(),
+        )));
+    }
+    let row_ranges = page_locations_to_row_ranges(locations, total_rows)?;
+    row_ranges.filter_with_mask(page_mask)
+}
+
+fn page_locations_to_row_ranges(
+    locations: &[PageLocation],
+    total_rows: usize,
+) -> Result<RowRanges, ParquetError> {
+    if locations.is_empty() || total_rows == 0 {
+        return Ok(RowRanges::new_empty());
+    }
+
+    // If we read directly from parquet pageIndex to construct locations,
+    // the location index should be continuous
+    let mut vec_range: VecDeque<Range> = locations
+        .windows(2)
+        .map(|x| {
+            let start = x[0].first_row_index as usize;
+            let end = (x[1].first_row_index - 1) as usize;
+            Range::new(start, end)
+        })
+        .collect();
+
+    let last = Range::new(
+        locations.last().unwrap().first_row_index as usize,
+        total_rows - 1,
+    );
+    vec_range.push_back(last);
+
+    Ok(RowRanges { ranges: vec_range })
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::basic::Type::INT32;
+    use crate::file::page_index::index::{NativeIndex, PageIndex};
+    use crate::file::page_index::range::{compute_row_ranges, Range, RowRanges};
+    use parquet_format::{BoundaryOrder, PageLocation};
+
+    #[test]
+    fn test_binary_search_overlap() {
+        let mut ranges = RowRanges::new_empty();
+        ranges.add(Range::new(1, 3));
+        ranges.add(Range::new(6, 7));
+
+        assert!(ranges.is_overlapping(&Range::new(1, 2)));
+        // include both [start, end]
+        assert!(ranges.is_overlapping(&Range::new(0, 1)));
+        assert!(ranges.is_overlapping(&Range::new(0, 3)));
+
+        assert!(ranges.is_overlapping(&Range::new(0, 7)));
+        assert!(ranges.is_overlapping(&Range::new(2, 7)));
+
+        assert!(!ranges.is_overlapping(&Range::new(4, 5)));
+    }
+
+    #[test]
+    fn test_add_func_ascending_disjunctive() {
+        let mut ranges_1 = RowRanges::new_empty();
+        ranges_1.add(Range::new(1, 3));
+        ranges_1.add(Range::new(5, 6));
+        ranges_1.add(Range::new(8, 9));
+        assert_eq!(ranges_1.count(), 3);
+    }
+
+    #[test]
+    fn test_add_func_ascending_merge() {
+        let mut ranges_1 = RowRanges::new_empty();
+        ranges_1.add(Range::new(1, 3));
+        ranges_1.add(Range::new(4, 5));
+        ranges_1.add(Range::new(6, 7));
+        assert_eq!(ranges_1.count(), 1);
+    }
+
+    #[test]
+    #[should_panic(expected = "Must add range in ascending!")]
+    fn test_add_func_not_ascending() {
+        let mut ranges_1 = RowRanges::new_empty();
+        ranges_1.add(Range::new(6, 7));
+        ranges_1.add(Range::new(1, 3));
+        ranges_1.add(Range::new(4, 5));
+        assert_eq!(ranges_1.count(), 1);
+    }
+
+    #[test]
+    fn test_union_func() {
+        let mut ranges_1 = RowRanges::new_empty();
+        ranges_1.add(Range::new(1, 2));
+        ranges_1.add(Range::new(3, 4));
+        ranges_1.add(Range::new(5, 6));
+
+        let mut ranges_2 = RowRanges::new_empty();
+        ranges_2.add(Range::new(2, 3));
+        ranges_2.add(Range::new(4, 5));
+        ranges_2.add(Range::new(6, 7));
+
+        let ranges = RowRanges::union(ranges_1, ranges_2);
+        assert_eq!(ranges.count(), 1);
+        let range = ranges.ranges.get(0).unwrap();
+        assert_eq!(*range.start(), 1);
+        assert_eq!(*range.end(), 7);
+
+        let mut ranges_a = RowRanges::new_empty();
+        ranges_a.add(Range::new(1, 3));
+        ranges_a.add(Range::new(5, 8));
+        ranges_a.add(Range::new(11, 12));
+
+        let mut ranges_b = RowRanges::new_empty();
+        ranges_b.add(Range::new(0, 2));
+        ranges_b.add(Range::new(6, 7));
+        ranges_b.add(Range::new(10, 11));
+
+        let ranges = RowRanges::union(ranges_a, ranges_b);
+        assert_eq!(ranges.count(), 3);
+
+        let range_1 = ranges.ranges.get(0).unwrap();
+        assert_eq!(*range_1.start(), 0);
+        assert_eq!(*range_1.end(), 3);
+        let range_2 = ranges.ranges.get(1).unwrap();
+        assert_eq!(*range_2.start(), 5);
+        assert_eq!(*range_2.end(), 8);
+        let range_3 = ranges.ranges.get(2).unwrap();
+        assert_eq!(*range_3.start(), 10);
+        assert_eq!(*range_3.end(), 12);
+    }
+
+    #[test]
+    fn test_intersection_func() {
+        let mut ranges_1 = RowRanges::new_empty();
+        ranges_1.add(Range::new(1, 2));
+        ranges_1.add(Range::new(3, 4));
+        ranges_1.add(Range::new(5, 6));
+
+        let mut ranges_2 = RowRanges::new_empty();
+        ranges_2.add(Range::new(2, 3));
+        ranges_2.add(Range::new(4, 5));
+        ranges_2.add(Range::new(6, 7));
+
+        let ranges = RowRanges::intersection(ranges_1, ranges_2);
+        assert_eq!(ranges.count(), 1);
+        let range = ranges.ranges.get(0).unwrap();
+        assert_eq!(*range.start(), 2);
+        assert_eq!(*range.end(), 6);
+
+        let mut ranges_a = RowRanges::new_empty();
+        ranges_a.add(Range::new(1, 3));
+        ranges_a.add(Range::new(5, 8));
+        ranges_a.add(Range::new(11, 12));
+
+        let mut ranges_b = RowRanges::new_empty();
+        ranges_b.add(Range::new(0, 2));
+        ranges_b.add(Range::new(6, 7));
+        ranges_b.add(Range::new(10, 11));
+
+        let ranges = RowRanges::intersection(ranges_a, ranges_b);
+        assert_eq!(ranges.count(), 3);
+
+        let range_1 = ranges.ranges.get(0).unwrap();
+        assert_eq!(*range_1.start(), 1);
+        assert_eq!(*range_1.end(), 2);
+        let range_2 = ranges.ranges.get(1).unwrap();
+        assert_eq!(*range_2.start(), 6);
+        assert_eq!(*range_2.end(), 7);
+        let range_3 = ranges.ranges.get(2).unwrap();
+        assert_eq!(*range_3.start(), 11);
+        assert_eq!(*range_3.end(), 11);
+    }
+
+    #[test]
+    fn test_compute_one() {
+        let locations = &[PageLocation {
+            offset: 50,
+            compressed_page_size: 10,
+            first_row_index: 0,
+        }];
+        let total_rows = 10;
+
+        let row_ranges = compute_row_ranges(&[true], locations, total_rows).unwrap();
+        assert_eq!(row_ranges.count(), 1);
+        assert_eq!(row_ranges.ranges.get(0).unwrap(), &Range::new(0, 9));
+    }
+
+    #[test]
+    fn test_compute_multi() {
+        let index: NativeIndex<i32> = NativeIndex {
+            physical_type: INT32,
+            indexes: vec![
+                PageIndex {
+                    min: Some(0),
+                    max: Some(10),
+                    null_count: Some(0),
+                },
+                PageIndex {
+                    min: Some(15),
+                    max: Some(20),
+                    null_count: Some(0),
+                },
+            ],
+            boundary_order: BoundaryOrder::Ascending,
+        };
+        let locations = &[
+            PageLocation {
+                offset: 100,
+                compressed_page_size: 10,
+                first_row_index: 0,
+            },
+            PageLocation {
+                offset: 200,
+                compressed_page_size: 20,
+                first_row_index: 11,
+            },
+        ];
+        let total_rows = 20;
+
+        //filter `x < 11`
+        let filter =
+            |page: &PageIndex<i32>| page.max.as_ref().map(|&x| x < 11).unwrap_or(false);
+
+        let mask = index.indexes.iter().map(filter).collect::<Vec<_>>();
+
+        let row_ranges = compute_row_ranges(&mask, locations, total_rows).unwrap();
+
+        assert_eq!(row_ranges.count(), 1);
+        assert_eq!(row_ranges.ranges.get(0).unwrap(), &Range::new(0, 10));
+    }
+}
diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index 8059157aabf2..6ff73e041e88 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -18,6 +18,7 @@
 //! Contains implementations of the reader traits FileReader, RowGroupReader and PageReader
 //! Also contains implementations of the ChunkReader for files (with buffering) and byte arrays (RAM)
 
+use bytes::{Buf, Bytes};
 use std::{convert::TryFrom, fs::File, io::Read, path::Path, sync::Arc};
 
 use parquet_format::{PageHeader, PageType};
@@ -27,6 +28,7 @@ use crate::basic::{Compression, Encoding, Type};
 use crate::column::page::{Page, PageReader};
 use crate::compression::{create_codec, Codec};
 use crate::errors::{ParquetError, Result};
+use crate::file::page_index::index_reader;
 use crate::file::{footer, metadata::*, reader::*, statistics};
 use crate::record::reader::RowIter;
 use crate::record::Row;
@@ -35,6 +37,7 @@ use crate::util::{io::TryClone, memory::ByteBufferPtr};
 
 // export `SliceableCursor` and `FileSource` publically so clients can
 // re-use the logic in their own ParquetFileWriter wrappers
+#[allow(deprecated)]
 pub use crate::util::{cursor::SliceableCursor, io::FileSource};
 
 // ----------------------------------------------------------------------
@@ -60,12 +63,35 @@ impl ChunkReader for File {
     }
 }
 
+impl Length for Bytes {
+    fn len(&self) -> u64 {
+        self.len() as u64
+    }
+}
+
+impl TryClone for Bytes {
+    fn try_clone(&self) -> std::io::Result<Self> {
+        Ok(self.clone())
+    }
+}
+
+impl ChunkReader for Bytes {
+    type T = bytes::buf::Reader<Bytes>;
+
+    fn get_read(&self, start: u64, length: usize) -> Result<Self::T> {
+        let start = start as usize;
+        Ok(self.slice(start..start + length).reader())
+    }
+}
+
+#[allow(deprecated)]
 impl Length for SliceableCursor {
     fn len(&self) -> u64 {
         SliceableCursor::len(self)
     }
 }
 
+#[allow(deprecated)]
 impl ChunkReader for SliceableCursor {
     type T = SliceableCursor;
 
@@ -132,12 +158,16 @@ pub struct SerializedFileReader<R: ChunkReader> {
 /// they will be chained using 'AND' to filter the row groups.
 pub struct ReadOptionsBuilder {
     predicates: Vec<Box<dyn FnMut(&RowGroupMetaData, usize) -> bool>>,
+    enable_page_index: bool,
 }
 
 impl ReadOptionsBuilder {
     /// New builder
     pub fn new() -> Self {
-        ReadOptionsBuilder { predicates: vec![] }
+        ReadOptionsBuilder {
+            predicates: vec![],
+            enable_page_index: false,
+        }
     }
 
     /// Add a predicate on row group metadata to the reading option,
@@ -162,10 +192,17 @@ impl ReadOptionsBuilder {
         self
     }
 
+    /// Enable page index in the reading option,
+    pub fn with_page_index(mut self) -> Self {
+        self.enable_page_index = true;
+        self
+    }
+
     /// Seal the builder and return the read options
     pub fn build(self) -> ReadOptions {
         ReadOptions {
             predicates: self.predicates,
+            enable_page_index: self.enable_page_index,
         }
     }
 }
@@ -176,6 +213,7 @@ impl ReadOptionsBuilder {
 /// All predicates will be chained using 'AND' to filter the row groups.
 pub struct ReadOptions {
     predicates: Vec<Box<dyn FnMut(&RowGroupMetaData, usize) -> bool>>,
+    enable_page_index: bool,
 }
 
 impl<R: 'static + ChunkReader> SerializedFileReader<R> {
@@ -209,13 +247,33 @@ impl<R: 'static + ChunkReader> SerializedFileReader<R> {
             }
         }
 
-        Ok(Self {
-            chunk_reader: Arc::new(chunk_reader),
-            metadata: ParquetMetaData::new(
-                metadata.file_metadata().clone(),
-                filtered_row_groups,
-            ),
-        })
+        if options.enable_page_index {
+            //Todo for now test data `data_index_bloom_encoding_stats.parquet` only have one rowgroup
+            //support multi after create multi-RG test data.
+            let cols = metadata.row_group(0);
+            let columns_indexes =
+                index_reader::read_columns_indexes(&chunk_reader, cols.columns())?;
+            let pages_locations =
+                index_reader::read_pages_locations(&chunk_reader, cols.columns())?;
+
+            Ok(Self {
+                chunk_reader: Arc::new(chunk_reader),
+                metadata: ParquetMetaData::new_with_page_index(
+                    metadata.file_metadata().clone(),
+                    filtered_row_groups,
+                    Some(columns_indexes),
+                    Some(pages_locations),
+                ),
+            })
+        } else {
+            Ok(Self {
+                chunk_reader: Arc::new(chunk_reader),
+                metadata: ParquetMetaData::new(
+                    metadata.file_metadata().clone(),
+                    filtered_row_groups,
+                ),
+            })
+        }
     }
 }
 
@@ -284,6 +342,7 @@ impl<'a, R: 'static + ChunkReader> RowGroupReader for SerializedRowGroupReader<'
     fn get_column_page_reader(&self, i: usize) -> Result<Box<dyn PageReader>> {
         let col = self.metadata.column(i);
         let (col_start, col_length) = col.byte_range();
+        //Todo filter with multi row range
         let file_chunk = self.chunk_reader.get_read(col_start, col_length as usize)?;
         let page_reader = SerializedPageReader::new(
             file_chunk,
@@ -299,6 +358,108 @@ impl<'a, R: 'static + ChunkReader> RowGroupReader for SerializedRowGroupReader<'
     }
 }
 
+/// Reads a [`PageHeader`] from the provided [`Read`]
+pub(crate) fn read_page_header<T: Read>(input: &mut T) -> Result<PageHeader> {
+    let mut prot = TCompactInputProtocol::new(input);
+    let page_header = PageHeader::read_from_in_protocol(&mut prot)?;
+    Ok(page_header)
+}
+
+/// Decodes a [`Page`] from the provided `buffer`
+pub(crate) fn decode_page(
+    page_header: PageHeader,
+    buffer: ByteBufferPtr,
+    physical_type: Type,
+    decompressor: Option<&mut Box<dyn Codec>>,
+) -> Result<Page> {
+    // When processing data page v2, depending on enabled compression for the
+    // page, we should account for uncompressed data ('offset') of
+    // repetition and definition levels.
+    //
+    // We always use 0 offset for other pages other than v2, `true` flag means
+    // that compression will be applied if decompressor is defined
+    let mut offset: usize = 0;
+    let mut can_decompress = true;
+
+    if let Some(ref header_v2) = page_header.data_page_header_v2 {
+        offset = (header_v2.definition_levels_byte_length
+            + header_v2.repetition_levels_byte_length) as usize;
+        // When is_compressed flag is missing the page is considered compressed
+        can_decompress = header_v2.is_compressed.unwrap_or(true);
+    }
+
+    // TODO: page header could be huge because of statistics. We should set a
+    // maximum page header size and abort if that is exceeded.
+    let buffer = match decompressor {
+        Some(decompressor) if can_decompress => {
+            let uncompressed_size = page_header.uncompressed_page_size as usize;
+            let mut decompressed = Vec::with_capacity(uncompressed_size);
+            let compressed = &buffer.as_ref()[offset..];
+            decompressed.extend_from_slice(&buffer.as_ref()[..offset]);
+            decompressor.decompress(compressed, &mut decompressed)?;
+
+            if decompressed.len() != uncompressed_size {
+                return Err(general_err!(
+                    "Actual decompressed size doesn't match the expected one ({} vs {})",
+                    decompressed.len(),
+                    uncompressed_size
+                ));
+            }
+
+            ByteBufferPtr::new(decompressed)
+        }
+        _ => buffer,
+    };
+
+    let result = match page_header.type_ {
+        PageType::DictionaryPage => {
+            assert!(page_header.dictionary_page_header.is_some());
+            let dict_header = page_header.dictionary_page_header.as_ref().unwrap();
+            let is_sorted = dict_header.is_sorted.unwrap_or(false);
+            Page::DictionaryPage {
+                buf: buffer,
+                num_values: dict_header.num_values as u32,
+                encoding: Encoding::from(dict_header.encoding),
+                is_sorted,
+            }
+        }
+        PageType::DataPage => {
+            assert!(page_header.data_page_header.is_some());
+            let header = page_header.data_page_header.unwrap();
+            Page::DataPage {
+                buf: buffer,
+                num_values: header.num_values as u32,
+                encoding: Encoding::from(header.encoding),
+                def_level_encoding: Encoding::from(header.definition_level_encoding),
+                rep_level_encoding: Encoding::from(header.repetition_level_encoding),
+                statistics: statistics::from_thrift(physical_type, header.statistics),
+            }
+        }
+        PageType::DataPageV2 => {
+            assert!(page_header.data_page_header_v2.is_some());
+            let header = page_header.data_page_header_v2.unwrap();
+            let is_compressed = header.is_compressed.unwrap_or(true);
+            Page::DataPageV2 {
+                buf: buffer,
+                num_values: header.num_values as u32,
+                encoding: Encoding::from(header.encoding),
+                num_nulls: header.num_nulls as u32,
+                num_rows: header.num_rows as u32,
+                def_levels_byte_len: header.definition_levels_byte_length as u32,
+                rep_levels_byte_len: header.repetition_levels_byte_length as u32,
+                is_compressed,
+                statistics: statistics::from_thrift(physical_type, header.statistics),
+            }
+        }
+        _ => {
+            // For unknown page type (e.g., INDEX_PAGE), skip and read next.
+            unimplemented!("Page type {:?} is not supported", page_header.type_)
+        }
+    };
+
+    Ok(result)
+}
+
 /// A serialized implementation for Parquet [`PageReader`].
 pub struct SerializedPageReader<T: Read> {
     // The file source buffer which references exactly the bytes for the column trunk
@@ -336,13 +497,6 @@ impl<T: Read> SerializedPageReader<T> {
         };
         Ok(result)
     }
-
-    /// Reads Page header from Thrift.
-    fn read_page_header(&mut self) -> Result<PageHeader> {
-        let mut prot = TCompactInputProtocol::new(&mut self.buf);
-        let page_header = PageHeader::read_from_in_protocol(&mut prot)?;
-        Ok(page_header)
-    }
 }
 
 impl<T: Read + Send> Iterator for SerializedPageReader<T> {
@@ -356,108 +510,40 @@ impl<T: Read + Send> Iterator for SerializedPageReader<T> {
 impl<T: Read + Send> PageReader for SerializedPageReader<T> {
     fn get_next_page(&mut self) -> Result<Option<Page>> {
         while self.seen_num_values < self.total_num_values {
-            let page_header = self.read_page_header()?;
-
-            // When processing data page v2, depending on enabled compression for the
-            // page, we should account for uncompressed data ('offset') of
-            // repetition and definition levels.
-            //
-            // We always use 0 offset for other pages other than v2, `true` flag means
-            // that compression will be applied if decompressor is defined
-            let mut offset: usize = 0;
-            let mut can_decompress = true;
-
-            if let Some(ref header_v2) = page_header.data_page_header_v2 {
-                offset = (header_v2.definition_levels_byte_length
-                    + header_v2.repetition_levels_byte_length)
-                    as usize;
-                // When is_compressed flag is missing the page is considered compressed
-                can_decompress = header_v2.is_compressed.unwrap_or(true);
-            }
-
-            let compressed_len = page_header.compressed_page_size as usize - offset;
-            let uncompressed_len = page_header.uncompressed_page_size as usize - offset;
-            // We still need to read all bytes from buffered stream
-            let mut buffer = vec![0; offset + compressed_len];
-            self.buf.read_exact(&mut buffer)?;
-
-            // TODO: page header could be huge because of statistics. We should set a
-            // maximum page header size and abort if that is exceeded.
-            if let Some(decompressor) = self.decompressor.as_mut() {
-                if can_decompress {
-                    let mut decompressed_buffer = Vec::with_capacity(uncompressed_len);
-                    let decompressed_size = decompressor
-                        .decompress(&buffer[offset..], &mut decompressed_buffer)?;
-                    if decompressed_size != uncompressed_len {
-                        return Err(general_err!(
-              "Actual decompressed size doesn't match the expected one ({} vs {})",
-              decompressed_size,
-              uncompressed_len
-            ));
-                    }
-                    if offset == 0 {
-                        buffer = decompressed_buffer;
-                    } else {
-                        // Prepend saved offsets to the buffer
-                        buffer.truncate(offset);
-                        buffer.append(&mut decompressed_buffer);
-                    }
-                }
+            let page_header = read_page_header(&mut self.buf)?;
+
+            let to_read = page_header.compressed_page_size as usize;
+            let mut buffer = Vec::with_capacity(to_read);
+            let read = (&mut self.buf)
+                .take(to_read as u64)
+                .read_to_end(&mut buffer)?;
+
+            if read != to_read {
+                return Err(eof_err!(
+                    "Expected to read {} bytes of page, read only {}",
+                    to_read,
+                    read
+                ));
             }
 
+            let buffer = ByteBufferPtr::new(buffer);
             let result = match page_header.type_ {
-                PageType::DictionaryPage => {
-                    assert!(page_header.dictionary_page_header.is_some());
-                    let dict_header =
-                        page_header.dictionary_page_header.as_ref().unwrap();
-                    let is_sorted = dict_header.is_sorted.unwrap_or(false);
-                    Page::DictionaryPage {
-                        buf: ByteBufferPtr::new(buffer),
-                        num_values: dict_header.num_values as u32,
-                        encoding: Encoding::from(dict_header.encoding),
-                        is_sorted,
-                    }
-                }
-                PageType::DataPage => {
-                    assert!(page_header.data_page_header.is_some());
-                    let header = page_header.data_page_header.unwrap();
-                    self.seen_num_values += header.num_values as i64;
-                    Page::DataPage {
-                        buf: ByteBufferPtr::new(buffer),
-                        num_values: header.num_values as u32,
-                        encoding: Encoding::from(header.encoding),
-                        def_level_encoding: Encoding::from(
-                            header.definition_level_encoding,
-                        ),
-                        rep_level_encoding: Encoding::from(
-                            header.repetition_level_encoding,
-                        ),
-                        statistics: statistics::from_thrift(
-                            self.physical_type,
-                            header.statistics,
-                        ),
-                    }
-                }
-                PageType::DataPageV2 => {
-                    assert!(page_header.data_page_header_v2.is_some());
-                    let header = page_header.data_page_header_v2.unwrap();
-                    let is_compressed = header.is_compressed.unwrap_or(true);
-                    self.seen_num_values += header.num_values as i64;
-                    Page::DataPageV2 {
-                        buf: ByteBufferPtr::new(buffer),
-                        num_values: header.num_values as u32,
-                        encoding: Encoding::from(header.encoding),
-                        num_nulls: header.num_nulls as u32,
-                        num_rows: header.num_rows as u32,
-                        def_levels_byte_len: header.definition_levels_byte_length as u32,
-                        rep_levels_byte_len: header.repetition_levels_byte_length as u32,
-                        is_compressed,
-                        statistics: statistics::from_thrift(
-                            self.physical_type,
-                            header.statistics,
-                        ),
-                    }
+                PageType::DataPage | PageType::DataPageV2 => {
+                    let decoded = decode_page(
+                        page_header,
+                        buffer,
+                        self.physical_type,
+                        self.decompressor.as_mut(),
+                    )?;
+                    self.seen_num_values += decoded.num_values() as i64;
+                    decoded
                 }
+                PageType::DictionaryPage => decode_page(
+                    page_header,
+                    buffer,
+                    self.physical_type,
+                    self.decompressor.as_mut(),
+                )?,
                 _ => {
                     // For unknown page type (e.g., INDEX_PAGE), skip and read next.
                     continue;
@@ -475,9 +561,11 @@ impl<T: Read + Send> PageReader for SerializedPageReader<T> {
 mod tests {
     use super::*;
     use crate::basic::{self, ColumnOrder};
+    use crate::file::page_index::index::Index;
     use crate::record::RowAccessor;
     use crate::schema::parser::parse_message_type;
     use crate::util::test_common::{get_test_file, get_test_path};
+    use parquet_format::BoundaryOrder;
     use std::sync::Arc;
 
     #[test]
@@ -486,7 +574,7 @@ mod tests {
         get_test_file("alltypes_plain.parquet")
             .read_to_end(&mut buf)
             .unwrap();
-        let cursor = SliceableCursor::new(buf);
+        let cursor = Bytes::from(buf);
         let read_from_cursor = SerializedFileReader::new(cursor).unwrap();
 
         let test_file = get_test_file("alltypes_plain.parquet");
@@ -605,9 +693,9 @@ mod tests {
         let file_metadata = metadata.file_metadata();
         assert!(file_metadata.created_by().is_some());
         assert_eq!(
-      file_metadata.created_by().unwrap(),
-      "impala version 1.3.0-INTERNAL (build 8a48ddb1eff84592b3fc06bc6f51ec120e1fffc9)"
-    );
+            file_metadata.created_by().unwrap(),
+            "impala version 1.3.0-INTERNAL (build 8a48ddb1eff84592b3fc06bc6f51ec120e1fffc9)"
+        );
         assert!(file_metadata.key_value_metadata().is_none());
         assert_eq!(file_metadata.num_rows(), 8);
         assert_eq!(file_metadata.version(), 1);
@@ -955,4 +1043,66 @@ mod tests {
         assert_eq!(metadata.num_row_groups(), 0);
         Ok(())
     }
+
+    #[test]
+    // Use java parquet-tools get below pageIndex info
+    // !```
+    // parquet-tools column-index ./data_index_bloom_encoding_stats.parquet
+    // row group 0:
+    // column index for column String:
+    // Boudary order: ASCENDING
+    // page-0  :
+    // null count                 min                                  max
+    // 0                          Hello                                today
+    //
+    // offset index for column String:
+    // page-0   :
+    // offset   compressed size       first row index
+    // 4               152                     0
+    ///```
+    //
+    fn test_page_index_reader() {
+        let test_file = get_test_file("data_index_bloom_encoding_stats.parquet");
+        let builder = ReadOptionsBuilder::new();
+        //enable read page index
+        let options = builder.with_page_index().build();
+        let reader_result = SerializedFileReader::new_with_options(test_file, options);
+        let reader = reader_result.unwrap();
+
+        // Test contents in Parquet metadata
+        let metadata = reader.metadata();
+        assert_eq!(metadata.num_row_groups(), 1);
+
+        let page_indexes = metadata.page_indexes().unwrap();
+
+        // only one row group
+        assert_eq!(page_indexes.len(), 1);
+        let index = if let Index::BYTE_ARRAY(index) = page_indexes.get(0).unwrap() {
+            index
+        } else {
+            unreachable!()
+        };
+
+        assert_eq!(index.boundary_order, BoundaryOrder::Ascending);
+        let index_in_pages = &index.indexes;
+
+        //only one page group
+        assert_eq!(index_in_pages.len(), 1);
+
+        let page0 = index_in_pages.get(0).unwrap();
+        let min = page0.min.as_ref().unwrap();
+        let max = page0.max.as_ref().unwrap();
+        assert_eq!("Hello", std::str::from_utf8(min.as_slice()).unwrap());
+        assert_eq!("today", std::str::from_utf8(max.as_slice()).unwrap());
+
+        let offset_indexes = metadata.offset_indexes().unwrap();
+        // only one row group
+        assert_eq!(offset_indexes.len(), 1);
+        let offset_index = offset_indexes.get(0).unwrap();
+        let page_offset = offset_index.get(0).unwrap();
+
+        assert_eq!(4, page_offset.offset);
+        assert_eq!(152, page_offset.compressed_page_size);
+        assert_eq!(0, page_offset.first_row_index);
+    }
 }
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index 646550dcb6be..0a8fc331e7e1 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -153,6 +153,11 @@ impl<W: Write> SerializedFileWriter<W> {
         Ok(row_group_writer)
     }
 
+    /// Returns metadata for any flushed row groups
+    pub fn flushed_row_groups(&self) -> &[RowGroupMetaDataPtr] {
+        &self.row_groups
+    }
+
     /// Closes and finalises file writer, returning the file metadata.
     ///
     /// All row groups must be appended before this method is called.
@@ -541,6 +546,7 @@ impl<'a, W: Write> PageWriter for SerializedPageWriter<'a, W> {
 mod tests {
     use super::*;
 
+    use bytes::Bytes;
     use std::{fs::File, io::Cursor};
 
     use crate::basic::{Compression, Encoding, LogicalType, Repetition, Type};
@@ -999,7 +1005,7 @@ mod tests {
         );
         let mut rows: i64 = 0;
 
-        for subset in &data {
+        for (idx, subset) in data.iter().enumerate() {
             let mut row_group_writer = file_writer.next_row_group().unwrap();
             if let Some(mut writer) = row_group_writer.next_column().unwrap() {
                 rows += writer
@@ -1008,7 +1014,10 @@ mod tests {
                     .unwrap() as i64;
                 writer.close().unwrap();
             }
-            row_group_writer.close().unwrap();
+            let last_group = row_group_writer.close().unwrap();
+            let flushed = file_writer.flushed_row_groups();
+            assert_eq!(flushed.len(), idx + 1);
+            assert_eq!(flushed[idx].as_ref(), last_group.as_ref());
         }
         file_writer.close().unwrap();
 
@@ -1054,7 +1063,7 @@ mod tests {
     }
 
     fn test_bytes_roundtrip(data: Vec<Vec<i32>>) {
-        let mut cursor = Cursor::new(vec![]);
+        let mut buffer = vec![];
 
         let schema = Arc::new(
             types::Type::group_type_builder("schema")
@@ -1072,7 +1081,7 @@ mod tests {
         {
             let props = Arc::new(WriterProperties::builder().build());
             let mut writer =
-                SerializedFileWriter::new(&mut cursor, schema, props).unwrap();
+                SerializedFileWriter::new(&mut buffer, schema, props).unwrap();
 
             for subset in &data {
                 let mut row_group_writer = writer.next_row_group().unwrap();
@@ -1089,9 +1098,7 @@ mod tests {
             writer.close().unwrap();
         }
 
-        let buffer = cursor.into_inner();
-
-        let reading_cursor = crate::file::serialized_reader::SliceableCursor::new(buffer);
+        let reading_cursor = Bytes::from(buffer);
         let reader = SerializedFileReader::new(reading_cursor).unwrap();
 
         assert_eq!(reader.num_row_groups(), data.len());
diff --git a/parquet/src/record/api.rs b/parquet/src/record/api.rs
index 95b97bc9546e..5df21e4b0d00 100644
--- a/parquet/src/record/api.rs
+++ b/parquet/src/record/api.rs
@@ -27,7 +27,7 @@ use crate::data_type::{ByteArray, Decimal, Int96};
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
 
-#[cfg(feature = "cli")]
+#[cfg(any(feature = "cli", test))]
 use serde_json::Value;
 
 /// Macro as a shortcut to generate 'not yet implemented' panic error.
@@ -79,7 +79,7 @@ impl Row {
         }
     }
 
-    #[cfg(feature = "cli")]
+    #[cfg(any(feature = "cli", test))]
     pub fn to_json_value(&self) -> Value {
         Value::Object(
             self.fields
@@ -650,7 +650,7 @@ impl Field {
         }
     }
 
-    #[cfg(feature = "cli")]
+    #[cfg(any(feature = "cli", test))]
     pub fn to_json_value(&self) -> Value {
         match &self {
             Field::Null => Value::Null,
@@ -669,7 +669,7 @@ impl Field {
             Field::Double(n) => serde_json::Number::from_f64(*n)
                 .map(Value::Number)
                 .unwrap_or(Value::Null),
-            Field::Decimal(n) => Value::String(convert_decimal_to_string(&n)),
+            Field::Decimal(n) => Value::String(convert_decimal_to_string(n)),
             Field::Str(s) => Value::String(s.to_owned()),
             Field::Bytes(b) => Value::String(base64::encode(b.data())),
             Field::Date(d) => Value::String(convert_date_to_string(*d)),
@@ -1668,7 +1668,7 @@ mod tests {
     }
 
     #[test]
-    #[cfg(feature = "cli")]
+    #[cfg(any(feature = "cli", test))]
     fn test_to_json_value() {
         assert_eq!(Field::Null.to_json_value(), Value::Null);
         assert_eq!(Field::Bool(true).to_json_value(), Value::Bool(true));
@@ -1707,21 +1707,19 @@ mod tests {
         );
         assert_eq!(
             Field::Float(5.0).to_json_value(),
-            Value::Number(serde_json::Number::from_f64(f64::from(5.0 as f32)).unwrap())
+            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
         );
         assert_eq!(
             Field::Float(5.1234).to_json_value(),
-            Value::Number(
-                serde_json::Number::from_f64(f64::from(5.1234 as f32)).unwrap()
-            )
+            Value::Number(serde_json::Number::from_f64(5.1234_f32 as f64).unwrap())
         );
         assert_eq!(
             Field::Double(6.0).to_json_value(),
-            Value::Number(serde_json::Number::from_f64(6.0 as f64).unwrap())
+            Value::Number(serde_json::Number::from_f64(6.0).unwrap())
         );
         assert_eq!(
             Field::Double(6.1234).to_json_value(),
-            Value::Number(serde_json::Number::from_f64(6.1234 as f64).unwrap())
+            Value::Number(serde_json::Number::from_f64(6.1234).unwrap())
         );
         assert_eq!(
             Field::Str("abc".to_string()).to_json_value(),
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index 9cef93a69b29..8d624fe3d185 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -838,6 +838,7 @@ impl ColumnDescriptor {
 
 /// A schema descriptor. This encapsulates the top-level schemas for all the columns,
 /// as well as all descriptors for all the primitive columns.
+#[derive(PartialEq)]
 pub struct SchemaDescriptor {
     // The top-level schema (the "message" type).
     // This must be a `GroupType` where each field is a root column type in the schema.
diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs
index 288c771b097b..b535ee02a0ef 100644
--- a/parquet/src/util/bit_util.rs
+++ b/parquet/src/util/bit_util.rs
@@ -32,6 +32,17 @@ pub fn from_ne_slice<T: FromBytes>(bs: &[u8]) -> T {
     T::from_ne_bytes(b)
 }
 
+#[inline]
+pub fn from_le_slice<T: FromBytes>(bs: &[u8]) -> T {
+    let mut b = T::Buffer::default();
+    {
+        let b = b.as_mut();
+        let bs = &bs[..b.len()];
+        b.copy_from_slice(bs);
+    }
+    T::from_le_bytes(b)
+}
+
 pub trait FromBytes: Sized {
     type Buffer: AsMut<[u8]> + Default;
     fn from_le_bytes(bs: Self::Buffer) -> Self;
diff --git a/parquet/src/util/cursor.rs b/parquet/src/util/cursor.rs
index ff7067fcbcad..706724dbf52a 100644
--- a/parquet/src/util/cursor.rs
+++ b/parquet/src/util/cursor.rs
@@ -26,6 +26,7 @@ use std::{cmp, fmt};
 /// because the lack of Generic Associated Type implies that you would require complex lifetime propagation when
 /// returning such a cursor.
 #[allow(clippy::rc_buffer)]
+#[deprecated = "use bytes::Bytes instead"]
 pub struct SliceableCursor {
     inner: Arc<Vec<u8>>,
     start: u64,
@@ -33,6 +34,7 @@ pub struct SliceableCursor {
     pos: u64,
 }
 
+#[allow(deprecated)]
 impl fmt::Debug for SliceableCursor {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("SliceableCursor")
@@ -44,6 +46,7 @@ impl fmt::Debug for SliceableCursor {
     }
 }
 
+#[allow(deprecated)]
 impl SliceableCursor {
     pub fn new(content: impl Into<Arc<Vec<u8>>>) -> Self {
         let inner = content.into();
@@ -90,6 +93,7 @@ impl SliceableCursor {
 }
 
 /// Implementation inspired by std::io::Cursor
+#[allow(deprecated)]
 impl Read for SliceableCursor {
     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
         let n = Read::read(&mut self.remaining_slice(), buf)?;
@@ -98,6 +102,7 @@ impl Read for SliceableCursor {
     }
 }
 
+#[allow(deprecated)]
 impl Seek for SliceableCursor {
     fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
         let new_pos = match pos {
@@ -204,12 +209,14 @@ mod tests {
     use super::*;
 
     /// Create a SliceableCursor of all u8 values in ascending order
+    #[allow(deprecated)]
     fn get_u8_range() -> SliceableCursor {
         let data: Vec<u8> = (0u8..=255).collect();
         SliceableCursor::new(data)
     }
 
     /// Reads all the bytes in the slice and checks that it matches the u8 range from start to end_included
+    #[allow(deprecated)]
     fn check_read_all(mut cursor: SliceableCursor, start: u8, end_included: u8) {
         let mut target = vec![];
         let cursor_res = cursor.read_to_end(&mut target);
diff --git a/parquet/src/util/memory.rs b/parquet/src/util/memory.rs
index 0b0c707ff34f..909878a6d538 100644
--- a/parquet/src/util/memory.rs
+++ b/parquet/src/util/memory.rs
@@ -31,7 +31,6 @@ use std::{
 /// when all slices are dropped.
 ///
 /// TODO: Remove and replace with [`bytes::Bytes`]
-#[allow(clippy::rc_buffer)]
 #[derive(Clone, Debug)]
 pub struct ByteBufferPtr {
     data: Bytes,
@@ -109,6 +108,12 @@ impl From<Vec<u8>> for ByteBufferPtr {
     }
 }
 
+impl From<Bytes> for ByteBufferPtr {
+    fn from(data: Bytes) -> Self {
+        Self { data }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/parquet_derive/Cargo.toml b/parquet_derive/Cargo.toml
index bab5d230a3c2..680074d08705 100644
--- a/parquet_derive/Cargo.toml
+++ b/parquet_derive/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "parquet_derive"
-version = "15.0.0"
+version = "16.0.0"
 license = "Apache-2.0"
 description = "Derive macros for the Rust implementation of Apache Parquet"
 homepage = "https://github.com/apache/arrow-rs"
@@ -35,4 +35,4 @@ proc-macro = true
 proc-macro2 = "1.0"
 quote = "1.0"
 syn = { version = "1.0", features = ["full", "extra-traits"] }
-parquet = { path = "../parquet", version = "15.0.0" }
+parquet = { path = "../parquet", version = "16.0.0" }
diff --git a/parquet_derive/README.md b/parquet_derive/README.md
index 61ccf3093001..4f390b0cd911 100644
--- a/parquet_derive/README.md
+++ b/parquet_derive/README.md
@@ -32,8 +32,8 @@ Add this to your Cargo.toml:
 
 ```toml
 [dependencies]
-parquet = "15.0.0"
-parquet_derive = "15.0.0"
+parquet = "16.0.0"
+parquet_derive = "16.0.0"
 ```
 
 and this to your crate root:
diff --git a/parquet_derive/test/dependency/README.md b/parquet_derive/test/dependency/README.md
deleted file mode 100644
index b618b4636e7c..000000000000
--- a/parquet_derive/test/dependency/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-This directory contains projects that use arrow as a dependency with
-various combinations of feature flags.
diff --git a/parquet_derive/test/dependency/default-features/Cargo.toml b/parquet_derive/test/dependency/default-features/Cargo.toml
deleted file mode 100644
index 7434552aaec4..000000000000
--- a/parquet_derive/test/dependency/default-features/Cargo.toml
+++ /dev/null
@@ -1,31 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-[package]
-name = "defeault-features"
-description = "Models a user application of parquet_derive that uses no additional features of arrow"
-version = "0.1.0"
-edition = "2021"
-rust-version = "1.57"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-parquet_derive = { path = "../../../../parquet_derive", version = "15.0.0" }
-
-# Keep this out of the default workspace
-[workspace]
diff --git a/parquet_derive/test/dependency/default-features/src/main.rs b/parquet_derive/test/dependency/default-features/src/main.rs
deleted file mode 100644
index e7a11a969c03..000000000000
--- a/parquet_derive/test/dependency/default-features/src/main.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-fn main() {
-    println!("Hello, world!");
-}
diff --git a/parquet_derive_test/Cargo.toml b/parquet_derive_test/Cargo.toml
index 53daf2977620..7bf6db6730e6 100644
--- a/parquet_derive_test/Cargo.toml
+++ b/parquet_derive_test/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "parquet_derive_test"
-version = "15.0.0"
+version = "16.0.0"
 license = "Apache-2.0"
 description = "Integration test package for parquet-derive"
 homepage = "https://github.com/apache/arrow-rs"
@@ -29,6 +29,6 @@ publish = false
 rust-version = "1.57"
 
 [dependencies]
-parquet = { path = "../parquet", version = "15.0.0" }
-parquet_derive = { path = "../parquet_derive", version = "15.0.0" }
+parquet = { path = "../parquet", version = "16.0.0" }
+parquet_derive = { path = "../parquet_derive", version = "16.0.0" }
 chrono = "0.4.19"