From 3a5fafb9729c2a57a901213c529c0a5736426ad7 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Thu, 5 Sep 2024 21:01:47 +1000 Subject: [PATCH 1/4] refactor(rust): Rename `data_type` -> `dtype` --- crates/polars-arrow/src/array/binary/data.rs | 10 +- crates/polars-arrow/src/array/binary/ffi.rs | 6 +- crates/polars-arrow/src/array/binary/mod.rs | 58 ++++----- .../polars-arrow/src/array/binary/mutable.rs | 22 ++-- .../src/array/binary/mutable_values.rs | 40 +++--- crates/polars-arrow/src/array/binview/ffi.rs | 8 +- crates/polars-arrow/src/array/binview/mod.rs | 32 ++--- .../polars-arrow/src/array/binview/mutable.rs | 4 +- crates/polars-arrow/src/array/boolean/data.rs | 4 +- crates/polars-arrow/src/array/boolean/ffi.rs | 6 +- crates/polars-arrow/src/array/boolean/mod.rs | 50 ++++---- .../polars-arrow/src/array/boolean/mutable.rs | 18 +-- .../polars-arrow/src/array/dictionary/data.rs | 10 +- .../polars-arrow/src/array/dictionary/ffi.rs | 6 +- .../polars-arrow/src/array/dictionary/mod.rs | 68 +++++----- .../src/array/dictionary/mutable.rs | 16 +-- .../src/array/dictionary/value_map.rs | 4 +- crates/polars-arrow/src/array/equal/binary.rs | 2 +- .../src/array/equal/binary_view.rs | 2 +- .../src/array/equal/dictionary.rs | 2 +- .../src/array/equal/fixed_size_binary.rs | 2 +- .../src/array/equal/fixed_size_list.rs | 2 +- crates/polars-arrow/src/array/equal/list.rs | 2 +- crates/polars-arrow/src/array/equal/map.rs | 2 +- crates/polars-arrow/src/array/equal/mod.rs | 4 +- .../polars-arrow/src/array/equal/primitive.rs | 2 +- .../polars-arrow/src/array/equal/struct_.rs | 2 +- crates/polars-arrow/src/array/equal/union.rs | 2 +- crates/polars-arrow/src/array/equal/utf8.rs | 2 +- crates/polars-arrow/src/array/ffi.rs | 2 +- .../src/array/fixed_size_binary/data.rs | 10 +- .../src/array/fixed_size_binary/ffi.rs | 6 +- .../src/array/fixed_size_binary/fmt.rs | 2 +- .../src/array/fixed_size_binary/mod.rs | 54 ++++---- .../src/array/fixed_size_binary/mutable.rs | 18 +-- .../src/array/fixed_size_list/data.rs | 10 +- .../src/array/fixed_size_list/ffi.rs | 4 +- .../src/array/fixed_size_list/mod.rs | 58 ++++----- .../src/array/fixed_size_list/mutable.rs | 30 ++--- crates/polars-arrow/src/array/fmt.rs | 2 +- .../polars-arrow/src/array/growable/binary.rs | 12 +- .../src/array/growable/binview.rs | 10 +- .../src/array/growable/boolean.rs | 10 +- .../src/array/growable/dictionary.rs | 8 +- .../src/array/growable/fixed_binary.rs | 6 +- .../src/array/growable/fixed_size_list.rs | 6 +- .../polars-arrow/src/array/growable/list.rs | 2 +- crates/polars-arrow/src/array/growable/mod.rs | 6 +- .../polars-arrow/src/array/growable/null.rs | 12 +- .../src/array/growable/primitive.rs | 10 +- .../src/array/growable/structure.rs | 4 +- .../polars-arrow/src/array/growable/utf8.rs | 2 +- crates/polars-arrow/src/array/list/data.rs | 10 +- crates/polars-arrow/src/array/list/ffi.rs | 6 +- crates/polars-arrow/src/array/list/mod.rs | 60 ++++----- crates/polars-arrow/src/array/list/mutable.rs | 40 +++--- crates/polars-arrow/src/array/map/data.rs | 10 +- crates/polars-arrow/src/array/map/ffi.rs | 6 +- crates/polars-arrow/src/array/map/mod.rs | 50 ++++---- crates/polars-arrow/src/array/mod.rs | 108 ++++++++-------- crates/polars-arrow/src/array/null.rs | 40 +++--- .../polars-arrow/src/array/primitive/data.rs | 8 +- .../polars-arrow/src/array/primitive/ffi.rs | 6 +- .../polars-arrow/src/array/primitive/fmt.rs | 4 +- .../polars-arrow/src/array/primitive/mod.rs | 72 +++++------ .../src/array/primitive/mutable.rs | 48 +++---- crates/polars-arrow/src/array/struct_/data.rs | 8 +- crates/polars-arrow/src/array/struct_/ffi.rs | 6 +- crates/polars-arrow/src/array/struct_/mod.rs | 66 +++++----- .../polars-arrow/src/array/struct_/mutable.rs | 42 +++--- crates/polars-arrow/src/array/union/data.rs | 12 +- crates/polars-arrow/src/array/union/ffi.rs | 8 +- crates/polars-arrow/src/array/union/mod.rs | 70 +++++----- crates/polars-arrow/src/array/utf8/data.rs | 10 +- crates/polars-arrow/src/array/utf8/ffi.rs | 6 +- crates/polars-arrow/src/array/utf8/mod.rs | 70 +++++----- crates/polars-arrow/src/array/utf8/mutable.rs | 24 ++-- .../src/array/utf8/mutable_values.rs | 40 +++--- crates/polars-arrow/src/array/values.rs | 2 +- .../src/compute/aggregate/memory.rs | 2 +- .../polars-arrow/src/compute/aggregate/sum.rs | 12 +- crates/polars-arrow/src/compute/arity.rs | 32 ++--- crates/polars-arrow/src/compute/bitwise.rs | 14 +- .../src/compute/cast/binary_to.rs | 24 ++-- .../src/compute/cast/binview_to.rs | 2 +- .../src/compute/cast/decimal_to.rs | 8 +- .../src/compute/cast/dictionary_to.rs | 16 +-- crates/polars-arrow/src/compute/cast/mod.rs | 14 +- .../src/compute/cast/primitive_to.rs | 4 +- .../polars-arrow/src/compute/cast/utf8_to.rs | 12 +- .../polars-arrow/src/compute/concatenate.rs | 2 +- .../polars-arrow/src/compute/take/binary.rs | 4 +- .../polars-arrow/src/compute/take/binview.rs | 2 +- .../polars-arrow/src/compute/take/boolean.rs | 4 +- crates/polars-arrow/src/compute/take/list.rs | 2 +- crates/polars-arrow/src/compute/take/mod.rs | 6 +- .../src/compute/take/primitive.rs | 2 +- .../src/compute/take/structure.rs | 2 +- crates/polars-arrow/src/compute/temporal.rs | 80 ++++++------ crates/polars-arrow/src/datatypes/field.rs | 18 +-- crates/polars-arrow/src/datatypes/mod.rs | 14 +- crates/polars-arrow/src/ffi/array.rs | 108 ++++++++-------- crates/polars-arrow/src/ffi/bridge.rs | 2 +- crates/polars-arrow/src/ffi/mod.rs | 4 +- crates/polars-arrow/src/ffi/schema.rs | 54 ++++---- crates/polars-arrow/src/ffi/stream.rs | 8 +- .../src/io/avro/read/deserialize.rs | 31 +++-- .../polars-arrow/src/io/avro/read/nested.rs | 48 ++++--- .../polars-arrow/src/io/avro/read/schema.rs | 6 +- .../polars-arrow/src/io/avro/write/schema.rs | 14 +- .../src/io/avro/write/serialize.rs | 20 +-- .../src/io/ipc/read/array/binary.rs | 6 +- .../src/io/ipc/read/array/binview.rs | 6 +- .../src/io/ipc/read/array/boolean.rs | 6 +- .../src/io/ipc/read/array/dictionary.rs | 4 +- .../io/ipc/read/array/fixed_size_binary.rs | 8 +- .../src/io/ipc/read/array/fixed_size_list.rs | 19 +-- .../src/io/ipc/read/array/list.rs | 14 +- .../polars-arrow/src/io/ipc/read/array/map.rs | 14 +- .../polars-arrow/src/io/ipc/read/array/mod.rs | 4 +- .../src/io/ipc/read/array/null.rs | 6 +- .../src/io/ipc/read/array/primitive.rs | 6 +- .../src/io/ipc/read/array/struct_.rs | 23 ++-- .../src/io/ipc/read/array/union.rs | 27 ++-- .../src/io/ipc/read/array/utf8.rs | 6 +- crates/polars-arrow/src/io/ipc/read/common.rs | 21 ++- .../src/io/ipc/read/deserialize.rs | 56 ++++---- crates/polars-arrow/src/io/ipc/read/schema.rs | 40 +++--- .../polars-arrow/src/io/ipc/write/common.rs | 8 +- crates/polars-arrow/src/io/ipc/write/mod.rs | 14 +- .../polars-arrow/src/io/ipc/write/schema.rs | 52 ++++---- .../src/io/ipc/write/serialize/mod.rs | 2 +- .../src/legacy/array/fixed_size_list.rs | 14 +- crates/polars-arrow/src/legacy/array/list.rs | 8 +- crates/polars-arrow/src/legacy/array/mod.rs | 26 ++-- crates/polars-arrow/src/legacy/array/null.rs | 2 +- .../polars-arrow/src/legacy/kernels/atan2.rs | 2 +- .../polars-arrow/src/legacy/kernels/list.rs | 2 +- crates/polars-arrow/src/legacy/kernels/pow.rs | 2 +- crates/polars-arrow/src/legacy/kernels/set.rs | 10 +- crates/polars-arrow/src/mmap/array.rs | 42 +++--- crates/polars-arrow/src/mmap/mod.rs | 8 +- crates/polars-arrow/src/scalar/README.md | 2 +- crates/polars-arrow/src/scalar/binary.rs | 2 +- crates/polars-arrow/src/scalar/binview.rs | 2 +- crates/polars-arrow/src/scalar/boolean.rs | 2 +- crates/polars-arrow/src/scalar/dictionary.rs | 16 +-- crates/polars-arrow/src/scalar/equal.rs | 4 +- .../src/scalar/fixed_size_binary.rs | 16 +-- .../src/scalar/fixed_size_list.rs | 22 ++-- crates/polars-arrow/src/scalar/list.rs | 22 ++-- crates/polars-arrow/src/scalar/map.rs | 24 ++-- crates/polars-arrow/src/scalar/mod.rs | 22 ++-- crates/polars-arrow/src/scalar/null.rs | 2 +- crates/polars-arrow/src/scalar/primitive.rs | 20 +-- crates/polars-arrow/src/scalar/struct_.rs | 12 +- crates/polars-arrow/src/scalar/union.rs | 10 +- crates/polars-arrow/src/scalar/utf8.rs | 2 +- .../polars-compute/src/arithmetic/signed.rs | 6 +- .../polars-compute/src/arithmetic/unsigned.rs | 4 +- .../polars-compute/src/comparisons/array.rs | 12 +- .../src/comparisons/dyn_array.rs | 4 +- crates/polars-compute/src/filter/mod.rs | 8 +- .../polars-compute/src/if_then_else/array.rs | 4 +- .../polars-compute/src/if_then_else/list.rs | 4 +- .../polars-compute/src/if_then_else/view.rs | 6 +- .../polars-compute/src/min_max/dyn_array.rs | 10 +- crates/polars-compute/src/unique/boolean.rs | 14 +- crates/polars-compute/src/unique/primitive.rs | 8 +- .../src/chunked_array/array/mod.rs | 2 +- .../chunked_array/builder/list/anonymous.rs | 4 +- crates/polars-core/src/chunked_array/cast.rs | 112 ++++++++-------- crates/polars-core/src/chunked_array/from.rs | 8 +- .../polars-core/src/chunked_array/list/mod.rs | 2 +- .../logical/categorical/merge.rs | 2 +- .../src/chunked_array/logical/decimal.rs | 2 +- crates/polars-core/src/chunked_array/mod.rs | 6 +- .../chunked_array/object/extension/drop.rs | 4 +- .../chunked_array/object/extension/list.rs | 6 +- .../object/extension/polars_extension.rs | 2 +- .../src/chunked_array/object/mod.rs | 2 +- .../src/chunked_array/ops/any_value.rs | 6 +- .../polars-core/src/chunked_array/ops/mod.rs | 12 +- .../src/chunked_array/ops/reverse.rs | 2 +- .../ops/sort/arg_sort_multiple.rs | 4 +- .../src/chunked_array/ops/unique/mod.rs | 12 +- .../polars-core/src/chunked_array/ops/zip.rs | 2 +- crates/polars-core/src/datatypes/_serde.rs | 2 +- crates/polars-core/src/datatypes/any_value.rs | 2 +- crates/polars-core/src/datatypes/dtype.rs | 4 +- crates/polars-core/src/datatypes/field.rs | 10 +- crates/polars-core/src/fmt.rs | 14 +- crates/polars-core/src/frame/from.rs | 2 +- .../frame/group_by/aggregations/agg_list.rs | 18 +-- crates/polars-core/src/frame/mod.rs | 2 +- crates/polars-core/src/frame/row/av_buffer.rs | 2 +- crates/polars-core/src/frame/row/mod.rs | 12 +- crates/polars-core/src/schema.rs | 4 +- crates/polars-core/src/series/from.rs | 28 ++-- .../src/series/implementations/array.rs | 6 +- .../src/series/implementations/binary.rs | 6 +- .../series/implementations/binary_offset.rs | 6 +- .../src/series/implementations/boolean.rs | 6 +- .../src/series/implementations/categorical.rs | 4 +- .../src/series/implementations/date.rs | 8 +- .../src/series/implementations/datetime.rs | 6 +- .../src/series/implementations/decimal.rs | 8 +- .../src/series/implementations/duration.rs | 4 +- .../src/series/implementations/floats.rs | 6 +- .../src/series/implementations/list.rs | 6 +- .../src/series/implementations/mod.rs | 6 +- .../src/series/implementations/null.rs | 4 +- .../src/series/implementations/object.rs | 4 +- .../src/series/implementations/string.rs | 6 +- .../src/series/implementations/time.rs | 6 +- crates/polars-core/src/series/into.rs | 4 +- crates/polars-core/src/series/mod.rs | 4 +- crates/polars-core/src/series/ops/null.rs | 2 +- crates/polars-core/src/series/ops/reshape.rs | 4 +- crates/polars-core/src/series/series_trait.rs | 2 +- crates/polars-core/src/utils/mod.rs | 2 +- .../src/expressions/aggregation.rs | 4 +- crates/polars-expr/src/expressions/alias.rs | 2 +- crates/polars-expr/src/expressions/apply.rs | 2 +- crates/polars-expr/src/expressions/binary.rs | 2 +- crates/polars-expr/src/expressions/cast.rs | 6 +- crates/polars-expr/src/expressions/literal.rs | 4 +- crates/polars-expr/src/expressions/ternary.rs | 4 +- crates/polars-expr/src/expressions/window.rs | 2 +- crates/polars-expr/src/planner.rs | 4 +- crates/polars-ffi/src/lib.rs | 2 +- crates/polars-io/src/csv/read/read_impl.rs | 2 +- crates/polars-io/src/csv/read/reader.rs | 2 +- crates/polars-io/src/json/infer.rs | 2 +- crates/polars-io/src/ndjson/mod.rs | 8 +- .../polars-io/src/parquet/read/read_impl.rs | 20 +-- crates/polars-io/src/parquet/write/writer.rs | 6 +- crates/polars-io/src/predicates.rs | 2 +- crates/polars-io/src/shared.rs | 6 +- crates/polars-json/src/json/deserialize.rs | 62 ++++----- crates/polars-json/src/json/infer_schema.rs | 22 ++-- .../polars-json/src/json/write/serialize.rs | 2 +- crates/polars-json/src/ndjson/deserialize.rs | 12 +- crates/polars-json/src/ndjson/file.rs | 20 +-- crates/polars-lazy/src/dsl/eval.rs | 8 +- crates/polars-lazy/src/dsl/list.rs | 10 +- .../physical_plan/streaming/convert_alp.rs | 2 +- .../src/chunked_array/array/any_all.rs | 2 +- .../src/chunked_array/list/any_all.rs | 2 +- .../polars-ops/src/chunked_array/list/sets.rs | 10 +- .../src/chunked_array/strings/extract.rs | 8 +- crates/polars-ops/src/series/ops/is_in.rs | 4 +- .../src/arrow/read/deserialize/binview.rs | 16 +-- .../src/arrow/read/deserialize/boolean.rs | 4 +- .../src/arrow/read/deserialize/dictionary.rs | 4 +- .../read/deserialize/fixed_size_binary.rs | 8 +- .../src/arrow/read/deserialize/mod.rs | 46 +++---- .../src/arrow/read/deserialize/nested.rs | 86 ++++++------- .../arrow/read/deserialize/nested_utils.rs | 8 +- .../src/arrow/read/deserialize/null.rs | 8 +- .../arrow/read/deserialize/primitive/float.rs | 10 +- .../read/deserialize/primitive/integer.rs | 10 +- .../src/arrow/read/deserialize/simple.rs | 120 +++++++++--------- .../src/arrow/read/deserialize/utils/mod.rs | 12 +- .../src/arrow/read/schema/convert.rs | 6 +- .../src/arrow/read/schema/metadata.rs | 24 ++-- .../src/arrow/read/statistics/dictionary.rs | 16 +-- .../src/arrow/read/statistics/list.rs | 20 +-- .../src/arrow/read/statistics/map.rs | 16 +-- .../src/arrow/read/statistics/mod.rs | 42 +++--- .../src/arrow/read/statistics/struct_.rs | 16 +-- .../src/arrow/write/binary/basic.rs | 2 +- .../src/arrow/write/binview/basic.rs | 2 +- .../src/arrow/write/dictionary.rs | 6 +- crates/polars-parquet/src/arrow/write/mod.rs | 38 +++--- .../polars-parquet/src/arrow/write/pages.rs | 10 +- .../polars-parquet/src/arrow/write/schema.rs | 20 +-- .../polars-parquet/src/arrow/write/utils.rs | 4 +- .../src/parquet/write/statistics.rs | 2 +- .../src/executors/sinks/sort/sink_multiple.rs | 2 +- crates/polars-plan/src/dsl/expr.rs | 6 +- crates/polars-plan/src/dsl/expr_dyn_fn.rs | 4 +- .../src/dsl/function_expr/schema.rs | 20 +-- .../src/dsl/function_expr/struct_.rs | 14 +- .../src/dsl/functions/syntactic_sugar.rs | 4 +- crates/polars-plan/src/dsl/mod.rs | 14 +- crates/polars-plan/src/dsl/name.rs | 2 +- crates/polars-plan/src/plans/aexpr/mod.rs | 4 +- crates/polars-plan/src/plans/aexpr/schema.rs | 28 ++-- crates/polars-plan/src/plans/builder_ir.rs | 2 +- .../src/plans/conversion/dsl_to_ir.rs | 2 +- .../src/plans/conversion/expr_expansion.rs | 4 +- .../src/plans/conversion/expr_to_ir.rs | 4 +- .../src/plans/conversion/ir_to_dsl.rs | 4 +- .../plans/conversion/type_coercion/binary.rs | 12 +- .../plans/conversion/type_coercion/is_in.rs | 4 +- .../src/plans/conversion/type_coercion/mod.rs | 14 +- crates/polars-plan/src/plans/format.rs | 6 +- .../polars-plan/src/plans/functions/schema.rs | 2 +- crates/polars-plan/src/plans/ir/format.rs | 6 +- .../polars-plan/src/plans/ir/tree_format.rs | 6 +- crates/polars-plan/src/plans/lit.rs | 12 +- crates/polars-plan/src/plans/schema.rs | 2 +- crates/polars-plan/src/plans/visitor/expr.rs | 6 +- crates/polars-python/src/conversion/mod.rs | 4 +- crates/polars-python/src/datatypes.rs | 2 +- crates/polars-python/src/expr/general.rs | 4 +- crates/polars-python/src/functions/io.rs | 6 +- .../polars-python/src/interop/arrow/to_py.rs | 12 +- .../src/interop/arrow/to_rust.rs | 4 +- crates/polars-python/src/lazyframe/general.rs | 2 +- .../src/lazyframe/visitor/expr_nodes.rs | 4 +- .../polars-python/src/series/c_interface.rs | 2 +- .../polars-python/src/series/construction.rs | 2 +- crates/polars-python/src/series/import.rs | 4 +- crates/polars-row/src/decode.rs | 20 +-- crates/polars-row/src/encode.rs | 30 ++--- crates/polars-row/src/fixed.rs | 4 +- crates/polars-sql/src/sql_expr.rs | 28 ++-- .../polars-stream/src/nodes/parquet_source.rs | 4 +- .../src/physical_plan/lower_expr.rs | 8 +- .../chunkedarray/rolling_window/dispatch.rs | 2 +- crates/polars-time/src/group_by/dynamic.rs | 10 +- crates/polars-time/src/upsample.rs | 4 +- crates/polars-time/src/windows/duration.rs | 8 +- .../polars/tests/it/arrow/array/binary/mod.rs | 2 +- .../it/arrow/array/binary/mutable_values.rs | 2 +- .../tests/it/arrow/array/boolean/mod.rs | 2 +- .../tests/it/arrow/array/dictionary/mod.rs | 32 ++--- .../polars/tests/it/arrow/array/equal/list.rs | 8 +- .../it/arrow/array/fixed_size_binary/mod.rs | 2 +- .../arrow/array/fixed_size_binary/mutable.rs | 2 +- .../it/arrow/array/fixed_size_list/mod.rs | 4 +- .../it/arrow/array/fixed_size_list/mutable.rs | 2 +- .../tests/it/arrow/array/growable/list.rs | 8 +- .../tests/it/arrow/array/growable/mod.rs | 12 +- .../polars/tests/it/arrow/array/list/mod.rs | 22 ++-- .../tests/it/arrow/array/list/mutable.rs | 4 +- crates/polars/tests/it/arrow/array/map/mod.rs | 4 +- crates/polars/tests/it/arrow/array/mod.rs | 2 +- .../tests/it/arrow/array/primitive/mod.rs | 2 +- .../tests/it/arrow/array/primitive/mutable.rs | 4 +- .../tests/it/arrow/array/struct_/mutable.rs | 4 +- crates/polars/tests/it/arrow/array/union.rs | 58 ++++----- .../polars/tests/it/arrow/array/utf8/mod.rs | 2 +- .../tests/it/arrow/array/utf8/mutable.rs | 2 +- .../it/arrow/array/utf8/mutable_values.rs | 2 +- .../it/arrow/compute/aggregate/memory.rs | 4 +- crates/polars/tests/it/arrow/ffi/data.rs | 6 +- crates/polars/tests/it/arrow/ffi/stream.rs | 2 +- crates/polars/tests/it/arrow/io/ipc/mod.rs | 2 +- crates/polars/tests/it/arrow/scalar/binary.rs | 4 +- .../polars/tests/it/arrow/scalar/boolean.rs | 2 +- .../it/arrow/scalar/fixed_size_binary.rs | 2 +- .../tests/it/arrow/scalar/fixed_size_list.rs | 2 +- crates/polars/tests/it/arrow/scalar/list.rs | 2 +- crates/polars/tests/it/arrow/scalar/map.rs | 2 +- crates/polars/tests/it/arrow/scalar/null.rs | 2 +- .../polars/tests/it/arrow/scalar/primitive.rs | 8 +- .../polars/tests/it/arrow/scalar/struct_.rs | 2 +- crates/polars/tests/it/arrow/scalar/utf8.rs | 4 +- .../polars/tests/it/io/parquet/arrow/mod.rs | 66 +++++----- .../polars/tests/it/io/parquet/arrow/write.rs | 2 +- .../polars/tests/it/io/parquet/roundtrip.rs | 2 +- docs/development/contributing/code-style.md | 4 +- .../python_rust_compiled_function/src/ffi.rs | 4 +- py-polars/docs/source/reference/config.rst | 4 +- 367 files changed, 2313 insertions(+), 2336 deletions(-) diff --git a/crates/polars-arrow/src/array/binary/data.rs b/crates/polars-arrow/src/array/binary/data.rs index a45ebcca0621..ea3dec1f6227 100644 --- a/crates/polars-arrow/src/array/binary/data.rs +++ b/crates/polars-arrow/src/array/binary/data.rs @@ -6,8 +6,8 @@ use crate::offset::{Offset, OffsetsBuffer}; impl Arrow2Arrow for BinaryArray { fn to_data(&self) -> ArrayData { - let data_type = self.data_type.clone().into(); - let builder = ArrayDataBuilder::new(data_type) + let dtype = self.dtype.clone().into(); + let builder = ArrayDataBuilder::new(dtype) .len(self.offsets().len_proxy()) .buffers(vec![ self.offsets.clone().into_inner().into(), @@ -20,11 +20,11 @@ impl Arrow2Arrow for BinaryArray { } fn from_data(data: &ArrayData) -> Self { - let data_type = data.data_type().clone().into(); + let dtype = data.dtype().clone().into(); if data.is_empty() { // Handle empty offsets - return Self::new_empty(data_type); + return Self::new_empty(dtype); } let buffers = data.buffers(); @@ -34,7 +34,7 @@ impl Arrow2Arrow for BinaryArray { offsets.slice(data.offset(), data.len() + 1); Self { - data_type, + dtype, offsets, values: buffers[1].clone().into(), validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), diff --git a/crates/polars-arrow/src/array/binary/ffi.rs b/crates/polars-arrow/src/array/binary/ffi.rs index c135c8d3d8dd..107cf0fcb421 100644 --- a/crates/polars-arrow/src/array/binary/ffi.rs +++ b/crates/polars-arrow/src/array/binary/ffi.rs @@ -40,7 +40,7 @@ unsafe impl ToFfi for BinaryArray { }); Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), validity, offsets: self.offsets.clone(), values: self.values.clone(), @@ -50,7 +50,7 @@ unsafe impl ToFfi for BinaryArray { impl FromFfi for BinaryArray { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let validity = unsafe { array.validity() }?; let offsets = unsafe { array.buffer::(1) }?; @@ -59,6 +59,6 @@ impl FromFfi for BinaryArray { // assumption that data from FFI is well constructed let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets) }; - Self::try_new(data_type, offsets, values, validity) + Self::try_new(dtype, offsets, values, validity) } } diff --git a/crates/polars-arrow/src/array/binary/mod.rs b/crates/polars-arrow/src/array/binary/mod.rs index 87ce30f1212a..b78826647be4 100644 --- a/crates/polars-arrow/src/array/binary/mod.rs +++ b/crates/polars-arrow/src/array/binary/mod.rs @@ -56,7 +56,7 @@ mod data; /// * `len` is equal to `validity.len()`, when defined. #[derive(Clone)] pub struct BinaryArray { - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Buffer, validity: Option, @@ -69,11 +69,11 @@ impl BinaryArray { /// This function returns an error iff: /// * The last offset is not equal to the values' length. /// * the validity's length is not equal to `offsets.len()`. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`. /// # Implementation /// This function is `O(1)` pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Buffer, validity: Option, @@ -87,12 +87,12 @@ impl BinaryArray { polars_bail!(ComputeError: "validity mask length must match the number of values") } - if data_type.to_physical_type() != Self::default_data_type().to_physical_type() { + if dtype.to_physical_type() != Self::default_dtype().to_physical_type() { polars_bail!(ComputeError: "BinaryArray can only be initialized with DataType::Binary or DataType::LargeBinary") } Ok(Self { - data_type, + dtype, offsets, values, validity, @@ -105,13 +105,13 @@ impl BinaryArray { /// /// The invariants must be valid (see try_new). pub unsafe fn new_unchecked( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Buffer, validity: Option, ) -> Self { Self { - data_type, + dtype, offsets, values, validity, @@ -188,8 +188,8 @@ impl BinaryArray { /// Returns the [`ArrowDataType`] of this array. #[inline] - pub fn data_type(&self) -> &ArrowDataType { - &self.data_type + pub fn dtype(&self) -> &ArrowDataType { + &self.dtype } /// Returns the values of this [`BinaryArray`]. @@ -246,12 +246,12 @@ impl BinaryArray { #[must_use] pub fn into_inner(self) -> (ArrowDataType, OffsetsBuffer, Buffer, Option) { let Self { - data_type, + dtype, offsets, values, validity, } = self; - (data_type, offsets, values, validity) + (dtype, offsets, values, validity) } /// Try to convert this `BinaryArray` to a `MutableBinaryArray` @@ -262,33 +262,33 @@ impl BinaryArray { match bitmap.into_mut() { // SAFETY: invariants are preserved Left(bitmap) => Left(BinaryArray::new( - self.data_type, + self.dtype, self.offsets, self.values, Some(bitmap), )), Right(mutable_bitmap) => match (self.values.into_mut(), self.offsets.into_mut()) { (Left(values), Left(offsets)) => Left(BinaryArray::new( - self.data_type, + self.dtype, offsets, values, Some(mutable_bitmap.into()), )), (Left(values), Right(offsets)) => Left(BinaryArray::new( - self.data_type, + self.dtype, offsets.into(), values, Some(mutable_bitmap.into()), )), (Right(values), Left(offsets)) => Left(BinaryArray::new( - self.data_type, + self.dtype, offsets, values.into(), Some(mutable_bitmap.into()), )), (Right(values), Right(offsets)) => Right( MutableBinaryArray::try_new( - self.data_type, + self.dtype, offsets, values, Some(mutable_bitmap), @@ -300,38 +300,38 @@ impl BinaryArray { } else { match (self.values.into_mut(), self.offsets.into_mut()) { (Left(values), Left(offsets)) => { - Left(BinaryArray::new(self.data_type, offsets, values, None)) + Left(BinaryArray::new(self.dtype, offsets, values, None)) }, (Left(values), Right(offsets)) => Left(BinaryArray::new( - self.data_type, + self.dtype, offsets.into(), values, None, )), (Right(values), Left(offsets)) => Left(BinaryArray::new( - self.data_type, + self.dtype, offsets, values.into(), None, )), (Right(values), Right(offsets)) => Right( - MutableBinaryArray::try_new(self.data_type, offsets, values, None).unwrap(), + MutableBinaryArray::try_new(self.dtype, offsets, values, None).unwrap(), ), } } } /// Creates an empty [`BinaryArray`], i.e. whose `.len` is zero. - pub fn new_empty(data_type: ArrowDataType) -> Self { - Self::new(data_type, OffsetsBuffer::new(), Buffer::new(), None) + pub fn new_empty(dtype: ArrowDataType) -> Self { + Self::new(dtype, OffsetsBuffer::new(), Buffer::new(), None) } /// Creates an null [`BinaryArray`], i.e. whose `.null_count() == .len()`. #[inline] - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { unsafe { Self::new_unchecked( - data_type, + dtype, Offsets::new_zeroed(length).into(), Buffer::new(), Some(Bitmap::new_zeroed(length)), @@ -340,7 +340,7 @@ impl BinaryArray { } /// Returns the default [`ArrowDataType`], `DataType::Binary` or `DataType::LargeBinary` - pub fn default_data_type() -> ArrowDataType { + pub fn default_dtype() -> ArrowDataType { if O::IS_LARGE { ArrowDataType::LargeBinary } else { @@ -350,12 +350,12 @@ impl BinaryArray { /// Alias for unwrapping [`Self::try_new`] pub fn new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Buffer, validity: Option, ) -> Self { - Self::try_new(data_type, offsets, values, validity).unwrap() + Self::try_new(dtype, offsets, values, validity).unwrap() } /// Returns a [`BinaryArray`] from an iterator of trusted length. @@ -463,13 +463,13 @@ impl Splitable for BinaryArray { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), offsets: lhs_offsets, values: self.values.clone(), validity: lhs_validity, }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), offsets: rhs_offsets, values: self.values.clone(), validity: rhs_validity, diff --git a/crates/polars-arrow/src/array/binary/mutable.rs b/crates/polars-arrow/src/array/binary/mutable.rs index 65d1ca928b75..4a8dbaafe4bf 100644 --- a/crates/polars-arrow/src/array/binary/mutable.rs +++ b/crates/polars-arrow/src/array/binary/mutable.rs @@ -52,16 +52,16 @@ impl MutableBinaryArray { /// This function returns an error iff: /// * The last offset is not equal to the values' length. /// * the validity's length is not equal to `offsets.len()`. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`. /// # Implementation /// This function is `O(1)` pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: Offsets, values: Vec, validity: Option, ) -> PolarsResult { - let values = MutableBinaryValuesArray::try_new(data_type, offsets, values)?; + let values = MutableBinaryValuesArray::try_new(dtype, offsets, values)?; if validity .as_ref() @@ -79,8 +79,8 @@ impl MutableBinaryArray { Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref())) } - fn default_data_type() -> ArrowDataType { - BinaryArray::::default_data_type() + fn default_dtype() -> ArrowDataType { + BinaryArray::::default_dtype() } /// Initializes a new [`MutableBinaryArray`] with a pre-allocated capacity of slots. @@ -201,8 +201,8 @@ impl MutableArray for MutableBinaryArray { array.arced() } - fn data_type(&self) -> &ArrowDataType { - self.values.data_type() + fn dtype(&self) -> &ArrowDataType { + self.values.dtype() } fn as_any(&self) -> &dyn std::any::Any { @@ -247,7 +247,7 @@ impl MutableBinaryArray { { let (validity, offsets, values) = trusted_len_unzip(iterator); - Self::try_new(Self::default_data_type(), offsets, values, validity).unwrap() + Self::try_new(Self::default_dtype(), offsets, values, validity).unwrap() } /// Creates a [`MutableBinaryArray`] from an iterator of trusted length. @@ -271,7 +271,7 @@ impl MutableBinaryArray { iterator: I, ) -> Self { let (offsets, values) = trusted_len_values_iter(iterator); - Self::try_new(Self::default_data_type(), offsets, values, None).unwrap() + Self::try_new(Self::default_dtype(), offsets, values, None).unwrap() } /// Creates a new [`BinaryArray`] from a [`TrustedLen`] of `&[u8]`. @@ -305,7 +305,7 @@ impl MutableBinaryArray { validity = None; } - Ok(Self::try_new(Self::default_data_type(), offsets, values, validity).unwrap()) + Ok(Self::try_new(Self::default_dtype(), offsets, values, validity).unwrap()) } /// Creates a [`MutableBinaryArray`] from an falible iterator of trusted length. @@ -403,7 +403,7 @@ impl MutableBinaryArray { /// Creates a new [`MutableBinaryArray`] from a [`Iterator`] of `&[u8]`. pub fn from_iter_values, I: Iterator>(iterator: I) -> Self { let (offsets, values) = values_iter(iterator); - Self::try_new(Self::default_data_type(), offsets, values, None).unwrap() + Self::try_new(Self::default_dtype(), offsets, values, None).unwrap() } /// Extend with a fallible iterator diff --git a/crates/polars-arrow/src/array/binary/mutable_values.rs b/crates/polars-arrow/src/array/binary/mutable_values.rs index 613cbb0aba9e..b02857694d4b 100644 --- a/crates/polars-arrow/src/array/binary/mutable_values.rs +++ b/crates/polars-arrow/src/array/binary/mutable_values.rs @@ -17,7 +17,7 @@ use crate::trusted_len::TrustedLen; /// from [`MutableBinaryArray`] in that it builds non-null [`BinaryArray`]. #[derive(Debug, Clone)] pub struct MutableBinaryValuesArray { - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: Offsets, values: Vec, } @@ -25,7 +25,7 @@ pub struct MutableBinaryValuesArray { impl From> for BinaryArray { fn from(other: MutableBinaryValuesArray) -> Self { BinaryArray::::new( - other.data_type, + other.dtype, other.offsets.into(), other.values.into(), None, @@ -35,7 +35,7 @@ impl From> for BinaryArray { impl From> for MutableBinaryArray { fn from(other: MutableBinaryValuesArray) -> Self { - MutableBinaryArray::::try_new(other.data_type, other.offsets, other.values, None) + MutableBinaryArray::::try_new(other.dtype, other.offsets, other.values, None) .expect("MutableBinaryValuesArray is consistent with MutableBinaryArray") } } @@ -50,7 +50,7 @@ impl MutableBinaryValuesArray { /// Returns an empty [`MutableBinaryValuesArray`]. pub fn new() -> Self { Self { - data_type: Self::default_data_type(), + dtype: Self::default_dtype(), offsets: Offsets::new(), values: Vec::::new(), } @@ -61,22 +61,22 @@ impl MutableBinaryValuesArray { /// # Errors /// This function returns an error iff: /// * The last offset is not equal to the values' length. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`. /// # Implementation /// This function is `O(1)` pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: Offsets, values: Vec, ) -> PolarsResult { try_check_offsets_bounds(&offsets, values.len())?; - if data_type.to_physical_type() != Self::default_data_type().to_physical_type() { + if dtype.to_physical_type() != Self::default_dtype().to_physical_type() { polars_bail!(ComputeError: "MutableBinaryValuesArray can only be initialized with DataType::Binary or DataType::LargeBinary",) } Ok(Self { - data_type, + dtype, offsets, values, }) @@ -84,8 +84,8 @@ impl MutableBinaryValuesArray { /// Returns the default [`ArrowDataType`] of this container: [`ArrowDataType::Utf8`] or [`ArrowDataType::LargeUtf8`] /// depending on the generic [`Offset`]. - pub fn default_data_type() -> ArrowDataType { - BinaryArray::::default_data_type() + pub fn default_dtype() -> ArrowDataType { + BinaryArray::::default_dtype() } /// Initializes a new [`MutableBinaryValuesArray`] with a pre-allocated capacity of items. @@ -96,7 +96,7 @@ impl MutableBinaryValuesArray { /// Initializes a new [`MutableBinaryValuesArray`] with a pre-allocated capacity of items and values. pub fn with_capacities(capacity: usize, values: usize) -> Self { Self { - data_type: Self::default_data_type(), + dtype: Self::default_dtype(), offsets: Offsets::::with_capacity(capacity), values: Vec::::with_capacity(values), } @@ -187,7 +187,7 @@ impl MutableBinaryValuesArray { /// Extract the low-end APIs from the [`MutableBinaryValuesArray`]. pub fn into_inner(self) -> (ArrowDataType, Offsets, Vec) { - (self.data_type, self.offsets, self.values) + (self.dtype, self.offsets, self.values) } } @@ -201,17 +201,17 @@ impl MutableArray for MutableBinaryValuesArray { } fn as_box(&mut self) -> Box { - let (data_type, offsets, values) = std::mem::take(self).into_inner(); - BinaryArray::new(data_type, offsets.into(), values.into(), None).boxed() + let (dtype, offsets, values) = std::mem::take(self).into_inner(); + BinaryArray::new(dtype, offsets.into(), values.into(), None).boxed() } fn as_arc(&mut self) -> Arc { - let (data_type, offsets, values) = std::mem::take(self).into_inner(); - BinaryArray::new(data_type, offsets.into(), values.into(), None).arced() + let (dtype, offsets, values) = std::mem::take(self).into_inner(); + BinaryArray::new(dtype, offsets.into(), values.into(), None).arced() } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { @@ -239,7 +239,7 @@ impl MutableArray for MutableBinaryValuesArray { impl> FromIterator

for MutableBinaryValuesArray { fn from_iter>(iter: I) -> Self { let (offsets, values) = values_iter(iter.into_iter()); - Self::try_new(Self::default_data_type(), offsets, values).unwrap() + Self::try_new(Self::default_dtype(), offsets, values).unwrap() } } @@ -301,7 +301,7 @@ impl MutableBinaryValuesArray { I: Iterator, { let (offsets, values) = trusted_len_values_iter(iterator); - Self::try_new(Self::default_data_type(), offsets, values).unwrap() + Self::try_new(Self::default_dtype(), offsets, values).unwrap() } /// Returns a new [`MutableBinaryValuesArray`] from an iterator. diff --git a/crates/polars-arrow/src/array/binview/ffi.rs b/crates/polars-arrow/src/array/binview/ffi.rs index 8ea36c9d1de7..3fc11278dcfb 100644 --- a/crates/polars-arrow/src/array/binview/ffi.rs +++ b/crates/polars-arrow/src/array/binview/ffi.rs @@ -43,7 +43,7 @@ unsafe impl ToFfi for BinaryViewArrayGeneric { }); Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), validity, views: self.views.clone(), buffers: self.buffers.clone(), @@ -56,7 +56,7 @@ unsafe impl ToFfi for BinaryViewArrayGeneric { impl FromFfi for BinaryViewArrayGeneric { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let validity = unsafe { array.validity() }?; let views = unsafe { array.buffer::(1) }?; @@ -66,7 +66,7 @@ impl FromFfi for BinaryViewArray let mut remaining_buffers = n_buffers - 2; if remaining_buffers <= 1 { return Ok(Self::new_unchecked_unknown_md( - data_type, + dtype, views, Arc::from([]), validity, @@ -90,7 +90,7 @@ impl FromFfi for BinaryViewArray } Ok(Self::new_unchecked_unknown_md( - data_type, + dtype, views, Arc::from(variadic_buffers), validity, diff --git a/crates/polars-arrow/src/array/binview/mod.rs b/crates/polars-arrow/src/array/binview/mod.rs index 6afae8a3f4e9..dd46a05ba969 100644 --- a/crates/polars-arrow/src/array/binview/mod.rs +++ b/crates/polars-arrow/src/array/binview/mod.rs @@ -110,7 +110,7 @@ impl ViewType for [u8] { } pub struct BinaryViewArrayGeneric { - data_type: ArrowDataType, + dtype: ArrowDataType, views: Buffer, buffers: Arc<[Buffer]>, validity: Option, @@ -130,7 +130,7 @@ impl PartialEq for BinaryViewArrayGeneric { impl Clone for BinaryViewArrayGeneric { fn clone(&self) -> Self { Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), views: self.views.clone(), buffers: self.buffers.clone(), validity: self.validity.clone(), @@ -152,7 +152,7 @@ impl BinaryViewArrayGeneric { /// - the data is valid utf8 (if required) /// - The offsets match the buffers. pub unsafe fn new_unchecked( - data_type: ArrowDataType, + dtype: ArrowDataType, views: Buffer, buffers: Arc<[Buffer]>, validity: Option, @@ -188,7 +188,7 @@ impl BinaryViewArrayGeneric { } Self { - data_type, + dtype, views, buffers, validity, @@ -203,7 +203,7 @@ impl BinaryViewArrayGeneric { /// # Safety /// The caller must ensure the invariants pub unsafe fn new_unchecked_unknown_md( - data_type: ArrowDataType, + dtype: ArrowDataType, views: Buffer, buffers: Arc<[Buffer]>, validity: Option, @@ -213,7 +213,7 @@ impl BinaryViewArrayGeneric { let total_buffer_len = total_buffer_len.unwrap_or_else(|| buffers.iter().map(|b| b.len()).sum()); Self::new_unchecked( - data_type, + dtype, views, buffers, validity, @@ -274,7 +274,7 @@ impl BinaryViewArrayGeneric { *v = update_view(*v, str_slice); } Self::new_unchecked( - self.data_type.clone(), + self.dtype.clone(), views.into(), buffers, validity, @@ -284,7 +284,7 @@ impl BinaryViewArrayGeneric { } pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, views: Buffer, buffers: Arc<[Buffer]>, validity: Option, @@ -301,24 +301,24 @@ impl BinaryViewArrayGeneric { unsafe { Ok(Self::new_unchecked_unknown_md( - data_type, views, buffers, validity, None, + dtype, views, buffers, validity, None, )) } } /// Creates an empty [`BinaryViewArrayGeneric`], i.e. whose `.len` is zero. #[inline] - pub fn new_empty(data_type: ArrowDataType) -> Self { - unsafe { Self::new_unchecked(data_type, Buffer::new(), Arc::from([]), None, 0, 0) } + pub fn new_empty(dtype: ArrowDataType) -> Self { + unsafe { Self::new_unchecked(dtype, Buffer::new(), Arc::from([]), None, 0, 0) } } /// Returns a new null [`BinaryViewArrayGeneric`] of `length`. #[inline] - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { let validity = Some(Bitmap::new_zeroed(length)); unsafe { Self::new_unchecked( - data_type, + dtype, Buffer::zeroed(length), Arc::from([]), validity, @@ -553,7 +553,7 @@ impl Array for BinaryViewArrayGeneric { BinaryViewArrayGeneric::len(self) } - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { T::dtype() } @@ -620,7 +620,7 @@ impl Splitable for BinaryViewArrayGeneric { unsafe { ( Self::new_unchecked( - self.data_type.clone(), + self.dtype.clone(), lhs_views, self.buffers.clone(), lhs_validity, @@ -628,7 +628,7 @@ impl Splitable for BinaryViewArrayGeneric { self.total_buffer_len(), ), Self::new_unchecked( - self.data_type.clone(), + self.dtype.clone(), rhs_views, self.buffers.clone(), rhs_validity, diff --git a/crates/polars-arrow/src/array/binview/mutable.rs b/crates/polars-arrow/src/array/binview/mutable.rs index 24ca207fbf77..0d7dcac94b5a 100644 --- a/crates/polars-arrow/src/array/binview/mutable.rs +++ b/crates/polars-arrow/src/array/binview/mutable.rs @@ -527,7 +527,7 @@ impl MutableBinaryViewArray { #[inline] pub fn freeze_with_dtype(self, dtype: ArrowDataType) -> BinaryViewArrayGeneric { let mut arr: BinaryViewArrayGeneric = self.into(); - arr.data_type = dtype; + arr.dtype = dtype; arr } @@ -752,7 +752,7 @@ impl> FromIterator> for MutableBinar } impl MutableArray for MutableBinaryViewArray { - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { T::dtype() } diff --git a/crates/polars-arrow/src/array/boolean/data.rs b/crates/polars-arrow/src/array/boolean/data.rs index f472348a0407..07ee3311d4cb 100644 --- a/crates/polars-arrow/src/array/boolean/data.rs +++ b/crates/polars-arrow/src/array/boolean/data.rs @@ -20,7 +20,7 @@ impl Arrow2Arrow for BooleanArray { } fn from_data(data: &ArrayData) -> Self { - assert_eq!(data.data_type(), &arrow_schema::DataType::Boolean); + assert_eq!(data.dtype(), &arrow_schema::DataType::Boolean); let buffers = data.buffers(); let buffer = BooleanBuffer::new(buffers[0].clone(), data.offset(), data.len()); @@ -28,7 +28,7 @@ impl Arrow2Arrow for BooleanArray { let values = Bitmap::from_null_buffer(NullBuffer::new(buffer)); Self { - data_type: ArrowDataType::Boolean, + dtype: ArrowDataType::Boolean, values, validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), } diff --git a/crates/polars-arrow/src/array/boolean/ffi.rs b/crates/polars-arrow/src/array/boolean/ffi.rs index bd8693f2dbb1..dfaf3ac90571 100644 --- a/crates/polars-arrow/src/array/boolean/ffi.rs +++ b/crates/polars-arrow/src/array/boolean/ffi.rs @@ -38,7 +38,7 @@ unsafe impl ToFfi for BooleanArray { }); Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), validity, values: self.values.clone(), } @@ -47,9 +47,9 @@ unsafe impl ToFfi for BooleanArray { impl FromFfi for BooleanArray { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let validity = unsafe { array.validity() }?; let values = unsafe { array.bitmap(1) }?; - Self::try_new(data_type, values, validity) + Self::try_new(dtype, values, validity) } } diff --git a/crates/polars-arrow/src/array/boolean/mod.rs b/crates/polars-arrow/src/array/boolean/mod.rs index 656a6db7e89a..5cd9870fdbf4 100644 --- a/crates/polars-arrow/src/array/boolean/mod.rs +++ b/crates/polars-arrow/src/array/boolean/mod.rs @@ -45,7 +45,7 @@ use polars_error::{polars_bail, PolarsResult}; /// ``` #[derive(Clone)] pub struct BooleanArray { - data_type: ArrowDataType, + dtype: ArrowDataType, values: Bitmap, validity: Option, } @@ -55,9 +55,9 @@ impl BooleanArray { /// # Errors /// This function errors iff: /// * The validity is not `None` and its length is different from `values`'s length - /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`]. + /// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`]. pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Bitmap, validity: Option, ) -> PolarsResult { @@ -68,20 +68,20 @@ impl BooleanArray { polars_bail!(ComputeError: "validity mask length must match the number of values") } - if data_type.to_physical_type() != PhysicalType::Boolean { + if dtype.to_physical_type() != PhysicalType::Boolean { polars_bail!(ComputeError: "BooleanArray can only be initialized with a DataType whose physical type is Boolean") } Ok(Self { - data_type, + dtype, values, validity, }) } /// Alias to `Self::try_new().unwrap()` - pub fn new(data_type: ArrowDataType, values: Bitmap, validity: Option) -> Self { - Self::try_new(data_type, values, validity).unwrap() + pub fn new(dtype: ArrowDataType, values: Bitmap, validity: Option) -> Self { + Self::try_new(dtype, values, validity).unwrap() } /// Returns an iterator over the optional values of this [`BooleanArray`]. @@ -123,8 +123,8 @@ impl BooleanArray { /// Returns the arrays' [`ArrowDataType`]. #[inline] - pub fn data_type(&self) -> &ArrowDataType { - &self.data_type + pub fn dtype(&self) -> &ArrowDataType { + &self.dtype } /// Returns the value at index `i` @@ -238,38 +238,38 @@ impl BooleanArray { if let Some(bitmap) = self.validity { match bitmap.into_mut() { - Left(bitmap) => Left(BooleanArray::new(self.data_type, self.values, Some(bitmap))), + Left(bitmap) => Left(BooleanArray::new(self.dtype, self.values, Some(bitmap))), Right(mutable_bitmap) => match self.values.into_mut() { Left(immutable) => Left(BooleanArray::new( - self.data_type, + self.dtype, immutable, Some(mutable_bitmap.into()), )), Right(mutable) => Right( - MutableBooleanArray::try_new(self.data_type, mutable, Some(mutable_bitmap)) + MutableBooleanArray::try_new(self.dtype, mutable, Some(mutable_bitmap)) .unwrap(), ), }, } } else { match self.values.into_mut() { - Left(immutable) => Left(BooleanArray::new(self.data_type, immutable, None)), + Left(immutable) => Left(BooleanArray::new(self.dtype, immutable, None)), Right(mutable) => { - Right(MutableBooleanArray::try_new(self.data_type, mutable, None).unwrap()) + Right(MutableBooleanArray::try_new(self.dtype, mutable, None).unwrap()) }, } } } /// Returns a new empty [`BooleanArray`]. - pub fn new_empty(data_type: ArrowDataType) -> Self { - Self::new(data_type, Bitmap::new(), None) + pub fn new_empty(dtype: ArrowDataType) -> Self { + Self::new(dtype, Bitmap::new(), None) } /// Returns a new [`BooleanArray`] whose all slots are null / `None`. - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { let bitmap = Bitmap::new_zeroed(length); - Self::new(data_type, bitmap.clone(), Some(bitmap)) + Self::new(dtype, bitmap.clone(), Some(bitmap)) } /// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`. @@ -352,11 +352,11 @@ impl BooleanArray { #[must_use] pub fn into_inner(self) -> (ArrowDataType, Bitmap, Option) { let Self { - data_type, + dtype, values, validity, } = self; - (data_type, values, validity) + (dtype, values, validity) } /// Creates a `[BooleanArray]` from its internal representation. @@ -365,12 +365,12 @@ impl BooleanArray { /// # Safety /// Callers must ensure all invariants of this struct are upheld. pub unsafe fn from_inner_unchecked( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Bitmap, validity: Option, ) -> Self { Self { - data_type, + dtype, values, validity, } @@ -401,12 +401,12 @@ impl Splitable for BooleanArray { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), values: lhs_values, validity: lhs_validity, }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), values: rhs_values, validity: rhs_validity, }, @@ -417,7 +417,7 @@ impl Splitable for BooleanArray { impl From for BooleanArray { fn from(values: Bitmap) -> Self { Self { - data_type: ArrowDataType::Boolean, + dtype: ArrowDataType::Boolean, values, validity: None, } diff --git a/crates/polars-arrow/src/array/boolean/mutable.rs b/crates/polars-arrow/src/array/boolean/mutable.rs index 7f97f82762b0..f93707db4846 100644 --- a/crates/polars-arrow/src/array/boolean/mutable.rs +++ b/crates/polars-arrow/src/array/boolean/mutable.rs @@ -15,7 +15,7 @@ use crate::trusted_len::TrustedLen; /// This struct does not allocate a validity until one is required (i.e. push a null to it). #[derive(Debug, Clone)] pub struct MutableBooleanArray { - data_type: ArrowDataType, + dtype: ArrowDataType, values: MutableBitmap, validity: Option, } @@ -23,7 +23,7 @@ pub struct MutableBooleanArray { impl From for BooleanArray { fn from(other: MutableBooleanArray) -> Self { BooleanArray::new( - other.data_type, + other.dtype, other.values.into(), other.validity.map(|x| x.into()), ) @@ -53,9 +53,9 @@ impl MutableBooleanArray { /// # Errors /// This function errors iff: /// * The validity is not `None` and its length is different from `values`'s length - /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`]. + /// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`]. pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, values: MutableBitmap, validity: Option, ) -> PolarsResult { @@ -68,14 +68,14 @@ impl MutableBooleanArray { ) } - if data_type.to_physical_type() != PhysicalType::Boolean { + if dtype.to_physical_type() != PhysicalType::Boolean { polars_bail!(oos = "MutableBooleanArray can only be initialized with a DataType whose physical type is Boolean", ) } Ok(Self { - data_type, + dtype, values, validity, }) @@ -84,7 +84,7 @@ impl MutableBooleanArray { /// Creates an new [`MutableBooleanArray`] with a capacity of values. pub fn with_capacity(capacity: usize) -> Self { Self { - data_type: ArrowDataType::Boolean, + dtype: ArrowDataType::Boolean, values: MutableBitmap::with_capacity(capacity), validity: None, } @@ -533,8 +533,8 @@ impl MutableArray for MutableBooleanArray { array.arced() } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { diff --git a/crates/polars-arrow/src/array/dictionary/data.rs b/crates/polars-arrow/src/array/dictionary/data.rs index e7159e4bfff2..6e8234da505d 100644 --- a/crates/polars-arrow/src/array/dictionary/data.rs +++ b/crates/polars-arrow/src/array/dictionary/data.rs @@ -10,7 +10,7 @@ impl Arrow2Arrow for DictionaryArray { let keys = self.keys.to_data(); let builder = keys .into_builder() - .data_type(self.data_type.clone().into()) + .dtype(self.dtype.clone().into()) .child_data(vec![to_data(self.values.as_ref())]); // SAFETY: Dictionary is valid @@ -18,14 +18,14 @@ impl Arrow2Arrow for DictionaryArray { } fn from_data(data: &ArrayData) -> Self { - let key = match data.data_type() { + let key = match data.dtype() { arrow_schema::DataType::Dictionary(k, _) => k.as_ref(), d => panic!("unsupported dictionary type {d}"), }; - let data_type = ArrowDataType::from(data.data_type().clone()); + let dtype = ArrowDataType::from(data.dtype().clone()); assert_eq!( - data_type.to_physical_type(), + dtype.to_physical_type(), PhysicalType::Dictionary(K::KEY_TYPE) ); @@ -41,7 +41,7 @@ impl Arrow2Arrow for DictionaryArray { let values = from_data(&data.child_data()[0]); Self { - data_type, + dtype, keys, values, } diff --git a/crates/polars-arrow/src/array/dictionary/ffi.rs b/crates/polars-arrow/src/array/dictionary/ffi.rs index b22c27eacead..025a4bbb9b69 100644 --- a/crates/polars-arrow/src/array/dictionary/ffi.rs +++ b/crates/polars-arrow/src/array/dictionary/ffi.rs @@ -15,7 +15,7 @@ unsafe impl ToFfi for DictionaryArray { fn to_ffi_aligned(&self) -> Self { Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), keys: self.keys.to_ffi_aligned(), values: self.values.clone(), } @@ -28,7 +28,7 @@ impl FromFfi for DictionaryArray let validity = unsafe { array.validity() }?; let values = unsafe { array.buffer::(1) }?; - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let keys = PrimitiveArray::::try_new(K::PRIMITIVE.into(), values, validity)?; let values = array.dictionary()?.ok_or_else( @@ -37,6 +37,6 @@ impl FromFfi for DictionaryArray let values = ffi::try_from(values)?; // the assumption of this trait - DictionaryArray::::try_new_unchecked(data_type, keys, values) + DictionaryArray::::try_new_unchecked(dtype, keys, values) } } diff --git a/crates/polars-arrow/src/array/dictionary/mod.rs b/crates/polars-arrow/src/array/dictionary/mod.rs index 7420b02f7891..d53970dacd98 100644 --- a/crates/polars-arrow/src/array/dictionary/mod.rs +++ b/crates/polars-arrow/src/array/dictionary/mod.rs @@ -126,21 +126,21 @@ unsafe impl DictionaryKey for u64 { /// use `unchecked` calls to retrieve the values #[derive(Clone)] pub struct DictionaryArray { - data_type: ArrowDataType, + dtype: ArrowDataType, keys: PrimitiveArray, values: Box, } -fn check_data_type( +fn check_dtype( key_type: IntegerType, - data_type: &ArrowDataType, - values_data_type: &ArrowDataType, + dtype: &ArrowDataType, + values_dtype: &ArrowDataType, ) -> PolarsResult<()> { - if let ArrowDataType::Dictionary(key, value, _) = data_type.to_logical_type() { + if let ArrowDataType::Dictionary(key, value, _) = dtype.to_logical_type() { if *key != key_type { polars_bail!(ComputeError: "DictionaryArray must be initialized with a DataType::Dictionary whose integer is compatible to its keys") } - if value.as_ref().to_logical_type() != values_data_type.to_logical_type() { + if value.as_ref().to_logical_type() != values_dtype.to_logical_type() { polars_bail!(ComputeError: "DictionaryArray must be initialized with a DataType::Dictionary whose value is equal to its values") } } else { @@ -155,16 +155,16 @@ impl DictionaryArray { /// This function is `O(N)` where `N` is the length of keys /// # Errors /// This function errors iff - /// * the `data_type`'s logical type is not a `DictionaryArray` - /// * the `data_type`'s keys is not compatible with `keys` - /// * the `data_type`'s values's data_type is not equal with `values.data_type()` + /// * the `dtype`'s logical type is not a `DictionaryArray` + /// * the `dtype`'s keys is not compatible with `keys` + /// * the `dtype`'s values's dtype is not equal with `values.dtype()` /// * any of the keys's values is not represented in `usize` or is `>= values.len()` pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, keys: PrimitiveArray, values: Box, ) -> PolarsResult { - check_data_type(K::KEY_TYPE, &data_type, values.data_type())?; + check_dtype(K::KEY_TYPE, &dtype, values.dtype())?; if keys.null_count() != keys.len() { if K::always_fits_usize() { @@ -177,7 +177,7 @@ impl DictionaryArray { } Ok(Self { - data_type, + dtype, keys, values, }) @@ -190,39 +190,39 @@ impl DictionaryArray { /// This function errors iff /// * any of the keys's values is not represented in `usize` or is `>= values.len()` pub fn try_from_keys(keys: PrimitiveArray, values: Box) -> PolarsResult { - let data_type = Self::default_data_type(values.data_type().clone()); - Self::try_new(data_type, keys, values) + let dtype = Self::default_dtype(values.dtype().clone()); + Self::try_new(dtype, keys, values) } /// Returns a new [`DictionaryArray`]. /// # Errors /// This function errors iff - /// * the `data_type`'s logical type is not a `DictionaryArray` - /// * the `data_type`'s keys is not compatible with `keys` - /// * the `data_type`'s values's data_type is not equal with `values.data_type()` + /// * the `dtype`'s logical type is not a `DictionaryArray` + /// * the `dtype`'s keys is not compatible with `keys` + /// * the `dtype`'s values's dtype is not equal with `values.dtype()` /// /// # Safety /// The caller must ensure that every keys's values is represented in `usize` and is `< values.len()` pub unsafe fn try_new_unchecked( - data_type: ArrowDataType, + dtype: ArrowDataType, keys: PrimitiveArray, values: Box, ) -> PolarsResult { - check_data_type(K::KEY_TYPE, &data_type, values.data_type())?; + check_dtype(K::KEY_TYPE, &dtype, values.dtype())?; Ok(Self { - data_type, + dtype, keys, values, }) } /// Returns a new empty [`DictionaryArray`]. - pub fn new_empty(data_type: ArrowDataType) -> Self { - let values = Self::try_get_child(&data_type).unwrap(); + pub fn new_empty(dtype: ArrowDataType) -> Self { + let values = Self::try_get_child(&dtype).unwrap(); let values = new_empty_array(values.clone()); Self::try_new( - data_type, + dtype, PrimitiveArray::::new_empty(K::PRIMITIVE.into()), values, ) @@ -231,11 +231,11 @@ impl DictionaryArray { /// Returns an [`DictionaryArray`] whose all elements are null #[inline] - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { - let values = Self::try_get_child(&data_type).unwrap(); + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { + let values = Self::try_get_child(&dtype).unwrap(); let values = new_null_array(values.clone(), 1); Self::try_new( - data_type, + dtype, PrimitiveArray::::new_null(K::PRIMITIVE.into(), length), values, ) @@ -282,20 +282,20 @@ impl DictionaryArray { /// Returns the [`ArrowDataType`] of this [`DictionaryArray`] #[inline] - pub fn data_type(&self) -> &ArrowDataType { - &self.data_type + pub fn dtype(&self) -> &ArrowDataType { + &self.dtype } /// Returns whether the values of this [`DictionaryArray`] are ordered #[inline] pub fn is_ordered(&self) -> bool { - match self.data_type.to_logical_type() { + match self.dtype.to_logical_type() { ArrowDataType::Dictionary(_, _, is_ordered) => *is_ordered, _ => unreachable!(), } } - pub(crate) fn default_data_type(values_datatype: ArrowDataType) -> ArrowDataType { + pub(crate) fn default_dtype(values_datatype: ArrowDataType) -> ArrowDataType { ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(values_datatype), false) } @@ -395,8 +395,8 @@ impl DictionaryArray { new_scalar(self.values.as_ref(), index) } - pub(crate) fn try_get_child(data_type: &ArrowDataType) -> PolarsResult<&ArrowDataType> { - Ok(match data_type.to_logical_type() { + pub(crate) fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&ArrowDataType> { + Ok(match dtype.to_logical_type() { ArrowDataType::Dictionary(_, values, _) => values.as_ref(), _ => { polars_bail!(ComputeError: "Dictionaries must be initialized with DataType::Dictionary") @@ -428,12 +428,12 @@ impl Splitable for DictionaryArray { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), keys: lhs_keys, values: self.values.clone(), }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), keys: rhs_keys, values: self.values.clone(), }, diff --git a/crates/polars-arrow/src/array/dictionary/mutable.rs b/crates/polars-arrow/src/array/dictionary/mutable.rs index d55ba6484443..1d01b1e719f9 100644 --- a/crates/polars-arrow/src/array/dictionary/mutable.rs +++ b/crates/polars-arrow/src/array/dictionary/mutable.rs @@ -13,7 +13,7 @@ use crate::datatypes::ArrowDataType; #[derive(Debug)] pub struct MutableDictionaryArray { - data_type: ArrowDataType, + dtype: ArrowDataType, map: ValueMap, // invariant: `max(keys) < map.values().len()` keys: MutablePrimitiveArray, @@ -24,7 +24,7 @@ impl From> for D // SAFETY: the invariant of this struct ensures that this is up-held unsafe { DictionaryArray::::try_new_unchecked( - other.data_type, + other.dtype, other.keys.into(), other.map.into_values().as_box(), ) @@ -69,10 +69,10 @@ impl MutableDictionaryArray { fn from_value_map(value_map: ValueMap) -> Self { let keys = MutablePrimitiveArray::::new(); - let data_type = - ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_map.data_type().clone()), false); + let dtype = + ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_map.dtype().clone()), false); Self { - data_type, + dtype, map: value_map, keys, } @@ -134,7 +134,7 @@ impl MutableDictionaryArray { fn take_into(&mut self) -> DictionaryArray { DictionaryArray::::try_new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.keys).into(), self.map.take_into(), ) @@ -159,8 +159,8 @@ impl MutableArray for MutableDictio Arc::new(self.take_into()) } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { diff --git a/crates/polars-arrow/src/array/dictionary/value_map.rs b/crates/polars-arrow/src/array/dictionary/value_map.rs index 64d7713ce679..d818b7e6b25c 100644 --- a/crates/polars-arrow/src/array/dictionary/value_map.rs +++ b/crates/polars-arrow/src/array/dictionary/value_map.rs @@ -106,8 +106,8 @@ impl ValueMap { }) } - pub fn data_type(&self) -> &ArrowDataType { - self.values.data_type() + pub fn dtype(&self) -> &ArrowDataType { + self.values.dtype() } pub fn into_values(self) -> M { diff --git a/crates/polars-arrow/src/array/equal/binary.rs b/crates/polars-arrow/src/array/equal/binary.rs index bed8588efb59..93145aa461e2 100644 --- a/crates/polars-arrow/src/array/equal/binary.rs +++ b/crates/polars-arrow/src/array/equal/binary.rs @@ -2,5 +2,5 @@ use crate::array::BinaryArray; use crate::offset::Offset; pub(super) fn equal(lhs: &BinaryArray, rhs: &BinaryArray) -> bool { - lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) + lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) } diff --git a/crates/polars-arrow/src/array/equal/binary_view.rs b/crates/polars-arrow/src/array/equal/binary_view.rs index 546e3e2a1818..f413650dc9c3 100644 --- a/crates/polars-arrow/src/array/equal/binary_view.rs +++ b/crates/polars-arrow/src/array/equal/binary_view.rs @@ -5,5 +5,5 @@ pub(super) fn equal( lhs: &BinaryViewArrayGeneric, rhs: &BinaryViewArrayGeneric, ) -> bool { - lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) + lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) } diff --git a/crates/polars-arrow/src/array/equal/dictionary.rs b/crates/polars-arrow/src/array/equal/dictionary.rs index d65634095fb3..88213cbc059a 100644 --- a/crates/polars-arrow/src/array/equal/dictionary.rs +++ b/crates/polars-arrow/src/array/equal/dictionary.rs @@ -1,7 +1,7 @@ use crate::array::{DictionaryArray, DictionaryKey}; pub(super) fn equal(lhs: &DictionaryArray, rhs: &DictionaryArray) -> bool { - if !(lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len()) { + if !(lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len()) { return false; }; diff --git a/crates/polars-arrow/src/array/equal/fixed_size_binary.rs b/crates/polars-arrow/src/array/equal/fixed_size_binary.rs index 883d5739778b..0e956e872090 100644 --- a/crates/polars-arrow/src/array/equal/fixed_size_binary.rs +++ b/crates/polars-arrow/src/array/equal/fixed_size_binary.rs @@ -1,5 +1,5 @@ use crate::array::{Array, FixedSizeBinaryArray}; pub(super) fn equal(lhs: &FixedSizeBinaryArray, rhs: &FixedSizeBinaryArray) -> bool { - lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) + lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) } diff --git a/crates/polars-arrow/src/array/equal/fixed_size_list.rs b/crates/polars-arrow/src/array/equal/fixed_size_list.rs index aaf77910013f..26582aa05379 100644 --- a/crates/polars-arrow/src/array/equal/fixed_size_list.rs +++ b/crates/polars-arrow/src/array/equal/fixed_size_list.rs @@ -1,5 +1,5 @@ use crate::array::{Array, FixedSizeListArray}; pub(super) fn equal(lhs: &FixedSizeListArray, rhs: &FixedSizeListArray) -> bool { - lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) + lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) } diff --git a/crates/polars-arrow/src/array/equal/list.rs b/crates/polars-arrow/src/array/equal/list.rs index 26faa1598faf..5c08e2103dcb 100644 --- a/crates/polars-arrow/src/array/equal/list.rs +++ b/crates/polars-arrow/src/array/equal/list.rs @@ -2,5 +2,5 @@ use crate::array::{Array, ListArray}; use crate::offset::Offset; pub(super) fn equal(lhs: &ListArray, rhs: &ListArray) -> bool { - lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) + lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) } diff --git a/crates/polars-arrow/src/array/equal/map.rs b/crates/polars-arrow/src/array/equal/map.rs index e150fb4a4b41..b98d65cea03a 100644 --- a/crates/polars-arrow/src/array/equal/map.rs +++ b/crates/polars-arrow/src/array/equal/map.rs @@ -1,5 +1,5 @@ use crate::array::{Array, MapArray}; pub(super) fn equal(lhs: &MapArray, rhs: &MapArray) -> bool { - lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) + lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) } diff --git a/crates/polars-arrow/src/array/equal/mod.rs b/crates/polars-arrow/src/array/equal/mod.rs index 0a929c793e13..971e4cbca4e8 100644 --- a/crates/polars-arrow/src/array/equal/mod.rs +++ b/crates/polars-arrow/src/array/equal/mod.rs @@ -201,12 +201,12 @@ impl PartialEq<&dyn Array> for MapArray { /// * their data types are equal /// * each of their items are equal pub fn equal(lhs: &dyn Array, rhs: &dyn Array) -> bool { - if lhs.data_type() != rhs.data_type() { + if lhs.dtype() != rhs.dtype() { return false; } use crate::datatypes::PhysicalType::*; - match lhs.data_type().to_physical_type() { + match lhs.dtype().to_physical_type() { Null => { let lhs = lhs.as_any().downcast_ref().unwrap(); let rhs = rhs.as_any().downcast_ref().unwrap(); diff --git a/crates/polars-arrow/src/array/equal/primitive.rs b/crates/polars-arrow/src/array/equal/primitive.rs index dc90bb15da5e..375335155dc8 100644 --- a/crates/polars-arrow/src/array/equal/primitive.rs +++ b/crates/polars-arrow/src/array/equal/primitive.rs @@ -2,5 +2,5 @@ use crate::array::PrimitiveArray; use crate::types::NativeType; pub(super) fn equal(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> bool { - lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) + lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) } diff --git a/crates/polars-arrow/src/array/equal/struct_.rs b/crates/polars-arrow/src/array/equal/struct_.rs index a1741e36368c..3e50626fe7d1 100644 --- a/crates/polars-arrow/src/array/equal/struct_.rs +++ b/crates/polars-arrow/src/array/equal/struct_.rs @@ -1,7 +1,7 @@ use crate::array::{Array, StructArray}; pub(super) fn equal(lhs: &StructArray, rhs: &StructArray) -> bool { - lhs.data_type() == rhs.data_type() + lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len() && match (lhs.validity(), rhs.validity()) { (None, None) => lhs.values().iter().eq(rhs.values().iter()), diff --git a/crates/polars-arrow/src/array/equal/union.rs b/crates/polars-arrow/src/array/equal/union.rs index 51b9d960feac..94881c187fe9 100644 --- a/crates/polars-arrow/src/array/equal/union.rs +++ b/crates/polars-arrow/src/array/equal/union.rs @@ -1,5 +1,5 @@ use crate::array::{Array, UnionArray}; pub(super) fn equal(lhs: &UnionArray, rhs: &UnionArray) -> bool { - lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) + lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) } diff --git a/crates/polars-arrow/src/array/equal/utf8.rs b/crates/polars-arrow/src/array/equal/utf8.rs index 1327221ca331..f76d30a87368 100644 --- a/crates/polars-arrow/src/array/equal/utf8.rs +++ b/crates/polars-arrow/src/array/equal/utf8.rs @@ -2,5 +2,5 @@ use crate::array::Utf8Array; use crate::offset::Offset; pub(super) fn equal(lhs: &Utf8Array, rhs: &Utf8Array) -> bool { - lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) + lhs.dtype() == rhs.dtype() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter()) } diff --git a/crates/polars-arrow/src/array/ffi.rs b/crates/polars-arrow/src/array/ffi.rs index 9806eac25e97..bf9844529b1f 100644 --- a/crates/polars-arrow/src/array/ffi.rs +++ b/crates/polars-arrow/src/array/ffi.rs @@ -54,7 +54,7 @@ type BuffersChildren = ( pub fn offset_buffers_children_dictionary(array: &dyn Array) -> BuffersChildren { use PhysicalType::*; - match array.data_type().to_physical_type() { + match array.dtype().to_physical_type() { Null => ffi_dyn!(array, NullArray), Boolean => ffi_dyn!(array, BooleanArray), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { diff --git a/crates/polars-arrow/src/array/fixed_size_binary/data.rs b/crates/polars-arrow/src/array/fixed_size_binary/data.rs index f99822eb0fbb..85c9fceacfa3 100644 --- a/crates/polars-arrow/src/array/fixed_size_binary/data.rs +++ b/crates/polars-arrow/src/array/fixed_size_binary/data.rs @@ -7,8 +7,8 @@ use crate::datatypes::ArrowDataType; impl Arrow2Arrow for FixedSizeBinaryArray { fn to_data(&self) -> ArrayData { - let data_type = self.data_type.clone().into(); - let builder = ArrayDataBuilder::new(data_type) + let dtype = self.dtype.clone().into(); + let builder = ArrayDataBuilder::new(dtype) .len(self.len()) .buffers(vec![self.values.clone().into()]) .nulls(self.validity.as_ref().map(|b| b.clone().into())); @@ -18,8 +18,8 @@ impl Arrow2Arrow for FixedSizeBinaryArray { } fn from_data(data: &ArrayData) -> Self { - let data_type: ArrowDataType = data.data_type().clone().into(); - let size = match data_type { + let dtype: ArrowDataType = data.dtype().clone().into(); + let size = match dtype { ArrowDataType::FixedSizeBinary(size) => size, _ => unreachable!("must be FixedSizeBinary"), }; @@ -29,7 +29,7 @@ impl Arrow2Arrow for FixedSizeBinaryArray { Self { size, - data_type, + dtype, values, validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), } diff --git a/crates/polars-arrow/src/array/fixed_size_binary/ffi.rs b/crates/polars-arrow/src/array/fixed_size_binary/ffi.rs index 43af7fef58ad..d3d0c777dd66 100644 --- a/crates/polars-arrow/src/array/fixed_size_binary/ffi.rs +++ b/crates/polars-arrow/src/array/fixed_size_binary/ffi.rs @@ -39,7 +39,7 @@ unsafe impl ToFfi for FixedSizeBinaryArray { Self { size: self.size, - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), validity, values: self.values.clone(), } @@ -48,10 +48,10 @@ unsafe impl ToFfi for FixedSizeBinaryArray { impl FromFfi for FixedSizeBinaryArray { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let validity = unsafe { array.validity() }?; let values = unsafe { array.buffer::(1) }?; - Self::try_new(data_type, values, validity) + Self::try_new(dtype, values, validity) } } diff --git a/crates/polars-arrow/src/array/fixed_size_binary/fmt.rs b/crates/polars-arrow/src/array/fixed_size_binary/fmt.rs index c5f9e2dd3293..6aa47acf3fd8 100644 --- a/crates/polars-arrow/src/array/fixed_size_binary/fmt.rs +++ b/crates/polars-arrow/src/array/fixed_size_binary/fmt.rs @@ -14,7 +14,7 @@ impl Debug for FixedSizeBinaryArray { fn fmt(&self, f: &mut Formatter<'_>) -> Result { let writer = |f: &mut Formatter, index| write_value(self, index, f); - write!(f, "{:?}", self.data_type)?; + write!(f, "{:?}", self.dtype)?; write_vec(f, writer, self.validity(), self.len(), "None", false) } } diff --git a/crates/polars-arrow/src/array/fixed_size_binary/mod.rs b/crates/polars-arrow/src/array/fixed_size_binary/mod.rs index 1194b8f5044d..6dccc5acf778 100644 --- a/crates/polars-arrow/src/array/fixed_size_binary/mod.rs +++ b/crates/polars-arrow/src/array/fixed_size_binary/mod.rs @@ -16,8 +16,8 @@ use polars_error::{polars_bail, polars_ensure, PolarsResult}; /// Cloning and slicing this struct is `O(1)`. #[derive(Clone)] pub struct FixedSizeBinaryArray { - size: usize, // this is redundant with `data_type`, but useful to not have to deconstruct the data_type. - data_type: ArrowDataType, + size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype. + dtype: ArrowDataType, values: Buffer, validity: Option, } @@ -27,15 +27,15 @@ impl FixedSizeBinaryArray { /// /// # Errors /// This function returns an error iff: - /// * The `data_type`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`] - /// * The length of `values` is not a multiple of `size` in `data_type` + /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`] + /// * The length of `values` is not a multiple of `size` in `dtype` /// * the validity's length is not equal to `values.len() / size`. pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Buffer, validity: Option, ) -> PolarsResult { - let size = Self::maybe_get_size(&data_type)?; + let size = Self::maybe_get_size(&dtype)?; if values.len() % size != 0 { polars_bail!(ComputeError: @@ -55,7 +55,7 @@ impl FixedSizeBinaryArray { Ok(Self { size, - data_type, + dtype, values, validity, }) @@ -64,23 +64,23 @@ impl FixedSizeBinaryArray { /// Creates a new [`FixedSizeBinaryArray`]. /// # Panics /// This function panics iff: - /// * The `data_type`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`] - /// * The length of `values` is not a multiple of `size` in `data_type` + /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`] + /// * The length of `values` is not a multiple of `size` in `dtype` /// * the validity's length is not equal to `values.len() / size`. - pub fn new(data_type: ArrowDataType, values: Buffer, validity: Option) -> Self { - Self::try_new(data_type, values, validity).unwrap() + pub fn new(dtype: ArrowDataType, values: Buffer, validity: Option) -> Self { + Self::try_new(dtype, values, validity).unwrap() } /// Returns a new empty [`FixedSizeBinaryArray`]. - pub fn new_empty(data_type: ArrowDataType) -> Self { - Self::new(data_type, Buffer::new(), None) + pub fn new_empty(dtype: ArrowDataType) -> Self { + Self::new(dtype, Buffer::new(), None) } /// Returns a new null [`FixedSizeBinaryArray`]. - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { - let size = Self::maybe_get_size(&data_type).unwrap(); + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { + let size = Self::maybe_get_size(&dtype).unwrap(); Self::new( - data_type, + dtype, vec![0u8; length * size].into(), Some(Bitmap::new_zeroed(length)), ) @@ -178,12 +178,12 @@ impl FixedSizeBinaryArray { /// Returns a new [`FixedSizeBinaryArray`] with a different logical type. /// This is `O(1)`. /// # Panics - /// Panics iff the data_type is not supported for the physical type. + /// Panics iff the dtype is not supported for the physical type. #[inline] - pub fn to(self, data_type: ArrowDataType) -> Self { + pub fn to(self, dtype: ArrowDataType) -> Self { match ( - data_type.to_logical_type(), - self.data_type().to_logical_type(), + dtype.to_logical_type(), + self.dtype().to_logical_type(), ) { (ArrowDataType::FixedSizeBinary(size_a), ArrowDataType::FixedSizeBinary(size_b)) if size_a == size_b => {}, @@ -192,7 +192,7 @@ impl FixedSizeBinaryArray { Self { size: self.size, - data_type, + dtype, values: self.values, validity: self.validity, } @@ -205,8 +205,8 @@ impl FixedSizeBinaryArray { } impl FixedSizeBinaryArray { - pub(crate) fn maybe_get_size(data_type: &ArrowDataType) -> PolarsResult { - match data_type.to_logical_type() { + pub(crate) fn maybe_get_size(dtype: &ArrowDataType) -> PolarsResult { + match dtype.to_logical_type() { ArrowDataType::FixedSizeBinary(size) => { polars_ensure!(*size != 0, ComputeError: "FixedSizeBinaryArray expects a positive size"); Ok(*size) @@ -217,8 +217,8 @@ impl FixedSizeBinaryArray { } } - pub fn get_size(data_type: &ArrowDataType) -> usize { - Self::maybe_get_size(data_type).unwrap() + pub fn get_size(dtype: &ArrowDataType) -> usize { + Self::maybe_get_size(dtype).unwrap() } } @@ -248,13 +248,13 @@ impl Splitable for FixedSizeBinaryArray { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), values: lhs_values, validity: lhs_validity, size, }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), values: rhs_values, validity: rhs_validity, size, diff --git a/crates/polars-arrow/src/array/fixed_size_binary/mutable.rs b/crates/polars-arrow/src/array/fixed_size_binary/mutable.rs index 1c744dbe88fd..903c33178640 100644 --- a/crates/polars-arrow/src/array/fixed_size_binary/mutable.rs +++ b/crates/polars-arrow/src/array/fixed_size_binary/mutable.rs @@ -14,7 +14,7 @@ use crate::datatypes::ArrowDataType; /// This struct does not allocate a validity until one is required (i.e. push a null to it). #[derive(Debug, Clone)] pub struct MutableFixedSizeBinaryArray { - data_type: ArrowDataType, + dtype: ArrowDataType, size: usize, values: Vec, validity: Option, @@ -23,7 +23,7 @@ pub struct MutableFixedSizeBinaryArray { impl From for FixedSizeBinaryArray { fn from(other: MutableFixedSizeBinaryArray) -> Self { FixedSizeBinaryArray::new( - other.data_type, + other.dtype, other.values.into(), other.validity.map(|x| x.into()), ) @@ -35,15 +35,15 @@ impl MutableFixedSizeBinaryArray { /// /// # Errors /// This function returns an error iff: - /// * The `data_type`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`] - /// * The length of `values` is not a multiple of `size` in `data_type` + /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`] + /// * The length of `values` is not a multiple of `size` in `dtype` /// * the validity's length is not equal to `values.len() / size`. pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Vec, validity: Option, ) -> PolarsResult { - let size = FixedSizeBinaryArray::maybe_get_size(&data_type)?; + let size = FixedSizeBinaryArray::maybe_get_size(&dtype)?; if values.len() % size != 0 { polars_bail!(ComputeError: @@ -63,7 +63,7 @@ impl MutableFixedSizeBinaryArray { Ok(Self { size, - data_type, + dtype, values, validity, }) @@ -264,8 +264,8 @@ impl MutableArray for MutableFixedSizeBinaryArray { .arced() } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { diff --git a/crates/polars-arrow/src/array/fixed_size_list/data.rs b/crates/polars-arrow/src/array/fixed_size_list/data.rs index f98fa452c6ea..c52a63d1dea8 100644 --- a/crates/polars-arrow/src/array/fixed_size_list/data.rs +++ b/crates/polars-arrow/src/array/fixed_size_list/data.rs @@ -6,8 +6,8 @@ use crate::datatypes::ArrowDataType; impl Arrow2Arrow for FixedSizeListArray { fn to_data(&self) -> ArrayData { - let data_type = self.data_type.clone().into(); - let builder = ArrayDataBuilder::new(data_type) + let dtype = self.dtype.clone().into(); + let builder = ArrayDataBuilder::new(dtype) .len(self.len()) .nulls(self.validity.as_ref().map(|b| b.clone().into())) .child_data(vec![to_data(self.values.as_ref())]); @@ -17,8 +17,8 @@ impl Arrow2Arrow for FixedSizeListArray { } fn from_data(data: &ArrayData) -> Self { - let data_type: ArrowDataType = data.data_type().clone().into(); - let size = match data_type { + let dtype: ArrowDataType = data.dtype().clone().into(); + let size = match dtype { ArrowDataType::FixedSizeList(_, size) => size, _ => unreachable!("must be FixedSizeList type"), }; @@ -28,7 +28,7 @@ impl Arrow2Arrow for FixedSizeListArray { Self { size, - data_type, + dtype, values, validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), } diff --git a/crates/polars-arrow/src/array/fixed_size_list/ffi.rs b/crates/polars-arrow/src/array/fixed_size_list/ffi.rs index 7cb463974e29..29cf7957cf6c 100644 --- a/crates/polars-arrow/src/array/fixed_size_list/ffi.rs +++ b/crates/polars-arrow/src/array/fixed_size_list/ffi.rs @@ -30,12 +30,12 @@ unsafe impl ToFfi for FixedSizeListArray { impl FromFfi for FixedSizeListArray { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let validity = unsafe { array.validity() }?; let child = unsafe { array.child(0)? }; let values = ffi::try_from(child)?; - let mut fsl = Self::try_new(data_type, values, validity)?; + let mut fsl = Self::try_new(dtype, values, validity)?; fsl.slice(array.offset(), array.length()); Ok(fsl) } diff --git a/crates/polars-arrow/src/array/fixed_size_list/mod.rs b/crates/polars-arrow/src/array/fixed_size_list/mod.rs index 4fd817b90264..3b8b9890d4f4 100644 --- a/crates/polars-arrow/src/array/fixed_size_list/mod.rs +++ b/crates/polars-arrow/src/array/fixed_size_list/mod.rs @@ -17,8 +17,8 @@ use polars_utils::pl_str::PlSmallStr; /// Cloning and slicing this struct is `O(1)`. #[derive(Clone)] pub struct FixedSizeListArray { - size: usize, // this is redundant with `data_type`, but useful to not have to deconstruct the data_type. - data_type: ArrowDataType, + size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype. + dtype: ArrowDataType, values: Box, validity: Option, } @@ -28,21 +28,21 @@ impl FixedSizeListArray { /// /// # Errors /// This function returns an error iff: - /// * The `data_type`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeList`] - /// * The `data_type`'s inner field's data type is not equal to `values.data_type`. - /// * The length of `values` is not a multiple of `size` in `data_type` + /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeList`] + /// * The `dtype`'s inner field's data type is not equal to `values.dtype`. + /// * The length of `values` is not a multiple of `size` in `dtype` /// * the validity's length is not equal to `values.len() / size`. pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Box, validity: Option, ) -> PolarsResult { - let (child, size) = Self::try_child_and_size(&data_type)?; + let (child, size) = Self::try_child_and_size(&dtype)?; - let child_data_type = &child.data_type; - let values_data_type = values.data_type(); - if child_data_type != values_data_type { - polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_data_type:?} while it got {values_data_type:?}.") + let child_dtype = &child.dtype; + let values_dtype = values.dtype(); + if child_dtype != values_dtype { + polars_bail!(ComputeError: "FixedSizeListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.") } if values.len() % size != 0 { @@ -63,7 +63,7 @@ impl FixedSizeListArray { Ok(Self { size, - data_type, + dtype, values, validity, }) @@ -71,8 +71,8 @@ impl FixedSizeListArray { /// Alias to `Self::try_new(...).unwrap()` #[track_caller] - pub fn new(data_type: ArrowDataType, values: Box, validity: Option) -> Self { - Self::try_new(data_type, values, validity).unwrap() + pub fn new(dtype: ArrowDataType, values: Box, validity: Option) -> Self { + Self::try_new(dtype, values, validity).unwrap() } /// Returns the size (number of elements per slot) of this [`FixedSizeListArray`]. @@ -81,17 +81,17 @@ impl FixedSizeListArray { } /// Returns a new empty [`FixedSizeListArray`]. - pub fn new_empty(data_type: ArrowDataType) -> Self { - let values = new_empty_array(Self::get_child_and_size(&data_type).0.data_type().clone()); - Self::new(data_type, values, None) + pub fn new_empty(dtype: ArrowDataType) -> Self { + let values = new_empty_array(Self::get_child_and_size(&dtype).0.dtype().clone()); + Self::new(dtype, values, None) } /// Returns a new null [`FixedSizeListArray`]. - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { - let (field, size) = Self::get_child_and_size(&data_type); + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { + let (field, size) = Self::get_child_and_size(&dtype); - let values = new_null_array(field.data_type().clone(), length * size); - Self::new(data_type, values, Some(Bitmap::new_zeroed(length))) + let values = new_null_array(field.dtype().clone(), length * size); + Self::new(dtype, values, Some(Bitmap::new_zeroed(length))) } } @@ -182,8 +182,8 @@ impl FixedSizeListArray { } impl FixedSizeListArray { - pub(crate) fn try_child_and_size(data_type: &ArrowDataType) -> PolarsResult<(&Field, usize)> { - match data_type.to_logical_type() { + pub(crate) fn try_child_and_size(dtype: &ArrowDataType) -> PolarsResult<(&Field, usize)> { + match dtype.to_logical_type() { ArrowDataType::FixedSizeList(child, size) => { if *size == 0 { polars_bail!(ComputeError: "FixedSizeBinaryArray expects a positive size") @@ -194,13 +194,13 @@ impl FixedSizeListArray { } } - pub(crate) fn get_child_and_size(data_type: &ArrowDataType) -> (&Field, usize) { - Self::try_child_and_size(data_type).unwrap() + pub(crate) fn get_child_and_size(dtype: &ArrowDataType) -> (&Field, usize) { + Self::try_child_and_size(dtype).unwrap() } /// Returns a [`ArrowDataType`] consistent with [`FixedSizeListArray`]. - pub fn default_datatype(data_type: ArrowDataType, size: usize) -> ArrowDataType { - let field = Box::new(Field::new(PlSmallStr::from_static("item"), data_type, true)); + pub fn default_datatype(dtype: ArrowDataType, size: usize) -> ArrowDataType { + let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true)); ArrowDataType::FixedSizeList(field, size) } } @@ -233,13 +233,13 @@ impl Splitable for FixedSizeListArray { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), values: lhs_values, validity: lhs_validity, size, }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), values: rhs_values, validity: rhs_validity, size, diff --git a/crates/polars-arrow/src/array/fixed_size_list/mutable.rs b/crates/polars-arrow/src/array/fixed_size_list/mutable.rs index 9b05396565f7..04802e59bd67 100644 --- a/crates/polars-arrow/src/array/fixed_size_list/mutable.rs +++ b/crates/polars-arrow/src/array/fixed_size_list/mutable.rs @@ -12,7 +12,7 @@ use crate::datatypes::{ArrowDataType, Field}; /// The mutable version of [`FixedSizeListArray`]. #[derive(Debug, Clone)] pub struct MutableFixedSizeListArray { - data_type: ArrowDataType, + dtype: ArrowDataType, size: usize, values: M, validity: Option, @@ -21,7 +21,7 @@ pub struct MutableFixedSizeListArray { impl From> for FixedSizeListArray { fn from(mut other: MutableFixedSizeListArray) -> Self { FixedSizeListArray::new( - other.data_type, + other.dtype, other.values.as_box(), other.validity.map(|x| x.into()), ) @@ -31,29 +31,29 @@ impl From> for FixedSizeListArray impl MutableFixedSizeListArray { /// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`] and size. pub fn new(values: M, size: usize) -> Self { - let data_type = FixedSizeListArray::default_datatype(values.data_type().clone(), size); - Self::new_from(values, data_type, size) + let dtype = FixedSizeListArray::default_datatype(values.dtype().clone(), size); + Self::new_from(values, dtype, size) } /// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`] and size. pub fn new_with_field(values: M, name: PlSmallStr, nullable: bool, size: usize) -> Self { - let data_type = ArrowDataType::FixedSizeList( - Box::new(Field::new(name, values.data_type().clone(), nullable)), + let dtype = ArrowDataType::FixedSizeList( + Box::new(Field::new(name, values.dtype().clone(), nullable)), size, ); - Self::new_from(values, data_type, size) + Self::new_from(values, dtype, size) } /// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`], [`ArrowDataType`] and size. - pub fn new_from(values: M, data_type: ArrowDataType, size: usize) -> Self { + pub fn new_from(values: M, dtype: ArrowDataType, size: usize) -> Self { assert_eq!(values.len(), 0); - match data_type { + match dtype { ArrowDataType::FixedSizeList(..) => (), - _ => panic!("data type must be FixedSizeList (got {data_type:?})"), + _ => panic!("data type must be FixedSizeList (got {dtype:?})"), }; Self { size, - data_type, + dtype, values, validity: None, } @@ -147,7 +147,7 @@ impl MutableArray for MutableFixedSizeListArray { fn as_box(&mut self) -> Box { FixedSizeListArray::new( - self.data_type.clone(), + self.dtype.clone(), self.values.as_box(), std::mem::take(&mut self.validity).map(|x| x.into()), ) @@ -156,15 +156,15 @@ impl MutableArray for MutableFixedSizeListArray { fn as_arc(&mut self) -> Arc { FixedSizeListArray::new( - self.data_type.clone(), + self.dtype.clone(), self.values.as_box(), std::mem::take(&mut self.validity).map(|x| x.into()), ) .arced() } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { diff --git a/crates/polars-arrow/src/array/fmt.rs b/crates/polars-arrow/src/array/fmt.rs index 2def0374ab19..6b3fc21752b1 100644 --- a/crates/polars-arrow/src/array/fmt.rs +++ b/crates/polars-arrow/src/array/fmt.rs @@ -12,7 +12,7 @@ pub fn get_value_display<'a, F: Write + 'a>( null: &'static str, ) -> Box Result + 'a> { use crate::datatypes::PhysicalType::*; - match array.data_type().to_physical_type() { + match array.dtype().to_physical_type() { Null => Box::new(move |f, _| write!(f, "{null}")), Boolean => Box::new(|f, index| { super::boolean::fmt::write_value(array.as_any().downcast_ref().unwrap(), index, f) diff --git a/crates/polars-arrow/src/array/growable/binary.rs b/crates/polars-arrow/src/array/growable/binary.rs index f0b746de2535..44b6ec4da147 100644 --- a/crates/polars-arrow/src/array/growable/binary.rs +++ b/crates/polars-arrow/src/array/growable/binary.rs @@ -13,7 +13,7 @@ use crate::offset::{Offset, Offsets}; /// Concrete [`Growable`] for the [`BinaryArray`]. pub struct GrowableBinary<'a, O: Offset> { arrays: Vec<&'a BinaryArray>, - data_type: ArrowDataType, + dtype: ArrowDataType, validity: Option, values: Vec, offsets: Offsets, @@ -24,7 +24,7 @@ impl<'a, O: Offset> GrowableBinary<'a, O> { /// # Panics /// If `arrays` is empty. pub fn new(arrays: Vec<&'a BinaryArray>, mut use_validity: bool, capacity: usize) -> Self { - let data_type = arrays[0].data_type().clone(); + let dtype = arrays[0].dtype().clone(); // if any of the arrays has nulls, insertions from any array requires setting bits // as there is at least one array with nulls. @@ -34,7 +34,7 @@ impl<'a, O: Offset> GrowableBinary<'a, O> { Self { arrays, - data_type, + dtype, values: Vec::with_capacity(0), offsets: Offsets::with_capacity(capacity), validity: prepare_validity(use_validity, capacity), @@ -42,13 +42,13 @@ impl<'a, O: Offset> GrowableBinary<'a, O> { } fn to(&mut self) -> BinaryArray { - let data_type = self.data_type.clone(); + let dtype = self.dtype.clone(); let validity = std::mem::take(&mut self.validity); let offsets = std::mem::take(&mut self.offsets); let values = std::mem::take(&mut self.values); BinaryArray::::new( - data_type, + dtype, offsets.into(), values.into(), validity.map(|v| v.into()), @@ -96,7 +96,7 @@ impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> { impl<'a, O: Offset> From> for BinaryArray { fn from(val: GrowableBinary<'a, O>) -> Self { BinaryArray::::new( - val.data_type, + val.dtype, val.offsets.into(), val.values.into(), val.validity.map(|v| v.into()), diff --git a/crates/polars-arrow/src/array/growable/binview.rs b/crates/polars-arrow/src/array/growable/binview.rs index 2f785699168b..6c974510fc46 100644 --- a/crates/polars-arrow/src/array/growable/binview.rs +++ b/crates/polars-arrow/src/array/growable/binview.rs @@ -15,7 +15,7 @@ use crate::datatypes::ArrowDataType; /// Concrete [`Growable`] for the [`BinaryArray`]. pub struct GrowableBinaryViewArray<'a, T: ViewType + ?Sized> { arrays: Vec<&'a BinaryViewArrayGeneric>, - data_type: ArrowDataType, + dtype: ArrowDataType, validity: Option, inner: MutableBinaryViewArray, same_buffers: Option<&'a Arc<[Buffer]>>, @@ -32,7 +32,7 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { mut use_validity: bool, capacity: usize, ) -> Self { - let data_type = arrays[0].data_type().clone(); + let dtype = arrays[0].dtype().clone(); // if any of the arrays has nulls, insertions from any array requires setting bits // as there is at least one array with nulls. @@ -64,7 +64,7 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { } Self { arrays, - data_type, + dtype, validity: prepare_validity(use_validity, capacity), inner: MutableBinaryViewArray::::with_capacity(capacity), same_buffers, @@ -78,7 +78,7 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { if let Some(buffers) = self.same_buffers { unsafe { BinaryViewArrayGeneric::::new_unchecked( - self.data_type.clone(), + self.dtype.clone(), arr.views.into(), buffers.clone(), self.validity.take().map(Bitmap::from), @@ -87,7 +87,7 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { ) } } else { - arr.freeze_with_dtype(self.data_type.clone()) + arr.freeze_with_dtype(self.dtype.clone()) .with_validity(self.validity.take().map(Bitmap::from)) } } diff --git a/crates/polars-arrow/src/array/growable/boolean.rs b/crates/polars-arrow/src/array/growable/boolean.rs index ea18791a804d..9bd1c6cc0df6 100644 --- a/crates/polars-arrow/src/array/growable/boolean.rs +++ b/crates/polars-arrow/src/array/growable/boolean.rs @@ -11,7 +11,7 @@ use crate::datatypes::ArrowDataType; /// Concrete [`Growable`] for the [`BooleanArray`]. pub struct GrowableBoolean<'a> { arrays: Vec<&'a BooleanArray>, - data_type: ArrowDataType, + dtype: ArrowDataType, validity: Option, values: MutableBitmap, } @@ -21,7 +21,7 @@ impl<'a> GrowableBoolean<'a> { /// # Panics /// If `arrays` is empty. pub fn new(arrays: Vec<&'a BooleanArray>, mut use_validity: bool, capacity: usize) -> Self { - let data_type = arrays[0].data_type().clone(); + let dtype = arrays[0].dtype().clone(); // if any of the arrays has nulls, insertions from any array requires setting bits // as there is at least one array with nulls. @@ -31,7 +31,7 @@ impl<'a> GrowableBoolean<'a> { Self { arrays, - data_type, + dtype, values: MutableBitmap::with_capacity(capacity), validity: prepare_validity(use_validity, capacity), } @@ -42,7 +42,7 @@ impl<'a> GrowableBoolean<'a> { let values = std::mem::take(&mut self.values); BooleanArray::new( - self.data_type.clone(), + self.dtype.clone(), values.into(), validity.map(|v| v.into()), ) @@ -88,7 +88,7 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> { impl<'a> From> for BooleanArray { fn from(val: GrowableBoolean<'a>) -> Self { BooleanArray::new( - val.data_type, + val.dtype, val.values.into(), val.validity.map(|v| v.into()), ) diff --git a/crates/polars-arrow/src/array/growable/dictionary.rs b/crates/polars-arrow/src/array/growable/dictionary.rs index dd2dbc01fde4..cc36954e19ec 100644 --- a/crates/polars-arrow/src/array/growable/dictionary.rs +++ b/crates/polars-arrow/src/array/growable/dictionary.rs @@ -13,7 +13,7 @@ use crate::datatypes::ArrowDataType; /// This growable does not perform collision checks and instead concatenates /// the values of each [`DictionaryArray`] one after the other. pub struct GrowableDictionary<'a, K: DictionaryKey> { - data_type: ArrowDataType, + dtype: ArrowDataType, keys: Vec<&'a PrimitiveArray>, key_values: Vec, validity: Option, @@ -41,7 +41,7 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> { /// # Panics /// If `arrays` is empty. pub fn new(arrays: &[&'a DictionaryArray], mut use_validity: bool, capacity: usize) -> Self { - let data_type = arrays[0].data_type().clone(); + let dtype = arrays[0].dtype().clone(); // if any of the arrays has nulls, insertions from any array requires setting bits // as there is at least one array with nulls. @@ -58,7 +58,7 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> { let (values, offsets) = concatenate_values(&arrays_keys, &arrays_values, capacity); Self { - data_type, + dtype, offsets, values, keys: arrays_keys, @@ -85,7 +85,7 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> { // SAFETY: the invariant of this struct ensures that this is up-held unsafe { DictionaryArray::::try_new_unchecked( - self.data_type.clone(), + self.dtype.clone(), keys, self.values.clone(), ) diff --git a/crates/polars-arrow/src/array/growable/fixed_binary.rs b/crates/polars-arrow/src/array/growable/fixed_binary.rs index 0f52fcd51410..d3e0eae9562b 100644 --- a/crates/polars-arrow/src/array/growable/fixed_binary.rs +++ b/crates/polars-arrow/src/array/growable/fixed_binary.rs @@ -30,7 +30,7 @@ impl<'a> GrowableFixedSizeBinary<'a> { use_validity = true; }; - let size = FixedSizeBinaryArray::get_size(arrays[0].data_type()); + let size = FixedSizeBinaryArray::get_size(arrays[0].dtype()); Self { arrays, values: Vec::with_capacity(0), @@ -44,7 +44,7 @@ impl<'a> GrowableFixedSizeBinary<'a> { let values = std::mem::take(&mut self.values); FixedSizeBinaryArray::new( - self.arrays[0].data_type().clone(), + self.arrays[0].dtype().clone(), values.into(), validity.map(|v| v.into()), ) @@ -88,7 +88,7 @@ impl<'a> Growable<'a> for GrowableFixedSizeBinary<'a> { impl<'a> From> for FixedSizeBinaryArray { fn from(val: GrowableFixedSizeBinary<'a>) -> Self { FixedSizeBinaryArray::new( - val.arrays[0].data_type().clone(), + val.arrays[0].dtype().clone(), val.values.into(), val.validity.map(|v| v.into()), ) diff --git a/crates/polars-arrow/src/array/growable/fixed_size_list.rs b/crates/polars-arrow/src/array/growable/fixed_size_list.rs index 1841285f377d..3e9f26750ab9 100644 --- a/crates/polars-arrow/src/array/growable/fixed_size_list.rs +++ b/crates/polars-arrow/src/array/growable/fixed_size_list.rs @@ -34,7 +34,7 @@ impl<'a> GrowableFixedSizeList<'a> { }; let size = if let ArrowDataType::FixedSizeList(_, size) = - &arrays[0].data_type().to_logical_type() + &arrays[0].dtype().to_logical_type() { *size } else { @@ -60,7 +60,7 @@ impl<'a> GrowableFixedSizeList<'a> { let values = self.values.as_box(); FixedSizeListArray::new( - self.arrays[0].data_type().clone(), + self.arrays[0].dtype().clone(), values, validity.map(|v| v.into()), ) @@ -111,7 +111,7 @@ impl<'a> From> for FixedSizeListArray { let values = values.as_box(); Self::new( - val.arrays[0].data_type().clone(), + val.arrays[0].dtype().clone(), values, val.validity.map(|v| v.into()), ) diff --git a/crates/polars-arrow/src/array/growable/list.rs b/crates/polars-arrow/src/array/growable/list.rs index a97518a310e3..90e4f15020a6 100644 --- a/crates/polars-arrow/src/array/growable/list.rs +++ b/crates/polars-arrow/src/array/growable/list.rs @@ -70,7 +70,7 @@ impl<'a, O: Offset> GrowableList<'a, O> { let values = self.values.as_box(); ListArray::::new( - self.arrays[0].data_type().clone(), + self.arrays[0].dtype().clone(), offsets.into(), values, validity.map(|v| v.into()), diff --git a/crates/polars-arrow/src/array/growable/mod.rs b/crates/polars-arrow/src/array/growable/mod.rs index 0c037592c928..1238b29f59a3 100644 --- a/crates/polars-arrow/src/array/growable/mod.rs +++ b/crates/polars-arrow/src/array/growable/mod.rs @@ -92,11 +92,11 @@ pub fn make_growable<'a>( capacity: usize, ) -> Box + 'a> { assert!(!arrays.is_empty()); - let data_type = arrays[0].data_type(); + let dtype = arrays[0].dtype(); use PhysicalType::*; - match data_type.to_physical_type() { - Null => Box::new(null::GrowableNull::new(data_type.clone())), + match dtype.to_physical_type() { + Null => Box::new(null::GrowableNull::new(dtype.clone())), Boolean => dyn_growable!(boolean::GrowableBoolean, arrays, use_validity, capacity), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { dyn_growable!(primitive::GrowablePrimitive::<$T>, arrays, use_validity, capacity) diff --git a/crates/polars-arrow/src/array/growable/null.rs b/crates/polars-arrow/src/array/growable/null.rs index 155f90d190aa..ef7d6e4e5eac 100644 --- a/crates/polars-arrow/src/array/growable/null.rs +++ b/crates/polars-arrow/src/array/growable/null.rs @@ -6,7 +6,7 @@ use crate::datatypes::ArrowDataType; /// Concrete [`Growable`] for the [`NullArray`]. pub struct GrowableNull { - data_type: ArrowDataType, + dtype: ArrowDataType, length: usize, } @@ -18,9 +18,9 @@ impl Default for GrowableNull { impl GrowableNull { /// Creates a new [`GrowableNull`]. - pub fn new(data_type: ArrowDataType) -> Self { + pub fn new(dtype: ArrowDataType) -> Self { Self { - data_type, + dtype, length: 0, } } @@ -41,16 +41,16 @@ impl<'a> Growable<'a> for GrowableNull { } fn as_arc(&mut self) -> Arc { - Arc::new(NullArray::new(self.data_type.clone(), self.length)) + Arc::new(NullArray::new(self.dtype.clone(), self.length)) } fn as_box(&mut self) -> Box { - Box::new(NullArray::new(self.data_type.clone(), self.length)) + Box::new(NullArray::new(self.dtype.clone(), self.length)) } } impl From for NullArray { fn from(val: GrowableNull) -> Self { - NullArray::new(val.data_type, val.length) + NullArray::new(val.dtype, val.length) } } diff --git a/crates/polars-arrow/src/array/growable/primitive.rs b/crates/polars-arrow/src/array/growable/primitive.rs index 936905ab05fa..b050fb206300 100644 --- a/crates/polars-arrow/src/array/growable/primitive.rs +++ b/crates/polars-arrow/src/array/growable/primitive.rs @@ -11,7 +11,7 @@ use crate::types::NativeType; /// Concrete [`Growable`] for the [`PrimitiveArray`]. pub struct GrowablePrimitive<'a, T: NativeType> { - data_type: ArrowDataType, + dtype: ArrowDataType, arrays: Vec<&'a PrimitiveArray>, validity: Option, values: Vec, @@ -32,10 +32,10 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> { use_validity = true; }; - let data_type = arrays[0].data_type().clone(); + let dtype = arrays[0].dtype().clone(); Self { - data_type, + dtype, arrays, values: Vec::with_capacity(capacity), validity: prepare_validity(use_validity, capacity), @@ -48,7 +48,7 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> { let values = std::mem::take(&mut self.values); PrimitiveArray::::new( - self.data_type.clone(), + self.dtype.clone(), values.into(), validity.map(|v| v.into()), ) @@ -108,7 +108,7 @@ impl<'a, T: NativeType> From> for PrimitiveArray { #[inline] fn from(val: GrowablePrimitive<'a, T>) -> Self { PrimitiveArray::::new( - val.data_type, + val.dtype, val.values.into(), val.validity.map(|v| v.into()), ) diff --git a/crates/polars-arrow/src/array/growable/structure.rs b/crates/polars-arrow/src/array/growable/structure.rs index a27a9cfe6bee..5f3d0c107c62 100644 --- a/crates/polars-arrow/src/array/growable/structure.rs +++ b/crates/polars-arrow/src/array/growable/structure.rs @@ -59,7 +59,7 @@ impl<'a> GrowableStruct<'a> { let values = values.into_iter().map(|mut x| x.as_box()).collect(); StructArray::new( - self.arrays[0].data_type().clone(), + self.arrays[0].dtype().clone(), values, validity.map(|v| v.into()), ) @@ -122,7 +122,7 @@ impl<'a> From> for StructArray { let values = val.values.into_iter().map(|mut x| x.as_box()).collect(); StructArray::new( - val.arrays[0].data_type().clone(), + val.arrays[0].dtype().clone(), values, val.validity.map(|v| v.into()), ) diff --git a/crates/polars-arrow/src/array/growable/utf8.rs b/crates/polars-arrow/src/array/growable/utf8.rs index f4e4e762fc67..4fc1c415d74e 100644 --- a/crates/polars-arrow/src/array/growable/utf8.rs +++ b/crates/polars-arrow/src/array/growable/utf8.rs @@ -48,7 +48,7 @@ impl<'a, O: Offset> GrowableUtf8<'a, O> { unsafe { Utf8Array::::new_unchecked( - self.arrays[0].data_type().clone(), + self.arrays[0].dtype().clone(), offsets.into(), values.into(), validity.map(|v| v.into()), diff --git a/crates/polars-arrow/src/array/list/data.rs b/crates/polars-arrow/src/array/list/data.rs index 212778a05abb..6ee11328663e 100644 --- a/crates/polars-arrow/src/array/list/data.rs +++ b/crates/polars-arrow/src/array/list/data.rs @@ -6,9 +6,9 @@ use crate::offset::{Offset, OffsetsBuffer}; impl Arrow2Arrow for ListArray { fn to_data(&self) -> ArrayData { - let data_type = self.data_type.clone().into(); + let dtype = self.dtype.clone().into(); - let builder = ArrayDataBuilder::new(data_type) + let builder = ArrayDataBuilder::new(dtype) .len(self.len()) .buffers(vec![self.offsets.clone().into_inner().into()]) .nulls(self.validity.as_ref().map(|b| b.clone().into())) @@ -19,17 +19,17 @@ impl Arrow2Arrow for ListArray { } fn from_data(data: &ArrayData) -> Self { - let data_type = data.data_type().clone().into(); + let dtype = data.dtype().clone().into(); if data.is_empty() { // Handle empty offsets - return Self::new_empty(data_type); + return Self::new_empty(dtype); } let mut offsets = unsafe { OffsetsBuffer::new_unchecked(data.buffers()[0].clone().into()) }; offsets.slice(data.offset(), data.len() + 1); Self { - data_type, + dtype, offsets, values: from_data(&data.child_data()[0]), validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), diff --git a/crates/polars-arrow/src/array/list/ffi.rs b/crates/polars-arrow/src/array/list/ffi.rs index e536a713cbc2..2ac23e45635b 100644 --- a/crates/polars-arrow/src/array/list/ffi.rs +++ b/crates/polars-arrow/src/array/list/ffi.rs @@ -45,7 +45,7 @@ unsafe impl ToFfi for ListArray { }); Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), validity, offsets: self.offsets.clone(), values: self.values.clone(), @@ -55,7 +55,7 @@ unsafe impl ToFfi for ListArray { impl FromFfi for ListArray { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let validity = unsafe { array.validity() }?; let offsets = unsafe { array.buffer::(1) }?; let child = unsafe { array.child(0)? }; @@ -64,6 +64,6 @@ impl FromFfi for ListArray { // assumption that data from FFI is well constructed let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets) }; - Self::try_new(data_type, offsets, values, validity) + Self::try_new(dtype, offsets, values, validity) } } diff --git a/crates/polars-arrow/src/array/list/mod.rs b/crates/polars-arrow/src/array/list/mod.rs index 17e6aff369a1..3c2bb6b41f98 100644 --- a/crates/polars-arrow/src/array/list/mod.rs +++ b/crates/polars-arrow/src/array/list/mod.rs @@ -18,7 +18,7 @@ use polars_utils::pl_str::PlSmallStr; /// An [`Array`] semantically equivalent to `Vec>>>` with Arrow's in-memory. #[derive(Clone)] pub struct ListArray { - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Box, validity: Option, @@ -31,12 +31,12 @@ impl ListArray { /// This function returns an error iff: /// * The last offset is not equal to the values' length. /// * the validity's length is not equal to `offsets.len()`. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`]. - /// * The `data_type`'s inner field's data type is not equal to `values.data_type`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`]. + /// * The `dtype`'s inner field's data type is not equal to `values.dtype`. /// # Implementation /// This function is `O(1)` pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Box, validity: Option, @@ -50,14 +50,14 @@ impl ListArray { polars_bail!(ComputeError: "validity mask length must match the number of values") } - let child_data_type = Self::try_get_child(&data_type)?.data_type(); - let values_data_type = values.data_type(); - if child_data_type != values_data_type { - polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_data_type:?} while it got {values_data_type:?}."); + let child_dtype = Self::try_get_child(&dtype)?.dtype(); + let values_dtype = values.dtype(); + if child_dtype != values_dtype { + polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}."); } Ok(Self { - data_type, + dtype, offsets, values, validity, @@ -70,31 +70,31 @@ impl ListArray { /// This function panics iff: /// * The last offset is not equal to the values' length. /// * the validity's length is not equal to `offsets.len()`. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`]. - /// * The `data_type`'s inner field's data type is not equal to `values.data_type`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`]. + /// * The `dtype`'s inner field's data type is not equal to `values.dtype`. /// # Implementation /// This function is `O(1)` pub fn new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Box, validity: Option, ) -> Self { - Self::try_new(data_type, offsets, values, validity).unwrap() + Self::try_new(dtype, offsets, values, validity).unwrap() } /// Returns a new empty [`ListArray`]. - pub fn new_empty(data_type: ArrowDataType) -> Self { - let values = new_empty_array(Self::get_child_type(&data_type).clone()); - Self::new(data_type, OffsetsBuffer::default(), values, None) + pub fn new_empty(dtype: ArrowDataType) -> Self { + let values = new_empty_array(Self::get_child_type(&dtype).clone()); + Self::new(dtype, OffsetsBuffer::default(), values, None) } /// Returns a new null [`ListArray`]. #[inline] - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { - let child = Self::get_child_type(&data_type).clone(); + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { + let child = Self::get_child_type(&dtype).clone(); Self::new( - data_type, + dtype, Offsets::new_zeroed(length).into(), new_empty_array(child), Some(Bitmap::new_zeroed(length)), @@ -185,8 +185,8 @@ impl ListArray { impl ListArray { /// Returns a default [`ArrowDataType`]: inner field is named "item" and is nullable - pub fn default_datatype(data_type: ArrowDataType) -> ArrowDataType { - let field = Box::new(Field::new(PlSmallStr::from_static("item"), data_type, true)); + pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType { + let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true)); if O::IS_LARGE { ArrowDataType::LargeList(field) } else { @@ -197,21 +197,21 @@ impl ListArray { /// Returns a the inner [`Field`] /// # Panics /// Panics iff the logical type is not consistent with this struct. - pub fn get_child_field(data_type: &ArrowDataType) -> &Field { - Self::try_get_child(data_type).unwrap() + pub fn get_child_field(dtype: &ArrowDataType) -> &Field { + Self::try_get_child(dtype).unwrap() } /// Returns a the inner [`Field`] /// # Errors /// Panics iff the logical type is not consistent with this struct. - pub fn try_get_child(data_type: &ArrowDataType) -> PolarsResult<&Field> { + pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> { if O::IS_LARGE { - match data_type.to_logical_type() { + match dtype.to_logical_type() { ArrowDataType::LargeList(child) => Ok(child.as_ref()), _ => polars_bail!(ComputeError: "ListArray expects DataType::LargeList"), } } else { - match data_type.to_logical_type() { + match dtype.to_logical_type() { ArrowDataType::List(child) => Ok(child.as_ref()), _ => polars_bail!(ComputeError: "ListArray expects DataType::List"), } @@ -221,8 +221,8 @@ impl ListArray { /// Returns a the inner [`ArrowDataType`] /// # Panics /// Panics iff the logical type is not consistent with this struct. - pub fn get_child_type(data_type: &ArrowDataType) -> &ArrowDataType { - Self::get_child_field(data_type).data_type() + pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType { + Self::get_child_field(dtype).dtype() } } @@ -250,13 +250,13 @@ impl Splitable for ListArray { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), offsets: lhs_offsets, validity: lhs_validity, values: self.values.clone(), }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), offsets: rhs_offsets, validity: rhs_validity, values: self.values.clone(), diff --git a/crates/polars-arrow/src/array/list/mutable.rs b/crates/polars-arrow/src/array/list/mutable.rs index 7a1601e4ceaa..a52e095d72bb 100644 --- a/crates/polars-arrow/src/array/list/mutable.rs +++ b/crates/polars-arrow/src/array/list/mutable.rs @@ -14,7 +14,7 @@ use crate::trusted_len::TrustedLen; /// The mutable version of [`ListArray`]. #[derive(Debug, Clone)] pub struct MutableListArray { - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: Offsets, values: M, validity: Option, @@ -24,18 +24,18 @@ impl MutableListArray { /// Creates a new empty [`MutableListArray`]. pub fn new() -> Self { let values = M::default(); - let data_type = ListArray::::default_datatype(values.data_type().clone()); - Self::new_from(values, data_type, 0) + let dtype = ListArray::::default_datatype(values.dtype().clone()); + Self::new_from(values, dtype, 0) } /// Creates a new [`MutableListArray`] with a capacity. pub fn with_capacity(capacity: usize) -> Self { let values = M::default(); - let data_type = ListArray::::default_datatype(values.data_type().clone()); + let dtype = ListArray::::default_datatype(values.dtype().clone()); let offsets = Offsets::::with_capacity(capacity); Self { - data_type, + dtype, offsets, values, validity: None, @@ -52,7 +52,7 @@ impl Default for MutableListArray { impl From> for ListArray { fn from(mut other: MutableListArray) -> Self { ListArray::new( - other.data_type, + other.dtype, other.offsets.into(), other.values.as_box(), other.validity.map(|x| x.into()), @@ -110,12 +110,12 @@ where impl MutableListArray { /// Creates a new [`MutableListArray`] from a [`MutableArray`] and capacity. - pub fn new_from(values: M, data_type: ArrowDataType, capacity: usize) -> Self { + pub fn new_from(values: M, dtype: ArrowDataType, capacity: usize) -> Self { let offsets = Offsets::::with_capacity(capacity); assert_eq!(values.len(), 0); - ListArray::::get_child_field(&data_type); + ListArray::::get_child_field(&dtype); Self { - data_type, + dtype, offsets, values, validity: None, @@ -124,19 +124,19 @@ impl MutableListArray { /// Creates a new [`MutableListArray`] from a [`MutableArray`]. pub fn new_with_field(values: M, name: PlSmallStr, nullable: bool) -> Self { - let field = Box::new(Field::new(name, values.data_type().clone(), nullable)); - let data_type = if O::IS_LARGE { + let field = Box::new(Field::new(name, values.dtype().clone(), nullable)); + let dtype = if O::IS_LARGE { ArrowDataType::LargeList(field) } else { ArrowDataType::List(field) }; - Self::new_from(values, data_type, 0) + Self::new_from(values, dtype, 0) } /// Creates a new [`MutableListArray`] from a [`MutableArray`] and capacity. pub fn new_with_capacity(values: M, capacity: usize) -> Self { - let data_type = ListArray::::default_datatype(values.data_type().clone()); - Self::new_from(values, data_type, capacity) + let dtype = ListArray::::default_datatype(values.dtype().clone()); + Self::new_from(values, dtype, capacity) } /// Creates a new [`MutableListArray`] from a [`MutableArray`], [`Offsets`] and @@ -147,9 +147,9 @@ impl MutableListArray { validity: Option, ) -> Self { assert_eq!(values.len(), offsets.last().to_usize()); - let data_type = ListArray::::default_datatype(values.data_type().clone()); + let dtype = ListArray::::default_datatype(values.dtype().clone()); Self { - data_type, + dtype, offsets, values, validity, @@ -274,7 +274,7 @@ impl MutableArray for MutableListArray Box { ListArray::new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.offsets).into(), self.values.as_box(), std::mem::take(&mut self.validity).map(|x| x.into()), @@ -284,7 +284,7 @@ impl MutableArray for MutableListArray Arc { ListArray::new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.offsets).into(), self.values.as_box(), std::mem::take(&mut self.validity).map(|x| x.into()), @@ -292,8 +292,8 @@ impl MutableArray for MutableListArray &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { diff --git a/crates/polars-arrow/src/array/map/data.rs b/crates/polars-arrow/src/array/map/data.rs index 8eb586e05f4c..81f3ec968724 100644 --- a/crates/polars-arrow/src/array/map/data.rs +++ b/crates/polars-arrow/src/array/map/data.rs @@ -6,9 +6,9 @@ use crate::offset::OffsetsBuffer; impl Arrow2Arrow for MapArray { fn to_data(&self) -> ArrayData { - let data_type = self.data_type.clone().into(); + let dtype = self.dtype.clone().into(); - let builder = ArrayDataBuilder::new(data_type) + let builder = ArrayDataBuilder::new(dtype) .len(self.len()) .buffers(vec![self.offsets.clone().into_inner().into()]) .nulls(self.validity.as_ref().map(|b| b.clone().into())) @@ -19,17 +19,17 @@ impl Arrow2Arrow for MapArray { } fn from_data(data: &ArrayData) -> Self { - let data_type = data.data_type().clone().into(); + let dtype = data.dtype().clone().into(); if data.is_empty() { // Handle empty offsets - return Self::new_empty(data_type); + return Self::new_empty(dtype); } let mut offsets = unsafe { OffsetsBuffer::new_unchecked(data.buffers()[0].clone().into()) }; offsets.slice(data.offset(), data.len() + 1); Self { - data_type: data.data_type().clone().into(), + dtype: data.dtype().clone().into(), offsets, field: from_data(&data.child_data()[0]), validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), diff --git a/crates/polars-arrow/src/array/map/ffi.rs b/crates/polars-arrow/src/array/map/ffi.rs index fad531671703..2233b371f7eb 100644 --- a/crates/polars-arrow/src/array/map/ffi.rs +++ b/crates/polars-arrow/src/array/map/ffi.rs @@ -45,7 +45,7 @@ unsafe impl ToFfi for MapArray { }); Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), validity, offsets: self.offsets.clone(), field: self.field.clone(), @@ -55,7 +55,7 @@ unsafe impl ToFfi for MapArray { impl FromFfi for MapArray { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let validity = unsafe { array.validity() }?; let offsets = unsafe { array.buffer::(1) }?; let child = array.child(0)?; @@ -64,6 +64,6 @@ impl FromFfi for MapArray { // assumption that data from FFI is well constructed let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets) }; - Self::try_new(data_type, offsets, values, validity) + Self::try_new(dtype, offsets, values, validity) } } diff --git a/crates/polars-arrow/src/array/map/mod.rs b/crates/polars-arrow/src/array/map/mod.rs index 219d703329e3..5497c1d7342b 100644 --- a/crates/polars-arrow/src/array/map/mod.rs +++ b/crates/polars-arrow/src/array/map/mod.rs @@ -15,7 +15,7 @@ use polars_error::{polars_bail, PolarsResult}; /// An array representing a (key, value), both of arbitrary logical types. #[derive(Clone)] pub struct MapArray { - data_type: ArrowDataType, + dtype: ArrowDataType, // invariant: field.len() == offsets.len() offsets: OffsetsBuffer, field: Box, @@ -28,27 +28,27 @@ impl MapArray { /// # Errors /// This function errors iff: /// * The last offset is not equal to the field' length - /// * The `data_type`'s physical type is not [`crate::datatypes::PhysicalType::Map`] - /// * The fields' `data_type` is not equal to the inner field of `data_type` + /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Map`] + /// * The fields' `dtype` is not equal to the inner field of `dtype` /// * The validity is not `None` and its length is different from `offsets.len() - 1`. pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, field: Box, validity: Option, ) -> PolarsResult { try_check_offsets_bounds(&offsets, field.len())?; - let inner_field = Self::try_get_field(&data_type)?; - if let ArrowDataType::Struct(inner) = inner_field.data_type() { + let inner_field = Self::try_get_field(&dtype)?; + if let ArrowDataType::Struct(inner) = inner_field.dtype() { if inner.len() != 2 { polars_bail!(ComputeError: "MapArray's inner `Struct` must have 2 fields (keys and maps)") } } else { polars_bail!(ComputeError: "MapArray expects `DataType::Struct` as its inner logical type") } - if field.data_type() != inner_field.data_type() { - polars_bail!(ComputeError: "MapArray expects `field.data_type` to match its inner DataType") + if field.dtype() != inner_field.dtype() { + polars_bail!(ComputeError: "MapArray expects `field.dtype` to match its inner DataType") } if validity @@ -59,7 +59,7 @@ impl MapArray { } Ok(Self { - data_type, + dtype, field, offsets, validity, @@ -69,22 +69,22 @@ impl MapArray { /// Creates a new [`MapArray`]. /// # Panics /// * The last offset is not equal to the field' length. - /// * The `data_type`'s physical type is not [`crate::datatypes::PhysicalType::Map`], + /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Map`], /// * The validity is not `None` and its length is different from `offsets.len() - 1`. pub fn new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, field: Box, validity: Option, ) -> Self { - Self::try_new(data_type, offsets, field, validity).unwrap() + Self::try_new(dtype, offsets, field, validity).unwrap() } /// Returns a new null [`MapArray`] of `length`. - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { - let field = new_empty_array(Self::get_field(&data_type).data_type().clone()); + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { + let field = new_empty_array(Self::get_field(&dtype).dtype().clone()); Self::new( - data_type, + dtype, vec![0i32; 1 + length].try_into().unwrap(), field, Some(Bitmap::new_zeroed(length)), @@ -92,9 +92,9 @@ impl MapArray { } /// Returns a new empty [`MapArray`]. - pub fn new_empty(data_type: ArrowDataType) -> Self { - let field = new_empty_array(Self::get_field(&data_type).data_type().clone()); - Self::new(data_type, OffsetsBuffer::default(), field, None) + pub fn new_empty(dtype: ArrowDataType) -> Self { + let field = new_empty_array(Self::get_field(&dtype).dtype().clone()); + Self::new(dtype, OffsetsBuffer::default(), field, None) } } @@ -128,16 +128,16 @@ impl MapArray { impl_mut_validity!(); impl_into_array!(); - pub(crate) fn try_get_field(data_type: &ArrowDataType) -> PolarsResult<&Field> { - if let ArrowDataType::Map(field, _) = data_type.to_logical_type() { + pub(crate) fn try_get_field(dtype: &ArrowDataType) -> PolarsResult<&Field> { + if let ArrowDataType::Map(field, _) = dtype.to_logical_type() { Ok(field.as_ref()) } else { - polars_bail!(ComputeError: "The data_type's logical type must be DataType::Map") + polars_bail!(ComputeError: "The dtype's logical type must be DataType::Map") } } - pub(crate) fn get_field(data_type: &ArrowDataType) -> &Field { - Self::try_get_field(data_type).unwrap() + pub(crate) fn get_field(dtype: &ArrowDataType) -> &Field { + Self::try_get_field(dtype).unwrap() } } @@ -207,13 +207,13 @@ impl Splitable for MapArray { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), offsets: lhs_offsets, field: self.field.clone(), validity: lhs_validity, }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), offsets: rhs_offsets, field: self.field.clone(), validity: rhs_validity, diff --git a/crates/polars-arrow/src/array/mod.rs b/crates/polars-arrow/src/array/mod.rs index bda8d9da169d..bc2363c3ba78 100644 --- a/crates/polars-arrow/src/array/mod.rs +++ b/crates/polars-arrow/src/array/mod.rs @@ -12,7 +12,7 @@ //! * [`StructArray`] and [`MutableStructArray`], an array of arrays identified by a string (e.g. `{"a": [1, 2], "b": [true, false]}`) //! //! All immutable arrays implement the trait object [`Array`] and that can be downcasted -//! to a concrete struct based on [`PhysicalType`](crate::datatypes::PhysicalType) available from [`Array::data_type`]. +//! to a concrete struct based on [`PhysicalType`](crate::datatypes::PhysicalType) available from [`Array::dtype`]. //! All immutable arrays are backed by [`Buffer`](crate::buffer::Buffer) and thus cloning and slicing them is `O(1)`. //! //! Most arrays contain a [`MutableArray`] counterpart that is neither clonable nor sliceable, but @@ -58,7 +58,7 @@ pub trait Splitable: Sized { } /// A trait representing an immutable Arrow array. Arrow arrays are trait objects -/// that are infallibly downcasted to concrete types according to the [`Array::data_type`]. +/// that are infallibly downcasted to concrete types according to the [`Array::dtype`]. pub trait Array: Send + Sync + dyn_clone::DynClone + 'static { /// Converts itself to a reference of [`Any`], which enables downcasting to concrete types. fn as_any(&self) -> &dyn Any; @@ -77,7 +77,7 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static { /// The [`ArrowDataType`] of the [`Array`]. In combination with [`Array::as_any`], this can be /// used to downcast trait objects (`dyn Array`) to concrete arrays. - fn data_type(&self) -> &ArrowDataType; + fn dtype(&self) -> &ArrowDataType; /// The validity of the [`Array`]: every array has an optional [`Bitmap`] that, when available /// specifies whether the array slot is valid or not (null). @@ -89,7 +89,7 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static { /// This is `O(1)` since the number of null elements is pre-computed. #[inline] fn null_count(&self) -> usize { - if self.data_type() == &ArrowDataType::Null { + if self.dtype() == &ArrowDataType::Null { return self.len(); }; self.validity() @@ -162,7 +162,7 @@ pub trait Array: Send + Sync + dyn_clone::DynClone + 'static { #[must_use] fn sliced(&self, offset: usize, length: usize) -> Box { if length == 0 { - return new_empty_array(self.data_type().clone()); + return new_empty_array(self.dtype().clone()); } let mut new = self.to_boxed(); new.slice(offset, length); @@ -201,7 +201,7 @@ dyn_clone::clone_trait_object!(Array); /// As in [`Array`], concrete arrays (such as [`MutablePrimitiveArray`]) implement how they are mutated. pub trait MutableArray: std::fmt::Debug + Send + Sync { /// The [`ArrowDataType`] of the array. - fn data_type(&self) -> &ArrowDataType; + fn dtype(&self) -> &ArrowDataType; /// The length of the array. fn len(&self) -> usize; @@ -269,8 +269,8 @@ impl MutableArray for Box { self.as_mut().as_arc() } - fn data_type(&self) -> &ArrowDataType { - self.as_ref().data_type() + fn dtype(&self) -> &ArrowDataType { + self.as_ref().dtype() } fn as_any(&self) -> &dyn std::any::Any { @@ -312,7 +312,7 @@ macro_rules! fmt_dyn { impl std::fmt::Debug for dyn Array + '_ { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use crate::datatypes::PhysicalType::*; - match self.data_type().to_physical_type() { + match self.dtype().to_physical_type() { Null => fmt_dyn!(self, NullArray, f), Boolean => fmt_dyn!(self, BooleanArray, f), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { @@ -341,63 +341,63 @@ impl std::fmt::Debug for dyn Array + '_ { } /// Creates a new [`Array`] with a [`Array::len`] of 0. -pub fn new_empty_array(data_type: ArrowDataType) -> Box { +pub fn new_empty_array(dtype: ArrowDataType) -> Box { use crate::datatypes::PhysicalType::*; - match data_type.to_physical_type() { - Null => Box::new(NullArray::new_empty(data_type)), - Boolean => Box::new(BooleanArray::new_empty(data_type)), + match dtype.to_physical_type() { + Null => Box::new(NullArray::new_empty(dtype)), + Boolean => Box::new(BooleanArray::new_empty(dtype)), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { - Box::new(PrimitiveArray::<$T>::new_empty(data_type)) + Box::new(PrimitiveArray::<$T>::new_empty(dtype)) }), - Binary => Box::new(BinaryArray::::new_empty(data_type)), - LargeBinary => Box::new(BinaryArray::::new_empty(data_type)), - FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(data_type)), - Utf8 => Box::new(Utf8Array::::new_empty(data_type)), - LargeUtf8 => Box::new(Utf8Array::::new_empty(data_type)), - List => Box::new(ListArray::::new_empty(data_type)), - LargeList => Box::new(ListArray::::new_empty(data_type)), - FixedSizeList => Box::new(FixedSizeListArray::new_empty(data_type)), - Struct => Box::new(StructArray::new_empty(data_type)), - Union => Box::new(UnionArray::new_empty(data_type)), - Map => Box::new(MapArray::new_empty(data_type)), - Utf8View => Box::new(Utf8ViewArray::new_empty(data_type)), - BinaryView => Box::new(BinaryViewArray::new_empty(data_type)), + Binary => Box::new(BinaryArray::::new_empty(dtype)), + LargeBinary => Box::new(BinaryArray::::new_empty(dtype)), + FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(dtype)), + Utf8 => Box::new(Utf8Array::::new_empty(dtype)), + LargeUtf8 => Box::new(Utf8Array::::new_empty(dtype)), + List => Box::new(ListArray::::new_empty(dtype)), + LargeList => Box::new(ListArray::::new_empty(dtype)), + FixedSizeList => Box::new(FixedSizeListArray::new_empty(dtype)), + Struct => Box::new(StructArray::new_empty(dtype)), + Union => Box::new(UnionArray::new_empty(dtype)), + Map => Box::new(MapArray::new_empty(dtype)), + Utf8View => Box::new(Utf8ViewArray::new_empty(dtype)), + BinaryView => Box::new(BinaryViewArray::new_empty(dtype)), Dictionary(key_type) => { match_integer_type!(key_type, |$T| { - Box::new(DictionaryArray::<$T>::new_empty(data_type)) + Box::new(DictionaryArray::<$T>::new_empty(dtype)) }) }, } } -/// Creates a new [`Array`] of [`ArrowDataType`] `data_type` and `length`. +/// Creates a new [`Array`] of [`ArrowDataType`] `dtype` and `length`. /// /// The array is guaranteed to have [`Array::null_count`] equal to [`Array::len`] /// for all types except Union, which does not have a validity. -pub fn new_null_array(data_type: ArrowDataType, length: usize) -> Box { +pub fn new_null_array(dtype: ArrowDataType, length: usize) -> Box { use crate::datatypes::PhysicalType::*; - match data_type.to_physical_type() { - Null => Box::new(NullArray::new_null(data_type, length)), - Boolean => Box::new(BooleanArray::new_null(data_type, length)), + match dtype.to_physical_type() { + Null => Box::new(NullArray::new_null(dtype, length)), + Boolean => Box::new(BooleanArray::new_null(dtype, length)), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { - Box::new(PrimitiveArray::<$T>::new_null(data_type, length)) + Box::new(PrimitiveArray::<$T>::new_null(dtype, length)) }), - Binary => Box::new(BinaryArray::::new_null(data_type, length)), - LargeBinary => Box::new(BinaryArray::::new_null(data_type, length)), - FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(data_type, length)), - Utf8 => Box::new(Utf8Array::::new_null(data_type, length)), - LargeUtf8 => Box::new(Utf8Array::::new_null(data_type, length)), - List => Box::new(ListArray::::new_null(data_type, length)), - LargeList => Box::new(ListArray::::new_null(data_type, length)), - FixedSizeList => Box::new(FixedSizeListArray::new_null(data_type, length)), - Struct => Box::new(StructArray::new_null(data_type, length)), - Union => Box::new(UnionArray::new_null(data_type, length)), - Map => Box::new(MapArray::new_null(data_type, length)), - BinaryView => Box::new(BinaryViewArray::new_null(data_type, length)), - Utf8View => Box::new(Utf8ViewArray::new_null(data_type, length)), + Binary => Box::new(BinaryArray::::new_null(dtype, length)), + LargeBinary => Box::new(BinaryArray::::new_null(dtype, length)), + FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(dtype, length)), + Utf8 => Box::new(Utf8Array::::new_null(dtype, length)), + LargeUtf8 => Box::new(Utf8Array::::new_null(dtype, length)), + List => Box::new(ListArray::::new_null(dtype, length)), + LargeList => Box::new(ListArray::::new_null(dtype, length)), + FixedSizeList => Box::new(FixedSizeListArray::new_null(dtype, length)), + Struct => Box::new(StructArray::new_null(dtype, length)), + Union => Box::new(UnionArray::new_null(dtype, length)), + Map => Box::new(MapArray::new_null(dtype, length)), + BinaryView => Box::new(BinaryViewArray::new_null(dtype, length)), + Utf8View => Box::new(Utf8ViewArray::new_null(dtype, length)), Dictionary(key_type) => { match_integer_type!(key_type, |$T| { - Box::new(DictionaryArray::<$T>::new_null(data_type, length)) + Box::new(DictionaryArray::<$T>::new_null(dtype, length)) }) }, } @@ -455,7 +455,7 @@ impl From<&dyn arrow_array::Array> for Box { #[cfg(feature = "arrow_rs")] pub fn to_data(array: &dyn Array) -> arrow_data::ArrayData { use crate::datatypes::PhysicalType::*; - match array.data_type().to_physical_type() { + match array.dtype().to_physical_type() { Null => to_data_dyn!(array, NullArray), Boolean => to_data_dyn!(array, BooleanArray), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { @@ -485,8 +485,8 @@ pub fn to_data(array: &dyn Array) -> arrow_data::ArrayData { #[cfg(feature = "arrow_rs")] pub fn from_data(data: &arrow_data::ArrayData) -> Box { use crate::datatypes::PhysicalType::*; - let data_type: ArrowDataType = data.data_type().clone().into(); - match data_type.to_physical_type() { + let dtype: ArrowDataType = data.dtype().clone().into(); + match dtype.to_physical_type() { Null => Box::new(NullArray::from_data(data)), Boolean => Box::new(BooleanArray::from_data(data)), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { @@ -657,8 +657,8 @@ macro_rules! impl_common_array { } #[inline] - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } #[inline] @@ -699,7 +699,7 @@ macro_rules! impl_common_array { /// and moving the concrete struct under a `Box`. pub fn clone(array: &dyn Array) -> Box { use crate::datatypes::PhysicalType::*; - match array.data_type().to_physical_type() { + match array.dtype().to_physical_type() { Null => clone_dyn!(array, NullArray), Boolean => clone_dyn!(array, BooleanArray), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { diff --git a/crates/polars-arrow/src/array/null.rs b/crates/polars-arrow/src/array/null.rs index 22c3d6077686..4960b263667c 100644 --- a/crates/polars-arrow/src/array/null.rs +++ b/crates/polars-arrow/src/array/null.rs @@ -11,7 +11,7 @@ use crate::ffi; /// The concrete [`Array`] of [`ArrowDataType::Null`]. #[derive(Clone)] pub struct NullArray { - data_type: ArrowDataType, + dtype: ArrowDataType, /// Validity mask. This is always all-zeroes. validity: Bitmap, @@ -23,16 +23,16 @@ impl NullArray { /// Returns a new [`NullArray`]. /// # Errors /// This function errors iff: - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Null`]. - pub fn try_new(data_type: ArrowDataType, length: usize) -> PolarsResult { - if data_type.to_physical_type() != PhysicalType::Null { + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Null`]. + pub fn try_new(dtype: ArrowDataType, length: usize) -> PolarsResult { + if dtype.to_physical_type() != PhysicalType::Null { polars_bail!(ComputeError: "NullArray can only be initialized with a DataType whose physical type is Null"); } let validity = Bitmap::new_zeroed(length); Ok(Self { - data_type, + dtype, validity, length, }) @@ -41,19 +41,19 @@ impl NullArray { /// Returns a new [`NullArray`]. /// # Panics /// This function errors iff: - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Null`]. - pub fn new(data_type: ArrowDataType, length: usize) -> Self { - Self::try_new(data_type, length).unwrap() + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Null`]. + pub fn new(dtype: ArrowDataType, length: usize) -> Self { + Self::try_new(dtype, length).unwrap() } /// Returns a new empty [`NullArray`]. - pub fn new_empty(data_type: ArrowDataType) -> Self { - Self::new(data_type, 0) + pub fn new_empty(dtype: ArrowDataType) -> Self { + Self::new(dtype, 0) } /// Returns a new [`NullArray`]. - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { - Self::new(data_type, length) + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { + Self::new(dtype, length) } impl_sliced!(); @@ -111,9 +111,9 @@ impl MutableNullArray { /// Returns a new [`MutableNullArray`]. /// # Panics /// This function errors iff: - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Null`]. - pub fn new(data_type: ArrowDataType, length: usize) -> Self { - let inner = NullArray::try_new(data_type, length).unwrap(); + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Null`]. + pub fn new(dtype: ArrowDataType, length: usize) -> Self { + let inner = NullArray::try_new(dtype, length).unwrap(); Self { inner } } } @@ -125,7 +125,7 @@ impl From for NullArray { } impl MutableArray for MutableNullArray { - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { &ArrowDataType::Null } @@ -194,12 +194,12 @@ impl Splitable for NullArray { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), validity: lhs, length: offset, }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), validity: rhs, length: self.len() - offset, }, @@ -209,8 +209,8 @@ impl Splitable for NullArray { impl FromFfi for NullArray { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); - Self::try_new(data_type, array.array().len()) + let dtype = array.dtype().clone(); + Self::try_new(dtype, array.array().len()) } } diff --git a/crates/polars-arrow/src/array/primitive/data.rs b/crates/polars-arrow/src/array/primitive/data.rs index 1a32b230f54f..17b998f33301 100644 --- a/crates/polars-arrow/src/array/primitive/data.rs +++ b/crates/polars-arrow/src/array/primitive/data.rs @@ -7,9 +7,9 @@ use crate::types::NativeType; impl Arrow2Arrow for PrimitiveArray { fn to_data(&self) -> ArrayData { - let data_type = self.data_type.clone().into(); + let dtype = self.dtype.clone().into(); - let builder = ArrayDataBuilder::new(data_type) + let builder = ArrayDataBuilder::new(dtype) .len(self.len()) .buffers(vec![self.values.clone().into()]) .nulls(self.validity.as_ref().map(|b| b.clone().into())); @@ -19,13 +19,13 @@ impl Arrow2Arrow for PrimitiveArray { } fn from_data(data: &ArrayData) -> Self { - let data_type = data.data_type().clone().into(); + let dtype = data.dtype().clone().into(); let mut values: Buffer = data.buffers()[0].clone().into(); values.slice(data.offset(), data.len()); Self { - data_type, + dtype, values, validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), } diff --git a/crates/polars-arrow/src/array/primitive/ffi.rs b/crates/polars-arrow/src/array/primitive/ffi.rs index ae22cf2e9a9c..6dae1963dd74 100644 --- a/crates/polars-arrow/src/array/primitive/ffi.rs +++ b/crates/polars-arrow/src/array/primitive/ffi.rs @@ -39,7 +39,7 @@ unsafe impl ToFfi for PrimitiveArray { }); Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), validity, values: self.values.clone(), } @@ -48,10 +48,10 @@ unsafe impl ToFfi for PrimitiveArray { impl FromFfi for PrimitiveArray { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let validity = unsafe { array.validity() }?; let values = unsafe { array.buffer::(1) }?; - Self::try_new(data_type, values, validity) + Self::try_new(dtype, values, validity) } } diff --git a/crates/polars-arrow/src/array/primitive/fmt.rs b/crates/polars-arrow/src/array/primitive/fmt.rs index 35f342aec3fa..b349e01843d8 100644 --- a/crates/polars-arrow/src/array/primitive/fmt.rs +++ b/crates/polars-arrow/src/array/primitive/fmt.rs @@ -22,7 +22,7 @@ pub fn get_write_value<'a, T: NativeType, F: Write>( array: &'a PrimitiveArray, ) -> Box Result + 'a> { use crate::datatypes::ArrowDataType::*; - match array.data_type().to_logical_type() { + match array.dtype().to_logical_type() { Int8 => Box::new(|f, index| write!(f, "{}", array.value(index))), Int16 => Box::new(|f, index| write!(f, "{}", array.value(index))), Int32 => Box::new(|f, index| write!(f, "{}", array.value(index))), @@ -143,7 +143,7 @@ impl Debug for PrimitiveArray { fn fmt(&self, f: &mut Formatter<'_>) -> Result { let writer = get_write_value(self); - write!(f, "{:?}", self.data_type())?; + write!(f, "{:?}", self.dtype())?; write_vec(f, &*writer, self.validity(), self.len(), "None", false) } } diff --git a/crates/polars-arrow/src/array/primitive/mod.rs b/crates/polars-arrow/src/array/primitive/mod.rs index 5ab10deaf47c..3185152fe228 100644 --- a/crates/polars-arrow/src/array/primitive/mod.rs +++ b/crates/polars-arrow/src/array/primitive/mod.rs @@ -51,13 +51,13 @@ use polars_utils::slice::{GetSaferUnchecked, SliceAble}; /// ``` #[derive(Clone)] pub struct PrimitiveArray { - data_type: ArrowDataType, + dtype: ArrowDataType, values: Buffer, validity: Option, } pub(super) fn check( - data_type: &ArrowDataType, + dtype: &ArrowDataType, values: &[T], validity_len: Option, ) -> PolarsResult<()> { @@ -65,7 +65,7 @@ pub(super) fn check( polars_bail!(ComputeError: "validity mask length must match the number of values") } - if data_type.to_physical_type() != PhysicalType::Primitive(T::PRIMITIVE) { + if dtype.to_physical_type() != PhysicalType::Primitive(T::PRIMITIVE) { polars_bail!(ComputeError: "PrimitiveArray can only be initialized with a DataType whose physical type is Primitive") } Ok(()) @@ -79,15 +79,15 @@ impl PrimitiveArray { /// # Errors /// This function errors iff: /// * The validity is not `None` and its length is different from `values`'s length - /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`] + /// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`] pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Buffer, validity: Option, ) -> PolarsResult { - check(&data_type, &values, validity.as_ref().map(|v| v.len()))?; + check(&dtype, &values, validity.as_ref().map(|v| v.len()))?; Ok(Self { - data_type, + dtype, values, validity, }) @@ -96,12 +96,12 @@ impl PrimitiveArray { /// # Safety /// Doesn't check invariants pub unsafe fn new_unchecked( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Buffer, validity: Option, ) -> Self { Self { - data_type, + dtype, values, validity, } @@ -123,18 +123,18 @@ impl PrimitiveArray { /// ); /// ``` /// # Panics - /// Panics iff the `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`] + /// Panics iff the `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`] #[inline] #[must_use] - pub fn to(self, data_type: ArrowDataType) -> Self { + pub fn to(self, dtype: ArrowDataType) -> Self { check( - &data_type, + &dtype, &self.values, self.validity.as_ref().map(|v| v.len()), ) .unwrap(); Self { - data_type, + dtype, values: self.values, validity: self.validity, } @@ -192,8 +192,8 @@ impl PrimitiveArray { /// Returns the arrays' [`ArrowDataType`]. #[inline] - pub fn data_type(&self) -> &ArrowDataType { - &self.data_type + pub fn dtype(&self) -> &ArrowDataType { + &self.dtype } /// Returns the value at slot `i`. @@ -302,22 +302,22 @@ impl PrimitiveArray { #[must_use] pub fn into_inner(self) -> (ArrowDataType, Buffer, Option) { let Self { - data_type, + dtype, values, validity, } = self; - (data_type, values, validity) + (dtype, values, validity) } /// Creates a `[PrimitiveArray]` from its internal representation. /// This is the inverted from `[PrimitiveArray::into_inner]` pub fn from_inner( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Buffer, validity: Option, ) -> PolarsResult { - check(&data_type, &values, validity.as_ref().map(|v| v.len()))?; - Ok(unsafe { Self::from_inner_unchecked(data_type, values, validity) }) + check(&dtype, &values, validity.as_ref().map(|v| v.len()))?; + Ok(unsafe { Self::from_inner_unchecked(dtype, values, validity) }) } /// Creates a `[PrimitiveArray]` from its internal representation. @@ -326,12 +326,12 @@ impl PrimitiveArray { /// # Safety /// Callers must ensure all invariants of this struct are upheld. pub unsafe fn from_inner_unchecked( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Buffer, validity: Option, ) -> Self { Self { - data_type, + dtype, values, validity, } @@ -351,21 +351,21 @@ impl PrimitiveArray { if let Some(bitmap) = self.validity { match bitmap.into_mut() { Left(bitmap) => Left(PrimitiveArray::new( - self.data_type, + self.dtype, self.values, Some(bitmap), )), Right(mutable_bitmap) => match self.values.into_mut() { Right(values) => Right( MutablePrimitiveArray::try_new( - self.data_type, + self.dtype, values, Some(mutable_bitmap), ) .unwrap(), ), Left(values) => Left(PrimitiveArray::new( - self.data_type, + self.dtype, values, Some(mutable_bitmap.into()), )), @@ -374,23 +374,23 @@ impl PrimitiveArray { } else { match self.values.into_mut() { Right(values) => { - Right(MutablePrimitiveArray::try_new(self.data_type, values, None).unwrap()) + Right(MutablePrimitiveArray::try_new(self.dtype, values, None).unwrap()) }, - Left(values) => Left(PrimitiveArray::new(self.data_type, values, None)), + Left(values) => Left(PrimitiveArray::new(self.dtype, values, None)), } } } /// Returns a new empty (zero-length) [`PrimitiveArray`]. - pub fn new_empty(data_type: ArrowDataType) -> Self { - Self::new(data_type, Buffer::new(), None) + pub fn new_empty(dtype: ArrowDataType) -> Self { + Self::new(dtype, Buffer::new(), None) } /// Returns a new [`PrimitiveArray`] where all slots are null / `None`. #[inline] - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { Self::new( - data_type, + dtype, vec![T::default(); length].into(), Some(Bitmap::new_zeroed(length)), ) @@ -448,9 +448,9 @@ impl PrimitiveArray { /// # Panics /// This function errors iff: /// * The validity is not `None` and its length is different from `values`'s length - /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive`]. - pub fn new(data_type: ArrowDataType, values: Buffer, validity: Option) -> Self { - Self::try_new(data_type, values, validity).unwrap() + /// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive`]. + pub fn new(dtype: ArrowDataType, values: Buffer, validity: Option) -> Self { + Self::try_new(dtype, values, validity).unwrap() } /// Transmute this PrimitiveArray into another PrimitiveArray. @@ -510,12 +510,12 @@ impl Splitable for PrimitiveArray { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), values: lhs_values, validity: lhs_validity, }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), values: rhs_values, validity: rhs_validity, }, diff --git a/crates/polars-arrow/src/array/primitive/mutable.rs b/crates/polars-arrow/src/array/primitive/mutable.rs index 53565dda831a..ab6bfd8c7511 100644 --- a/crates/polars-arrow/src/array/primitive/mutable.rs +++ b/crates/polars-arrow/src/array/primitive/mutable.rs @@ -14,7 +14,7 @@ use crate::types::NativeType; /// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`. #[derive(Debug, Clone)] pub struct MutablePrimitiveArray { - data_type: ArrowDataType, + dtype: ArrowDataType, values: Vec, validity: Option, } @@ -30,7 +30,7 @@ impl From> for PrimitiveArray { } }); - PrimitiveArray::::new(other.data_type, other.values.into(), validity) + PrimitiveArray::::new(other.dtype, other.values.into(), validity) } } @@ -58,15 +58,15 @@ impl MutablePrimitiveArray { /// # Errors /// This function errors iff: /// * The validity is not `None` and its length is different from `values`'s length - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Primitive(T::PRIMITIVE)`] + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Primitive(T::PRIMITIVE)`] pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Vec, validity: Option, ) -> PolarsResult { - check(&data_type, &values, validity.as_ref().map(|x| x.len()))?; + check(&dtype, &values, validity.as_ref().map(|x| x.len()))?; Ok(Self { - data_type, + dtype, values, validity, }) @@ -74,7 +74,7 @@ impl MutablePrimitiveArray { /// Extract the low-end APIs from the [`MutablePrimitiveArray`]. pub fn into_inner(self) -> (ArrowDataType, Vec, Option) { - (self.data_type, self.values, self.validity) + (self.dtype, self.values, self.validity) } /// Applies a function `f` to the values of this array, cloning the values @@ -98,10 +98,10 @@ impl Default for MutablePrimitiveArray { } impl From for MutablePrimitiveArray { - fn from(data_type: ArrowDataType) -> Self { - assert!(data_type.to_physical_type().eq_primitive(T::PRIMITIVE)); + fn from(dtype: ArrowDataType) -> Self { + assert!(dtype.to_physical_type().eq_primitive(T::PRIMITIVE)); Self { - data_type, + dtype, values: Vec::::new(), validity: None, } @@ -110,10 +110,10 @@ impl From for MutablePrimitiveArray { impl MutablePrimitiveArray { /// Creates a new [`MutablePrimitiveArray`] from a capacity and [`ArrowDataType`]. - pub fn with_capacity_from(capacity: usize, data_type: ArrowDataType) -> Self { - assert!(data_type.to_physical_type().eq_primitive(T::PRIMITIVE)); + pub fn with_capacity_from(capacity: usize, dtype: ArrowDataType) -> Self { + assert!(dtype.to_physical_type().eq_primitive(T::PRIMITIVE)); Self { - data_type, + dtype, values: Vec::::with_capacity(capacity), validity: None, } @@ -264,8 +264,8 @@ impl MutablePrimitiveArray { /// # Implementation /// This operation is `O(1)`. #[inline] - pub fn to(self, data_type: ArrowDataType) -> Self { - Self::try_new(data_type, self.values, self.validity).unwrap() + pub fn to(self, dtype: ArrowDataType) -> Self { + Self::try_new(dtype, self.values, self.validity).unwrap() } /// Converts itself into an [`Array`]. @@ -413,7 +413,7 @@ impl MutableArray for MutablePrimitiveArray { fn as_box(&mut self) -> Box { PrimitiveArray::new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.values).into(), std::mem::take(&mut self.validity).map(|x| x.into()), ) @@ -422,15 +422,15 @@ impl MutableArray for MutablePrimitiveArray { fn as_arc(&mut self) -> Arc { PrimitiveArray::new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.values).into(), std::mem::take(&mut self.validity).map(|x| x.into()), ) .arced() } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { @@ -474,7 +474,7 @@ impl MutablePrimitiveArray { let (validity, values) = trusted_len_unzip(iterator); Self { - data_type: T::PRIMITIVE.into(), + dtype: T::PRIMITIVE.into(), values, validity, } @@ -508,7 +508,7 @@ impl MutablePrimitiveArray { let (validity, values) = try_trusted_len_unzip(iterator)?; Ok(Self { - data_type: T::PRIMITIVE.into(), + dtype: T::PRIMITIVE.into(), values, validity, }) @@ -527,7 +527,7 @@ impl MutablePrimitiveArray { /// Creates a new [`MutablePrimitiveArray`] out an iterator over values pub fn from_trusted_len_values_iter>(iter: I) -> Self { Self { - data_type: T::PRIMITIVE.into(), + dtype: T::PRIMITIVE.into(), values: iter.collect(), validity: None, } @@ -546,7 +546,7 @@ impl MutablePrimitiveArray { /// I.e. that `size_hint().1` correctly reports its length. pub unsafe fn from_trusted_len_values_iter_unchecked>(iter: I) -> Self { Self { - data_type: T::PRIMITIVE.into(), + dtype: T::PRIMITIVE.into(), values: iter.collect(), validity: None, } @@ -577,7 +577,7 @@ impl>> FromIterator let validity = Some(validity); Self { - data_type: T::PRIMITIVE.into(), + dtype: T::PRIMITIVE.into(), values, validity, } diff --git a/crates/polars-arrow/src/array/struct_/data.rs b/crates/polars-arrow/src/array/struct_/data.rs index 4dfcb0010a73..a07424e7d2c1 100644 --- a/crates/polars-arrow/src/array/struct_/data.rs +++ b/crates/polars-arrow/src/array/struct_/data.rs @@ -5,9 +5,9 @@ use crate::bitmap::Bitmap; impl Arrow2Arrow for StructArray { fn to_data(&self) -> ArrayData { - let data_type = self.data_type.clone().into(); + let dtype = self.dtype.clone().into(); - let builder = ArrayDataBuilder::new(data_type) + let builder = ArrayDataBuilder::new(dtype) .len(self.len()) .nulls(self.validity.as_ref().map(|b| b.clone().into())) .child_data(self.values.iter().map(|x| to_data(x.as_ref())).collect()); @@ -17,10 +17,10 @@ impl Arrow2Arrow for StructArray { } fn from_data(data: &ArrayData) -> Self { - let data_type = data.data_type().clone().into(); + let dtype = data.dtype().clone().into(); Self { - data_type, + dtype, values: data.child_data().iter().map(from_data).collect(), validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), } diff --git a/crates/polars-arrow/src/array/struct_/ffi.rs b/crates/polars-arrow/src/array/struct_/ffi.rs index 76522b8efd7c..3bfb9a1a7d7f 100644 --- a/crates/polars-arrow/src/array/struct_/ffi.rs +++ b/crates/polars-arrow/src/array/struct_/ffi.rs @@ -30,8 +30,8 @@ unsafe impl ToFfi for StructArray { impl FromFfi for StructArray { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); - let fields = Self::get_fields(&data_type); + let dtype = array.dtype().clone(); + let fields = Self::get_fields(&dtype); let arrow_array = array.array(); let validity = unsafe { array.validity() }?; @@ -68,6 +68,6 @@ impl FromFfi for StructArray { }) .collect::>>>()?; - Self::try_new(data_type, values, validity) + Self::try_new(dtype, values, validity) } } diff --git a/crates/polars-arrow/src/array/struct_/mod.rs b/crates/polars-arrow/src/array/struct_/mod.rs index 6a31fe801209..efac13a481ea 100644 --- a/crates/polars-arrow/src/array/struct_/mod.rs +++ b/crates/polars-arrow/src/array/struct_/mod.rs @@ -31,7 +31,7 @@ use crate::compute::utils::combine_validities_and; /// ``` #[derive(Clone)] pub struct StructArray { - data_type: ArrowDataType, + dtype: ArrowDataType, values: Vec>, validity: Option, } @@ -40,23 +40,23 @@ impl StructArray { /// Returns a new [`StructArray`]. /// # Errors /// This function errors iff: - /// * `data_type`'s physical type is not [`crate::datatypes::PhysicalType::Struct`]. - /// * the children of `data_type` are empty + /// * `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Struct`]. + /// * the children of `dtype` are empty /// * the values's len is different from children's length /// * any of the values's data type is different from its corresponding children' data type /// * any element of values has a different length than the first element /// * the validity's length is not equal to the length of the first element pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Vec>, validity: Option, ) -> PolarsResult { - let fields = Self::try_get_fields(&data_type)?; + let fields = Self::try_get_fields(&dtype)?; if fields.is_empty() { assert!(values.is_empty(), "invalid struct"); assert_eq!(validity.map(|v| v.len()).unwrap_or(0), 0, "invalid struct"); return Ok(Self { - data_type, + dtype, values, validity: None, }); @@ -66,14 +66,14 @@ impl StructArray { } fields - .iter().map(|a| &a.data_type) - .zip(values.iter().map(|a| a.data_type())) + .iter().map(|a| &a.dtype) + .zip(values.iter().map(|a| a.dtype())) .enumerate() - .try_for_each(|(index, (data_type, child))| { - if data_type != child { + .try_for_each(|(index, (dtype, child))| { + if dtype != child { polars_bail!(ComputeError: "The children DataTypes of a StructArray must equal the children data types. - However, the field {index} has data type {data_type:?} but the value has data type {child:?}" + However, the field {index} has data type {dtype:?} but the value has data type {child:?}" ) } else { Ok(()) @@ -102,7 +102,7 @@ impl StructArray { } Ok(Self { - data_type, + dtype, values, validity, }) @@ -111,41 +111,41 @@ impl StructArray { /// Returns a new [`StructArray`] /// # Panics /// This function panics iff: - /// * `data_type`'s physical type is not [`crate::datatypes::PhysicalType::Struct`]. - /// * the children of `data_type` are empty + /// * `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Struct`]. + /// * the children of `dtype` are empty /// * the values's len is different from children's length /// * any of the values's data type is different from its corresponding children' data type /// * any element of values has a different length than the first element /// * the validity's length is not equal to the length of the first element pub fn new( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Vec>, validity: Option, ) -> Self { - Self::try_new(data_type, values, validity).unwrap() + Self::try_new(dtype, values, validity).unwrap() } /// Creates an empty [`StructArray`]. - pub fn new_empty(data_type: ArrowDataType) -> Self { - if let ArrowDataType::Struct(fields) = &data_type.to_logical_type() { + pub fn new_empty(dtype: ArrowDataType) -> Self { + if let ArrowDataType::Struct(fields) = &dtype.to_logical_type() { let values = fields .iter() - .map(|field| new_empty_array(field.data_type().clone())) + .map(|field| new_empty_array(field.dtype().clone())) .collect(); - Self::new(data_type, values, None) + Self::new(dtype, values, None) } else { panic!("StructArray must be initialized with DataType::Struct"); } } /// Creates a null [`StructArray`] of length `length`. - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { - if let ArrowDataType::Struct(fields) = &data_type { + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { + if let ArrowDataType::Struct(fields) = &dtype { let values = fields .iter() - .map(|field| new_null_array(field.data_type().clone(), length)) + .map(|field| new_null_array(field.dtype().clone(), length)) .collect(); - Self::new(data_type, values, Some(Bitmap::new_zeroed(length))) + Self::new(dtype, values, Some(Bitmap::new_zeroed(length))) } else { panic!("StructArray must be initialized with DataType::Struct"); } @@ -158,11 +158,11 @@ impl StructArray { #[must_use] pub fn into_data(self) -> (Vec, Vec>, Option) { let Self { - data_type, + dtype, values, validity, } = self; - let fields = if let ArrowDataType::Struct(fields) = data_type { + let fields = if let ArrowDataType::Struct(fields) = dtype { fields } else { unreachable!() @@ -242,14 +242,14 @@ impl StructArray { /// Returns the fields of this [`StructArray`]. pub fn fields(&self) -> &[Field] { - Self::get_fields(&self.data_type) + Self::get_fields(&self.dtype) } } impl StructArray { /// Returns the fields the `DataType::Struct`. - pub(crate) fn try_get_fields(data_type: &ArrowDataType) -> PolarsResult<&[Field]> { - match data_type.to_logical_type() { + pub(crate) fn try_get_fields(dtype: &ArrowDataType) -> PolarsResult<&[Field]> { + match dtype.to_logical_type() { ArrowDataType::Struct(fields) => Ok(fields), _ => { polars_bail!(ComputeError: "Struct array must be created with a DataType whose physical type is Struct") @@ -258,8 +258,8 @@ impl StructArray { } /// Returns the fields the `DataType::Struct`. - pub fn get_fields(data_type: &ArrowDataType) -> &[Field] { - Self::try_get_fields(data_type).unwrap() + pub fn get_fields(dtype: &ArrowDataType) -> &[Field] { + Self::try_get_fields(dtype).unwrap() } } @@ -295,12 +295,12 @@ impl Splitable for StructArray { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), values: lhs_values, validity: lhs_validity, }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), values: rhs_values, validity: rhs_validity, }, diff --git a/crates/polars-arrow/src/array/struct_/mutable.rs b/crates/polars-arrow/src/array/struct_/mutable.rs index d748f7743b32..286db07e2f97 100644 --- a/crates/polars-arrow/src/array/struct_/mutable.rs +++ b/crates/polars-arrow/src/array/struct_/mutable.rs @@ -10,17 +10,17 @@ use crate::datatypes::ArrowDataType; /// Converting a [`MutableStructArray`] into a [`StructArray`] is `O(1)`. #[derive(Debug)] pub struct MutableStructArray { - data_type: ArrowDataType, + dtype: ArrowDataType, values: Vec>, validity: Option, } fn check( - data_type: &ArrowDataType, + dtype: &ArrowDataType, values: &[Box], validity: Option, ) -> PolarsResult<()> { - let fields = StructArray::try_get_fields(data_type)?; + let fields = StructArray::try_get_fields(dtype)?; if fields.is_empty() { polars_bail!(ComputeError: "a StructArray must contain at least one field") } @@ -29,14 +29,14 @@ fn check( } fields - .iter().map(|a| &a.data_type) - .zip(values.iter().map(|a| a.data_type())) + .iter().map(|a| &a.dtype) + .zip(values.iter().map(|a| a.dtype())) .enumerate() - .try_for_each(|(index, (data_type, child))| { - if data_type != child { + .try_for_each(|(index, (dtype, child))| { + if dtype != child { polars_bail!(ComputeError: "The children DataTypes of a StructArray must equal the children data types. - However, the field {index} has data type {data_type:?} but the value has data type {child:?}" + However, the field {index} has data type {dtype:?} but the value has data type {child:?}" ) } else { Ok(()) @@ -76,7 +76,7 @@ impl From for StructArray { }; StructArray::new( - other.data_type, + other.dtype, other.values.into_iter().map(|mut v| v.as_box()).collect(), validity, ) @@ -85,24 +85,24 @@ impl From for StructArray { impl MutableStructArray { /// Creates a new [`MutableStructArray`]. - pub fn new(data_type: ArrowDataType, values: Vec>) -> Self { - Self::try_new(data_type, values, None).unwrap() + pub fn new(dtype: ArrowDataType, values: Vec>) -> Self { + Self::try_new(dtype, values, None).unwrap() } /// Create a [`MutableStructArray`] out of low-end APIs. /// # Errors /// This function errors iff: - /// * `data_type` is not [`ArrowDataType::Struct`] - /// * The inner types of `data_type` are not equal to those of `values` + /// * `dtype` is not [`ArrowDataType::Struct`] + /// * The inner types of `dtype` are not equal to those of `values` /// * `validity` is not `None` and its length is different from the `values`'s length pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, values: Vec>, validity: Option, ) -> PolarsResult { - check(&data_type, &values, validity.as_ref().map(|x| x.len()))?; + check(&dtype, &values, validity.as_ref().map(|x| x.len()))?; Ok(Self { - data_type, + dtype, values, validity, }) @@ -116,7 +116,7 @@ impl MutableStructArray { Vec>, Option, ) { - (self.data_type, self.values, self.validity) + (self.dtype, self.values, self.validity) } /// The mutable values @@ -202,7 +202,7 @@ impl MutableArray for MutableStructArray { fn as_box(&mut self) -> Box { StructArray::new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.values) .into_iter() .map(|mut v| v.as_box()) @@ -214,7 +214,7 @@ impl MutableArray for MutableStructArray { fn as_arc(&mut self) -> Arc { StructArray::new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.values) .into_iter() .map(|mut v| v.as_box()) @@ -224,8 +224,8 @@ impl MutableArray for MutableStructArray { .arced() } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { diff --git a/crates/polars-arrow/src/array/union/data.rs b/crates/polars-arrow/src/array/union/data.rs index 4303ab7b4356..237f0393d5fe 100644 --- a/crates/polars-arrow/src/array/union/data.rs +++ b/crates/polars-arrow/src/array/union/data.rs @@ -6,15 +6,15 @@ use crate::datatypes::ArrowDataType; impl Arrow2Arrow for UnionArray { fn to_data(&self) -> ArrayData { - let data_type = arrow_schema::DataType::from(self.data_type.clone()); + let dtype = arrow_schema::DataType::from(self.dtype.clone()); let len = self.len(); let builder = match self.offsets.clone() { - Some(offsets) => ArrayDataBuilder::new(data_type) + Some(offsets) => ArrayDataBuilder::new(dtype) .len(len) .buffers(vec![self.types.clone().into(), offsets.into()]) .child_data(self.fields.iter().map(|x| to_data(x.as_ref())).collect()), - None => ArrayDataBuilder::new(data_type) + None => ArrayDataBuilder::new(dtype) .len(len) .buffers(vec![self.types.clone().into()]) .child_data( @@ -30,7 +30,7 @@ impl Arrow2Arrow for UnionArray { } fn from_data(data: &ArrayData) -> Self { - let data_type: ArrowDataType = data.data_type().clone().into(); + let dtype: ArrowDataType = data.dtype().clone().into(); let fields = data.child_data().iter().map(from_data).collect(); let buffers = data.buffers(); @@ -46,7 +46,7 @@ impl Arrow2Arrow for UnionArray { }; // Map from type id to array index - let map = match &data_type { + let map = match &dtype { ArrowDataType::Union(_, Some(ids), _) => { let mut map = [0; 127]; for (pos, &id) in ids.iter().enumerate() { @@ -63,7 +63,7 @@ impl Arrow2Arrow for UnionArray { map, fields, offsets, - data_type, + dtype, offset: data.offset(), } } diff --git a/crates/polars-arrow/src/array/union/ffi.rs b/crates/polars-arrow/src/array/union/ffi.rs index 1510b29e2588..d9a2601a6019 100644 --- a/crates/polars-arrow/src/array/union/ffi.rs +++ b/crates/polars-arrow/src/array/union/ffi.rs @@ -33,11 +33,11 @@ unsafe impl ToFfi for UnionArray { impl FromFfi for UnionArray { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); - let fields = Self::get_fields(&data_type); + let dtype = array.dtype().clone(); + let fields = Self::get_fields(&dtype); let mut types = unsafe { array.buffer::(0) }?; - let offsets = if Self::is_sparse(&data_type) { + let offsets = if Self::is_sparse(&dtype) { None } else { Some(unsafe { array.buffer::(1) }?) @@ -56,6 +56,6 @@ impl FromFfi for UnionArray { types.slice(offset, length); }; - Self::try_new(data_type, types, fields, offsets) + Self::try_new(dtype, types, fields, offsets) } } diff --git a/crates/polars-arrow/src/array/union/mod.rs b/crates/polars-arrow/src/array/union/mod.rs index d1221b812eae..e42d268f5c06 100644 --- a/crates/polars-arrow/src/array/union/mod.rs +++ b/crates/polars-arrow/src/array/union/mod.rs @@ -34,7 +34,7 @@ pub struct UnionArray { fields: Vec>, // Invariant: when set, `offsets.len() == types.len()` offsets: Option>, - data_type: ArrowDataType, + dtype: ArrowDataType, offset: usize, } @@ -42,17 +42,17 @@ impl UnionArray { /// Returns a new [`UnionArray`]. /// # Errors /// This function errors iff: - /// * `data_type`'s physical type is not [`crate::datatypes::PhysicalType::Union`]. - /// * the fields's len is different from the `data_type`'s children's length + /// * `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Union`]. + /// * the fields's len is different from the `dtype`'s children's length /// * The number of `fields` is larger than `i8::MAX` /// * any of the values's data type is different from its corresponding children' data type pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, types: Buffer, fields: Vec>, offsets: Option>, ) -> PolarsResult { - let (f, ids, mode) = Self::try_get_all(&data_type)?; + let (f, ids, mode) = Self::try_get_all(&dtype)?; if f.len() != fields.len() { polars_bail!(ComputeError: "the number of `fields` must equal the number of children fields in DataType::Union") @@ -62,14 +62,14 @@ impl UnionArray { )?; f - .iter().map(|a| a.data_type()) - .zip(fields.iter().map(|a| a.data_type())) + .iter().map(|a| a.dtype()) + .zip(fields.iter().map(|a| a.dtype())) .enumerate() - .try_for_each(|(index, (data_type, child))| { - if data_type != child { + .try_for_each(|(index, (dtype, child))| { + if dtype != child { polars_bail!(ComputeError: "the children DataTypes of a UnionArray must equal the children data types. - However, the field {index} has data type {data_type:?} but the value has data type {child:?}" + However, the field {index} has data type {dtype:?} but the value has data type {child:?}" ) } else { Ok(()) @@ -147,7 +147,7 @@ impl UnionArray { }; Ok(Self { - data_type, + dtype, map, fields, offsets, @@ -159,24 +159,24 @@ impl UnionArray { /// Returns a new [`UnionArray`]. /// # Panics /// This function panics iff: - /// * `data_type`'s physical type is not [`crate::datatypes::PhysicalType::Union`]. - /// * the fields's len is different from the `data_type`'s children's length + /// * `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Union`]. + /// * the fields's len is different from the `dtype`'s children's length /// * any of the values's data type is different from its corresponding children' data type pub fn new( - data_type: ArrowDataType, + dtype: ArrowDataType, types: Buffer, fields: Vec>, offsets: Option>, ) -> Self { - Self::try_new(data_type, types, fields, offsets).unwrap() + Self::try_new(dtype, types, fields, offsets).unwrap() } /// Creates a new null [`UnionArray`]. - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { - if let ArrowDataType::Union(f, _, mode) = &data_type { + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { + if let ArrowDataType::Union(f, _, mode) = &dtype { let fields = f .iter() - .map(|x| new_null_array(x.data_type().clone(), length)) + .map(|x| new_null_array(x.dtype().clone(), length)) .collect(); let offsets = if mode.is_sparse() { @@ -188,18 +188,18 @@ impl UnionArray { // all from the same field let types = vec![0i8; length].into(); - Self::new(data_type, types, fields, offsets) + Self::new(dtype, types, fields, offsets) } else { panic!("Union struct must be created with the corresponding Union DataType") } } /// Creates a new empty [`UnionArray`]. - pub fn new_empty(data_type: ArrowDataType) -> Self { - if let ArrowDataType::Union(f, _, mode) = data_type.to_logical_type() { + pub fn new_empty(dtype: ArrowDataType) -> Self { + if let ArrowDataType::Union(f, _, mode) = dtype.to_logical_type() { let fields = f .iter() - .map(|x| new_empty_array(x.data_type().clone())) + .map(|x| new_empty_array(x.dtype().clone())) .collect(); let offsets = if mode.is_sparse() { @@ -209,7 +209,7 @@ impl UnionArray { }; Self { - data_type, + dtype, map: None, fields, offsets, @@ -351,8 +351,8 @@ impl Array for UnionArray { } impl UnionArray { - fn try_get_all(data_type: &ArrowDataType) -> PolarsResult { - match data_type.to_logical_type() { + fn try_get_all(dtype: &ArrowDataType) -> PolarsResult { + match dtype.to_logical_type() { ArrowDataType::Union(fields, ids, mode) => { Ok((fields, ids.as_ref().map(|x| x.as_ref()), *mode)) }, @@ -362,22 +362,22 @@ impl UnionArray { } } - fn get_all(data_type: &ArrowDataType) -> (&[Field], Option<&[i32]>, UnionMode) { - Self::try_get_all(data_type).unwrap() + fn get_all(dtype: &ArrowDataType) -> (&[Field], Option<&[i32]>, UnionMode) { + Self::try_get_all(dtype).unwrap() } /// Returns all fields from [`ArrowDataType::Union`]. /// # Panic - /// Panics iff `data_type`'s logical type is not [`ArrowDataType::Union`]. - pub fn get_fields(data_type: &ArrowDataType) -> &[Field] { - Self::get_all(data_type).0 + /// Panics iff `dtype`'s logical type is not [`ArrowDataType::Union`]. + pub fn get_fields(dtype: &ArrowDataType) -> &[Field] { + Self::get_all(dtype).0 } /// Returns whether the [`ArrowDataType::Union`] is sparse or not. /// # Panic - /// Panics iff `data_type`'s logical type is not [`ArrowDataType::Union`]. - pub fn is_sparse(data_type: &ArrowDataType) -> bool { - Self::get_all(data_type).2.is_sparse() + /// Panics iff `dtype`'s logical type is not [`ArrowDataType::Union`]. + pub fn is_sparse(dtype: &ArrowDataType) -> bool { + Self::get_all(dtype).2.is_sparse() } } @@ -399,7 +399,7 @@ impl Splitable for UnionArray { map: self.map, fields: self.fields.clone(), offsets: lhs_offsets, - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), offset: self.offset, }, Self { @@ -407,7 +407,7 @@ impl Splitable for UnionArray { map: self.map, fields: self.fields.clone(), offsets: rhs_offsets, - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), offset: self.offset + offset, }, ) diff --git a/crates/polars-arrow/src/array/utf8/data.rs b/crates/polars-arrow/src/array/utf8/data.rs index 577a43677c05..aa85c2742961 100644 --- a/crates/polars-arrow/src/array/utf8/data.rs +++ b/crates/polars-arrow/src/array/utf8/data.rs @@ -6,8 +6,8 @@ use crate::offset::{Offset, OffsetsBuffer}; impl Arrow2Arrow for Utf8Array { fn to_data(&self) -> ArrayData { - let data_type = self.data_type().clone().into(); - let builder = ArrayDataBuilder::new(data_type) + let dtype = self.dtype().clone().into(); + let builder = ArrayDataBuilder::new(dtype) .len(self.offsets().len_proxy()) .buffers(vec![ self.offsets.clone().into_inner().into(), @@ -20,10 +20,10 @@ impl Arrow2Arrow for Utf8Array { } fn from_data(data: &ArrayData) -> Self { - let data_type = data.data_type().clone().into(); + let dtype = data.dtype().clone().into(); if data.is_empty() { // Handle empty offsets - return Self::new_empty(data_type); + return Self::new_empty(dtype); } let buffers = data.buffers(); @@ -33,7 +33,7 @@ impl Arrow2Arrow for Utf8Array { offsets.slice(data.offset(), data.len() + 1); Self { - data_type, + dtype, offsets, values: buffers[1].clone().into(), validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), diff --git a/crates/polars-arrow/src/array/utf8/ffi.rs b/crates/polars-arrow/src/array/utf8/ffi.rs index 5bdced4df6f1..7181eba91286 100644 --- a/crates/polars-arrow/src/array/utf8/ffi.rs +++ b/crates/polars-arrow/src/array/utf8/ffi.rs @@ -40,7 +40,7 @@ unsafe impl ToFfi for Utf8Array { }); Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), validity, offsets: self.offsets.clone(), values: self.values.clone(), @@ -50,7 +50,7 @@ unsafe impl ToFfi for Utf8Array { impl FromFfi for Utf8Array { unsafe fn try_from_ffi(array: A) -> PolarsResult { - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let validity = unsafe { array.validity() }?; let offsets = unsafe { array.buffer::(1) }?; let values = unsafe { array.buffer::(2)? }; @@ -58,6 +58,6 @@ impl FromFfi for Utf8Array { // assumption that data from FFI is well constructed let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets) }; - Ok(Self::new_unchecked(data_type, offsets, values, validity)) + Ok(Self::new_unchecked(dtype, offsets, values, validity)) } } diff --git a/crates/polars-arrow/src/array/utf8/mod.rs b/crates/polars-arrow/src/array/utf8/mod.rs index 03c4ca1caabb..0d7ea3eb7d8c 100644 --- a/crates/polars-arrow/src/array/utf8/mod.rs +++ b/crates/polars-arrow/src/array/utf8/mod.rs @@ -65,7 +65,7 @@ impl> AsRef<[u8]> for StrAsBytes { /// * `len` is equal to `validity.len()`, when defined. #[derive(Clone)] pub struct Utf8Array { - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Buffer, validity: Option, @@ -79,12 +79,12 @@ impl Utf8Array { /// This function returns an error iff: /// * The last offset is not equal to the values' length. /// * the validity's length is not equal to `offsets.len()`. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. /// * The `values` between two consecutive `offsets` are not valid utf8 /// # Implementation /// This function is `O(N)` - checking utf8 is `O(N)` pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Buffer, validity: Option, @@ -97,12 +97,12 @@ impl Utf8Array { polars_bail!(ComputeError: "validity mask length must match the number of values"); } - if data_type.to_physical_type() != Self::default_data_type().to_physical_type() { + if dtype.to_physical_type() != Self::default_dtype().to_physical_type() { polars_bail!(ComputeError: "Utf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8") } Ok(Self { - data_type, + dtype, offsets, values, validity, @@ -186,8 +186,8 @@ impl Utf8Array { /// Returns the [`ArrowDataType`] of this array. #[inline] - pub fn data_type(&self) -> &ArrowDataType { - &self.data_type + pub fn dtype(&self) -> &ArrowDataType { + &self.dtype } /// Returns the values of this [`Utf8Array`]. @@ -244,12 +244,12 @@ impl Utf8Array { #[must_use] pub fn into_inner(self) -> (ArrowDataType, OffsetsBuffer, Buffer, Option) { let Self { - data_type, + dtype, offsets, values, validity, } = self; - (data_type, offsets, values, validity) + (dtype, offsets, values, validity) } /// Try to convert this `Utf8Array` to a `MutableUtf8Array` @@ -261,7 +261,7 @@ impl Utf8Array { // SAFETY: invariants are preserved Left(bitmap) => Left(unsafe { Utf8Array::new_unchecked( - self.data_type, + self.dtype, self.offsets, self.values, Some(bitmap), @@ -272,7 +272,7 @@ impl Utf8Array { // SAFETY: invariants are preserved Left(unsafe { Utf8Array::new_unchecked( - self.data_type, + self.dtype, offsets, values, Some(mutable_bitmap.into()), @@ -283,7 +283,7 @@ impl Utf8Array { // SAFETY: invariants are preserved Left(unsafe { Utf8Array::new_unchecked( - self.data_type, + self.dtype, offsets.into(), values, Some(mutable_bitmap.into()), @@ -294,7 +294,7 @@ impl Utf8Array { // SAFETY: invariants are preserved Left(unsafe { Utf8Array::new_unchecked( - self.data_type, + self.dtype, offsets, values.into(), Some(mutable_bitmap.into()), @@ -303,7 +303,7 @@ impl Utf8Array { }, (Right(values), Right(offsets)) => Right(unsafe { MutableUtf8Array::new_unchecked( - self.data_type, + self.dtype, offsets, values, Some(mutable_bitmap), @@ -314,16 +314,16 @@ impl Utf8Array { } else { match (self.values.into_mut(), self.offsets.into_mut()) { (Left(values), Left(offsets)) => { - Left(unsafe { Utf8Array::new_unchecked(self.data_type, offsets, values, None) }) + Left(unsafe { Utf8Array::new_unchecked(self.dtype, offsets, values, None) }) }, (Left(values), Right(offsets)) => Left(unsafe { - Utf8Array::new_unchecked(self.data_type, offsets.into(), values, None) + Utf8Array::new_unchecked(self.dtype, offsets.into(), values, None) }), (Right(values), Left(offsets)) => Left(unsafe { - Utf8Array::new_unchecked(self.data_type, offsets, values.into(), None) + Utf8Array::new_unchecked(self.dtype, offsets, values.into(), None) }), (Right(values), Right(offsets)) => Right(unsafe { - MutableUtf8Array::new_unchecked(self.data_type, offsets, values, None) + MutableUtf8Array::new_unchecked(self.dtype, offsets, values, None) }), } } @@ -333,15 +333,15 @@ impl Utf8Array { /// /// The array is guaranteed to have no elements nor validity. #[inline] - pub fn new_empty(data_type: ArrowDataType) -> Self { - unsafe { Self::new_unchecked(data_type, OffsetsBuffer::new(), Buffer::new(), None) } + pub fn new_empty(dtype: ArrowDataType) -> Self { + unsafe { Self::new_unchecked(dtype, OffsetsBuffer::new(), Buffer::new(), None) } } /// Returns a new [`Utf8Array`] whose all slots are null / `None`. #[inline] - pub fn new_null(data_type: ArrowDataType, length: usize) -> Self { + pub fn new_null(dtype: ArrowDataType, length: usize) -> Self { Self::new( - data_type, + dtype, Offsets::new_zeroed(length).into(), Buffer::new(), Some(Bitmap::new_zeroed(length)), @@ -349,7 +349,7 @@ impl Utf8Array { } /// Returns a default [`ArrowDataType`] of this array, which depends on the generic parameter `O`: `DataType::Utf8` or `DataType::LargeUtf8` - pub fn default_data_type() -> ArrowDataType { + pub fn default_dtype() -> ArrowDataType { if O::IS_LARGE { ArrowDataType::LargeUtf8 } else { @@ -363,7 +363,7 @@ impl Utf8Array { /// This function panics (in debug mode only) iff: /// * The last offset is not equal to the values' length. /// * the validity's length is not equal to `offsets.len()`. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. /// /// # Safety /// This function is unsound iff: @@ -371,7 +371,7 @@ impl Utf8Array { /// # Implementation /// This function is `O(1)` pub unsafe fn new_unchecked( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Buffer, validity: Option, @@ -387,12 +387,12 @@ impl Utf8Array { "validity mask length must match the number of values" ); debug_assert!( - data_type.to_physical_type() == Self::default_data_type().to_physical_type(), + dtype.to_physical_type() == Self::default_dtype().to_physical_type(), "Utf8Array can only be initialized with DataType::Utf8 or DataType::LargeUtf8" ); Self { - data_type, + dtype, offsets, values, validity, @@ -404,17 +404,17 @@ impl Utf8Array { /// This function panics iff: /// * The last offset is not equal to the values' length. /// * the validity's length is not equal to `offsets.len()`. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. /// * The `values` between two consecutive `offsets` are not valid utf8 /// # Implementation /// This function is `O(N)` - checking utf8 is `O(N)` pub fn new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: OffsetsBuffer, values: Buffer, validity: Option, ) -> Self { - Self::try_new(data_type, offsets, values, validity).unwrap() + Self::try_new(dtype, offsets, values, validity).unwrap() } /// Returns a (non-null) [`Utf8Array`] created from a [`TrustedLen`] of `&str`. @@ -497,7 +497,7 @@ impl Utf8Array { pub fn to_binary(&self) -> BinaryArray { unsafe { BinaryArray::new_unchecked( - BinaryArray::::default_data_type(), + BinaryArray::::default_dtype(), self.offsets.clone(), self.values.clone(), self.validity.clone(), @@ -518,13 +518,13 @@ impl Splitable for Utf8Array { ( Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), offsets: lhs_offsets, values: self.values.clone(), validity: lhs_validity, }, Self { - data_type: self.data_type.clone(), + dtype: self.dtype.clone(), offsets: rhs_offsets, values: self.values.clone(), validity: rhs_validity, @@ -560,11 +560,11 @@ unsafe impl GenericBinaryArray for Utf8Array { impl Default for Utf8Array { fn default() -> Self { - let data_type = if O::IS_LARGE { + let dtype = if O::IS_LARGE { ArrowDataType::LargeUtf8 } else { ArrowDataType::Utf8 }; - Utf8Array::new(data_type, Default::default(), Default::default(), None) + Utf8Array::new(dtype, Default::default(), Default::default(), None) } } diff --git a/crates/polars-arrow/src/array/utf8/mutable.rs b/crates/polars-arrow/src/array/utf8/mutable.rs index af4845680428..570e795542ff 100644 --- a/crates/polars-arrow/src/array/utf8/mutable.rs +++ b/crates/polars-arrow/src/array/utf8/mutable.rs @@ -51,17 +51,17 @@ impl MutableUtf8Array { /// This function returns an error iff: /// * The last offset is not equal to the values' length. /// * the validity's length is not equal to `offsets.len()`. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. /// * The `values` between two consecutive `offsets` are not valid utf8 /// # Implementation /// This function is `O(N)` - checking utf8 is `O(N)` pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: Offsets, values: Vec, validity: Option, ) -> PolarsResult { - let values = MutableUtf8ValuesArray::try_new(data_type, offsets, values)?; + let values = MutableUtf8ValuesArray::try_new(dtype, offsets, values)?; if validity .as_ref() @@ -82,12 +82,12 @@ impl MutableUtf8Array { /// * The `offsets` and `values` are inconsistent /// * The validity is not `None` and its length is different from `offsets`'s length minus one. pub unsafe fn new_unchecked( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: Offsets, values: Vec, validity: Option, ) -> Self { - let values = MutableUtf8ValuesArray::new_unchecked(data_type, offsets, values); + let values = MutableUtf8ValuesArray::new_unchecked(dtype, offsets, values); if let Some(ref validity) = validity { assert_eq!(values.len(), validity.len()); } @@ -100,8 +100,8 @@ impl MutableUtf8Array { Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref())) } - fn default_data_type() -> ArrowDataType { - Utf8Array::::default_data_type() + fn default_dtype() -> ArrowDataType { + Utf8Array::::default_dtype() } /// Initializes a new [`MutableUtf8Array`] with a pre-allocated capacity of slots. @@ -198,8 +198,8 @@ impl MutableUtf8Array { /// Extract the low-end APIs from the [`MutableUtf8Array`]. pub fn into_data(self) -> (ArrowDataType, Offsets, Vec, Option) { - let (data_type, offsets, values) = self.values.into_inner(); - (data_type, offsets, values, self.validity) + let (dtype, offsets, values) = self.values.into_inner(); + (dtype, offsets, values, self.validity) } /// Returns an iterator of `&str` @@ -260,7 +260,7 @@ impl MutableArray for MutableUtf8Array { array.arced() } - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { if O::IS_LARGE { &ArrowDataType::LargeUtf8 } else { @@ -391,7 +391,7 @@ impl MutableUtf8Array { let (validity, offsets, values) = trusted_len_unzip(iterator); // soundness: P is `str` - Self::new_unchecked(Self::default_data_type(), offsets, values, validity) + Self::new_unchecked(Self::default_dtype(), offsets, values, validity) } /// Creates a [`MutableUtf8Array`] from an iterator of trusted length. @@ -462,7 +462,7 @@ impl MutableUtf8Array { // soundness: P is `str` Ok(Self::new_unchecked( - Self::default_data_type(), + Self::default_dtype(), offsets, values, validity, diff --git a/crates/polars-arrow/src/array/utf8/mutable_values.rs b/crates/polars-arrow/src/array/utf8/mutable_values.rs index ce3c2f71f20c..ec362a40a8db 100644 --- a/crates/polars-arrow/src/array/utf8/mutable_values.rs +++ b/crates/polars-arrow/src/array/utf8/mutable_values.rs @@ -15,7 +15,7 @@ use crate::trusted_len::TrustedLen; /// from [`MutableUtf8Array`] in that it builds non-null [`Utf8Array`]. #[derive(Debug, Clone)] pub struct MutableUtf8ValuesArray { - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: Offsets, values: Vec, } @@ -27,7 +27,7 @@ impl From> for Utf8Array { // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks. unsafe { Utf8Array::::new_unchecked( - other.data_type, + other.dtype, other.offsets.into(), other.values.into(), None, @@ -41,7 +41,7 @@ impl From> for MutableUtf8Array { // SAFETY: // `MutableUtf8ValuesArray` has the same invariants as `MutableUtf8Array` unsafe { - MutableUtf8Array::::new_unchecked(other.data_type, other.offsets, other.values, None) + MutableUtf8Array::::new_unchecked(other.dtype, other.offsets, other.values, None) } } } @@ -56,7 +56,7 @@ impl MutableUtf8ValuesArray { /// Returns an empty [`MutableUtf8ValuesArray`]. pub fn new() -> Self { Self { - data_type: Self::default_data_type(), + dtype: Self::default_dtype(), offsets: Offsets::new(), values: Vec::::new(), } @@ -67,22 +67,22 @@ impl MutableUtf8ValuesArray { /// # Errors /// This function returns an error iff: /// * The last offset is not equal to the values' length. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Utf8` or `LargeUtf8`. /// * The `values` between two consecutive `offsets` are not valid utf8 /// # Implementation /// This function is `O(N)` - checking utf8 is `O(N)` pub fn try_new( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: Offsets, values: Vec, ) -> PolarsResult { try_check_utf8(&offsets, &values)?; - if data_type.to_physical_type() != Self::default_data_type().to_physical_type() { + if dtype.to_physical_type() != Self::default_dtype().to_physical_type() { polars_bail!(ComputeError: "MutableUtf8ValuesArray can only be initialized with DataType::Utf8 or DataType::LargeUtf8") } Ok(Self { - data_type, + dtype, offsets, values, }) @@ -93,7 +93,7 @@ impl MutableUtf8ValuesArray { /// # Panic /// This function does not panic iff: /// * The last offset is equal to the values' length. - /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is equal to either `Utf8` or `LargeUtf8`. + /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is equal to either `Utf8` or `LargeUtf8`. /// /// # Safety /// This function is safe iff: @@ -102,19 +102,19 @@ impl MutableUtf8ValuesArray { /// # Implementation /// This function is `O(1)` pub unsafe fn new_unchecked( - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: Offsets, values: Vec, ) -> Self { try_check_offsets_bounds(&offsets, values.len()) .expect("The length of the values must be equal to the last offset value"); - if data_type.to_physical_type() != Self::default_data_type().to_physical_type() { + if dtype.to_physical_type() != Self::default_dtype().to_physical_type() { panic!("MutableUtf8ValuesArray can only be initialized with DataType::Utf8 or DataType::LargeUtf8") } Self { - data_type, + dtype, offsets, values, } @@ -122,8 +122,8 @@ impl MutableUtf8ValuesArray { /// Returns the default [`ArrowDataType`] of this container: [`ArrowDataType::Utf8`] or [`ArrowDataType::LargeUtf8`] /// depending on the generic [`Offset`]. - pub fn default_data_type() -> ArrowDataType { - Utf8Array::::default_data_type() + pub fn default_dtype() -> ArrowDataType { + Utf8Array::::default_dtype() } /// Initializes a new [`MutableUtf8ValuesArray`] with a pre-allocated capacity of items. @@ -134,7 +134,7 @@ impl MutableUtf8ValuesArray { /// Initializes a new [`MutableUtf8ValuesArray`] with a pre-allocated capacity of items and values. pub fn with_capacities(capacity: usize, values: usize) -> Self { Self { - data_type: Self::default_data_type(), + dtype: Self::default_dtype(), offsets: Offsets::::with_capacity(capacity), values: Vec::::with_capacity(values), } @@ -229,7 +229,7 @@ impl MutableUtf8ValuesArray { /// Extract the low-end APIs from the [`MutableUtf8ValuesArray`]. pub fn into_inner(self) -> (ArrowDataType, Offsets, Vec) { - (self.data_type, self.offsets, self.values) + (self.dtype, self.offsets, self.values) } } @@ -252,8 +252,8 @@ impl MutableArray for MutableUtf8ValuesArray { array.arced() } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { @@ -282,7 +282,7 @@ impl> FromIterator

for MutableUtf8ValuesArray { fn from_iter>(iter: I) -> Self { let (offsets, values) = values_iter(iter.into_iter().map(StrAsBytes)); // soundness: T: AsRef and offsets are monotonically increasing - unsafe { Self::new_unchecked(Self::default_data_type(), offsets, values) } + unsafe { Self::new_unchecked(Self::default_dtype(), offsets, values) } } } @@ -349,7 +349,7 @@ impl MutableUtf8ValuesArray { let (offsets, values) = trusted_len_values_iter(iterator); // soundness: P is `str` and offsets are monotonically increasing - Self::new_unchecked(Self::default_data_type(), offsets, values) + Self::new_unchecked(Self::default_dtype(), offsets, values) } /// Returns a new [`MutableUtf8ValuesArray`] from an iterator. diff --git a/crates/polars-arrow/src/array/values.rs b/crates/polars-arrow/src/array/values.rs index 9864e4f4c129..197d97f167eb 100644 --- a/crates/polars-arrow/src/array/values.rs +++ b/crates/polars-arrow/src/array/values.rs @@ -54,7 +54,7 @@ impl ValueSize for BinaryArray { impl ValueSize for ArrayRef { fn get_values_size(&self) -> usize { - match self.data_type() { + match self.dtype() { ArrowDataType::LargeUtf8 => self .as_any() .downcast_ref::>() diff --git a/crates/polars-arrow/src/compute/aggregate/memory.rs b/crates/polars-arrow/src/compute/aggregate/memory.rs index 8b59503b93e7..bd4ba7ab6384 100644 --- a/crates/polars-arrow/src/compute/aggregate/memory.rs +++ b/crates/polars-arrow/src/compute/aggregate/memory.rs @@ -42,7 +42,7 @@ fn binview_size(array: &BinaryViewArrayGeneric) -> usiz /// FFI buffers are included in this estimation. pub fn estimated_bytes_size(array: &dyn Array) -> usize { use PhysicalType::*; - match array.data_type().to_physical_type() { + match array.dtype().to_physical_type() { Null => 0, Boolean => { let array = array.as_any().downcast_ref::().unwrap(); diff --git a/crates/polars-arrow/src/compute/aggregate/sum.rs b/crates/polars-arrow/src/compute/aggregate/sum.rs index 8ba9714f9521..9fbed5f8b1b6 100644 --- a/crates/polars-arrow/src/compute/aggregate/sum.rs +++ b/crates/polars-arrow/src/compute/aggregate/sum.rs @@ -102,9 +102,9 @@ where } } -/// Whether [`sum`] supports `data_type` -pub fn can_sum(data_type: &ArrowDataType) -> bool { - if let PhysicalType::Primitive(primitive) = data_type.to_physical_type() { +/// Whether [`sum`] supports `dtype` +pub fn can_sum(dtype: &ArrowDataType) -> bool { + if let PhysicalType::Primitive(primitive) = dtype.to_physical_type() { use PrimitiveType::*; matches!( primitive, @@ -120,11 +120,11 @@ pub fn can_sum(data_type: &ArrowDataType) -> bool { /// # Error /// Errors iff the operation is not supported. pub fn sum(array: &dyn Array) -> PolarsResult> { - Ok(match array.data_type().to_physical_type() { + Ok(match array.dtype().to_physical_type() { PhysicalType::Primitive(primitive) => with_match_primitive_type!(primitive, |$T| { - let data_type = array.data_type().clone(); + let dtype = array.dtype().clone(); let array = array.as_any().downcast_ref().unwrap(); - Box::new(PrimitiveScalar::new(data_type, sum_primitive::<$T>(array))) + Box::new(PrimitiveScalar::new(dtype, sum_primitive::<$T>(array))) }), _ => { unimplemented!() diff --git a/crates/polars-arrow/src/compute/arity.rs b/crates/polars-arrow/src/compute/arity.rs index 22ac733c2839..529d3242546a 100644 --- a/crates/polars-arrow/src/compute/arity.rs +++ b/crates/polars-arrow/src/compute/arity.rs @@ -21,7 +21,7 @@ use crate::types::NativeType; pub fn unary( array: &PrimitiveArray, op: F, - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> PrimitiveArray where I: NativeType, @@ -30,7 +30,7 @@ where { let values = array.values().iter().map(|v| op(*v)).collect::>(); - PrimitiveArray::::new(data_type, values.into(), array.validity().cloned()) + PrimitiveArray::::new(dtype, values.into(), array.validity().cloned()) } /// Version of unary that checks for errors in the closure used to create the @@ -38,7 +38,7 @@ where pub fn try_unary( array: &PrimitiveArray, op: F, - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> PolarsResult> where I: NativeType, @@ -53,7 +53,7 @@ where .into(); Ok(PrimitiveArray::::new( - data_type, + dtype, values, array.validity().cloned(), )) @@ -64,7 +64,7 @@ where pub fn unary_with_bitmap( array: &PrimitiveArray, op: F, - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> (PrimitiveArray, Bitmap) where I: NativeType, @@ -85,7 +85,7 @@ where .into(); ( - PrimitiveArray::::new(data_type, values, array.validity().cloned()), + PrimitiveArray::::new(dtype, values, array.validity().cloned()), mut_bitmap.into(), ) } @@ -96,7 +96,7 @@ where pub fn unary_checked( array: &PrimitiveArray, op: F, - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> PrimitiveArray where I: NativeType, @@ -128,7 +128,7 @@ where let bitmap: Bitmap = mut_bitmap.into(); let validity = combine_validities_and(array.validity(), Some(&bitmap)); - PrimitiveArray::::new(data_type, values, validity) + PrimitiveArray::::new(dtype, values, validity) } /// Applies a binary operations to two primitive arrays. @@ -151,7 +151,7 @@ where pub fn binary( lhs: &PrimitiveArray, rhs: &PrimitiveArray, - data_type: ArrowDataType, + dtype: ArrowDataType, op: F, ) -> PrimitiveArray where @@ -171,7 +171,7 @@ where .collect::>() .into(); - PrimitiveArray::::new(data_type, values, validity) + PrimitiveArray::::new(dtype, values, validity) } /// Version of binary that checks for errors in the closure used to create the @@ -179,7 +179,7 @@ where pub fn try_binary( lhs: &PrimitiveArray, rhs: &PrimitiveArray, - data_type: ArrowDataType, + dtype: ArrowDataType, op: F, ) -> PolarsResult> where @@ -199,7 +199,7 @@ where .collect::>>()? .into(); - Ok(PrimitiveArray::::new(data_type, values, validity)) + Ok(PrimitiveArray::::new(dtype, values, validity)) } /// Version of binary that returns an array and bitmap. Used when working with @@ -207,7 +207,7 @@ where pub fn binary_with_bitmap( lhs: &PrimitiveArray, rhs: &PrimitiveArray, - data_type: ArrowDataType, + dtype: ArrowDataType, op: F, ) -> (PrimitiveArray, Bitmap) where @@ -234,7 +234,7 @@ where .into(); ( - PrimitiveArray::::new(data_type, values, validity), + PrimitiveArray::::new(dtype, values, validity), mut_bitmap.into(), ) } @@ -245,7 +245,7 @@ where pub fn binary_checked( lhs: &PrimitiveArray, rhs: &PrimitiveArray, - data_type: ArrowDataType, + dtype: ArrowDataType, op: F, ) -> PrimitiveArray where @@ -283,5 +283,5 @@ where // as Null let validity = combine_validities_and(validity.as_ref(), Some(&bitmap)); - PrimitiveArray::::new(data_type, values, validity) + PrimitiveArray::::new(dtype, values, validity) } diff --git a/crates/polars-arrow/src/compute/bitwise.rs b/crates/polars-arrow/src/compute/bitwise.rs index 37c26542b848..1762fb430e58 100644 --- a/crates/polars-arrow/src/compute/bitwise.rs +++ b/crates/polars-arrow/src/compute/bitwise.rs @@ -12,7 +12,7 @@ pub fn or(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArray where T: NativeType + BitOr, { - binary(lhs, rhs, lhs.data_type().clone(), |a, b| a | b) + binary(lhs, rhs, lhs.dtype().clone(), |a, b| a | b) } /// Performs `XOR` operation between two [`PrimitiveArray`]s. @@ -22,7 +22,7 @@ pub fn xor(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArra where T: NativeType + BitXor, { - binary(lhs, rhs, lhs.data_type().clone(), |a, b| a ^ b) + binary(lhs, rhs, lhs.dtype().clone(), |a, b| a ^ b) } /// Performs `AND` operation on two [`PrimitiveArray`]s. @@ -32,7 +32,7 @@ pub fn and(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveArra where T: NativeType + BitAnd, { - binary(lhs, rhs, lhs.data_type().clone(), |a, b| a & b) + binary(lhs, rhs, lhs.dtype().clone(), |a, b| a & b) } /// Returns a new [`PrimitiveArray`] with the bitwise `not`. @@ -41,7 +41,7 @@ where T: NativeType + Not, { let op = move |a: T| !a; - unary(array, op, array.data_type().clone()) + unary(array, op, array.dtype().clone()) } /// Performs `OR` operation between a [`PrimitiveArray`] and scalar. @@ -51,7 +51,7 @@ pub fn or_scalar(lhs: &PrimitiveArray, rhs: &T) -> PrimitiveArray where T: NativeType + BitOr, { - unary(lhs, |a| a | *rhs, lhs.data_type().clone()) + unary(lhs, |a| a | *rhs, lhs.dtype().clone()) } /// Performs `XOR` operation between a [`PrimitiveArray`] and scalar. @@ -61,7 +61,7 @@ pub fn xor_scalar(lhs: &PrimitiveArray, rhs: &T) -> PrimitiveArray where T: NativeType + BitXor, { - unary(lhs, |a| a ^ *rhs, lhs.data_type().clone()) + unary(lhs, |a| a ^ *rhs, lhs.dtype().clone()) } /// Performs `AND` operation between a [`PrimitiveArray`] and scalar. @@ -71,5 +71,5 @@ pub fn and_scalar(lhs: &PrimitiveArray, rhs: &T) -> PrimitiveArray where T: NativeType + BitAnd, { - unary(lhs, |a| a & *rhs, lhs.data_type().clone()) + unary(lhs, |a| a & *rhs, lhs.dtype().clone()) } diff --git a/crates/polars-arrow/src/compute/cast/binary_to.rs b/crates/polars-arrow/src/compute/cast/binary_to.rs index 72c5f20922b0..3c0acb006890 100644 --- a/crates/polars-arrow/src/compute/cast/binary_to.rs +++ b/crates/polars-arrow/src/compute/cast/binary_to.rs @@ -53,11 +53,11 @@ impl Parse for f64 { /// Conversion of binary pub fn binary_to_large_binary( from: &BinaryArray, - to_data_type: ArrowDataType, + to_dtype: ArrowDataType, ) -> BinaryArray { let values = from.values().clone(); BinaryArray::::new( - to_data_type, + to_dtype, from.offsets().into(), values, from.validity().cloned(), @@ -67,12 +67,12 @@ pub fn binary_to_large_binary( /// Conversion of binary pub fn binary_large_to_binary( from: &BinaryArray, - to_data_type: ArrowDataType, + to_dtype: ArrowDataType, ) -> PolarsResult> { let values = from.values().clone(); let offsets = from.offsets().try_into()?; Ok(BinaryArray::::new( - to_data_type, + to_dtype, offsets, values, from.validity().cloned(), @@ -82,10 +82,10 @@ pub fn binary_large_to_binary( /// Conversion to utf8 pub fn binary_to_utf8( from: &BinaryArray, - to_data_type: ArrowDataType, + to_dtype: ArrowDataType, ) -> PolarsResult> { Utf8Array::::try_new( - to_data_type, + to_dtype, from.offsets().clone(), from.values().clone(), from.validity().cloned(), @@ -97,12 +97,12 @@ pub fn binary_to_utf8( /// This function errors if the values are not valid utf8 pub fn binary_to_large_utf8( from: &BinaryArray, - to_data_type: ArrowDataType, + to_dtype: ArrowDataType, ) -> PolarsResult> { let values = from.values().clone(); let offsets = from.offsets().into(); - Utf8Array::::try_new(to_data_type, offsets, values, from.validity().cloned()) + Utf8Array::::try_new(to_dtype, offsets, values, from.validity().cloned()) } /// Casts a [`BinaryArray`] to a [`PrimitiveArray`], making any uncastable value a Null. @@ -169,12 +169,12 @@ fn fixed_size_to_offsets(values_len: usize, fixed_size: usize) -> Off /// Conversion of `FixedSizeBinary` to `Binary`. pub fn fixed_size_binary_binary( from: &FixedSizeBinaryArray, - to_data_type: ArrowDataType, + to_dtype: ArrowDataType, ) -> BinaryArray { let values = from.values().clone(); let offsets = fixed_size_to_offsets(values.len(), from.size()); BinaryArray::::new( - to_data_type, + to_dtype, offsets.into(), values, from.validity().cloned(), @@ -250,12 +250,12 @@ pub fn fixed_size_binary_to_binview(from: &FixedSizeBinaryArray) -> BinaryViewAr /// Conversion of binary pub fn binary_to_list( from: &BinaryArray, - to_data_type: ArrowDataType, + to_dtype: ArrowDataType, ) -> ListArray { let values = from.values().clone(); let values = PrimitiveArray::new(ArrowDataType::UInt8, values, None); ListArray::::new( - to_data_type, + to_dtype, from.offsets().clone(), values.boxed(), from.validity().cloned(), diff --git a/crates/polars-arrow/src/compute/cast/binview_to.rs b/crates/polars-arrow/src/compute/cast/binview_to.rs index 5a12a14aaca7..406fdcc14e80 100644 --- a/crates/polars-arrow/src/compute/cast/binview_to.rs +++ b/crates/polars-arrow/src/compute/cast/binview_to.rs @@ -51,7 +51,7 @@ pub fn utf8view_to_utf8(array: &Utf8ViewArray) -> Utf8Array { let array = array.to_binview(); let out = view_to_binary::(&array); - let dtype = Utf8Array::::default_data_type(); + let dtype = Utf8Array::::default_dtype(); unsafe { Utf8Array::new_unchecked( dtype, diff --git a/crates/polars-arrow/src/compute/cast/decimal_to.rs b/crates/polars-arrow/src/compute/cast/decimal_to.rs index dd2f29e1a443..babd17158143 100644 --- a/crates/polars-arrow/src/compute/cast/decimal_to.rs +++ b/crates/polars-arrow/src/compute/cast/decimal_to.rs @@ -37,7 +37,7 @@ pub fn decimal_to_decimal( to_scale: usize, ) -> PrimitiveArray { let (from_precision, from_scale) = - if let ArrowDataType::Decimal(p, s) = from.data_type().to_logical_type() { + if let ArrowDataType::Decimal(p, s) = from.dtype().to_logical_type() { (*p, *s) } else { panic!("internal error: i128 is always a decimal") @@ -86,7 +86,7 @@ where T: NativeType + Float, f64: AsPrimitive, { - let (_, from_scale) = if let ArrowDataType::Decimal(p, s) = from.data_type().to_logical_type() { + let (_, from_scale) = if let ArrowDataType::Decimal(p, s) = from.dtype().to_logical_type() { (*p, *s) } else { panic!("internal error: i128 is always a decimal") @@ -116,7 +116,7 @@ pub fn decimal_to_integer(from: &PrimitiveArray) -> PrimitiveArray where T: NativeType + NumCast, { - let (_, from_scale) = if let ArrowDataType::Decimal(p, s) = from.data_type().to_logical_type() { + let (_, from_scale) = if let ArrowDataType::Decimal(p, s) = from.dtype().to_logical_type() { (*p, *s) } else { panic!("internal error: i128 is always a decimal") @@ -139,7 +139,7 @@ where /// Returns a [`Utf8Array`] where every element is the utf8 representation of the decimal. #[cfg(feature = "dtype-decimal")] pub(super) fn decimal_to_utf8view(from: &PrimitiveArray) -> Utf8ViewArray { - let (_, from_scale) = if let ArrowDataType::Decimal(p, s) = from.data_type().to_logical_type() { + let (_, from_scale) = if let ArrowDataType::Decimal(p, s) = from.dtype().to_logical_type() { (*p, *s) } else { panic!("internal error: i128 is always a decimal") diff --git a/crates/polars-arrow/src/compute/cast/dictionary_to.rs b/crates/polars-arrow/src/compute/cast/dictionary_to.rs index 134c9af7991f..a253ea0e05ff 100644 --- a/crates/polars-arrow/src/compute/cast/dictionary_to.rs +++ b/crates/polars-arrow/src/compute/cast/dictionary_to.rs @@ -39,7 +39,7 @@ pub fn dictionary_to_dictionary_values( assert_eq!(values.len(), length); // this is guaranteed by `cast` unsafe { - DictionaryArray::try_new_unchecked(from.data_type().clone(), keys.clone(), values.clone()) + DictionaryArray::try_new_unchecked(from.dtype().clone(), keys.clone(), values.clone()) } } @@ -62,7 +62,7 @@ pub fn wrapping_dictionary_to_dictionary_values( )?; assert_eq!(values.len(), length); // this is guaranteed by `cast` unsafe { - DictionaryArray::try_new_unchecked(from.data_type().clone(), keys.clone(), values.clone()) + DictionaryArray::try_new_unchecked(from.dtype().clone(), keys.clone(), values.clone()) } } @@ -87,13 +87,13 @@ where if casted_keys.null_count() > keys.null_count() { polars_bail!(ComputeError: "overflow") } else { - let data_type = ArrowDataType::Dictionary( + let dtype = ArrowDataType::Dictionary( K2::KEY_TYPE, - Box::new(values.data_type().clone()), + Box::new(values.dtype().clone()), is_ordered, ); // SAFETY: this is safe because given a type `T` that fits in a `usize`, casting it to type `P` either overflows or also fits in a `usize` - unsafe { DictionaryArray::try_new_unchecked(data_type, casted_keys, values.clone()) } + unsafe { DictionaryArray::try_new_unchecked(dtype, casted_keys, values.clone()) } } } @@ -114,13 +114,13 @@ where if casted_keys.null_count() > keys.null_count() { polars_bail!(ComputeError: "overflow") } else { - let data_type = ArrowDataType::Dictionary( + let dtype = ArrowDataType::Dictionary( K2::KEY_TYPE, - Box::new(values.data_type().clone()), + Box::new(values.dtype().clone()), is_ordered, ); // some of the values may not fit in `usize` and thus this needs to be checked - DictionaryArray::try_new(data_type, casted_keys, values.clone()) + DictionaryArray::try_new(dtype, casted_keys, values.clone()) } } diff --git a/crates/polars-arrow/src/compute/cast/mod.rs b/crates/polars-arrow/src/compute/cast/mod.rs index 0afa67ec875a..9193abe8d476 100644 --- a/crates/polars-arrow/src/compute/cast/mod.rs +++ b/crates/polars-arrow/src/compute/cast/mod.rs @@ -89,7 +89,7 @@ fn cast_struct( let new_values = values .iter() .zip(fields) - .map(|(arr, field)| cast(arr.as_ref(), field.data_type(), options)) + .map(|(arr, field)| cast(arr.as_ref(), field.dtype(), options)) .collect::>>()?; Ok(StructArray::new( @@ -190,7 +190,7 @@ fn cast_list_to_fixed_size_list( list.offsets().first().to_usize(), list.offsets().range().to_usize(), ); - cast(sliced_values.as_ref(), inner.data_type(), options)? + cast(sliced_values.as_ref(), inner.dtype(), options)? }, } } else { @@ -230,7 +230,7 @@ fn cast_list_to_fixed_size_list( crate::compute::take::take_unchecked(list.values().as_ref(), &indices.freeze()) }; - cast(take_values.as_ref(), inner.data_type(), options)? + cast(take_values.as_ref(), inner.dtype(), options)? }; FixedSizeListArray::try_new( ArrowDataType::FixedSizeList(Box::new(inner.clone()), size), @@ -279,7 +279,7 @@ pub fn cast( options: CastOptionsImpl, ) -> PolarsResult> { use ArrowDataType::*; - let from_type = array.data_type(); + let from_type = array.dtype(); // clone array if types are the same if from_type == to_type { @@ -350,7 +350,7 @@ pub fn cast( Int64 => binview_to_primitive_dyn::(array, to_type, options), Float32 => binview_to_primitive_dyn::(array, to_type, options), Float64 => binview_to_primitive_dyn::(array, to_type, options), - LargeList(inner) if matches!(inner.data_type, ArrowDataType::UInt8) => { + LargeList(inner) if matches!(inner.dtype, ArrowDataType::UInt8) => { let bin_array = view_to_binary::(array.as_any().downcast_ref().unwrap()); Ok(binary_to_list(&bin_array, to_type.clone()).boxed()) }, @@ -371,7 +371,7 @@ pub fn cast( (_, List(to)) => { // cast primitive to list's primitive - let values = cast(array, &to.data_type, options)?; + let values = cast(array, &to.dtype, options)?; // create offsets, where if array.len() = 2, we have [0,1,2] let offsets = (0..=array.len() as i32).collect::>(); // SAFETY: offsets _are_ monotonically increasing @@ -384,7 +384,7 @@ pub fn cast( (_, LargeList(to)) if from_type != &LargeBinary => { // cast primitive to list's primitive - let values = cast(array, &to.data_type, options)?; + let values = cast(array, &to.dtype, options)?; // create offsets, where if array.len() = 2, we have [0,1,2] let offsets = (0..=array.len() as i64).collect::>(); // SAFETY: offsets _are_ monotonically increasing diff --git a/crates/polars-arrow/src/compute/cast/primitive_to.rs b/crates/polars-arrow/src/compute/cast/primitive_to.rs index 13fc8c8be3f0..6fa9b9fb01fd 100644 --- a/crates/polars-arrow/src/compute/cast/primitive_to.rs +++ b/crates/polars-arrow/src/compute/cast/primitive_to.rs @@ -123,7 +123,7 @@ pub(super) fn primitive_to_utf8( let (values, offsets) = primitive_to_values_and_offsets(from); unsafe { Utf8Array::::new_unchecked( - Utf8Array::::default_data_type(), + Utf8Array::::default_dtype(), offsets.into(), values.into(), from.validity().cloned(), @@ -317,7 +317,7 @@ pub fn primitive_to_dictionary( ) -> PolarsResult> { let iter = from.iter().map(|x| x.copied()); let mut array = MutableDictionaryArray::::try_empty(MutablePrimitiveArray::::from( - from.data_type().clone(), + from.dtype().clone(), ))?; array.reserve(from.len()); array.try_extend(iter)?; diff --git a/crates/polars-arrow/src/compute/cast/utf8_to.rs b/crates/polars-arrow/src/compute/cast/utf8_to.rs index 85b478c43817..e6ab12c3b2a1 100644 --- a/crates/polars-arrow/src/compute/cast/utf8_to.rs +++ b/crates/polars-arrow/src/compute/cast/utf8_to.rs @@ -35,35 +35,35 @@ pub fn utf8_to_dictionary( /// Conversion of utf8 pub fn utf8_to_large_utf8(from: &Utf8Array) -> Utf8Array { - let data_type = Utf8Array::::default_data_type(); + let dtype = Utf8Array::::default_dtype(); let validity = from.validity().cloned(); let values = from.values().clone(); let offsets = from.offsets().into(); // SAFETY: sound because `values` fulfills the same invariants as `from.values()` - unsafe { Utf8Array::::new_unchecked(data_type, offsets, values, validity) } + unsafe { Utf8Array::::new_unchecked(dtype, offsets, values, validity) } } /// Conversion of utf8 pub fn utf8_large_to_utf8(from: &Utf8Array) -> PolarsResult> { - let data_type = Utf8Array::::default_data_type(); + let dtype = Utf8Array::::default_dtype(); let validity = from.validity().cloned(); let values = from.values().clone(); let offsets = from.offsets().try_into()?; // SAFETY: sound because `values` fulfills the same invariants as `from.values()` - Ok(unsafe { Utf8Array::::new_unchecked(data_type, offsets, values, validity) }) + Ok(unsafe { Utf8Array::::new_unchecked(dtype, offsets, values, validity) }) } /// Conversion to binary pub fn utf8_to_binary( from: &Utf8Array, - to_data_type: ArrowDataType, + to_dtype: ArrowDataType, ) -> BinaryArray { // SAFETY: erasure of an invariant is always safe unsafe { BinaryArray::::new( - to_data_type, + to_dtype, from.offsets().clone(), from.values().clone(), from.validity().cloned(), diff --git a/crates/polars-arrow/src/compute/concatenate.rs b/crates/polars-arrow/src/compute/concatenate.rs index 001a183de653..1951cad9f4f5 100644 --- a/crates/polars-arrow/src/compute/concatenate.rs +++ b/crates/polars-arrow/src/compute/concatenate.rs @@ -13,7 +13,7 @@ pub fn concatenate(arrays: &[&dyn Array]) -> PolarsResult> { if arrays .iter() - .any(|array| array.data_type() != arrays[0].data_type()) + .any(|array| array.dtype() != arrays[0].dtype()) { polars_bail!(InvalidOperation: "It is not possible to concatenate arrays of different data types.") } diff --git a/crates/polars-arrow/src/compute/take/binary.rs b/crates/polars-arrow/src/compute/take/binary.rs index 8d2b971ced8f..576fbc8e4f37 100644 --- a/crates/polars-arrow/src/compute/take/binary.rs +++ b/crates/polars-arrow/src/compute/take/binary.rs @@ -25,7 +25,7 @@ pub unsafe fn take_unchecked( values: &BinaryArray, indices: &PrimitiveArray, ) -> BinaryArray { - let data_type = values.data_type().clone(); + let dtype = values.dtype().clone(); let indices_has_validity = indices.null_count() > 0; let values_has_validity = values.null_count() > 0; @@ -37,5 +37,5 @@ pub unsafe fn take_unchecked( (false, true) => take_indices_validity(values.offsets(), values.values(), indices), (true, true) => take_values_indices_validity(values, indices), }; - BinaryArray::::new_unchecked(data_type, offsets, values, validity) + BinaryArray::::new_unchecked(dtype, offsets, values, validity) } diff --git a/crates/polars-arrow/src/compute/take/binview.rs b/crates/polars-arrow/src/compute/take/binview.rs index 65ff633a080a..02b0272be873 100644 --- a/crates/polars-arrow/src/compute/take/binview.rs +++ b/crates/polars-arrow/src/compute/take/binview.rs @@ -12,7 +12,7 @@ pub(super) unsafe fn take_binview_unchecked( take_values_and_validity_unchecked(arr.views(), arr.validity(), indices); BinaryViewArray::new_unchecked_unknown_md( - arr.data_type().clone(), + arr.dtype().clone(), views.into(), arr.data_buffers().clone(), validity, diff --git a/crates/polars-arrow/src/compute/take/boolean.rs b/crates/polars-arrow/src/compute/take/boolean.rs index 3e6008d54652..745c7036c16b 100644 --- a/crates/polars-arrow/src/compute/take/boolean.rs +++ b/crates/polars-arrow/src/compute/take/boolean.rs @@ -63,7 +63,7 @@ pub unsafe fn take_unchecked( values: &BooleanArray, indices: &PrimitiveArray, ) -> BooleanArray { - let data_type = values.data_type().clone(); + let dtype = values.dtype().clone(); let indices_has_validity = indices.null_count() > 0; let values_has_validity = values.null_count() > 0; @@ -74,5 +74,5 @@ pub unsafe fn take_unchecked( (true, true) => take_values_indices_validity(values, indices), }; - BooleanArray::new(data_type, values, validity) + BooleanArray::new(dtype, values, validity) } diff --git a/crates/polars-arrow/src/compute/take/list.rs b/crates/polars-arrow/src/compute/take/list.rs index 547b738e0acb..36ca1f72131f 100644 --- a/crates/polars-arrow/src/compute/take/list.rs +++ b/crates/polars-arrow/src/compute/take/list.rs @@ -28,7 +28,7 @@ pub(super) unsafe fn take_unchecked( ) -> ListArray { // fast-path: all values to take are none if indices.null_count() == indices.len() { - return ListArray::::new_null(values.data_type().clone(), indices.len()); + return ListArray::::new_null(values.dtype().clone(), indices.len()); } let mut capacity = 0; diff --git a/crates/polars-arrow/src/compute/take/mod.rs b/crates/polars-arrow/src/compute/take/mod.rs index 34b62802dc12..aed14823af1e 100644 --- a/crates/polars-arrow/src/compute/take/mod.rs +++ b/crates/polars-arrow/src/compute/take/mod.rs @@ -40,12 +40,12 @@ use crate::with_match_primitive_type_full; /// Doesn't do bound checks pub unsafe fn take_unchecked(values: &dyn Array, indices: &IdxArr) -> Box { if indices.len() == 0 { - return new_empty_array(values.data_type().clone()); + return new_empty_array(values.dtype().clone()); } use crate::datatypes::PhysicalType::*; - match values.data_type().to_physical_type() { - Null => Box::new(NullArray::new(values.data_type().clone(), indices.len())), + match values.dtype().to_physical_type() { + Null => Box::new(NullArray::new(values.dtype().clone(), indices.len())), Boolean => { let values = values.as_any().downcast_ref().unwrap(); Box::new(boolean::take_unchecked(values, indices)) diff --git a/crates/polars-arrow/src/compute/take/primitive.rs b/crates/polars-arrow/src/compute/take/primitive.rs index c8686201fdbb..8997323b5c15 100644 --- a/crates/polars-arrow/src/compute/take/primitive.rs +++ b/crates/polars-arrow/src/compute/take/primitive.rs @@ -76,5 +76,5 @@ pub unsafe fn take_primitive_unchecked( ) -> PrimitiveArray { let (values, validity) = take_values_and_validity_unchecked(arr.values(), arr.validity(), indices); - PrimitiveArray::new_unchecked(arr.data_type().clone(), values.into(), validity) + PrimitiveArray::new_unchecked(arr.dtype().clone(), values.into(), validity) } diff --git a/crates/polars-arrow/src/compute/take/structure.rs b/crates/polars-arrow/src/compute/take/structure.rs index 3619dae307bb..caad9f4ee0a4 100644 --- a/crates/polars-arrow/src/compute/take/structure.rs +++ b/crates/polars-arrow/src/compute/take/structure.rs @@ -30,5 +30,5 @@ pub(super) unsafe fn take_unchecked(array: &StructArray, indices: &IdxArr) -> St .validity() .map(|b| super::bitmap::take_bitmap_nulls_unchecked(b, indices)); let validity = combine_validities_and(validity.as_ref(), indices.validity()); - StructArray::new(array.data_type().clone(), values, validity) + StructArray::new(array.dtype().clone(), values, validity) } diff --git a/crates/polars-arrow/src/compute/temporal.rs b/crates/polars-arrow/src/compute/temporal.rs index 6bc76aa0f9a3..309493fbbbdb 100644 --- a/crates/polars-arrow/src/compute/temporal.rs +++ b/crates/polars-arrow/src/compute/temporal.rs @@ -51,10 +51,10 @@ impl Int8IsoWeek for chrono::DateTime {} // Macro to avoid repetition in functions, that apply // `chrono::Datelike` methods on Arrays macro_rules! date_like { - ($extract:ident, $array:ident, $data_type:path) => { - match $array.data_type().to_logical_type() { + ($extract:ident, $array:ident, $dtype:path) => { + match $array.dtype().to_logical_type() { ArrowDataType::Date32 | ArrowDataType::Date64 | ArrowDataType::Timestamp(_, None) => { - date_variants($array, $data_type, |x| x.$extract().try_into().unwrap()) + date_variants($array, $dtype, |x| x.$extract().try_into().unwrap()) }, ArrowDataType::Timestamp(time_unit, Some(timezone_str)) => { let array = $array.as_any().downcast_ref().unwrap(); @@ -116,10 +116,10 @@ pub fn iso_week(array: &dyn Array) -> PolarsResult> { // Macro to avoid repetition in functions, that apply // `chrono::Timelike` methods on Arrays macro_rules! time_like { - ($extract:ident, $array:ident, $data_type:path) => { - match $array.data_type().to_logical_type() { + ($extract:ident, $array:ident, $dtype:path) => { + match $array.dtype().to_logical_type() { ArrowDataType::Date32 | ArrowDataType::Date64 | ArrowDataType::Timestamp(_, None) => { - date_variants($array, $data_type, |x| x.$extract().try_into().unwrap()) + date_variants($array, $dtype, |x| x.$extract().try_into().unwrap()) }, ArrowDataType::Time32(_) | ArrowDataType::Time64(_) => { time_variants($array, ArrowDataType::UInt32, |x| { @@ -176,27 +176,27 @@ pub fn nanosecond(array: &dyn Array) -> PolarsResult> { fn date_variants( array: &dyn Array, - data_type: ArrowDataType, + dtype: ArrowDataType, op: F, ) -> PolarsResult> where O: NativeType, F: Fn(chrono::NaiveDateTime) -> O, { - match array.data_type().to_logical_type() { + match array.dtype().to_logical_type() { ArrowDataType::Date32 => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary(array, |x| op(date32_to_datetime(x)), data_type)) + Ok(unary(array, |x| op(date32_to_datetime(x)), dtype)) }, ArrowDataType::Date64 => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary(array, |x| op(date64_to_datetime(x)), data_type)) + Ok(unary(array, |x| op(date64_to_datetime(x)), dtype)) }, ArrowDataType::Timestamp(time_unit, None) => { let array = array @@ -219,41 +219,41 @@ where fn time_variants( array: &dyn Array, - data_type: ArrowDataType, + dtype: ArrowDataType, op: F, ) -> PolarsResult> where O: NativeType, F: Fn(chrono::NaiveTime) -> O, { - match array.data_type().to_logical_type() { + match array.dtype().to_logical_type() { ArrowDataType::Time32(TimeUnit::Second) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary(array, |x| op(time32s_to_time(x)), data_type)) + Ok(unary(array, |x| op(time32s_to_time(x)), dtype)) }, ArrowDataType::Time32(TimeUnit::Millisecond) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary(array, |x| op(time32ms_to_time(x)), data_type)) + Ok(unary(array, |x| op(time32ms_to_time(x)), dtype)) }, ArrowDataType::Time64(TimeUnit::Microsecond) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary(array, |x| op(time64us_to_time(x)), data_type)) + Ok(unary(array, |x| op(time64us_to_time(x)), dtype)) }, ArrowDataType::Time64(TimeUnit::Nanosecond) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary(array, |x| op(time64ns_to_time(x)), data_type)) + Ok(unary(array, |x| op(time64ns_to_time(x)), dtype)) }, _ => unreachable!(), } @@ -356,33 +356,33 @@ where /// assert_eq!(can_year(&ArrowDataType::Date32), true); /// assert_eq!(can_year(&ArrowDataType::Int8), false); /// ``` -pub fn can_year(data_type: &ArrowDataType) -> bool { - can_date(data_type) +pub fn can_year(dtype: &ArrowDataType) -> bool { + can_date(dtype) } /// Checks if an array of type `datatype` can perform month operation -pub fn can_month(data_type: &ArrowDataType) -> bool { - can_date(data_type) +pub fn can_month(dtype: &ArrowDataType) -> bool { + can_date(dtype) } /// Checks if an array of type `datatype` can perform day operation -pub fn can_day(data_type: &ArrowDataType) -> bool { - can_date(data_type) +pub fn can_day(dtype: &ArrowDataType) -> bool { + can_date(dtype) } -/// Checks if an array of type `data_type` can perform weekday operation -pub fn can_weekday(data_type: &ArrowDataType) -> bool { - can_date(data_type) +/// Checks if an array of type `dtype` can perform weekday operation +pub fn can_weekday(dtype: &ArrowDataType) -> bool { + can_date(dtype) } -/// Checks if an array of type `data_type` can perform ISO week operation -pub fn can_iso_week(data_type: &ArrowDataType) -> bool { - can_date(data_type) +/// Checks if an array of type `dtype` can perform ISO week operation +pub fn can_iso_week(dtype: &ArrowDataType) -> bool { + can_date(dtype) } -fn can_date(data_type: &ArrowDataType) -> bool { +fn can_date(dtype: &ArrowDataType) -> bool { matches!( - data_type, + dtype, ArrowDataType::Date32 | ArrowDataType::Date64 | ArrowDataType::Timestamp(_, _) ) } @@ -397,28 +397,28 @@ fn can_date(data_type: &ArrowDataType) -> bool { /// assert_eq!(can_hour(&ArrowDataType::Time32(TimeUnit::Second)), true); /// assert_eq!(can_hour(&ArrowDataType::Int8), false); /// ``` -pub fn can_hour(data_type: &ArrowDataType) -> bool { - can_time(data_type) +pub fn can_hour(dtype: &ArrowDataType) -> bool { + can_time(dtype) } /// Checks if an array of type `datatype` can perform minute operation -pub fn can_minute(data_type: &ArrowDataType) -> bool { - can_time(data_type) +pub fn can_minute(dtype: &ArrowDataType) -> bool { + can_time(dtype) } /// Checks if an array of type `datatype` can perform second operation -pub fn can_second(data_type: &ArrowDataType) -> bool { - can_time(data_type) +pub fn can_second(dtype: &ArrowDataType) -> bool { + can_time(dtype) } /// Checks if an array of type `datatype` can perform nanosecond operation -pub fn can_nanosecond(data_type: &ArrowDataType) -> bool { - can_time(data_type) +pub fn can_nanosecond(dtype: &ArrowDataType) -> bool { + can_time(dtype) } -fn can_time(data_type: &ArrowDataType) -> bool { +fn can_time(dtype: &ArrowDataType) -> bool { matches!( - data_type, + dtype, ArrowDataType::Time32(TimeUnit::Second) | ArrowDataType::Time32(TimeUnit::Millisecond) | ArrowDataType::Time64(TimeUnit::Microsecond) diff --git a/crates/polars-arrow/src/datatypes/field.rs b/crates/polars-arrow/src/datatypes/field.rs index 4f078ab9d569..cf26c100be8d 100644 --- a/crates/polars-arrow/src/datatypes/field.rs +++ b/crates/polars-arrow/src/datatypes/field.rs @@ -18,7 +18,7 @@ pub struct Field { /// Its name pub name: PlSmallStr, /// Its logical [`ArrowDataType`] - pub data_type: ArrowDataType, + pub dtype: ArrowDataType, /// Its nullability pub is_nullable: bool, /// Additional custom (opaque) metadata. @@ -34,10 +34,10 @@ impl From for (PlSmallStr, Field) { impl Field { /// Creates a new [`Field`]. - pub fn new(name: PlSmallStr, data_type: ArrowDataType, is_nullable: bool) -> Self { + pub fn new(name: PlSmallStr, dtype: ArrowDataType, is_nullable: bool) -> Self { Field { name, - data_type, + dtype, is_nullable, metadata: Default::default(), } @@ -48,7 +48,7 @@ impl Field { pub fn with_metadata(self, metadata: Metadata) -> Self { Self { name: self.name, - data_type: self.data_type, + dtype: self.dtype, is_nullable: self.is_nullable, metadata, } @@ -56,8 +56,8 @@ impl Field { /// Returns the [`Field`]'s [`ArrowDataType`]. #[inline] - pub fn data_type(&self) -> &ArrowDataType { - &self.data_type + pub fn dtype(&self) -> &ArrowDataType { + &self.dtype } } @@ -66,7 +66,7 @@ impl From for arrow_schema::Field { fn from(value: Field) -> Self { Self::new( value.name.to_string(), - value.data_type.into(), + value.dtype.into(), value.is_nullable, ) .with_metadata( @@ -89,7 +89,7 @@ impl From for Field { #[cfg(feature = "arrow_rs")] impl From<&arrow_schema::Field> for Field { fn from(value: &arrow_schema::Field) -> Self { - let data_type = value.data_type().clone().into(); + let dtype = value.dtype().clone().into(); let metadata = value .metadata() .iter() @@ -97,7 +97,7 @@ impl From<&arrow_schema::Field> for Field { .collect(); Self::new( PlSmallStr::from_str(value.name().as_str()), - data_type, + dtype, value.is_nullable(), ) .with_metadata(metadata) diff --git a/crates/polars-arrow/src/datatypes/mod.rs b/crates/polars-arrow/src/datatypes/mod.rs index 85a2df5ada54..6ef9687f146e 100644 --- a/crates/polars-arrow/src/datatypes/mod.rs +++ b/crates/polars-arrow/src/datatypes/mod.rs @@ -497,16 +497,16 @@ impl ArrowDataType { Interval(IntervalUnit::MonthDayNano) => unimplemented!(), Binary => Binary, List(field) => List(Box::new(Field { - data_type: field.data_type.underlying_physical_type(), + dtype: field.dtype.underlying_physical_type(), ..*field.clone() })), LargeList(field) => LargeList(Box::new(Field { - data_type: field.data_type.underlying_physical_type(), + dtype: field.dtype.underlying_physical_type(), ..*field.clone() })), FixedSizeList(field, width) => FixedSizeList( Box::new(Field { - data_type: field.data_type.underlying_physical_type(), + dtype: field.dtype.underlying_physical_type(), ..*field.clone() }), *width, @@ -515,7 +515,7 @@ impl ArrowDataType { fields .iter() .map(|field| Field { - data_type: field.data_type.underlying_physical_type(), + dtype: field.dtype.underlying_physical_type(), ..field.clone() }) .collect(), @@ -541,9 +541,9 @@ impl ArrowDataType { pub fn inner_dtype(&self) -> Option<&ArrowDataType> { match self { - ArrowDataType::List(inner) => Some(inner.data_type()), - ArrowDataType::LargeList(inner) => Some(inner.data_type()), - ArrowDataType::FixedSizeList(inner, _) => Some(inner.data_type()), + ArrowDataType::List(inner) => Some(inner.dtype()), + ArrowDataType::LargeList(inner) => Some(inner.dtype()), + ArrowDataType::FixedSizeList(inner, _) => Some(inner.dtype()), _ => None, } } diff --git a/crates/polars-arrow/src/ffi/array.rs b/crates/polars-arrow/src/ffi/array.rs index 34abe43704ed..806f8810d635 100644 --- a/crates/polars-arrow/src/ffi/array.rs +++ b/crates/polars-arrow/src/ffi/array.rs @@ -19,7 +19,7 @@ use crate::{match_integer_type, with_match_primitive_type_full}; /// * the interface is not valid (e.g. a null pointer) pub unsafe fn try_from(array: A) -> PolarsResult> { use PhysicalType::*; - Ok(match array.data_type().to_physical_type() { + Ok(match array.dtype().to_physical_type() { Null => Box::new(NullArray::try_from_ffi(array)?), Boolean => Box::new(BooleanArray::try_from_ffi(array)?), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { @@ -99,7 +99,7 @@ impl ArrowArray { /// releasing this struct, or contents in `buffers` leak. pub(crate) fn new(array: Box) -> Self { let needs_variadic_buffer_sizes = matches!( - array.data_type(), + array.dtype(), ArrowDataType::BinaryView | ArrowDataType::Utf8View ); @@ -207,12 +207,12 @@ impl ArrowArray { /// The caller must ensure that the buffer at index `i` is not mutably shared. unsafe fn get_buffer_ptr( array: &ArrowArray, - data_type: &ArrowDataType, + dtype: &ArrowDataType, index: usize, ) -> PolarsResult<*mut T> { if array.buffers.is_null() { polars_bail!( ComputeError: - "an ArrowArray of type {data_type:?} must have non-null buffers" + "an ArrowArray of type {dtype:?} must have non-null buffers" ); } @@ -222,7 +222,7 @@ unsafe fn get_buffer_ptr( != 0 { polars_bail!( ComputeError: - "an ArrowArray of type {data_type:?} + "an ArrowArray of type {dtype:?} must have buffer {index} aligned to type {}", std::any::type_name::<*mut *const u8>() ); @@ -231,7 +231,7 @@ unsafe fn get_buffer_ptr( if index >= array.n_buffers as usize { polars_bail!(ComputeError: - "An ArrowArray of type {data_type:?} + "An ArrowArray of type {dtype:?} must have buffer {index}." ) } @@ -239,7 +239,7 @@ unsafe fn get_buffer_ptr( let ptr = *buffers.add(index); if ptr.is_null() { polars_bail!(ComputeError: - "An array of type {data_type:?} + "An array of type {dtype:?} must have a non-null buffer {index}" ) } @@ -250,7 +250,7 @@ unsafe fn get_buffer_ptr( unsafe fn create_buffer_known_len( array: &ArrowArray, - data_type: &ArrowDataType, + dtype: &ArrowDataType, owner: InternalArrowArray, len: usize, index: usize, @@ -258,7 +258,7 @@ unsafe fn create_buffer_known_len( if len == 0 { return Ok(Buffer::new()); } - let ptr: *mut T = get_buffer_ptr(array, data_type, index)?; + let ptr: *mut T = get_buffer_ptr(array, dtype, index)?; let bytes = Bytes::from_foreign(ptr, len, BytesAllocator::InternalArrowArray(owner)); Ok(Buffer::from_bytes(bytes)) } @@ -270,18 +270,18 @@ unsafe fn create_buffer_known_len( /// * the buffers' pointers are not mutably shared for the lifetime of `owner` unsafe fn create_buffer( array: &ArrowArray, - data_type: &ArrowDataType, + dtype: &ArrowDataType, owner: InternalArrowArray, index: usize, ) -> PolarsResult> { - let len = buffer_len(array, data_type, index)?; + let len = buffer_len(array, dtype, index)?; if len == 0 { return Ok(Buffer::new()); } - let offset = buffer_offset(array, data_type, index); - let ptr: *mut T = get_buffer_ptr(array, data_type, index)?; + let offset = buffer_offset(array, dtype, index); + let ptr: *mut T = get_buffer_ptr(array, dtype, index)?; // We have to check alignment. // This is the zero-copy path. @@ -304,7 +304,7 @@ unsafe fn create_buffer( /// * the buffers' pointer is not mutable for the lifetime of `owner` unsafe fn create_bitmap( array: &ArrowArray, - data_type: &ArrowDataType, + dtype: &ArrowDataType, owner: InternalArrowArray, index: usize, // if this is the validity bitmap @@ -315,7 +315,7 @@ unsafe fn create_bitmap( if len == 0 { return Ok(Bitmap::new()); } - let ptr = get_buffer_ptr(array, data_type, index)?; + let ptr = get_buffer_ptr(array, dtype, index)?; // Pointer of u8 has alignment 1, so we don't have to check alignment. @@ -336,12 +336,12 @@ unsafe fn create_bitmap( )) } -fn buffer_offset(array: &ArrowArray, data_type: &ArrowDataType, i: usize) -> usize { +fn buffer_offset(array: &ArrowArray, dtype: &ArrowDataType, i: usize) -> usize { use PhysicalType::*; - match (data_type.to_physical_type(), i) { + match (dtype.to_physical_type(), i) { (LargeUtf8, 2) | (LargeBinary, 2) | (Utf8, 2) | (Binary, 2) => 0, (FixedSizeBinary, 1) => { - if let ArrowDataType::FixedSizeBinary(size) = data_type.to_logical_type() { + if let ArrowDataType::FixedSizeBinary(size) = dtype.to_logical_type() { let offset: usize = array.offset.try_into().expect("Offset to fit in `usize`"); offset * *size } else { @@ -355,19 +355,19 @@ fn buffer_offset(array: &ArrowArray, data_type: &ArrowDataType, i: usize) -> usi /// Returns the length, in slots, of the buffer `i` (indexed according to the C data interface) unsafe fn buffer_len( array: &ArrowArray, - data_type: &ArrowDataType, + dtype: &ArrowDataType, i: usize, ) -> PolarsResult { - Ok(match (data_type.to_physical_type(), i) { + Ok(match (dtype.to_physical_type(), i) { (PhysicalType::FixedSizeBinary, 1) => { - if let ArrowDataType::FixedSizeBinary(size) = data_type.to_logical_type() { + if let ArrowDataType::FixedSizeBinary(size) = dtype.to_logical_type() { *size * (array.offset as usize + array.length as usize) } else { unreachable!() } }, (PhysicalType::FixedSizeList, 1) => { - if let ArrowDataType::FixedSizeList(_, size) = data_type.to_logical_type() { + if let ArrowDataType::FixedSizeList(_, size) = dtype.to_logical_type() { *size * (array.offset as usize + array.length as usize) } else { unreachable!() @@ -388,7 +388,7 @@ unsafe fn buffer_len( }, (PhysicalType::Utf8, 2) | (PhysicalType::Binary, 2) => { // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1) - let len = buffer_len(array, data_type, 1)?; + let len = buffer_len(array, dtype, 1)?; // first buffer is the null buffer => add(1) let offset_buffer = unsafe { *(array.buffers as *mut *const u8).add(1) }; // interpret as i32 @@ -399,7 +399,7 @@ unsafe fn buffer_len( }, (PhysicalType::LargeUtf8, 2) | (PhysicalType::LargeBinary, 2) => { // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1) - let len = buffer_len(array, data_type, 1)?; + let len = buffer_len(array, dtype, 1)?; // first buffer is the null buffer => add(1) let offset_buffer = unsafe { *(array.buffers as *mut *const u8).add(1) }; // interpret as i64 @@ -421,20 +421,20 @@ unsafe fn buffer_len( /// * the pointer of `array.children` at `index` is not mutably shared for the lifetime of `parent` unsafe fn create_child( array: &ArrowArray, - data_type: &ArrowDataType, + dtype: &ArrowDataType, parent: InternalArrowArray, index: usize, ) -> PolarsResult> { - let data_type = get_child(data_type, index)?; + let dtype = get_child(dtype, index)?; // catch what we can if array.children.is_null() { - polars_bail!(ComputeError: "an ArrowArray of type {data_type:?} must have non-null children"); + polars_bail!(ComputeError: "an ArrowArray of type {dtype:?} must have non-null children"); } if index >= array.n_children as usize { polars_bail!(ComputeError: - "an ArrowArray of type {data_type:?} + "an ArrowArray of type {dtype:?} must have child {index}." ); } @@ -445,14 +445,14 @@ unsafe fn create_child( // catch what we can if arr_ptr.is_null() { polars_bail!(ComputeError: - "an array of type {data_type:?} + "an array of type {dtype:?} must have a non-null child {index}" ) } // SAFETY: invariant of this function let arr_ptr = unsafe { &*arr_ptr }; - Ok(ArrowArrayChild::new(arr_ptr, data_type, parent)) + Ok(ArrowArrayChild::new(arr_ptr, dtype, parent)) } /// # Safety @@ -462,22 +462,22 @@ unsafe fn create_child( /// * `array.dictionary` is not mutably shared for the lifetime of `parent` unsafe fn create_dictionary( array: &ArrowArray, - data_type: &ArrowDataType, + dtype: &ArrowDataType, parent: InternalArrowArray, ) -> PolarsResult>> { - if let ArrowDataType::Dictionary(_, values, _) = data_type { - let data_type = values.as_ref().clone(); + if let ArrowDataType::Dictionary(_, values, _) = dtype { + let dtype = values.as_ref().clone(); // catch what we can if array.dictionary.is_null() { polars_bail!(ComputeError: - "an array of type {data_type:?} + "an array of type {dtype:?} must have a non-null dictionary" ) } // SAFETY: part of the invariant let array = unsafe { &*array.dictionary }; - Ok(Some(ArrowArrayChild::new(array, data_type, parent))) + Ok(Some(ArrowArrayChild::new(array, dtype, parent))) } else { Ok(None) } @@ -499,7 +499,7 @@ pub trait ArrowArrayRef: std::fmt::Debug { if self.array().null_count() == 0 { Ok(None) } else { - create_bitmap(self.array(), self.data_type(), self.owner(), 0, true).map(Some) + create_bitmap(self.array(), self.dtype(), self.owner(), 0, true).map(Some) } } @@ -507,7 +507,7 @@ pub trait ArrowArrayRef: std::fmt::Debug { /// The caller must guarantee that the buffer `index` corresponds to a buffer. /// This function assumes that the buffer created from FFI is valid; this is impossible to prove. unsafe fn buffer(&self, index: usize) -> PolarsResult> { - create_buffer::(self.array(), self.data_type(), self.owner(), index) + create_buffer::(self.array(), self.dtype(), self.owner(), index) } /// # Safety @@ -518,7 +518,7 @@ pub trait ArrowArrayRef: std::fmt::Debug { index: usize, len: usize, ) -> PolarsResult> { - create_buffer_known_len::(self.array(), self.data_type(), self.owner(), len, index) + create_buffer_known_len::(self.array(), self.dtype(), self.owner(), len, index) } /// # Safety @@ -526,7 +526,7 @@ pub trait ArrowArrayRef: std::fmt::Debug { /// * the buffer at position `index` is valid for the declared length /// * the buffers' pointer is not mutable for the lifetime of `owner` unsafe fn bitmap(&self, index: usize) -> PolarsResult { - create_bitmap(self.array(), self.data_type(), self.owner(), index, false) + create_bitmap(self.array(), self.dtype(), self.owner(), index, false) } /// # Safety @@ -535,11 +535,11 @@ pub trait ArrowArrayRef: std::fmt::Debug { /// * the pointer of `array.children` at `index` is valid /// * the pointer of `array.children` at `index` is not mutably shared for the lifetime of `parent` unsafe fn child(&self, index: usize) -> PolarsResult { - create_child(self.array(), self.data_type(), self.parent().clone(), index) + create_child(self.array(), self.dtype(), self.parent().clone(), index) } unsafe fn dictionary(&self) -> PolarsResult> { - create_dictionary(self.array(), self.data_type(), self.parent().clone()) + create_dictionary(self.array(), self.dtype(), self.parent().clone()) } fn n_buffers(&self) -> usize; @@ -549,7 +549,7 @@ pub trait ArrowArrayRef: std::fmt::Debug { fn parent(&self) -> &InternalArrowArray; fn array(&self) -> &ArrowArray; - fn data_type(&self) -> &ArrowDataType; + fn dtype(&self) -> &ArrowDataType; } /// Struct used to move an Array from and to the C Data Interface. @@ -576,22 +576,22 @@ pub struct InternalArrowArray { // Arc is used for sharability since this is immutable array: Arc, // Arced to reduce cost of cloning - data_type: Arc, + dtype: Arc, } impl InternalArrowArray { - pub fn new(array: ArrowArray, data_type: ArrowDataType) -> Self { + pub fn new(array: ArrowArray, dtype: ArrowDataType) -> Self { Self { array: Arc::new(array), - data_type: Arc::new(data_type), + dtype: Arc::new(dtype), } } } impl ArrowArrayRef for InternalArrowArray { - /// the data_type as declared in the schema - fn data_type(&self) -> &ArrowDataType { - &self.data_type + /// the dtype as declared in the schema + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn parent(&self) -> &InternalArrowArray { @@ -618,14 +618,14 @@ impl ArrowArrayRef for InternalArrowArray { #[derive(Debug)] pub struct ArrowArrayChild<'a> { array: &'a ArrowArray, - data_type: ArrowDataType, + dtype: ArrowDataType, parent: InternalArrowArray, } impl<'a> ArrowArrayRef for ArrowArrayChild<'a> { - /// the data_type as declared in the schema - fn data_type(&self) -> &ArrowDataType { - &self.data_type + /// the dtype as declared in the schema + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn parent(&self) -> &InternalArrowArray { @@ -650,10 +650,10 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> { } impl<'a> ArrowArrayChild<'a> { - fn new(array: &'a ArrowArray, data_type: ArrowDataType, parent: InternalArrowArray) -> Self { + fn new(array: &'a ArrowArray, dtype: ArrowDataType, parent: InternalArrowArray) -> Self { Self { array, - data_type, + dtype, parent, } } diff --git a/crates/polars-arrow/src/ffi/bridge.rs b/crates/polars-arrow/src/ffi/bridge.rs index 7c45ad2faa12..c23c21643214 100644 --- a/crates/polars-arrow/src/ffi/bridge.rs +++ b/crates/polars-arrow/src/ffi/bridge.rs @@ -14,7 +14,7 @@ macro_rules! ffi_dyn { pub fn align_to_c_data_interface(array: Box) -> Box { use crate::datatypes::PhysicalType::*; - match array.data_type().to_physical_type() { + match array.dtype().to_physical_type() { Null => ffi_dyn!(array, NullArray), Boolean => ffi_dyn!(array, BooleanArray), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { diff --git a/crates/polars-arrow/src/ffi/mod.rs b/crates/polars-arrow/src/ffi/mod.rs index 7308a3b8a59e..b7cf2b957b0a 100644 --- a/crates/polars-arrow/src/ffi/mod.rs +++ b/crates/polars-arrow/src/ffi/mod.rs @@ -40,7 +40,7 @@ pub unsafe fn import_field_from_c(field: &ArrowSchema) -> PolarsResult { /// being valid according to the [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) (FFI). pub unsafe fn import_array_from_c( array: ArrowArray, - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> PolarsResult> { - try_from(InternalArrowArray::new(array, data_type)) + try_from(InternalArrowArray::new(array, dtype)) } diff --git a/crates/polars-arrow/src/ffi/schema.rs b/crates/polars-arrow/src/ffi/schema.rs index 3dbc749a369f..d602803c06c8 100644 --- a/crates/polars-arrow/src/ffi/schema.rs +++ b/crates/polars-arrow/src/ffi/schema.rs @@ -39,8 +39,8 @@ unsafe extern "C" fn c_release_schema(schema: *mut ArrowSchema) { } /// allocate (and hold) the children -fn schema_children(data_type: &ArrowDataType, flags: &mut i64) -> Box<[*mut ArrowSchema]> { - match data_type { +fn schema_children(dtype: &ArrowDataType, flags: &mut i64) -> Box<[*mut ArrowSchema]> { + match dtype { ArrowDataType::List(field) | ArrowDataType::FixedSizeList(field, _) | ArrowDataType::LargeList(field) => { @@ -62,16 +62,16 @@ fn schema_children(data_type: &ArrowDataType, flags: &mut i64) -> Box<[*mut Arro impl ArrowSchema { /// creates a new [ArrowSchema] pub(crate) fn new(field: &Field) -> Self { - let format = to_format(field.data_type()); + let format = to_format(field.dtype()); let name = field.name.clone(); let mut flags = field.is_nullable as i64 * 2; // note: this cannot be done along with the above because the above is fallible and this op leaks. - let children_ptr = schema_children(field.data_type(), &mut flags); + let children_ptr = schema_children(field.dtype(), &mut flags); let n_children = children_ptr.len() as i64; - let dictionary = if let ArrowDataType::Dictionary(_, values, is_ordered) = field.data_type() + let dictionary = if let ArrowDataType::Dictionary(_, values, is_ordered) = field.dtype() { flags += *is_ordered as i64; // we do not store field info in the dict values, so can't recover it all :( @@ -84,7 +84,7 @@ impl ArrowSchema { let metadata = &field.metadata; let metadata = - if let ArrowDataType::Extension(name, _, extension_metadata) = field.data_type() { + if let ArrowDataType::Extension(name, _, extension_metadata) = field.dtype() { // append extension information. let mut metadata = metadata.clone(); @@ -204,25 +204,25 @@ impl Drop for ArrowSchema { pub(crate) unsafe fn to_field(schema: &ArrowSchema) -> PolarsResult { let dictionary = schema.dictionary(); - let data_type = if let Some(dictionary) = dictionary { + let dtype = if let Some(dictionary) = dictionary { let indices = to_integer_type(schema.format())?; let values = to_field(dictionary)?; let is_ordered = schema.flags & 1 == 1; - ArrowDataType::Dictionary(indices, Box::new(values.data_type().clone()), is_ordered) + ArrowDataType::Dictionary(indices, Box::new(values.dtype().clone()), is_ordered) } else { - to_data_type(schema)? + to_dtype(schema)? }; let (metadata, extension) = unsafe { metadata_from_bytes(schema.metadata) }; - let data_type = if let Some((name, extension_metadata)) = extension { - ArrowDataType::Extension(name, Box::new(data_type), extension_metadata) + let dtype = if let Some((name, extension_metadata)) = extension { + ArrowDataType::Extension(name, Box::new(dtype), extension_metadata) } else { - data_type + dtype }; Ok(Field::new( PlSmallStr::from_str(schema.name()), - data_type, + dtype, schema.nullable(), ) .with_metadata(metadata)) @@ -248,7 +248,7 @@ fn to_integer_type(format: &str) -> PolarsResult { }) } -unsafe fn to_data_type(schema: &ArrowSchema) -> PolarsResult { +unsafe fn to_dtype(schema: &ArrowSchema) -> PolarsResult { Ok(match schema.format() { "n" => ArrowDataType::Null, "b" => ArrowDataType::Boolean, @@ -414,8 +414,8 @@ unsafe fn to_data_type(schema: &ArrowSchema) -> PolarsResult { } /// the inverse of [to_field] -fn to_format(data_type: &ArrowDataType) -> String { - match data_type { +fn to_format(dtype: &ArrowDataType) -> String { + match dtype { ArrowDataType::Null => "n".to_string(), ArrowDataType::Boolean => "b".to_string(), ArrowDataType::Int8 => "c".to_string(), @@ -496,17 +496,17 @@ fn to_format(data_type: &ArrowDataType) -> String { } } -pub(super) fn get_child(data_type: &ArrowDataType, index: usize) -> PolarsResult { - match (index, data_type) { - (0, ArrowDataType::List(field)) => Ok(field.data_type().clone()), - (0, ArrowDataType::FixedSizeList(field, _)) => Ok(field.data_type().clone()), - (0, ArrowDataType::LargeList(field)) => Ok(field.data_type().clone()), - (0, ArrowDataType::Map(field, _)) => Ok(field.data_type().clone()), - (index, ArrowDataType::Struct(fields)) => Ok(fields[index].data_type().clone()), - (index, ArrowDataType::Union(fields, _, _)) => Ok(fields[index].data_type().clone()), +pub(super) fn get_child(dtype: &ArrowDataType, index: usize) -> PolarsResult { + match (index, dtype) { + (0, ArrowDataType::List(field)) => Ok(field.dtype().clone()), + (0, ArrowDataType::FixedSizeList(field, _)) => Ok(field.dtype().clone()), + (0, ArrowDataType::LargeList(field)) => Ok(field.dtype().clone()), + (0, ArrowDataType::Map(field, _)) => Ok(field.dtype().clone()), + (index, ArrowDataType::Struct(fields)) => Ok(fields[index].dtype().clone()), + (index, ArrowDataType::Union(fields, _, _)) => Ok(fields[index].dtype().clone()), (index, ArrowDataType::Extension(_, subtype, _)) => get_child(subtype, index), - (child, data_type) => polars_bail!(ComputeError: - "Requested child {child} to type {data_type:?} that has no such child", + (child, dtype) => polars_bail!(ComputeError: + "Requested child {child} to type {dtype:?} that has no such child", ), } } @@ -694,7 +694,7 @@ mod tests { for expected in dts { let field = Field::new(PlSmallStr::from_static("a"), expected.clone(), true); let schema = ArrowSchema::new(&field); - let result = unsafe { super::to_data_type(&schema).unwrap() }; + let result = unsafe { super::to_dtype(&schema).unwrap() }; assert_eq!(result, expected); } } diff --git a/crates/polars-arrow/src/ffi/stream.rs b/crates/polars-arrow/src/ffi/stream.rs index b894bc6748ab..2666d417ec48 100644 --- a/crates/polars-arrow/src/ffi/stream.rs +++ b/crates/polars-arrow/src/ffi/stream.rs @@ -120,7 +120,7 @@ impl> ArrowArrayStreamReader { array.release?; // SAFETY: assumed from the C stream interface - unsafe { import_array_from_c(array, self.field.data_type.clone()) } + unsafe { import_array_from_c(array, self.field.dtype.clone()) } .map(Some) .transpose() } @@ -140,9 +140,9 @@ unsafe extern "C" fn get_next(iter: *mut ArrowArrayStream, array: *mut ArrowArra match private.iter.next() { Some(Ok(item)) => { - // check that the array has the same data_type as field - let item_dt = item.data_type(); - let expected_dt = private.field.data_type(); + // check that the array has the same dtype as field + let item_dt = item.dtype(); + let expected_dt = private.field.dtype(); if item_dt != expected_dt { private.error = Some(CString::new(format!("The iterator produced an item of data type {item_dt:?} but the producer expects data type {expected_dt:?}").as_bytes().to_vec()).unwrap()); return 2001; // custom application specific error (since this is never a result of this interface) diff --git a/crates/polars-arrow/src/io/avro/read/deserialize.rs b/crates/polars-arrow/src/io/avro/read/deserialize.rs index c5d4b55b3f26..f9423f83305a 100644 --- a/crates/polars-arrow/src/io/avro/read/deserialize.rs +++ b/crates/polars-arrow/src/io/avro/read/deserialize.rs @@ -11,16 +11,16 @@ use crate::types::months_days_ns; use crate::with_match_primitive_type_full; fn make_mutable( - data_type: &ArrowDataType, + dtype: &ArrowDataType, avro_field: Option<&AvroSchema>, capacity: usize, ) -> PolarsResult> { - Ok(match data_type.to_physical_type() { + Ok(match dtype.to_physical_type() { PhysicalType::Boolean => { Box::new(MutableBooleanArray::with_capacity(capacity)) as Box }, PhysicalType::Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { - Box::new(MutablePrimitiveArray::<$T>::with_capacity(capacity).to(data_type.clone())) + Box::new(MutablePrimitiveArray::<$T>::with_capacity(capacity).to(dtype.clone())) as Box }), PhysicalType::Binary => { @@ -38,12 +38,12 @@ fn make_mutable( unreachable!() } }, - _ => match data_type { + _ => match dtype { ArrowDataType::List(inner) => { - let values = make_mutable(inner.data_type(), None, 0)?; + let values = make_mutable(inner.dtype(), None, 0)?; Box::new(DynMutableListArray::::new_from( values, - data_type.clone(), + dtype.clone(), capacity, )) as Box }, @@ -54,10 +54,9 @@ fn make_mutable( ArrowDataType::Struct(fields) => { let values = fields .iter() - .map(|field| make_mutable(field.data_type(), None, capacity)) + .map(|field| make_mutable(field.dtype(), None, capacity)) .collect::>>()?; - Box::new(DynMutableStructArray::new(values, data_type.clone())) - as Box + Box::new(DynMutableStructArray::new(values, dtype.clone())) as Box }, other => { polars_bail!(nyi = "Deserializing type {other:#?} is still not implemented") @@ -96,8 +95,8 @@ fn deserialize_value<'a>( avro_field: &AvroSchema, mut block: &'a [u8], ) -> PolarsResult<&'a [u8]> { - let data_type = array.data_type(); - match data_type { + let dtype = array.dtype(); + match dtype { ArrowDataType::List(inner) => { let is_nullable = inner.is_nullable; let avro_inner = match avro_field { @@ -168,7 +167,7 @@ fn deserialize_value<'a>( } array.try_push_valid()?; }, - _ => match data_type.to_physical_type() { + _ => match dtype.to_physical_type() { PhysicalType::Boolean => { let is_valid = block[0] == 1; block = &block[1..]; @@ -331,7 +330,7 @@ fn skip_item<'a>( return Ok(block); } } - match &field.data_type { + match &field.dtype { ArrowDataType::List(inner) => { let avro_inner = match avro_field { AvroSchema::Array(inner) => inner.as_ref(), @@ -392,7 +391,7 @@ fn skip_item<'a>( block = skip_item(field, &avro_field.schema, block)?; } }, - _ => match field.data_type.to_physical_type() { + _ => match field.dtype.to_physical_type() { PhysicalType::Boolean => { let _ = block[0] == 1; block = &block[1..]; @@ -444,7 +443,7 @@ fn skip_item<'a>( block = &block[len..]; }, PhysicalType::FixedSizeBinary => { - let len = if let ArrowDataType::FixedSizeBinary(len) = &field.data_type { + let len = if let ArrowDataType::FixedSizeBinary(len) = &field.dtype { *len } else { unreachable!() @@ -484,7 +483,7 @@ pub fn deserialize( .zip(projection.iter()) .map(|((field, avro_field), projection)| { if *projection { - make_mutable(&field.data_type, Some(&avro_field.schema), rows) + make_mutable(&field.dtype, Some(&avro_field.schema), rows) } else { // just something; we are not going to use it make_mutable(&ArrowDataType::Int32, None, 0) diff --git a/crates/polars-arrow/src/io/avro/read/nested.rs b/crates/polars-arrow/src/io/avro/read/nested.rs index 7188e06ae873..fc7e07487d83 100644 --- a/crates/polars-arrow/src/io/avro/read/nested.rs +++ b/crates/polars-arrow/src/io/avro/read/nested.rs @@ -8,22 +8,18 @@ use crate::offset::{Offset, Offsets}; /// Auxiliary struct #[derive(Debug)] pub struct DynMutableListArray { - data_type: ArrowDataType, + dtype: ArrowDataType, offsets: Offsets, values: Box, validity: Option, } impl DynMutableListArray { - pub fn new_from( - values: Box, - data_type: ArrowDataType, - capacity: usize, - ) -> Self { + pub fn new_from(values: Box, dtype: ArrowDataType, capacity: usize) -> Self { assert_eq!(values.len(), 0); - ListArray::::get_child_field(&data_type); + ListArray::::get_child_field(&dtype); Self { - data_type, + dtype, offsets: Offsets::::with_capacity(capacity), values, validity: None, @@ -80,7 +76,7 @@ impl MutableArray for DynMutableListArray { fn as_box(&mut self) -> Box { ListArray::new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.offsets).into(), self.values.as_box(), std::mem::take(&mut self.validity).map(|x| x.into()), @@ -90,7 +86,7 @@ impl MutableArray for DynMutableListArray { fn as_arc(&mut self) -> std::sync::Arc { ListArray::new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.offsets).into(), self.values.as_box(), std::mem::take(&mut self.validity).map(|x| x.into()), @@ -98,8 +94,8 @@ impl MutableArray for DynMutableListArray { .arced() } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { @@ -126,7 +122,7 @@ impl MutableArray for DynMutableListArray { #[derive(Debug)] pub struct FixedItemsUtf8Dictionary { - data_type: ArrowDataType, + dtype: ArrowDataType, keys: MutablePrimitiveArray, values: Utf8Array, } @@ -134,9 +130,9 @@ pub struct FixedItemsUtf8Dictionary { impl FixedItemsUtf8Dictionary { pub fn with_capacity(values: Utf8Array, capacity: usize) -> Self { Self { - data_type: ArrowDataType::Dictionary( + dtype: ArrowDataType::Dictionary( IntegerType::Int32, - Box::new(values.data_type().clone()), + Box::new(values.dtype().clone()), false, ), keys: MutablePrimitiveArray::::with_capacity(capacity), @@ -166,7 +162,7 @@ impl MutableArray for FixedItemsUtf8Dictionary { fn as_box(&mut self) -> Box { Box::new( DictionaryArray::try_new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.keys).into(), Box::new(self.values.clone()), ) @@ -177,7 +173,7 @@ impl MutableArray for FixedItemsUtf8Dictionary { fn as_arc(&mut self) -> std::sync::Arc { std::sync::Arc::new( DictionaryArray::try_new( - self.data_type.clone(), + self.dtype.clone(), std::mem::take(&mut self.keys).into(), Box::new(self.values.clone()), ) @@ -185,8 +181,8 @@ impl MutableArray for FixedItemsUtf8Dictionary { ) } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { @@ -214,15 +210,15 @@ impl MutableArray for FixedItemsUtf8Dictionary { /// Auxiliary struct #[derive(Debug)] pub struct DynMutableStructArray { - data_type: ArrowDataType, + dtype: ArrowDataType, values: Vec>, validity: Option, } impl DynMutableStructArray { - pub fn new(values: Vec>, data_type: ArrowDataType) -> Self { + pub fn new(values: Vec>, dtype: ArrowDataType) -> Self { Self { - data_type, + dtype, values, validity: None, } @@ -273,7 +269,7 @@ impl MutableArray for DynMutableStructArray { let values = self.values.iter_mut().map(|x| x.as_box()).collect(); Box::new(StructArray::new( - self.data_type.clone(), + self.dtype.clone(), values, std::mem::take(&mut self.validity).map(|x| x.into()), )) @@ -283,14 +279,14 @@ impl MutableArray for DynMutableStructArray { let values = self.values.iter_mut().map(|x| x.as_box()).collect(); std::sync::Arc::new(StructArray::new( - self.data_type.clone(), + self.dtype.clone(), values, std::mem::take(&mut self.validity).map(|x| x.into()), )) } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } fn as_any(&self) -> &dyn std::any::Any { diff --git a/crates/polars-arrow/src/io/avro/read/schema.rs b/crates/polars-arrow/src/io/avro/read/schema.rs index 854362aa0c2f..ae9660496c7f 100644 --- a/crates/polars-arrow/src/io/avro/read/schema.rs +++ b/crates/polars-arrow/src/io/avro/read/schema.rs @@ -47,7 +47,7 @@ fn schema_to_field( props: Metadata, ) -> PolarsResult { let mut nullable = false; - let data_type = match schema { + let dtype = match schema { AvroSchema::Null => ArrowDataType::Null, AvroSchema::Boolean => ArrowDataType::Boolean, AvroSchema::Int(logical) => match logical { @@ -107,7 +107,7 @@ fn schema_to_field( .iter() .find(|&schema| !matches!(schema, AvroSchema::Null)) { - schema_to_field(schema, None, Metadata::default())?.data_type + schema_to_field(schema, None, Metadata::default())?.dtype } else { polars_bail!(nyi = "Can't read avro union {schema:?}"); } @@ -157,5 +157,5 @@ fn schema_to_field( let name = name.unwrap_or_default(); - Ok(Field::new(PlSmallStr::from_str(name), data_type, nullable).with_metadata(props)) + Ok(Field::new(PlSmallStr::from_str(name), dtype, nullable).with_metadata(props)) } diff --git a/crates/polars-arrow/src/io/avro/write/schema.rs b/crates/polars-arrow/src/io/avro/write/schema.rs index f7845d624881..e0d71c5611c3 100644 --- a/crates/polars-arrow/src/io/avro/write/schema.rs +++ b/crates/polars-arrow/src/io/avro/write/schema.rs @@ -23,22 +23,22 @@ pub fn to_record(schema: &ArrowSchema, name: String) -> PolarsResult { } fn field_to_field(field: &Field, name_counter: &mut i32) -> PolarsResult { - let schema = type_to_schema(field.data_type(), field.is_nullable, name_counter)?; + let schema = type_to_schema(field.dtype(), field.is_nullable, name_counter)?; Ok(AvroField::new(field.name.to_string(), schema)) } fn type_to_schema( - data_type: &ArrowDataType, + dtype: &ArrowDataType, is_nullable: bool, name_counter: &mut i32, ) -> PolarsResult { Ok(if is_nullable { AvroSchema::Union(vec![ AvroSchema::Null, - _type_to_schema(data_type, name_counter)?, + _type_to_schema(dtype, name_counter)?, ]) } else { - _type_to_schema(data_type, name_counter)? + _type_to_schema(dtype, name_counter)? }) } @@ -47,8 +47,8 @@ fn _get_field_name(name_counter: &mut i32) -> String { format!("r{name_counter}") } -fn _type_to_schema(data_type: &ArrowDataType, name_counter: &mut i32) -> PolarsResult { - Ok(match data_type.to_logical_type() { +fn _type_to_schema(dtype: &ArrowDataType, name_counter: &mut i32) -> PolarsResult { + Ok(match dtype.to_logical_type() { ArrowDataType::Null => AvroSchema::Null, ArrowDataType::Boolean => AvroSchema::Boolean, ArrowDataType::Int32 => AvroSchema::Int(None), @@ -61,7 +61,7 @@ fn _type_to_schema(data_type: &ArrowDataType, name_counter: &mut i32) -> PolarsR ArrowDataType::LargeUtf8 => AvroSchema::String(None), ArrowDataType::LargeList(inner) | ArrowDataType::List(inner) => { AvroSchema::Array(Box::new(type_to_schema( - &inner.data_type, + &inner.dtype, inner.is_nullable, name_counter, )?)) diff --git a/crates/polars-arrow/src/io/avro/write/serialize.rs b/crates/polars-arrow/src/io/avro/write/serialize.rs index 36519acbf493..ba287521b677 100644 --- a/crates/polars-arrow/src/io/avro/write/serialize.rs +++ b/crates/polars-arrow/src/io/avro/write/serialize.rs @@ -207,14 +207,14 @@ fn struct_optional<'a>(array: &'a StructArray, schema: &Record) -> BoxSerializer /// Creates a [`StreamingIterator`] trait object that presents items from `array` /// encoded according to `schema`. /// # Panic -/// This function panics iff the `data_type` is not supported (use [`can_serialize`] to check) +/// This function panics iff the `dtype` is not supported (use [`can_serialize`] to check) /// # Implementation /// This function performs minimal CPU work: it dynamically dispatches based on the schema /// and arrow type. pub fn new_serializer<'a>(array: &'a dyn Array, schema: &AvroSchema) -> BoxSerializer<'a> { - let data_type = array.data_type().to_physical_type(); + let dtype = array.dtype().to_physical_type(); - match (data_type, schema) { + match (dtype, schema) { (PhysicalType::Boolean, AvroSchema::Boolean) => { let values = array.as_any().downcast_ref::().unwrap(); Box::new(BufStreamingIterator::new( @@ -497,18 +497,18 @@ pub fn new_serializer<'a>(array: &'a dyn Array, schema: &AvroSchema) -> BoxSeria } } -/// Whether [`new_serializer`] supports `data_type`. -pub fn can_serialize(data_type: &ArrowDataType) -> bool { +/// Whether [`new_serializer`] supports `dtype`. +pub fn can_serialize(dtype: &ArrowDataType) -> bool { use ArrowDataType::*; - match data_type.to_logical_type() { - List(inner) => return can_serialize(&inner.data_type), - LargeList(inner) => return can_serialize(&inner.data_type), - Struct(inner) => return inner.iter().all(|inner| can_serialize(&inner.data_type)), + match dtype.to_logical_type() { + List(inner) => return can_serialize(&inner.dtype), + LargeList(inner) => return can_serialize(&inner.dtype), + Struct(inner) => return inner.iter().all(|inner| can_serialize(&inner.dtype)), _ => {}, }; matches!( - data_type, + dtype, Boolean | Int32 | Int64 diff --git a/crates/polars-arrow/src/io/ipc/read/array/binary.rs b/crates/polars-arrow/src/io/ipc/read/array/binary.rs index 9553212ec5c4..d46f5ca102ac 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/binary.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/binary.rs @@ -14,7 +14,7 @@ use crate::offset::Offset; #[allow(clippy::too_many_arguments)] pub fn read_binary( field_nodes: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, buffers: &mut VecDeque, reader: &mut R, block_offset: u64, @@ -23,7 +23,7 @@ pub fn read_binary( limit: Option, scratch: &mut Vec, ) -> PolarsResult> { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let validity = read_validity( buffers, @@ -61,7 +61,7 @@ pub fn read_binary( scratch, )?; - BinaryArray::::try_new(data_type, offsets.try_into()?, values, validity) + BinaryArray::::try_new(dtype, offsets.try_into()?, values, validity) } pub fn skip_binary( diff --git a/crates/polars-arrow/src/io/ipc/read/array/binview.rs b/crates/polars-arrow/src/io/ipc/read/array/binview.rs index 8d5725023791..4423cdaab6e4 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/binview.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/binview.rs @@ -12,7 +12,7 @@ use crate::buffer::Buffer; pub fn read_binview( field_nodes: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, buffers: &mut VecDeque, reader: &mut R, block_offset: u64, @@ -21,7 +21,7 @@ pub fn read_binview( limit: Option, scratch: &mut Vec, ) -> PolarsResult { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let validity = read_validity( buffers, @@ -62,7 +62,7 @@ pub fn read_binview( }) .collect::>>>()?; - BinaryViewArrayGeneric::::try_new(data_type, views, Arc::from(variadic_buffers), validity) + BinaryViewArrayGeneric::::try_new(dtype, views, Arc::from(variadic_buffers), validity) .map(|arr| arr.boxed()) } diff --git a/crates/polars-arrow/src/io/ipc/read/array/boolean.rs b/crates/polars-arrow/src/io/ipc/read/array/boolean.rs index 16443b0b8af0..ebc9ed510380 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/boolean.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/boolean.rs @@ -12,7 +12,7 @@ use crate::io::ipc::read::array::{try_get_array_length, try_get_field_node}; #[allow(clippy::too_many_arguments)] pub fn read_boolean( field_nodes: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, buffers: &mut VecDeque, reader: &mut R, block_offset: u64, @@ -21,7 +21,7 @@ pub fn read_boolean( limit: Option, scratch: &mut Vec, ) -> PolarsResult { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let validity = read_validity( buffers, @@ -45,7 +45,7 @@ pub fn read_boolean( compression, scratch, )?; - BooleanArray::try_new(data_type, values, validity) + BooleanArray::try_new(dtype, values, validity) } pub fn skip_boolean( diff --git a/crates/polars-arrow/src/io/ipc/read/array/dictionary.rs b/crates/polars-arrow/src/io/ipc/read/array/dictionary.rs index e578fefee321..88f9ef46de89 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/dictionary.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/dictionary.rs @@ -12,7 +12,7 @@ use crate::datatypes::ArrowDataType; #[allow(clippy::too_many_arguments)] pub fn read_dictionary( field_nodes: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, id: Option, buffers: &mut VecDeque, reader: &mut R, @@ -53,7 +53,7 @@ where scratch, )?; - DictionaryArray::::try_new(data_type, keys, values) + DictionaryArray::::try_new(dtype, keys, values) } pub fn skip_dictionary( diff --git a/crates/polars-arrow/src/io/ipc/read/array/fixed_size_binary.rs b/crates/polars-arrow/src/io/ipc/read/array/fixed_size_binary.rs index 9683952c6d6c..61a8055528e7 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/fixed_size_binary.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/fixed_size_binary.rs @@ -12,7 +12,7 @@ use crate::io::ipc::read::array::{try_get_array_length, try_get_field_node}; #[allow(clippy::too_many_arguments)] pub fn read_fixed_size_binary( field_nodes: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, buffers: &mut VecDeque, reader: &mut R, block_offset: u64, @@ -21,7 +21,7 @@ pub fn read_fixed_size_binary( limit: Option, scratch: &mut Vec, ) -> PolarsResult { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let validity = read_validity( buffers, @@ -36,7 +36,7 @@ pub fn read_fixed_size_binary( let length = try_get_array_length(field_node, limit)?; - let length = length.saturating_mul(FixedSizeBinaryArray::maybe_get_size(&data_type)?); + let length = length.saturating_mul(FixedSizeBinaryArray::maybe_get_size(&dtype)?); let values = read_buffer( buffers, length, @@ -47,7 +47,7 @@ pub fn read_fixed_size_binary( scratch, )?; - FixedSizeBinaryArray::try_new(data_type, values, validity) + FixedSizeBinaryArray::try_new(dtype, values, validity) } pub fn skip_fixed_size_binary( diff --git a/crates/polars-arrow/src/io/ipc/read/array/fixed_size_list.rs b/crates/polars-arrow/src/io/ipc/read/array/fixed_size_list.rs index 1f303a156787..eac68f9fda54 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/fixed_size_list.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/fixed_size_list.rs @@ -15,7 +15,7 @@ use crate::io::ipc::read::array::try_get_field_node; pub fn read_fixed_size_list( field_nodes: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, ipc_field: &IpcField, buffers: &mut VecDeque, reader: &mut R, @@ -27,7 +27,7 @@ pub fn read_fixed_size_list( version: Version, scratch: &mut Vec, ) -> PolarsResult { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let validity = read_validity( buffers, @@ -40,7 +40,7 @@ pub fn read_fixed_size_list( scratch, )?; - let (field, size) = FixedSizeListArray::get_child_and_size(&data_type); + let (field, size) = FixedSizeListArray::get_child_and_size(&dtype); let limit = limit.map(|x| x.saturating_mul(size)); @@ -59,12 +59,12 @@ pub fn read_fixed_size_list( version, scratch, )?; - FixedSizeListArray::try_new(data_type, values, validity) + FixedSizeListArray::try_new(dtype, values, validity) } pub fn skip_fixed_size_list( field_nodes: &mut VecDeque, - data_type: &ArrowDataType, + dtype: &ArrowDataType, buffers: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, ) -> PolarsResult<()> { @@ -78,12 +78,7 @@ pub fn skip_fixed_size_list( .pop_front() .ok_or_else(|| polars_err!(oos = "IPC: missing validity buffer."))?; - let (field, _) = FixedSizeListArray::get_child_and_size(data_type); + let (field, _) = FixedSizeListArray::get_child_and_size(dtype); - skip( - field_nodes, - field.data_type(), - buffers, - variadic_buffer_counts, - ) + skip(field_nodes, field.dtype(), buffers, variadic_buffer_counts) } diff --git a/crates/polars-arrow/src/io/ipc/read/array/list.rs b/crates/polars-arrow/src/io/ipc/read/array/list.rs index 45566fd5df9f..b89a03cf552a 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/list.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/list.rs @@ -17,7 +17,7 @@ use crate::offset::Offset; pub fn read_list( field_nodes: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, ipc_field: &IpcField, buffers: &mut VecDeque, reader: &mut R, @@ -32,7 +32,7 @@ pub fn read_list( where Vec: TryInto, { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let validity = read_validity( buffers, @@ -61,7 +61,7 @@ where let last_offset = offsets.last().unwrap().to_usize(); - let field = ListArray::::get_child_field(&data_type); + let field = ListArray::::get_child_field(&dtype); let values = read( field_nodes, @@ -78,12 +78,12 @@ where version, scratch, )?; - ListArray::try_new(data_type, offsets.try_into()?, values, validity) + ListArray::try_new(dtype, offsets.try_into()?, values, validity) } pub fn skip_list( field_nodes: &mut VecDeque, - data_type: &ArrowDataType, + dtype: &ArrowDataType, buffers: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, ) -> PolarsResult<()> { @@ -100,7 +100,7 @@ pub fn skip_list( .pop_front() .ok_or_else(|| polars_err!(oos = "IPC: missing offsets buffer."))?; - let data_type = ListArray::::get_child_type(data_type); + let dtype = ListArray::::get_child_type(dtype); - skip(field_nodes, data_type, buffers, variadic_buffer_counts) + skip(field_nodes, dtype, buffers, variadic_buffer_counts) } diff --git a/crates/polars-arrow/src/io/ipc/read/array/map.rs b/crates/polars-arrow/src/io/ipc/read/array/map.rs index 741d496a5a63..17e963f5dfa4 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/map.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/map.rs @@ -16,7 +16,7 @@ use crate::io::ipc::read::array::{try_get_array_length, try_get_field_node}; pub fn read_map( field_nodes: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, ipc_field: &IpcField, buffers: &mut VecDeque, reader: &mut R, @@ -28,7 +28,7 @@ pub fn read_map( version: Version, scratch: &mut Vec, ) -> PolarsResult { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let validity = read_validity( buffers, @@ -55,7 +55,7 @@ pub fn read_map( // Older versions of the IPC format sometimes do not report an offset .or_else(|_| PolarsResult::Ok(Buffer::::from(vec![0i32])))?; - let field = MapArray::get_field(&data_type); + let field = MapArray::get_field(&dtype); let last_offset: usize = offsets.last().copied().unwrap() as usize; @@ -74,12 +74,12 @@ pub fn read_map( version, scratch, )?; - MapArray::try_new(data_type, offsets.try_into()?, field, validity) + MapArray::try_new(dtype, offsets.try_into()?, field, validity) } pub fn skip_map( field_nodes: &mut VecDeque, - data_type: &ArrowDataType, + dtype: &ArrowDataType, buffers: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, ) -> PolarsResult<()> { @@ -96,7 +96,7 @@ pub fn skip_map( .pop_front() .ok_or_else(|| polars_err!(oos = "IPC: missing offsets buffer."))?; - let data_type = MapArray::get_field(data_type).data_type(); + let dtype = MapArray::get_field(dtype).dtype(); - skip(field_nodes, data_type, buffers, variadic_buffer_counts) + skip(field_nodes, dtype, buffers, variadic_buffer_counts) } diff --git a/crates/polars-arrow/src/io/ipc/read/array/mod.rs b/crates/polars-arrow/src/io/ipc/read/array/mod.rs index 2ffe1a369c25..21c393a2869e 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/mod.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/mod.rs @@ -34,10 +34,10 @@ use crate::datatypes::ArrowDataType; fn try_get_field_node<'a>( field_nodes: &mut VecDeque>, - data_type: &ArrowDataType, + dtype: &ArrowDataType, ) -> PolarsResult> { field_nodes.pop_front().ok_or_else(|| { - polars_err!(ComputeError: "IPC: unable to fetch the field for {:?}\n\nThe file or stream is corrupted.", data_type) + polars_err!(ComputeError: "IPC: unable to fetch the field for {:?}\n\nThe file or stream is corrupted.", dtype) }) } diff --git a/crates/polars-arrow/src/io/ipc/read/array/null.rs b/crates/polars-arrow/src/io/ipc/read/array/null.rs index f9df4d254900..6fac4ae2d7bb 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/null.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/null.rs @@ -9,14 +9,14 @@ use crate::io::ipc::read::array::{try_get_array_length, try_get_field_node}; pub fn read_null( field_nodes: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, limit: Option, ) -> PolarsResult { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let length = try_get_array_length(field_node, limit)?; - NullArray::try_new(data_type, length) + NullArray::try_new(dtype, length) } pub fn skip_null(field_nodes: &mut VecDeque) -> PolarsResult<()> { diff --git a/crates/polars-arrow/src/io/ipc/read/array/primitive.rs b/crates/polars-arrow/src/io/ipc/read/array/primitive.rs index 04304aadca90..a530cba97bb1 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/primitive.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/primitive.rs @@ -13,7 +13,7 @@ use crate::types::NativeType; #[allow(clippy::too_many_arguments)] pub fn read_primitive( field_nodes: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, buffers: &mut VecDeque, reader: &mut R, block_offset: u64, @@ -25,7 +25,7 @@ pub fn read_primitive( where Vec: TryInto, { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let validity = read_validity( buffers, @@ -49,7 +49,7 @@ where compression, scratch, )?; - PrimitiveArray::::try_new(data_type, values, validity) + PrimitiveArray::::try_new(dtype, values, validity) } pub fn skip_primitive( diff --git a/crates/polars-arrow/src/io/ipc/read/array/struct_.rs b/crates/polars-arrow/src/io/ipc/read/array/struct_.rs index 6dc716ab368b..5cf68f1d1d95 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/struct_.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/struct_.rs @@ -15,7 +15,7 @@ use crate::io::ipc::read::array::try_get_field_node; pub fn read_struct( field_nodes: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, ipc_field: &IpcField, buffers: &mut VecDeque, reader: &mut R, @@ -27,7 +27,7 @@ pub fn read_struct( version: Version, scratch: &mut Vec, ) -> PolarsResult { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let validity = read_validity( buffers, @@ -40,7 +40,7 @@ pub fn read_struct( scratch, )?; - let fields = StructArray::get_fields(&data_type); + let fields = StructArray::get_fields(&dtype); let values = fields .iter() @@ -64,12 +64,12 @@ pub fn read_struct( }) .collect::>>()?; - StructArray::try_new(data_type, values, validity) + StructArray::try_new(dtype, values, validity) } pub fn skip_struct( field_nodes: &mut VecDeque, - data_type: &ArrowDataType, + dtype: &ArrowDataType, buffers: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, ) -> PolarsResult<()> { @@ -83,14 +83,9 @@ pub fn skip_struct( .pop_front() .ok_or_else(|| polars_err!(oos = "IPC: missing validity buffer."))?; - let fields = StructArray::get_fields(data_type); + let fields = StructArray::get_fields(dtype); - fields.iter().try_for_each(|field| { - skip( - field_nodes, - field.data_type(), - buffers, - variadic_buffer_counts, - ) - }) + fields + .iter() + .try_for_each(|field| skip(field_nodes, field.dtype(), buffers, variadic_buffer_counts)) } diff --git a/crates/polars-arrow/src/io/ipc/read/array/union.rs b/crates/polars-arrow/src/io/ipc/read/array/union.rs index 192d9582ed21..b84ff3349aed 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/union.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/union.rs @@ -16,7 +16,7 @@ use crate::io::ipc::read::array::{try_get_array_length, try_get_field_node}; pub fn read_union( field_nodes: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, ipc_field: &IpcField, buffers: &mut VecDeque, reader: &mut R, @@ -28,7 +28,7 @@ pub fn read_union( version: Version, scratch: &mut Vec, ) -> PolarsResult { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; if version != Version::V5 { let _ = buffers @@ -48,7 +48,7 @@ pub fn read_union( scratch, )?; - let offsets = if let ArrowDataType::Union(_, _, mode) = data_type { + let offsets = if let ArrowDataType::Union(_, _, mode) = dtype { if !mode.is_sparse() { Some(read_buffer( buffers, @@ -66,7 +66,7 @@ pub fn read_union( unreachable!() }; - let fields = UnionArray::get_fields(&data_type); + let fields = UnionArray::get_fields(&dtype); let fields = fields .iter() @@ -90,12 +90,12 @@ pub fn read_union( }) .collect::>>()?; - UnionArray::try_new(data_type, types, fields, offsets) + UnionArray::try_new(dtype, types, fields, offsets) } pub fn skip_union( field_nodes: &mut VecDeque, - data_type: &ArrowDataType, + dtype: &ArrowDataType, buffers: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, ) -> PolarsResult<()> { @@ -108,7 +108,7 @@ pub fn skip_union( let _ = buffers .pop_front() .ok_or_else(|| polars_err!(oos = "IPC: missing validity buffer."))?; - if let ArrowDataType::Union(_, _, Dense) = data_type { + if let ArrowDataType::Union(_, _, Dense) = dtype { let _ = buffers .pop_front() .ok_or_else(|| polars_err!(oos = "IPC: missing offsets buffer."))?; @@ -116,14 +116,9 @@ pub fn skip_union( unreachable!() }; - let fields = UnionArray::get_fields(data_type); + let fields = UnionArray::get_fields(dtype); - fields.iter().try_for_each(|field| { - skip( - field_nodes, - field.data_type(), - buffers, - variadic_buffer_counts, - ) - }) + fields + .iter() + .try_for_each(|field| skip(field_nodes, field.dtype(), buffers, variadic_buffer_counts)) } diff --git a/crates/polars-arrow/src/io/ipc/read/array/utf8.rs b/crates/polars-arrow/src/io/ipc/read/array/utf8.rs index f29f8d8cdb26..33f43baf7f1b 100644 --- a/crates/polars-arrow/src/io/ipc/read/array/utf8.rs +++ b/crates/polars-arrow/src/io/ipc/read/array/utf8.rs @@ -11,7 +11,7 @@ use crate::offset::Offset; #[allow(clippy::too_many_arguments)] pub fn read_utf8( field_nodes: &mut VecDeque, - data_type: ArrowDataType, + dtype: ArrowDataType, buffers: &mut VecDeque, reader: &mut R, block_offset: u64, @@ -20,7 +20,7 @@ pub fn read_utf8( limit: Option, scratch: &mut Vec, ) -> PolarsResult> { - let field_node = try_get_field_node(field_nodes, &data_type)?; + let field_node = try_get_field_node(field_nodes, &dtype)?; let validity = read_validity( buffers, @@ -58,7 +58,7 @@ pub fn read_utf8( scratch, )?; - Utf8Array::::try_new(data_type, offsets.try_into()?, values, validity) + Utf8Array::::try_new(dtype, offsets.try_into()?, values, validity) } pub fn skip_utf8( diff --git a/crates/polars-arrow/src/io/ipc/read/common.rs b/crates/polars-arrow/src/io/ipc/read/common.rs index ba47192e83a7..2458cba702b9 100644 --- a/crates/polars-arrow/src/io/ipc/read/common.rs +++ b/crates/polars-arrow/src/io/ipc/read/common.rs @@ -154,7 +154,7 @@ pub fn read_record_batch( ProjectionResult::NotSelected((field, _)) => { skip( &mut field_nodes, - &field.data_type, + &field.dtype, &mut buffers, &mut variadic_buffer_counts, )?; @@ -193,11 +193,11 @@ pub fn read_record_batch( fn find_first_dict_field_d<'a>( id: i64, - data_type: &'a ArrowDataType, + dtype: &'a ArrowDataType, ipc_field: &'a IpcField, ) -> Option<(&'a Field, &'a IpcField)> { use ArrowDataType::*; - match data_type { + match dtype { Dictionary(_, inner, _) => find_first_dict_field_d(id, inner.as_ref(), ipc_field), List(field) | LargeList(field) | FixedSizeList(field, ..) | Map(field, ..) => { find_first_dict_field(id, field.as_ref(), &ipc_field.fields[0]) @@ -224,7 +224,7 @@ fn find_first_dict_field<'a>( return Some((field, ipc_field)); } } - find_first_dict_field_d(id, &field.data_type, ipc_field) + find_first_dict_field_d(id, &field.dtype, ipc_field) } pub(crate) fn first_dict_field<'a>( @@ -273,13 +273,12 @@ pub fn read_dictionary( .map_err(|err| polars_err!(oos = OutOfSpecKind::InvalidFlatbufferData(err)))? .ok_or_else(|| polars_err!(oos = OutOfSpecKind::MissingData))?; - let value_type = if let ArrowDataType::Dictionary(_, value_type, _) = - first_field.data_type.to_logical_type() - { - value_type.as_ref() - } else { - polars_bail!(oos = OutOfSpecKind::InvalidIdDataType { requested_id: id }) - }; + let value_type = + if let ArrowDataType::Dictionary(_, value_type, _) = first_field.dtype.to_logical_type() { + value_type.as_ref() + } else { + polars_bail!(oos = OutOfSpecKind::InvalidIdDataType { requested_id: id }) + }; // Make a fake schema for the dictionary batch. let fields = std::iter::once(( diff --git a/crates/polars-arrow/src/io/ipc/read/deserialize.rs b/crates/polars-arrow/src/io/ipc/read/deserialize.rs index f27d9b58100e..1a57ac487c70 100644 --- a/crates/polars-arrow/src/io/ipc/read/deserialize.rs +++ b/crates/polars-arrow/src/io/ipc/read/deserialize.rs @@ -28,13 +28,13 @@ pub fn read( scratch: &mut Vec, ) -> PolarsResult> { use PhysicalType::*; - let data_type = field.data_type.clone(); + let dtype = field.dtype.clone(); - match data_type.to_physical_type() { - Null => read_null(field_nodes, data_type, limit).map(|x| x.boxed()), + match dtype.to_physical_type() { + Null => read_null(field_nodes, dtype, limit).map(|x| x.boxed()), Boolean => read_boolean( field_nodes, - data_type, + dtype, buffers, reader, block_offset, @@ -47,7 +47,7 @@ pub fn read( Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { read_primitive::<$T, _>( field_nodes, - data_type, + dtype, buffers, reader, block_offset, @@ -60,7 +60,7 @@ pub fn read( }), Binary => read_binary::( field_nodes, - data_type, + dtype, buffers, reader, block_offset, @@ -72,7 +72,7 @@ pub fn read( .map(|x| x.boxed()), LargeBinary => read_binary::( field_nodes, - data_type, + dtype, buffers, reader, block_offset, @@ -84,7 +84,7 @@ pub fn read( .map(|x| x.boxed()), FixedSizeBinary => read_fixed_size_binary( field_nodes, - data_type, + dtype, buffers, reader, block_offset, @@ -96,7 +96,7 @@ pub fn read( .map(|x| x.boxed()), Utf8 => read_utf8::( field_nodes, - data_type, + dtype, buffers, reader, block_offset, @@ -108,7 +108,7 @@ pub fn read( .map(|x| x.boxed()), LargeUtf8 => read_utf8::( field_nodes, - data_type, + dtype, buffers, reader, block_offset, @@ -121,7 +121,7 @@ pub fn read( List => read_list::( field_nodes, variadic_buffer_counts, - data_type, + dtype, ipc_field, buffers, reader, @@ -137,7 +137,7 @@ pub fn read( LargeList => read_list::( field_nodes, variadic_buffer_counts, - data_type, + dtype, ipc_field, buffers, reader, @@ -153,7 +153,7 @@ pub fn read( FixedSizeList => read_fixed_size_list( field_nodes, variadic_buffer_counts, - data_type, + dtype, ipc_field, buffers, reader, @@ -169,7 +169,7 @@ pub fn read( Struct => read_struct( field_nodes, variadic_buffer_counts, - data_type, + dtype, ipc_field, buffers, reader, @@ -186,7 +186,7 @@ pub fn read( match_integer_type!(key_type, |$T| { read_dictionary::<$T, _>( field_nodes, - data_type, + dtype, ipc_field.dictionary_id, buffers, reader, @@ -203,7 +203,7 @@ pub fn read( Union => read_union( field_nodes, variadic_buffer_counts, - data_type, + dtype, ipc_field, buffers, reader, @@ -219,7 +219,7 @@ pub fn read( Map => read_map( field_nodes, variadic_buffer_counts, - data_type, + dtype, ipc_field, buffers, reader, @@ -235,7 +235,7 @@ pub fn read( Utf8View => read_binview::( field_nodes, variadic_buffer_counts, - data_type, + dtype, buffers, reader, block_offset, @@ -247,7 +247,7 @@ pub fn read( BinaryView => read_binview::<[u8], _>( field_nodes, variadic_buffer_counts, - data_type, + dtype, buffers, reader, block_offset, @@ -261,27 +261,25 @@ pub fn read( pub fn skip( field_nodes: &mut VecDeque, - data_type: &ArrowDataType, + dtype: &ArrowDataType, buffers: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, ) -> PolarsResult<()> { use PhysicalType::*; - match data_type.to_physical_type() { + match dtype.to_physical_type() { Null => skip_null(field_nodes), Boolean => skip_boolean(field_nodes, buffers), Primitive(_) => skip_primitive(field_nodes, buffers), LargeBinary | Binary => skip_binary(field_nodes, buffers), LargeUtf8 | Utf8 => skip_utf8(field_nodes, buffers), FixedSizeBinary => skip_fixed_size_binary(field_nodes, buffers), - List => skip_list::(field_nodes, data_type, buffers, variadic_buffer_counts), - LargeList => skip_list::(field_nodes, data_type, buffers, variadic_buffer_counts), - FixedSizeList => { - skip_fixed_size_list(field_nodes, data_type, buffers, variadic_buffer_counts) - }, - Struct => skip_struct(field_nodes, data_type, buffers, variadic_buffer_counts), + List => skip_list::(field_nodes, dtype, buffers, variadic_buffer_counts), + LargeList => skip_list::(field_nodes, dtype, buffers, variadic_buffer_counts), + FixedSizeList => skip_fixed_size_list(field_nodes, dtype, buffers, variadic_buffer_counts), + Struct => skip_struct(field_nodes, dtype, buffers, variadic_buffer_counts), Dictionary(_) => skip_dictionary(field_nodes, buffers), - Union => skip_union(field_nodes, data_type, buffers, variadic_buffer_counts), - Map => skip_map(field_nodes, data_type, buffers, variadic_buffer_counts), + Union => skip_union(field_nodes, dtype, buffers, variadic_buffer_counts), + Map => skip_map(field_nodes, dtype, buffers, variadic_buffer_counts), BinaryView | Utf8View => skip_binview(field_nodes, buffers, variadic_buffer_counts), } } diff --git a/crates/polars-arrow/src/io/ipc/read/schema.rs b/crates/polars-arrow/src/io/ipc/read/schema.rs index 091c21f14c1c..7fe6141e9b14 100644 --- a/crates/polars-arrow/src/io/ipc/read/schema.rs +++ b/crates/polars-arrow/src/io/ipc/read/schema.rs @@ -29,7 +29,7 @@ fn deserialize_field(ipc_field: arrow_format::ipc::FieldRef) -> PolarsResult<(Fi let extension = get_extension(&metadata); - let (data_type, ipc_field_) = get_data_type(ipc_field, extension, true)?; + let (dtype, ipc_field_) = get_dtype(ipc_field, extension, true)?; let field = Field { name: PlSmallStr::from_str( @@ -37,7 +37,7 @@ fn deserialize_field(ipc_field: arrow_format::ipc::FieldRef) -> PolarsResult<(Fi .name()? .ok_or_else(|| polars_err!(oos = "Every field in IPC must have a name"))?, ), - data_type, + dtype, is_nullable: ipc_field.nullable()?, metadata, }; @@ -87,7 +87,7 @@ fn deserialize_timeunit(time_unit: arrow_format::ipc::TimeUnit) -> PolarsResult< fn deserialize_time(time: TimeRef) -> PolarsResult<(ArrowDataType, IpcField)> { let unit = deserialize_timeunit(time.unit()?)?; - let data_type = match (time.bit_width()?, unit) { + let dtype = match (time.bit_width()?, unit) { (32, TimeUnit::Second) => ArrowDataType::Time32(TimeUnit::Second), (32, TimeUnit::Millisecond) => ArrowDataType::Time32(TimeUnit::Millisecond), (64, TimeUnit::Microsecond) => ArrowDataType::Time64(TimeUnit::Microsecond), @@ -98,7 +98,7 @@ fn deserialize_time(time: TimeRef) -> PolarsResult<(ArrowDataType, IpcField)> { ) }, }; - Ok((data_type, IpcField::default())) + Ok((dtype, IpcField::default())) } fn deserialize_timestamp(timestamp: TimestampRef) -> PolarsResult<(ArrowDataType, IpcField)> { @@ -143,9 +143,9 @@ fn deserialize_map(map: MapRef, field: FieldRef) -> PolarsResult<(ArrowDataType, .ok_or_else(|| polars_err!(oos = "IPC: Map must contain one child"))??; let (field, ipc_field) = deserialize_field(inner)?; - let data_type = ArrowDataType::Map(Box::new(field), is_sorted); + let dtype = ArrowDataType::Map(Box::new(field), is_sorted); Ok(( - data_type, + dtype, IpcField { fields: vec![ipc_field], dictionary_id: None, @@ -234,7 +234,7 @@ fn deserialize_fixed_size_list( } /// Get the Arrow data type from the flatbuffer Field table -fn get_data_type( +fn get_dtype( field: arrow_format::ipc::FieldRef, extension: Extension, may_be_dictionary: bool, @@ -245,7 +245,7 @@ fn get_data_type( .index_type()? .ok_or_else(|| polars_err!(oos = "indexType is mandatory in Dictionary."))?; let index_type = deserialize_integer(int)?; - let (inner, mut ipc_field) = get_data_type(field, extension, false)?; + let (inner, mut ipc_field) = get_dtype(field, extension, false)?; ipc_field.dictionary_id = Some(dictionary.id()?); return Ok(( ArrowDataType::Dictionary(index_type, Box::new(inner), dictionary.is_ordered()?), @@ -256,9 +256,9 @@ fn get_data_type( if let Some(extension) = extension { let (name, metadata) = extension; - let (data_type, fields) = get_data_type(field, None, false)?; + let (dtype, fields) = get_dtype(field, None, false)?; return Ok(( - ArrowDataType::Extension(name, Box::new(data_type), metadata), + ArrowDataType::Extension(name, Box::new(dtype), metadata), fields, )); } @@ -272,8 +272,8 @@ fn get_data_type( Null(_) => (ArrowDataType::Null, IpcField::default()), Bool(_) => (ArrowDataType::Boolean, IpcField::default()), Int(int) => { - let data_type = deserialize_integer(int)?.into(); - (data_type, IpcField::default()) + let dtype = deserialize_integer(int)?.into(); + (dtype, IpcField::default()) }, Binary(_) => (ArrowDataType::Binary, IpcField::default()), LargeBinary(_) => (ArrowDataType::LargeBinary, IpcField::default()), @@ -291,24 +291,24 @@ fn get_data_type( IpcField::default(), ), FloatingPoint(float) => { - let data_type = match float.precision()? { + let dtype = match float.precision()? { arrow_format::ipc::Precision::Half => ArrowDataType::Float16, arrow_format::ipc::Precision::Single => ArrowDataType::Float32, arrow_format::ipc::Precision::Double => ArrowDataType::Float64, }; - (data_type, IpcField::default()) + (dtype, IpcField::default()) }, Date(date) => { - let data_type = match date.unit()? { + let dtype = match date.unit()? { arrow_format::ipc::DateUnit::Day => ArrowDataType::Date32, arrow_format::ipc::DateUnit::Millisecond => ArrowDataType::Date64, }; - (data_type, IpcField::default()) + (dtype, IpcField::default()) }, Time(time) => deserialize_time(time)?, Timestamp(timestamp) => deserialize_timestamp(timestamp)?, Interval(interval) => { - let data_type = match interval.unit()? { + let dtype = match interval.unit()? { arrow_format::ipc::IntervalUnit::YearMonth => { ArrowDataType::Interval(IntervalUnit::YearMonth) }, @@ -319,7 +319,7 @@ fn get_data_type( ArrowDataType::Interval(IntervalUnit::MonthDayNano) }, }; - (data_type, IpcField::default()) + (dtype, IpcField::default()) }, Duration(duration) => { let time_unit = deserialize_timeunit(duration.unit()?)?; @@ -339,13 +339,13 @@ fn get_data_type( .try_into() .map_err(|_| polars_err!(oos = OutOfSpecKind::NegativeFooterLength))?; - let data_type = match bit_width { + let dtype = match bit_width { 128 => ArrowDataType::Decimal(precision, scale), 256 => ArrowDataType::Decimal256(precision, scale), _ => return Err(polars_err!(oos = OutOfSpecKind::NegativeFooterLength)), }; - (data_type, IpcField::default()) + (dtype, IpcField::default()) }, List(_) => deserialize_list(field)?, LargeList(_) => deserialize_large_list(field)?, diff --git a/crates/polars-arrow/src/io/ipc/write/common.rs b/crates/polars-arrow/src/io/ipc/write/common.rs index 30312bf7f19d..2aebf1ec5d50 100644 --- a/crates/polars-arrow/src/io/ipc/write/common.rs +++ b/crates/polars-arrow/src/io/ipc/write/common.rs @@ -38,7 +38,7 @@ fn encode_dictionary( encoded_dictionaries: &mut Vec, ) -> PolarsResult<()> { use PhysicalType::*; - match array.data_type().to_physical_type() { + match array.dtype().to_physical_type() { Utf8 | LargeUtf8 | Binary | LargeBinary | Primitive(_) | Boolean | Null | FixedSizeBinary | BinaryView | Utf8View => Ok(()), Dictionary(key_type) => match_integer_type!(key_type, |$T| { @@ -231,7 +231,7 @@ fn serialize_compression( } fn set_variadic_buffer_counts(counts: &mut Vec, array: &dyn Array) { - match array.data_type() { + match array.dtype() { ArrowDataType::Utf8View => { let array = array.as_any().downcast_ref::().unwrap(); counts.push(array.data_buffers().len() as i64); @@ -297,7 +297,7 @@ fn chunk_to_bytes_amortized( let mut variadic_buffer_counts = vec![]; for array in chunk.arrays() { // We don't want to write all buffers in sliced arrays. - let array = match array.data_type() { + let array = match array.dtype() { ArrowDataType::BinaryView => { let concrete_arr = array.as_any().downcast_ref::().unwrap(); gc_bin_view(array, concrete_arr) @@ -432,7 +432,7 @@ impl DictionaryTracker { /// has never been seen before, return `Ok(true)` to indicate that the dictionary was just /// inserted. pub fn insert(&mut self, dict_id: i64, array: &dyn Array) -> PolarsResult { - let values = match array.data_type() { + let values = match array.dtype() { ArrowDataType::Dictionary(key_type, _, _) => { match_integer_type!(key_type, |$T| { let array = array diff --git a/crates/polars-arrow/src/io/ipc/write/mod.rs b/crates/polars-arrow/src/io/ipc/write/mod.rs index 7dd2cf2a583d..d8afc1571721 100644 --- a/crates/polars-arrow/src/io/ipc/write/mod.rs +++ b/crates/polars-arrow/src/io/ipc/write/mod.rs @@ -27,28 +27,28 @@ pub mod file_async; use super::IpcField; use crate::datatypes::{ArrowDataType, Field}; -fn default_ipc_field(data_type: &ArrowDataType, current_id: &mut i64) -> IpcField { +fn default_ipc_field(dtype: &ArrowDataType, current_id: &mut i64) -> IpcField { use crate::datatypes::ArrowDataType::*; - match data_type.to_logical_type() { + match dtype.to_logical_type() { // single child => recurse Map(inner, ..) | FixedSizeList(inner, _) | LargeList(inner) | List(inner) => IpcField { - fields: vec![default_ipc_field(inner.data_type(), current_id)], + fields: vec![default_ipc_field(inner.dtype(), current_id)], dictionary_id: None, }, // multiple children => recurse Union(fields, ..) | Struct(fields) => IpcField { fields: fields .iter() - .map(|f| default_ipc_field(f.data_type(), current_id)) + .map(|f| default_ipc_field(f.dtype(), current_id)) .collect(), dictionary_id: None, }, // dictionary => current_id - Dictionary(_, data_type, _) => { + Dictionary(_, dtype, _) => { let dictionary_id = Some(*current_id); *current_id += 1; IpcField { - fields: vec![default_ipc_field(data_type, current_id)], + fields: vec![default_ipc_field(dtype, current_id)], dictionary_id, } }, @@ -64,6 +64,6 @@ fn default_ipc_field(data_type: &ArrowDataType, current_id: &mut i64) -> IpcFiel pub fn default_ipc_fields<'a>(fields: impl ExactSizeIterator) -> Vec { let mut dictionary_id = 0i64; fields - .map(|field| default_ipc_field(field.data_type().to_logical_type(), &mut dictionary_id)) + .map(|field| default_ipc_field(field.dtype().to_logical_type(), &mut dictionary_id)) .collect() } diff --git a/crates/polars-arrow/src/io/ipc/write/schema.rs b/crates/polars-arrow/src/io/ipc/write/schema.rs index d9ae41652c7d..e8ed25c5c77e 100644 --- a/crates/polars-arrow/src/io/ipc/write/schema.rs +++ b/crates/polars-arrow/src/io/ipc/write/schema.rs @@ -78,7 +78,7 @@ fn write_extension( pub(crate) fn serialize_field(field: &Field, ipc_field: &IpcField) -> arrow_format::ipc::Field { // custom metadata. let mut kv_vec = vec![]; - if let ArrowDataType::Extension(name, _, metadata) = field.data_type() { + if let ArrowDataType::Extension(name, _, metadata) = field.dtype() { write_extension( name.as_str(), metadata.as_ref().map(|x| x.as_str()), @@ -86,28 +86,28 @@ pub(crate) fn serialize_field(field: &Field, ipc_field: &IpcField) -> arrow_form ); } - let type_ = serialize_type(field.data_type()); - let children = serialize_children(field.data_type(), ipc_field); + let type_ = serialize_type(field.dtype()); + let children = serialize_children(field.dtype(), ipc_field); - let dictionary = - if let ArrowDataType::Dictionary(index_type, inner, is_ordered) = field.data_type() { - if let ArrowDataType::Extension(name, _, metadata) = inner.as_ref() { - write_extension( - name.as_str(), - metadata.as_ref().map(|x| x.as_str()), - &mut kv_vec, - ); - } - Some(serialize_dictionary( - index_type, - ipc_field - .dictionary_id - .expect("All Dictionary types have `dict_id`"), - *is_ordered, - )) - } else { - None - }; + let dictionary = if let ArrowDataType::Dictionary(index_type, inner, is_ordered) = field.dtype() + { + if let ArrowDataType::Extension(name, _, metadata) = inner.as_ref() { + write_extension( + name.as_str(), + metadata.as_ref().map(|x| x.as_str()), + &mut kv_vec, + ); + } + Some(serialize_dictionary( + index_type, + ipc_field + .dictionary_id + .expect("All Dictionary types have `dict_id`"), + *is_ordered, + )) + } else { + None + }; write_metadata(&field.metadata, &mut kv_vec); @@ -136,10 +136,10 @@ fn serialize_time_unit(unit: &TimeUnit) -> arrow_format::ipc::TimeUnit { } } -fn serialize_type(data_type: &ArrowDataType) -> arrow_format::ipc::Type { +fn serialize_type(dtype: &ArrowDataType) -> arrow_format::ipc::Type { use arrow_format::ipc; use ArrowDataType::*; - match data_type { + match dtype { Null => ipc::Type::Null(Box::new(ipc::Null {})), Boolean => ipc::Type::Bool(Box::new(ipc::Bool {})), UInt8 => ipc::Type::Int(Box::new(ipc::Int { @@ -253,11 +253,11 @@ fn serialize_type(data_type: &ArrowDataType) -> arrow_format::ipc::Type { } fn serialize_children( - data_type: &ArrowDataType, + dtype: &ArrowDataType, ipc_field: &IpcField, ) -> Vec { use ArrowDataType::*; - match data_type { + match dtype { Null | Boolean | Int8 diff --git a/crates/polars-arrow/src/io/ipc/write/serialize/mod.rs b/crates/polars-arrow/src/io/ipc/write/serialize/mod.rs index b33f50b2277a..f13098477d4d 100644 --- a/crates/polars-arrow/src/io/ipc/write/serialize/mod.rs +++ b/crates/polars-arrow/src/io/ipc/write/serialize/mod.rs @@ -50,7 +50,7 @@ pub fn write( null_count: array.null_count() as i64, }); use PhysicalType::*; - match array.data_type().to_physical_type() { + match array.dtype().to_physical_type() { Null => (), Boolean => write_boolean( array.as_any().downcast_ref().unwrap(), diff --git a/crates/polars-arrow/src/legacy/array/fixed_size_list.rs b/crates/polars-arrow/src/legacy/array/fixed_size_list.rs index 31bc5880c68a..99382b0b6407 100644 --- a/crates/polars-arrow/src/legacy/array/fixed_size_list.rs +++ b/crates/polars-arrow/src/legacy/array/fixed_size_list.rs @@ -51,12 +51,12 @@ impl AnonymousBuilder { } pub fn finish(self, inner_dtype: Option<&ArrowDataType>) -> PolarsResult { - let mut inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].data_type()); + let mut inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].dtype()); if is_nested_null(inner_dtype) { for arr in &self.arrays { - if !is_nested_null(arr.data_type()) { - inner_dtype = arr.data_type(); + if !is_nested_null(arr.dtype()) { + inner_dtype = arr.dtype(); break; } } @@ -67,9 +67,9 @@ impl AnonymousBuilder { .arrays .iter() .map(|arr| { - if matches!(arr.data_type(), ArrowDataType::Null) { + if matches!(arr.dtype(), ArrowDataType::Null) { new_null_array(inner_dtype.clone(), arr.len()) - } else if is_nested_null(arr.data_type()) { + } else if is_nested_null(arr.dtype()) { convert_inner_type(&**arr, inner_dtype) } else { arr.to_boxed() @@ -79,9 +79,9 @@ impl AnonymousBuilder { let values = concatenate_owned_unchecked(&arrays)?; - let data_type = FixedSizeListArray::default_datatype(inner_dtype.clone(), self.width); + let dtype = FixedSizeListArray::default_datatype(inner_dtype.clone(), self.width); Ok(FixedSizeListArray::new( - data_type, + dtype, values, self.validity.map(|validity| validity.into()), )) diff --git a/crates/polars-arrow/src/legacy/array/list.rs b/crates/polars-arrow/src/legacy/array/list.rs index ff02011663cb..3e3118f25248 100644 --- a/crates/polars-arrow/src/legacy/array/list.rs +++ b/crates/polars-arrow/src/legacy/array/list.rs @@ -118,7 +118,7 @@ impl<'a> AnonymousBuilder<'a> { }, } } else { - let inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].data_type()); + let inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].dtype()); // check if there is a dtype that is not `Null` // if we find it, we will convert the null arrays @@ -126,8 +126,8 @@ impl<'a> AnonymousBuilder<'a> { let mut non_null_dtype = None; if is_nested_null(inner_dtype) { for arr in &self.arrays { - if !is_nested_null(arr.data_type()) { - non_null_dtype = Some(arr.data_type()); + if !is_nested_null(arr.dtype()) { + non_null_dtype = Some(arr.dtype()); break; } } @@ -139,7 +139,7 @@ impl<'a> AnonymousBuilder<'a> { .arrays .iter() .map(|arr| { - if is_nested_null(arr.data_type()) { + if is_nested_null(arr.dtype()) { convert_inner_type(&**arr, dtype) } else { arr.to_boxed() diff --git a/crates/polars-arrow/src/legacy/array/mod.rs b/crates/polars-arrow/src/legacy/array/mod.rs index 18e5a386df0e..ee6c743fbdcf 100644 --- a/crates/polars-arrow/src/legacy/array/mod.rs +++ b/crates/polars-arrow/src/legacy/array/mod.rs @@ -50,7 +50,7 @@ pub trait ListFromIter { /// Will produce incorrect arrays if size hint is incorrect. unsafe fn from_iter_primitive_trusted_len( iter: I, - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> ListArray where T: NativeType, @@ -70,9 +70,9 @@ pub trait ListFromIter { // SAFETY: // offsets are monotonically increasing ListArray::new( - ListArray::::default_datatype(data_type.clone()), + ListArray::::default_datatype(dtype.clone()), Offsets::new_unchecked(offsets).into(), - Box::new(values.to(data_type)), + Box::new(values.to(dtype)), Some(validity.into()), ) } @@ -185,13 +185,13 @@ pub trait ListFromIter { } impl ListFromIter for ListArray {} -fn is_nested_null(data_type: &ArrowDataType) -> bool { - match data_type { +fn is_nested_null(dtype: &ArrowDataType) -> bool { + match dtype { ArrowDataType::Null => true, - ArrowDataType::LargeList(field) => is_nested_null(field.data_type()), - ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.data_type()), + ArrowDataType::LargeList(field) => is_nested_null(field.dtype()), + ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()), ArrowDataType::Struct(fields) => { - fields.iter().all(|field| is_nested_null(field.data_type())) + fields.iter().all(|field| is_nested_null(field.dtype())) }, _ => false, } @@ -203,8 +203,8 @@ pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box { let array = array.as_any().downcast_ref::().unwrap(); let inner = array.values(); - let new_values = convert_inner_type(inner.as_ref(), field.data_type()); - let dtype = LargeListArray::default_datatype(new_values.data_type().clone()); + let new_values = convert_inner_type(inner.as_ref(), field.dtype()); + let dtype = LargeListArray::default_datatype(new_values.dtype().clone()); LargeListArray::new( dtype, array.offsets().clone(), @@ -216,9 +216,9 @@ pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box { let array = array.as_any().downcast_ref::().unwrap(); let inner = array.values(); - let new_values = convert_inner_type(inner.as_ref(), field.data_type()); + let new_values = convert_inner_type(inner.as_ref(), field.dtype()); let dtype = - FixedSizeListArray::default_datatype(new_values.data_type().clone(), *width); + FixedSizeListArray::default_datatype(new_values.dtype().clone(), *width); FixedSizeListArray::new(dtype, new_values, array.validity().cloned()).boxed() }, ArrowDataType::Struct(fields) => { @@ -227,7 +227,7 @@ pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box>(); StructArray::new(dtype.clone(), new_values, array.validity().cloned()).boxed() }, diff --git a/crates/polars-arrow/src/legacy/array/null.rs b/crates/polars-arrow/src/legacy/array/null.rs index 6a802540db83..ec630250e0c5 100644 --- a/crates/polars-arrow/src/legacy/array/null.rs +++ b/crates/polars-arrow/src/legacy/array/null.rs @@ -10,7 +10,7 @@ pub struct MutableNullArray { } impl MutableArray for MutableNullArray { - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { &ArrowDataType::Null } diff --git a/crates/polars-arrow/src/legacy/kernels/atan2.rs b/crates/polars-arrow/src/legacy/kernels/atan2.rs index 7884ab18d09f..40d3d527b24a 100644 --- a/crates/polars-arrow/src/legacy/kernels/atan2.rs +++ b/crates/polars-arrow/src/legacy/kernels/atan2.rs @@ -8,5 +8,5 @@ pub fn atan2(arr_1: &PrimitiveArray, arr_2: &PrimitiveArray) -> Primiti where T: Float + NativeType, { - binary(arr_1, arr_2, arr_1.data_type().clone(), |a, b| a.atan2(b)) + binary(arr_1, arr_2, arr_1.dtype().clone(), |a, b| a.atan2(b)) } diff --git a/crates/polars-arrow/src/legacy/kernels/list.rs b/crates/polars-arrow/src/legacy/kernels/list.rs index 2b21d872c948..4f3f332dac28 100644 --- a/crates/polars-arrow/src/legacy/kernels/list.rs +++ b/crates/polars-arrow/src/legacy/kernels/list.rs @@ -139,7 +139,7 @@ pub fn array_to_unit_list(array: ArrayRef) -> ListArray { // offsets are monotonically increasing unsafe { let offsets: OffsetsBuffer = Offsets::new_unchecked(offsets).into(); - let dtype = ListArray::::default_datatype(array.data_type().clone()); + let dtype = ListArray::::default_datatype(array.dtype().clone()); ListArray::::new(dtype, offsets, array, None) } } diff --git a/crates/polars-arrow/src/legacy/kernels/pow.rs b/crates/polars-arrow/src/legacy/kernels/pow.rs index 35ab21bcf7f8..4d2842aeccb0 100644 --- a/crates/polars-arrow/src/legacy/kernels/pow.rs +++ b/crates/polars-arrow/src/legacy/kernels/pow.rs @@ -9,7 +9,7 @@ where T: Pow + NativeType, F: NativeType, { - binary(arr_1, arr_2, arr_1.data_type().clone(), |a, b| { + binary(arr_1, arr_2, arr_1.dtype().clone(), |a, b| { Pow::pow(a, b) }) } diff --git a/crates/polars-arrow/src/legacy/kernels/set.rs b/crates/polars-arrow/src/legacy/kernels/set.rs index 41f3dbcf5c3d..338fc4b1a17c 100644 --- a/crates/polars-arrow/src/legacy/kernels/set.rs +++ b/crates/polars-arrow/src/legacy/kernels/set.rs @@ -34,7 +34,7 @@ where } }); - PrimitiveArray::new(array.data_type().clone(), av.into(), None) + PrimitiveArray::new(array.dtype().clone(), av.into(), None) } /// Set values in a primitive array based on a mask array. This is fast when large chunks of bits are set or unset. @@ -42,7 +42,7 @@ pub fn set_with_mask( array: &PrimitiveArray, mask: &BooleanArray, value: T, - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> PrimitiveArray { let values = array.values(); @@ -61,7 +61,7 @@ pub fn set_with_mask( valid.bitor(mask_bitmap) }); - PrimitiveArray::new(data_type, buf.into(), validity) + PrimitiveArray::new(dtype, buf.into(), validity) } /// Efficiently sets value at the indices from the iterator to `set_value`. @@ -70,7 +70,7 @@ pub fn scatter_single_non_null( array: &PrimitiveArray, idx: I, set_value: T, - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> PolarsResult> where T: NativeType, @@ -89,7 +89,7 @@ where })?; Ok(PrimitiveArray::new( - data_type, + dtype, buf.into(), array.validity().cloned(), )) diff --git a/crates/polars-arrow/src/mmap/array.rs b/crates/polars-arrow/src/mmap/array.rs index d22705c63b63..ae9af81f1bbc 100644 --- a/crates/polars-arrow/src/mmap/array.rs +++ b/crates/polars-arrow/src/mmap/array.rs @@ -187,9 +187,9 @@ fn mmap_fixed_size_binary>( node: &Node, block_offset: usize, buffers: &mut VecDeque, - data_type: &ArrowDataType, + dtype: &ArrowDataType, ) -> PolarsResult { - let bytes_per_row = if let ArrowDataType::FixedSizeBinary(bytes_per_row) = data_type { + let bytes_per_row = if let ArrowDataType::FixedSizeBinary(bytes_per_row) = dtype { bytes_per_row } else { polars_bail!(ComputeError: "out-of-spec {:?}", OutOfSpecKind::InvalidDataType); @@ -337,14 +337,14 @@ fn mmap_list>( data: Arc, node: &Node, block_offset: usize, - data_type: &ArrowDataType, + dtype: &ArrowDataType, ipc_field: &IpcField, dictionaries: &Dictionaries, field_nodes: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, buffers: &mut VecDeque, ) -> PolarsResult { - let child = ListArray::::try_get_child(data_type)?.data_type(); + let child = ListArray::::try_get_child(dtype)?.dtype(); let (num_rows, null_count) = get_num_rows_and_null_count(node)?; let data_ref = data.as_ref().as_ref(); @@ -383,16 +383,16 @@ fn mmap_fixed_size_list>( data: Arc, node: &Node, block_offset: usize, - data_type: &ArrowDataType, + dtype: &ArrowDataType, ipc_field: &IpcField, dictionaries: &Dictionaries, field_nodes: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, buffers: &mut VecDeque, ) -> PolarsResult { - let child = FixedSizeListArray::try_child_and_size(data_type)? + let child = FixedSizeListArray::try_child_and_size(dtype)? .0 - .data_type(); + .dtype(); let (num_rows, null_count) = get_num_rows_and_null_count(node)?; let data_ref = data.as_ref().as_ref(); @@ -428,14 +428,14 @@ fn mmap_struct>( data: Arc, node: &Node, block_offset: usize, - data_type: &ArrowDataType, + dtype: &ArrowDataType, ipc_field: &IpcField, dictionaries: &Dictionaries, field_nodes: &mut VecDeque, variadic_buffer_counts: &mut VecDeque, buffers: &mut VecDeque, ) -> PolarsResult { - let children = StructArray::try_get_fields(data_type)?; + let children = StructArray::try_get_fields(dtype)?; let (num_rows, null_count) = get_num_rows_and_null_count(node)?; let data_ref = data.as_ref().as_ref(); @@ -444,7 +444,7 @@ fn mmap_struct>( let values = children .iter() - .map(|f| &f.data_type) + .map(|f| &f.dtype) .zip(ipc_field.fields.iter()) .map(|(child, ipc)| { get_array( @@ -514,7 +514,7 @@ fn mmap_dict>( fn get_array>( data: Arc, block_offset: usize, - data_type: &ArrowDataType, + dtype: &ArrowDataType, ipc_field: &IpcField, dictionaries: &Dictionaries, field_nodes: &mut VecDeque, @@ -526,7 +526,7 @@ fn get_array>( || polars_err!(ComputeError: "out-of-spec: {:?}", OutOfSpecKind::ExpectedBuffer), )?; - match data_type.to_physical_type() { + match dtype.to_physical_type() { Null => mmap_null(data, &node, block_offset, buffers), Boolean => mmap_boolean(data, &node, block_offset, buffers), Primitive(p) => with_match_primitive_type_full!(p, |$T| { @@ -536,13 +536,13 @@ fn get_array>( Utf8View | BinaryView => { mmap_binview(data, &node, block_offset, buffers, variadic_buffer_counts) }, - FixedSizeBinary => mmap_fixed_size_binary(data, &node, block_offset, buffers, data_type), + FixedSizeBinary => mmap_fixed_size_binary(data, &node, block_offset, buffers, dtype), LargeBinary | LargeUtf8 => mmap_binary::(data, &node, block_offset, buffers), List => mmap_list::( data, &node, block_offset, - data_type, + dtype, ipc_field, dictionaries, field_nodes, @@ -553,7 +553,7 @@ fn get_array>( data, &node, block_offset, - data_type, + dtype, ipc_field, dictionaries, field_nodes, @@ -564,7 +564,7 @@ fn get_array>( data, &node, block_offset, - data_type, + dtype, ipc_field, dictionaries, field_nodes, @@ -575,7 +575,7 @@ fn get_array>( data, &node, block_offset, - data_type, + dtype, ipc_field, dictionaries, field_nodes, @@ -587,7 +587,7 @@ fn get_array>( data, &node, block_offset, - data_type, + dtype, ipc_field, dictionaries, field_nodes, @@ -603,7 +603,7 @@ fn get_array>( pub(crate) unsafe fn mmap>( data: Arc, block_offset: usize, - data_type: ArrowDataType, + dtype: ArrowDataType, ipc_field: &IpcField, dictionaries: &Dictionaries, field_nodes: &mut VecDeque, @@ -613,7 +613,7 @@ pub(crate) unsafe fn mmap>( let array = get_array( data, block_offset, - &data_type, + &dtype, ipc_field, dictionaries, field_nodes, @@ -622,5 +622,5 @@ pub(crate) unsafe fn mmap>( )?; // The unsafety comes from the fact that `array` is not necessarily valid - // the IPC file may be corrupted (e.g. invalid offsets or non-utf8 data) - unsafe { try_from(InternalArrowArray::new(array, data_type)) } + unsafe { try_from(InternalArrowArray::new(array, dtype)) } } diff --git a/crates/polars-arrow/src/mmap/mod.rs b/crates/polars-arrow/src/mmap/mod.rs index 8d1ed5f89155..b934c31de563 100644 --- a/crates/polars-arrow/src/mmap/mod.rs +++ b/crates/polars-arrow/src/mmap/mod.rs @@ -88,14 +88,14 @@ unsafe fn _mmap_record>( fields .iter_values() - .map(|f| &f.data_type) + .map(|f| &f.dtype) .cloned() .zip(ipc_fields) - .map(|(data_type, ipc_field)| { + .map(|(dtype, ipc_field)| { array::mmap( data.clone(), offset, - data_type, + dtype, ipc_field, dictionaries, &mut field_nodes, @@ -178,7 +178,7 @@ unsafe fn mmap_dictionary>( .ok_or_else(|| polars_err!(ComputeError: "out-of-spec {:?}", OutOfSpecKind::MissingData))?; let value_type = if let ArrowDataType::Dictionary(_, value_type, _) = - first_field.data_type.to_logical_type() + first_field.dtype.to_logical_type() { value_type.as_ref() } else { diff --git a/crates/polars-arrow/src/scalar/README.md b/crates/polars-arrow/src/scalar/README.md index b780081b6131..ea6c3791d6be 100644 --- a/crates/polars-arrow/src/scalar/README.md +++ b/crates/polars-arrow/src/scalar/README.md @@ -19,7 +19,7 @@ Specifically, a `Scalar` is a trait object that can be downcasted to concrete im Like `Array`, `Scalar` implements -- `data_type`, which is used to perform the correct downcast +- `dtype`, which is used to perform the correct downcast - `is_valid`, to tell whether the scalar is null or not ### There is one implementation per arrows' physical type diff --git a/crates/polars-arrow/src/scalar/binary.rs b/crates/polars-arrow/src/scalar/binary.rs index bdc1f8b8243a..f758cf021b1c 100644 --- a/crates/polars-arrow/src/scalar/binary.rs +++ b/crates/polars-arrow/src/scalar/binary.rs @@ -45,7 +45,7 @@ impl Scalar for BinaryScalar { } #[inline] - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { if O::IS_LARGE { &ArrowDataType::LargeBinary } else { diff --git a/crates/polars-arrow/src/scalar/binview.rs b/crates/polars-arrow/src/scalar/binview.rs index e96c90c04adb..958037041623 100644 --- a/crates/polars-arrow/src/scalar/binview.rs +++ b/crates/polars-arrow/src/scalar/binview.rs @@ -62,7 +62,7 @@ impl Scalar for BinaryViewScalar { } #[inline] - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { if T::IS_UTF8 { &ArrowDataType::Utf8View } else { diff --git a/crates/polars-arrow/src/scalar/boolean.rs b/crates/polars-arrow/src/scalar/boolean.rs index 82d1e9c6e7ed..44158d8c3636 100644 --- a/crates/polars-arrow/src/scalar/boolean.rs +++ b/crates/polars-arrow/src/scalar/boolean.rs @@ -33,7 +33,7 @@ impl Scalar for BooleanScalar { } #[inline] - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { &ArrowDataType::Boolean } } diff --git a/crates/polars-arrow/src/scalar/dictionary.rs b/crates/polars-arrow/src/scalar/dictionary.rs index f9559009a1c6..b92a99355559 100644 --- a/crates/polars-arrow/src/scalar/dictionary.rs +++ b/crates/polars-arrow/src/scalar/dictionary.rs @@ -9,12 +9,12 @@ use crate::datatypes::ArrowDataType; pub struct DictionaryScalar { value: Option>, phantom: std::marker::PhantomData, - data_type: ArrowDataType, + dtype: ArrowDataType, } impl PartialEq for DictionaryScalar { fn eq(&self, other: &Self) -> bool { - (self.data_type == other.data_type) && (self.value.as_ref() == other.value.as_ref()) + (self.dtype == other.dtype) && (self.value.as_ref() == other.value.as_ref()) } } @@ -22,14 +22,14 @@ impl DictionaryScalar { /// returns a new [`DictionaryScalar`] /// # Panics /// iff - /// * the `data_type` is not `List` or `LargeList` (depending on this scalar's offset `O`) - /// * the child of the `data_type` is not equal to the `values` + /// * the `dtype` is not `List` or `LargeList` (depending on this scalar's offset `O`) + /// * the child of the `dtype` is not equal to the `values` #[inline] - pub fn new(data_type: ArrowDataType, value: Option>) -> Self { + pub fn new(dtype: ArrowDataType, value: Option>) -> Self { Self { value, phantom: std::marker::PhantomData, - data_type, + dtype, } } @@ -48,7 +48,7 @@ impl Scalar for DictionaryScalar { self.value.is_some() } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } } diff --git a/crates/polars-arrow/src/scalar/equal.rs b/crates/polars-arrow/src/scalar/equal.rs index 78055671b32e..3978765fe73a 100644 --- a/crates/polars-arrow/src/scalar/equal.rs +++ b/crates/polars-arrow/src/scalar/equal.rs @@ -30,12 +30,12 @@ macro_rules! dyn_eq { } fn equal(lhs: &dyn Scalar, rhs: &dyn Scalar) -> bool { - if lhs.data_type() != rhs.data_type() { + if lhs.dtype() != rhs.dtype() { return false; } use PhysicalType::*; - match lhs.data_type().to_physical_type() { + match lhs.dtype().to_physical_type() { Null => dyn_eq!(NullScalar, lhs, rhs), Boolean => dyn_eq!(BooleanScalar, lhs, rhs), Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { diff --git a/crates/polars-arrow/src/scalar/fixed_size_binary.rs b/crates/polars-arrow/src/scalar/fixed_size_binary.rs index 0c6c5602c2bc..a14d2886d75d 100644 --- a/crates/polars-arrow/src/scalar/fixed_size_binary.rs +++ b/crates/polars-arrow/src/scalar/fixed_size_binary.rs @@ -5,31 +5,31 @@ use crate::datatypes::ArrowDataType; /// The [`Scalar`] implementation of fixed size binary ([`Option>`]). pub struct FixedSizeBinaryScalar { value: Option>, - data_type: ArrowDataType, + dtype: ArrowDataType, } impl FixedSizeBinaryScalar { /// Returns a new [`FixedSizeBinaryScalar`]. /// # Panics /// iff - /// * the `data_type` is not `FixedSizeBinary` + /// * the `dtype` is not `FixedSizeBinary` /// * the size of child binary is not equal #[inline] - pub fn new>>(data_type: ArrowDataType, value: Option

) -> Self { + pub fn new>>(dtype: ArrowDataType, value: Option

) -> Self { assert_eq!( - data_type.to_physical_type(), + dtype.to_physical_type(), crate::datatypes::PhysicalType::FixedSizeBinary ); Self { value: value.map(|x| { let x: Vec = x.into(); assert_eq!( - data_type.to_logical_type(), + dtype.to_logical_type(), &ArrowDataType::FixedSizeBinary(x.len()) ); x.into_boxed_slice() }), - data_type, + dtype, } } @@ -52,7 +52,7 @@ impl Scalar for FixedSizeBinaryScalar { } #[inline] - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } } diff --git a/crates/polars-arrow/src/scalar/fixed_size_list.rs b/crates/polars-arrow/src/scalar/fixed_size_list.rs index 0ef0f083943c..5810eeab2dfc 100644 --- a/crates/polars-arrow/src/scalar/fixed_size_list.rs +++ b/crates/polars-arrow/src/scalar/fixed_size_list.rs @@ -9,12 +9,12 @@ use crate::datatypes::ArrowDataType; #[derive(Debug, Clone)] pub struct FixedSizeListScalar { values: Option>, - data_type: ArrowDataType, + dtype: ArrowDataType, } impl PartialEq for FixedSizeListScalar { fn eq(&self, other: &Self) -> bool { - (self.data_type == other.data_type) + (self.dtype == other.dtype) && (self.values.is_some() == other.values.is_some()) && ((self.values.is_none()) | (self.values.as_ref() == other.values.as_ref())) } @@ -24,18 +24,18 @@ impl FixedSizeListScalar { /// returns a new [`FixedSizeListScalar`] /// # Panics /// iff - /// * the `data_type` is not `FixedSizeList` - /// * the child of the `data_type` is not equal to the `values` + /// * the `dtype` is not `FixedSizeList` + /// * the child of the `dtype` is not equal to the `values` /// * the size of child array is not equal #[inline] - pub fn new(data_type: ArrowDataType, values: Option>) -> Self { - let (field, size) = FixedSizeListArray::get_child_and_size(&data_type); - let inner_data_type = field.data_type(); + pub fn new(dtype: ArrowDataType, values: Option>) -> Self { + let (field, size) = FixedSizeListArray::get_child_and_size(&dtype); + let inner_dtype = field.dtype(); let values = values.inspect(|x| { - assert_eq!(inner_data_type, x.data_type()); + assert_eq!(inner_dtype, x.dtype()); assert_eq!(size, x.len()); }); - Self { values, data_type } + Self { values, dtype } } /// The values of the [`FixedSizeListScalar`] @@ -53,7 +53,7 @@ impl Scalar for FixedSizeListScalar { self.values.is_some() } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } } diff --git a/crates/polars-arrow/src/scalar/list.rs b/crates/polars-arrow/src/scalar/list.rs index c58a11150e08..6978c6e61860 100644 --- a/crates/polars-arrow/src/scalar/list.rs +++ b/crates/polars-arrow/src/scalar/list.rs @@ -12,12 +12,12 @@ pub struct ListScalar { values: Box, is_valid: bool, phantom: std::marker::PhantomData, - data_type: ArrowDataType, + dtype: ArrowDataType, } impl PartialEq for ListScalar { fn eq(&self, other: &Self) -> bool { - (self.data_type == other.data_type) + (self.dtype == other.dtype) && (self.is_valid == other.is_valid) && ((!self.is_valid) | (self.values.as_ref() == other.values.as_ref())) } @@ -27,23 +27,23 @@ impl ListScalar { /// returns a new [`ListScalar`] /// # Panics /// iff - /// * the `data_type` is not `List` or `LargeList` (depending on this scalar's offset `O`) - /// * the child of the `data_type` is not equal to the `values` + /// * the `dtype` is not `List` or `LargeList` (depending on this scalar's offset `O`) + /// * the child of the `dtype` is not equal to the `values` #[inline] - pub fn new(data_type: ArrowDataType, values: Option>) -> Self { - let inner_data_type = ListArray::::get_child_type(&data_type); + pub fn new(dtype: ArrowDataType, values: Option>) -> Self { + let inner_dtype = ListArray::::get_child_type(&dtype); let (is_valid, values) = match values { Some(values) => { - assert_eq!(inner_data_type, values.data_type()); + assert_eq!(inner_dtype, values.dtype()); (true, values) }, - None => (false, new_empty_array(inner_data_type.clone())), + None => (false, new_empty_array(inner_dtype.clone())), }; Self { values, is_valid, phantom: std::marker::PhantomData, - data_type, + dtype, } } @@ -62,7 +62,7 @@ impl Scalar for ListScalar { self.is_valid } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } } diff --git a/crates/polars-arrow/src/scalar/map.rs b/crates/polars-arrow/src/scalar/map.rs index 6dd204a83e02..f9e7b238c481 100644 --- a/crates/polars-arrow/src/scalar/map.rs +++ b/crates/polars-arrow/src/scalar/map.rs @@ -10,12 +10,12 @@ use crate::datatypes::ArrowDataType; pub struct MapScalar { values: Box, is_valid: bool, - data_type: ArrowDataType, + dtype: ArrowDataType, } impl PartialEq for MapScalar { fn eq(&self, other: &Self) -> bool { - (self.data_type == other.data_type) + (self.dtype == other.dtype) && (self.is_valid == other.is_valid) && ((!self.is_valid) | (self.values.as_ref() == other.values.as_ref())) } @@ -25,23 +25,23 @@ impl MapScalar { /// returns a new [`MapScalar`] /// # Panics /// iff - /// * the `data_type` is not `Map` - /// * the child of the `data_type` is not equal to the `values` + /// * the `dtype` is not `Map` + /// * the child of the `dtype` is not equal to the `values` #[inline] - pub fn new(data_type: ArrowDataType, values: Option>) -> Self { - let inner_field = MapArray::try_get_field(&data_type).unwrap(); - let inner_data_type = inner_field.data_type(); + pub fn new(dtype: ArrowDataType, values: Option>) -> Self { + let inner_field = MapArray::try_get_field(&dtype).unwrap(); + let inner_dtype = inner_field.dtype(); let (is_valid, values) = match values { Some(values) => { - assert_eq!(inner_data_type, values.data_type()); + assert_eq!(inner_dtype, values.dtype()); (true, values) }, - None => (false, new_empty_array(inner_data_type.clone())), + None => (false, new_empty_array(inner_dtype.clone())), }; Self { values, is_valid, - data_type, + dtype, } } @@ -60,7 +60,7 @@ impl Scalar for MapScalar { self.is_valid } - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } } diff --git a/crates/polars-arrow/src/scalar/mod.rs b/crates/polars-arrow/src/scalar/mod.rs index 54bd0705bf54..adcf006862db 100644 --- a/crates/polars-arrow/src/scalar/mod.rs +++ b/crates/polars-arrow/src/scalar/mod.rs @@ -46,7 +46,7 @@ pub trait Scalar: std::fmt::Debug + Send + Sync + dyn_clone::DynClone + 'static fn is_valid(&self) -> bool; /// the logical type. - fn data_type(&self) -> &ArrowDataType; + fn dtype(&self) -> &ArrowDataType; } dyn_clone::clone_trait_object!(Scalar); @@ -101,14 +101,14 @@ macro_rules! dyn_new_list { } else { None }; - Box::new(ListScalar::<$type>::new(array.data_type().clone(), value)) + Box::new(ListScalar::<$type>::new(array.dtype().clone(), value)) }}; } /// creates a new [`Scalar`] from an [`Array`]. pub fn new_scalar(array: &dyn Array, index: usize) -> Box { use PhysicalType::*; - match array.data_type().to_physical_type() { + match array.dtype().to_physical_type() { Null => Box::new(NullScalar::new()), Boolean => { let array = array.as_any().downcast_ref::().unwrap(); @@ -129,7 +129,7 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box { } else { None }; - Box::new(PrimitiveScalar::new(array.data_type().clone(), value)) + Box::new(PrimitiveScalar::new(array.dtype().clone(), value)) }), BinaryView => dyn_new_binview!(array, index, [u8]), Utf8View => dyn_new_binview!(array, index, str), @@ -147,9 +147,9 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box { .iter() .map(|x| new_scalar(x.as_ref(), index)) .collect(); - Box::new(StructScalar::new(array.data_type().clone(), Some(values))) + Box::new(StructScalar::new(array.dtype().clone(), Some(values))) } else { - Box::new(StructScalar::new(array.data_type().clone(), None)) + Box::new(StructScalar::new(array.dtype().clone(), None)) } }, FixedSizeBinary => { @@ -162,7 +162,7 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box { } else { None }; - Box::new(FixedSizeBinaryScalar::new(array.data_type().clone(), value)) + Box::new(FixedSizeBinaryScalar::new(array.dtype().clone(), value)) }, FixedSizeList => { let array = array.as_any().downcast_ref::().unwrap(); @@ -171,12 +171,12 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box { } else { None }; - Box::new(FixedSizeListScalar::new(array.data_type().clone(), value)) + Box::new(FixedSizeListScalar::new(array.dtype().clone(), value)) }, Union => { let array = array.as_any().downcast_ref::().unwrap(); Box::new(UnionScalar::new( - array.data_type().clone(), + array.dtype().clone(), array.types()[index], array.value(index), )) @@ -188,7 +188,7 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box { } else { None }; - Box::new(MapScalar::new(array.data_type().clone(), value)) + Box::new(MapScalar::new(array.dtype().clone(), value)) }, Dictionary(key_type) => match_integer_type!(key_type, |$T| { let array = array @@ -201,7 +201,7 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box { None }; Box::new(DictionaryScalar::<$T>::new( - array.data_type().clone(), + array.dtype().clone(), value, )) }), diff --git a/crates/polars-arrow/src/scalar/null.rs b/crates/polars-arrow/src/scalar/null.rs index 3559d6cc8290..2071f0d4584e 100644 --- a/crates/polars-arrow/src/scalar/null.rs +++ b/crates/polars-arrow/src/scalar/null.rs @@ -31,7 +31,7 @@ impl Scalar for NullScalar { } #[inline] - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { &ArrowDataType::Null } } diff --git a/crates/polars-arrow/src/scalar/primitive.rs b/crates/polars-arrow/src/scalar/primitive.rs index b25b09b3ec91..35214b270032 100644 --- a/crates/polars-arrow/src/scalar/primitive.rs +++ b/crates/polars-arrow/src/scalar/primitive.rs @@ -7,21 +7,21 @@ use crate::types::NativeType; #[derive(Debug, Clone, PartialEq, Eq)] pub struct PrimitiveScalar { value: Option, - data_type: ArrowDataType, + dtype: ArrowDataType, } impl PrimitiveScalar { /// Returns a new [`PrimitiveScalar`]. #[inline] - pub fn new(data_type: ArrowDataType, value: Option) -> Self { - if !data_type.to_physical_type().eq_primitive(T::PRIMITIVE) { + pub fn new(dtype: ArrowDataType, value: Option) -> Self { + if !dtype.to_physical_type().eq_primitive(T::PRIMITIVE) { panic!( "Type {} does not support logical type {:?}", std::any::type_name::(), - data_type + dtype ) } - Self { value, data_type } + Self { value, dtype } } /// Returns the optional value. @@ -32,9 +32,9 @@ impl PrimitiveScalar { /// Returns a new `PrimitiveScalar` with the same value but different [`ArrowDataType`] /// # Panic - /// This function panics if the `data_type` is not valid for self's physical type `T`. - pub fn to(self, data_type: ArrowDataType) -> Self { - Self::new(data_type, self.value) + /// This function panics if the `dtype` is not valid for self's physical type `T`. + pub fn to(self, dtype: ArrowDataType) -> Self { + Self::new(dtype, self.value) } } @@ -57,7 +57,7 @@ impl Scalar for PrimitiveScalar { } #[inline] - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } } diff --git a/crates/polars-arrow/src/scalar/struct_.rs b/crates/polars-arrow/src/scalar/struct_.rs index c3e249a45d47..c9ba6a8e66c0 100644 --- a/crates/polars-arrow/src/scalar/struct_.rs +++ b/crates/polars-arrow/src/scalar/struct_.rs @@ -6,12 +6,12 @@ use crate::datatypes::ArrowDataType; pub struct StructScalar { values: Vec>, is_valid: bool, - data_type: ArrowDataType, + dtype: ArrowDataType, } impl PartialEq for StructScalar { fn eq(&self, other: &Self) -> bool { - (self.data_type == other.data_type) + (self.dtype == other.dtype) && (self.is_valid == other.is_valid) && ((!self.is_valid) | (self.values == other.values)) } @@ -20,12 +20,12 @@ impl PartialEq for StructScalar { impl StructScalar { /// Returns a new [`StructScalar`] #[inline] - pub fn new(data_type: ArrowDataType, values: Option>>) -> Self { + pub fn new(dtype: ArrowDataType, values: Option>>) -> Self { let is_valid = values.is_some(); Self { values: values.unwrap_or_default(), is_valid, - data_type, + dtype, } } @@ -48,7 +48,7 @@ impl Scalar for StructScalar { } #[inline] - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } } diff --git a/crates/polars-arrow/src/scalar/union.rs b/crates/polars-arrow/src/scalar/union.rs index bf22c0cfede2..95f4ebba6e3e 100644 --- a/crates/polars-arrow/src/scalar/union.rs +++ b/crates/polars-arrow/src/scalar/union.rs @@ -6,17 +6,17 @@ use crate::datatypes::ArrowDataType; pub struct UnionScalar { value: Box, type_: i8, - data_type: ArrowDataType, + dtype: ArrowDataType, } impl UnionScalar { /// Returns a new [`UnionScalar`] #[inline] - pub fn new(data_type: ArrowDataType, type_: i8, value: Box) -> Self { + pub fn new(dtype: ArrowDataType, type_: i8, value: Box) -> Self { Self { value, type_, - data_type, + dtype, } } @@ -45,7 +45,7 @@ impl Scalar for UnionScalar { } #[inline] - fn data_type(&self) -> &ArrowDataType { - &self.data_type + fn dtype(&self) -> &ArrowDataType { + &self.dtype } } diff --git a/crates/polars-arrow/src/scalar/utf8.rs b/crates/polars-arrow/src/scalar/utf8.rs index e31c778631fe..986477d5bb5c 100644 --- a/crates/polars-arrow/src/scalar/utf8.rs +++ b/crates/polars-arrow/src/scalar/utf8.rs @@ -45,7 +45,7 @@ impl Scalar for Utf8Scalar { } #[inline] - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { if O::IS_LARGE { &ArrowDataType::LargeUtf8 } else { diff --git a/crates/polars-compute/src/arithmetic/signed.rs b/crates/polars-compute/src/arithmetic/signed.rs index a19f6b231526..c77057b78a47 100644 --- a/crates/polars-compute/src/arithmetic/signed.rs +++ b/crates/polars-compute/src/arithmetic/signed.rs @@ -106,7 +106,7 @@ macro_rules! impl_signed_arith_kernel { fn prim_wrapping_floor_div_scalar(lhs: PArr<$T>, rhs: $T) -> PArr<$T> { if rhs == 0 { - PArr::full_null(lhs.len(), lhs.data_type().clone()) + PArr::full_null(lhs.len(), lhs.dtype().clone()) } else if rhs == -1 { Self::prim_wrapping_neg(lhs) } else if rhs == 1 { @@ -145,7 +145,7 @@ macro_rules! impl_signed_arith_kernel { fn prim_wrapping_trunc_div_scalar(lhs: PArr<$T>, rhs: $T) -> PArr<$T> { if rhs == 0 { - PArr::full_null(lhs.len(), lhs.data_type().clone()) + PArr::full_null(lhs.len(), lhs.dtype().clone()) } else if rhs == -1 { Self::prim_wrapping_neg(lhs) } else if rhs == 1 { @@ -177,7 +177,7 @@ macro_rules! impl_signed_arith_kernel { fn prim_wrapping_mod_scalar(lhs: PArr<$T>, rhs: $T) -> PArr<$T> { if rhs == 0 { - PArr::full_null(lhs.len(), lhs.data_type().clone()) + PArr::full_null(lhs.len(), lhs.dtype().clone()) } else if rhs == -1 || rhs == 1 { lhs.fill_with(0) } else { diff --git a/crates/polars-compute/src/arithmetic/unsigned.rs b/crates/polars-compute/src/arithmetic/unsigned.rs index 2ae40332e820..db71590989bd 100644 --- a/crates/polars-compute/src/arithmetic/unsigned.rs +++ b/crates/polars-compute/src/arithmetic/unsigned.rs @@ -85,7 +85,7 @@ macro_rules! impl_unsigned_arith_kernel { fn prim_wrapping_floor_div_scalar(lhs: PArr<$T>, rhs: $T) -> PArr<$T> { if rhs == 0 { - PArr::full_null(lhs.len(), lhs.data_type().clone()) + PArr::full_null(lhs.len(), lhs.dtype().clone()) } else if rhs == 1 { lhs } else { @@ -115,7 +115,7 @@ macro_rules! impl_unsigned_arith_kernel { fn prim_wrapping_mod_scalar(lhs: PArr<$T>, rhs: $T) -> PArr<$T> { if rhs == 0 { - PArr::full_null(lhs.len(), lhs.data_type().clone()) + PArr::full_null(lhs.len(), lhs.dtype().clone()) } else if rhs == 1 { lhs.fill_with(0) } else { diff --git a/crates/polars-compute/src/comparisons/array.rs b/crates/polars-compute/src/comparisons/array.rs index b981a50b3547..e8b926739d8e 100644 --- a/crates/polars-compute/src/comparisons/array.rs +++ b/crates/polars-compute/src/comparisons/array.rs @@ -35,16 +35,16 @@ impl TotalEqKernel for FixedSizeListArray { assert_eq!(self.len(), other.len()); let ArrowDataType::FixedSizeList(self_type, self_width) = - self.data_type().to_logical_type() + self.dtype().to_logical_type() else { panic!("array comparison called with non-array type"); }; let ArrowDataType::FixedSizeList(other_type, other_width) = - other.data_type().to_logical_type() + other.dtype().to_logical_type() else { panic!("array comparison called with non-array type"); }; - assert_eq!(self_type.data_type(), other_type.data_type()); + assert_eq!(self_type.dtype(), other_type.dtype()); if self_width != other_width { return Bitmap::new_with_value(false, self.len()); @@ -58,16 +58,16 @@ impl TotalEqKernel for FixedSizeListArray { fn tot_ne_kernel(&self, other: &Self) -> Bitmap { assert_eq!(self.len(), other.len()); let ArrowDataType::FixedSizeList(self_type, self_width) = - self.data_type().to_logical_type() + self.dtype().to_logical_type() else { panic!("array comparison called with non-array type"); }; let ArrowDataType::FixedSizeList(other_type, other_width) = - other.data_type().to_logical_type() + other.dtype().to_logical_type() else { panic!("array comparison called with non-array type"); }; - assert_eq!(self_type.data_type(), other_type.data_type()); + assert_eq!(self_type.dtype(), other_type.dtype()); if self_width != other_width { return Bitmap::new_with_value(true, self.len()); diff --git a/crates/polars-compute/src/comparisons/dyn_array.rs b/crates/polars-compute/src/comparisons/dyn_array.rs index 693293f4e2c5..3ee3d802f09f 100644 --- a/crates/polars-compute/src/comparisons/dyn_array.rs +++ b/crates/polars-compute/src/comparisons/dyn_array.rs @@ -20,10 +20,10 @@ macro_rules! compare { let lhs = $lhs; let rhs = $rhs; - assert_eq!(lhs.data_type(), rhs.data_type()); + assert_eq!(lhs.dtype(), rhs.dtype()); use arrow::datatypes::{IntegerType as I, PhysicalType as PH, PrimitiveType as PR}; - match lhs.data_type().to_physical_type() { + match lhs.dtype().to_physical_type() { PH::Boolean => call_binary!(BooleanArray, lhs, rhs, $op), PH::BinaryView => call_binary!(BinaryViewArray, lhs, rhs, $op), PH::Utf8View => call_binary!(Utf8ViewArray, lhs, rhs, $op), diff --git a/crates/polars-compute/src/filter/mod.rs b/crates/polars-compute/src/filter/mod.rs index 2ac66243fb8e..6de1afbab2ed 100644 --- a/crates/polars-compute/src/filter/mod.rs +++ b/crates/polars-compute/src/filter/mod.rs @@ -28,14 +28,14 @@ pub fn filter_with_bitmap(array: &dyn Array, mask: &Bitmap) -> Box { // Fast-path: completely empty or completely full mask. let false_count = mask.unset_bits(); if false_count == mask.len() { - return new_empty_array(array.data_type().clone()); + return new_empty_array(array.dtype().clone()); } if false_count == 0 { return array.to_boxed(); } use arrow::datatypes::PhysicalType::*; - match array.data_type().to_physical_type() { + match array.dtype().to_physical_type() { Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { let array: &PrimitiveArray<$T> = array.as_any().downcast_ref().unwrap(); let (values, validity) = primitive::filter_values_and_validity::<$T>(array.values(), array.validity(), mask); @@ -45,7 +45,7 @@ pub fn filter_with_bitmap(array: &dyn Array, mask: &Bitmap) -> Box { let array = array.as_any().downcast_ref::().unwrap(); let (values, validity) = boolean::filter_bitmap_and_validity(array.values(), array.validity(), mask); - BooleanArray::new(array.data_type().clone(), values, validity).boxed() + BooleanArray::new(array.dtype().clone(), values, validity).boxed() }, BinaryView => { let array = array.as_any().downcast_ref::().unwrap(); @@ -54,7 +54,7 @@ pub fn filter_with_bitmap(array: &dyn Array, mask: &Bitmap) -> Box { let (views, validity) = primitive::filter_values_and_validity(views, validity, mask); unsafe { BinaryViewArray::new_unchecked_unknown_md( - array.data_type().clone(), + array.dtype().clone(), views.into(), array.data_buffers().clone(), validity, diff --git a/crates/polars-compute/src/if_then_else/array.rs b/crates/polars-compute/src/if_then_else/array.rs index a15349bf1e2c..67f9b450ec7c 100644 --- a/crates/polars-compute/src/if_then_else/array.rs +++ b/crates/polars-compute/src/if_then_else/array.rs @@ -26,7 +26,7 @@ impl IfThenElseKernel for FixedSizeListArray { if_false: &Self, ) -> Self { let if_true_list: FixedSizeListArray = - std::iter::once(if_true).collect_arr_trusted_with_dtype(if_false.data_type().clone()); + std::iter::once(if_true).collect_arr_trusted_with_dtype(if_false.dtype().clone()); let mut growable = GrowableFixedSizeList::new(vec![&if_true_list, if_false], false, mask.len()); unsafe { @@ -46,7 +46,7 @@ impl IfThenElseKernel for FixedSizeListArray { if_false: Self::Scalar<'_>, ) -> Self { let if_false_list: FixedSizeListArray = - std::iter::once(if_false).collect_arr_trusted_with_dtype(if_true.data_type().clone()); + std::iter::once(if_false).collect_arr_trusted_with_dtype(if_true.dtype().clone()); let mut growable = GrowableFixedSizeList::new(vec![if_true, &if_false_list], false, mask.len()); unsafe { diff --git a/crates/polars-compute/src/if_then_else/list.rs b/crates/polars-compute/src/if_then_else/list.rs index aa3096c6f07e..284d6b7f0420 100644 --- a/crates/polars-compute/src/if_then_else/list.rs +++ b/crates/polars-compute/src/if_then_else/list.rs @@ -26,7 +26,7 @@ impl IfThenElseKernel for ListArray { if_false: &Self, ) -> Self { let if_true_list: ListArray = - std::iter::once(if_true).collect_arr_trusted_with_dtype(if_false.data_type().clone()); + std::iter::once(if_true).collect_arr_trusted_with_dtype(if_false.dtype().clone()); let mut growable = GrowableList::new(vec![&if_true_list, if_false], false, mask.len()); unsafe { if_then_else_extend( @@ -45,7 +45,7 @@ impl IfThenElseKernel for ListArray { if_false: Self::Scalar<'_>, ) -> Self { let if_false_list: ListArray = - std::iter::once(if_false).collect_arr_trusted_with_dtype(if_true.data_type().clone()); + std::iter::once(if_false).collect_arr_trusted_with_dtype(if_true.dtype().clone()); let mut growable = GrowableList::new(vec![if_true, &if_false_list], false, mask.len()); unsafe { if_then_else_extend( diff --git a/crates/polars-compute/src/if_then_else/view.rs b/crates/polars-compute/src/if_then_else/view.rs index 9324fbb8d7a7..5b3fd8fc4df9 100644 --- a/crates/polars-compute/src/if_then_else/view.rs +++ b/crates/polars-compute/src/if_then_else/view.rs @@ -87,7 +87,7 @@ impl IfThenElseKernel for BinaryViewArray { }; } builder - .freeze_with_dtype(if_true.data_type().clone()) + .freeze_with_dtype(if_true.dtype().clone()) .with_validity(validity) } @@ -125,7 +125,7 @@ impl IfThenElseKernel for BinaryViewArray { } } builder - .freeze_with_dtype(if_false.data_type().clone()) + .freeze_with_dtype(if_false.dtype().clone()) .with_validity(validity) } @@ -163,7 +163,7 @@ impl IfThenElseKernel for BinaryViewArray { } }; builder - .freeze_with_dtype(if_true.data_type().clone()) + .freeze_with_dtype(if_true.dtype().clone()) .with_validity(validity) } diff --git a/crates/polars-compute/src/min_max/dyn_array.rs b/crates/polars-compute/src/min_max/dyn_array.rs index e988bbd0ef54..119c60cf035b 100644 --- a/crates/polars-compute/src/min_max/dyn_array.rs +++ b/crates/polars-compute/src/min_max/dyn_array.rs @@ -13,7 +13,7 @@ macro_rules! call_op { (dt: $T:ty, $scalar:ty, $arr:expr, $op:path) => {{ let arr: &$T = $arr.as_any().downcast_ref().unwrap(); $op(arr) - .map(|v| Box::new(<$scalar>::new(arr.data_type().clone(), Some(v))) as Box) + .map(|v| Box::new(<$scalar>::new(arr.dtype().clone(), Some(v))) as Box) }}; ($T:ty, $scalar:ty, $arr:expr, $op:path, ret_two) => {{ let arr: &$T = $arr.as_any().downcast_ref().unwrap(); @@ -28,8 +28,8 @@ macro_rules! call_op { let arr: &$T = $arr.as_any().downcast_ref().unwrap(); $op(arr).map(|(l, r)| { ( - Box::new(<$scalar>::new(arr.data_type().clone(), Some(l))) as Box, - Box::new(<$scalar>::new(arr.data_type().clone(), Some(r))) as Box, + Box::new(<$scalar>::new(arr.dtype().clone(), Some(l))) as Box, + Box::new(<$scalar>::new(arr.dtype().clone(), Some(r))) as Box, ) }) }}; @@ -42,7 +42,7 @@ macro_rules! call { use arrow::datatypes::{PhysicalType as PH, PrimitiveType as PR}; use PrimitiveArray as PArr; use PrimitiveScalar as PScalar; - match arr.data_type().to_physical_type() { + match arr.dtype().to_physical_type() { PH::Boolean => call_op!(BooleanArray, BooleanScalar, arr, $op$(, $variant)?), PH::Primitive(PR::Int8) => call_op!(dt: PArr, PScalar, arr, $op$(, $variant)?), PH::Primitive(PR::Int16) => call_op!(dt: PArr, PScalar, arr, $op$(, $variant)?), @@ -65,7 +65,7 @@ macro_rules! call { PH::Utf8 => call_op!(Utf8Array, BinaryScalar, arr, $op$(, $variant)?), PH::LargeUtf8 => call_op!(Utf8Array, BinaryScalar, arr, $op$(, $variant)?), - _ => todo!("Dynamic MinMax is not yet implemented for {:?}", arr.data_type()), + _ => todo!("Dynamic MinMax is not yet implemented for {:?}", arr.dtype()), } }}; } diff --git a/crates/polars-compute/src/unique/boolean.rs b/crates/polars-compute/src/unique/boolean.rs index 511a45bcea00..48da9a66c762 100644 --- a/crates/polars-compute/src/unique/boolean.rs +++ b/crates/polars-compute/src/unique/boolean.rs @@ -7,7 +7,7 @@ use super::{GenericUniqueKernel, RangedUniqueKernel}; pub struct BooleanUniqueKernelState { seen: u32, has_null: bool, - data_type: ArrowDataType, + dtype: ArrowDataType, } const fn to_value(scalar: Option) -> u8 { @@ -19,11 +19,11 @@ const fn to_value(scalar: Option) -> u8 { } impl BooleanUniqueKernelState { - pub fn new(has_null: bool, data_type: ArrowDataType) -> Self { + pub fn new(has_null: bool, dtype: ArrowDataType) -> Self { Self { seen: 0, has_null, - data_type, + dtype, } } @@ -91,7 +91,7 @@ impl RangedUniqueKernel for BooleanUniqueKernelState { let values = values.freeze(); - BooleanArray::new(self.data_type, values, validity) + BooleanArray::new(self.dtype, values, validity) } fn finalize_n_unique(self) -> usize { @@ -106,21 +106,21 @@ impl RangedUniqueKernel for BooleanUniqueKernelState { impl GenericUniqueKernel for BooleanArray { fn unique(&self) -> Self { let mut state = - BooleanUniqueKernelState::new(self.null_count() > 0, self.data_type().clone()); + BooleanUniqueKernelState::new(self.null_count() > 0, self.dtype().clone()); state.append(self); state.finalize_unique() } fn n_unique(&self) -> usize { let mut state = - BooleanUniqueKernelState::new(self.null_count() > 0, self.data_type().clone()); + BooleanUniqueKernelState::new(self.null_count() > 0, self.dtype().clone()); state.append(self); state.finalize_n_unique() } fn n_unique_non_null(&self) -> usize { let mut state = - BooleanUniqueKernelState::new(self.null_count() > 0, self.data_type().clone()); + BooleanUniqueKernelState::new(self.null_count() > 0, self.dtype().clone()); state.append(self); state.finalize_n_unique_non_null() } diff --git a/crates/polars-compute/src/unique/primitive.rs b/crates/polars-compute/src/unique/primitive.rs index 9a1e4ff933bb..fc8a9f043f57 100644 --- a/crates/polars-compute/src/unique/primitive.rs +++ b/crates/polars-compute/src/unique/primitive.rs @@ -16,7 +16,7 @@ pub struct PrimitiveRangedUniqueState { seen: u128, range: RangeInclusive, has_null: bool, - data_type: ArrowDataType, + dtype: ArrowDataType, } impl PrimitiveRangedUniqueState @@ -27,7 +27,7 @@ where min_value: T, max_value: T, has_null: bool, - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> Option { // We cannot really do this for floating point number as these are not as discrete as // integers. @@ -46,7 +46,7 @@ where seen: 0, range: min_value..=max_value, has_null, - data_type, + dtype, }) } @@ -163,7 +163,7 @@ where (values, None) }; - PrimitiveArray::new(self.data_type, values.into(), validity) + PrimitiveArray::new(self.dtype, values.into(), validity) } fn finalize_n_unique(self) -> usize { diff --git a/crates/polars-core/src/chunked_array/array/mod.rs b/crates/polars-core/src/chunked_array/array/mod.rs index e327449e4124..59bdd92b67cc 100644 --- a/crates/polars-core/src/chunked_array/array/mod.rs +++ b/crates/polars-core/src/chunked_array/array/mod.rs @@ -55,7 +55,7 @@ impl ArrayChunked { Series::_try_from_arrow_unchecked_with_md( self.name().clone(), vec![(*arr.values()).clone()], - &field.data_type, + &field.dtype, Some(&field.metadata), ) .unwrap() diff --git a/crates/polars-core/src/chunked_array/builder/list/anonymous.rs b/crates/polars-core/src/chunked_array/builder/list/anonymous.rs index 845c53c08b29..80305ca043fb 100644 --- a/crates/polars-core/src/chunked_array/builder/list/anonymous.rs +++ b/crates/polars-core/src/chunked_array/builder/list/anonymous.rs @@ -87,7 +87,7 @@ impl<'a> AnonymousListBuilder<'a> { let arr = slf.builder.finish(inner_dtype_physical.as_ref()).unwrap(); let list_dtype_logical = match inner_dtype { - None => DataType::from(arr.data_type()), + None => DataType::from(arr.dtype()), Some(dt) => DataType::List(Box::new(dt)), }; @@ -147,7 +147,7 @@ impl ListBuilderTrait for AnonymousOwnedListBuilder { let arr = slf.builder.finish(inner_dtype_physical.as_ref()).unwrap(); let list_dtype_logical = match inner_dtype { - None => DataType::from_arrow(arr.data_type(), false), + None => DataType::from_arrow(arr.dtype(), false), Some(dt) => DataType::List(Box::new(dt)), }; diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index a7c9884ed604..cfabee18dcf5 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -132,20 +132,20 @@ impl ChunkedArray where T: PolarsNumericType, { - fn cast_impl(&self, data_type: &DataType, options: CastOptions) -> PolarsResult { - if self.dtype() == data_type { + fn cast_impl(&self, dtype: &DataType, options: CastOptions) -> PolarsResult { + if self.dtype() == dtype { // SAFETY: chunks are correct dtype let mut out = unsafe { Series::from_chunks_and_dtype_unchecked( self.name().clone(), self.chunks.clone(), - data_type, + dtype, ) }; out.set_sorted_flag(self.is_sorted_flag()); return Ok(out); } - match data_type { + match dtype { #[cfg(feature = "dtype-categorical")] DataType::Categorical(_, ordering) => { polars_ensure!( @@ -201,7 +201,7 @@ where DataType::Struct(fields) => { cast_single_to_struct(self.name().clone(), &self.chunks, fields, options) }, - _ => cast_impl_inner(self.name().clone(), &self.chunks, data_type, options).map( + _ => cast_impl_inner(self.name().clone(), &self.chunks, dtype, options).map( |mut s| { // maintain sorted if data types // - remain signed @@ -209,15 +209,15 @@ where // this may still fail with overflow? let dtype = self.dtype(); - let to_signed = data_type.is_signed_integer(); + let to_signed = dtype.is_signed_integer(); let unsigned2unsigned = - dtype.is_unsigned_integer() && data_type.is_unsigned_integer(); + dtype.is_unsigned_integer() && dtype.is_unsigned_integer(); let allowed = to_signed || unsigned2unsigned; if (allowed) && (s.null_count() == self.null_count()) // physical to logicals - || (self.dtype().to_physical() == data_type.to_physical()) + || (self.dtype().to_physical() == dtype.to_physical()) { let is_sorted = self.is_sorted_flag(); s.set_sorted_flag(is_sorted) @@ -235,14 +235,14 @@ where { fn cast_with_options( &self, - data_type: &DataType, + dtype: &DataType, options: CastOptions, ) -> PolarsResult { - self.cast_impl(data_type, options) + self.cast_impl(dtype, options) } - unsafe fn cast_unchecked(&self, data_type: &DataType) -> PolarsResult { - match data_type { + unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { + match dtype { #[cfg(feature = "dtype-categorical")] DataType::Categorical(Some(rev_map), ordering) | DataType::Enum(Some(rev_map), ordering) => { @@ -254,7 +254,7 @@ where CategoricalChunked::from_cats_and_rev_map_unchecked( ca.clone(), rev_map.clone(), - matches!(data_type, DataType::Enum(_, _)), + matches!(dtype, DataType::Enum(_, _)), *ordering, ) } @@ -263,7 +263,7 @@ where polars_bail!(ComputeError: "cannot cast numeric types to 'Categorical'"); } }, - _ => self.cast_impl(data_type, CastOptions::Overflowing), + _ => self.cast_impl(dtype, CastOptions::Overflowing), } } } @@ -271,10 +271,10 @@ where impl ChunkCast for StringChunked { fn cast_with_options( &self, - data_type: &DataType, + dtype: &DataType, options: CastOptions, ) -> PolarsResult { - match data_type { + match dtype { #[cfg(feature = "dtype-categorical")] DataType::Categorical(rev_map, ordering) => match rev_map { None => { @@ -327,7 +327,7 @@ impl ChunkCast for StringChunked { }, #[cfg(feature = "dtype-date")] DataType::Date => { - let result = cast_chunks(&self.chunks, data_type, options)?; + let result = cast_chunks(&self.chunks, dtype, options)?; let out = Series::try_from((self.name().clone(), result))?; Ok(out) }, @@ -355,12 +355,12 @@ impl ChunkCast for StringChunked { }; out }, - _ => cast_impl(self.name().clone(), &self.chunks, data_type, options), + _ => cast_impl(self.name().clone(), &self.chunks, dtype, options), } } - unsafe fn cast_unchecked(&self, data_type: &DataType) -> PolarsResult { - self.cast_with_options(data_type, CastOptions::Overflowing) + unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { + self.cast_with_options(dtype, CastOptions::Overflowing) } } @@ -403,22 +403,22 @@ impl StringChunked { impl ChunkCast for BinaryChunked { fn cast_with_options( &self, - data_type: &DataType, + dtype: &DataType, options: CastOptions, ) -> PolarsResult { - match data_type { + match dtype { #[cfg(feature = "dtype-struct")] DataType::Struct(fields) => { cast_single_to_struct(self.name().clone(), &self.chunks, fields, options) }, - _ => cast_impl(self.name().clone(), &self.chunks, data_type, options), + _ => cast_impl(self.name().clone(), &self.chunks, dtype, options), } } - unsafe fn cast_unchecked(&self, data_type: &DataType) -> PolarsResult { - match data_type { + unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { + match dtype { DataType::String => unsafe { Ok(self.to_string_unchecked().into_series()) }, - _ => self.cast_with_options(data_type, CastOptions::Overflowing), + _ => self.cast_with_options(dtype, CastOptions::Overflowing), } } } @@ -426,40 +426,40 @@ impl ChunkCast for BinaryChunked { impl ChunkCast for BinaryOffsetChunked { fn cast_with_options( &self, - data_type: &DataType, + dtype: &DataType, options: CastOptions, ) -> PolarsResult { - match data_type { + match dtype { #[cfg(feature = "dtype-struct")] DataType::Struct(fields) => { cast_single_to_struct(self.name().clone(), &self.chunks, fields, options) }, - _ => cast_impl(self.name().clone(), &self.chunks, data_type, options), + _ => cast_impl(self.name().clone(), &self.chunks, dtype, options), } } - unsafe fn cast_unchecked(&self, data_type: &DataType) -> PolarsResult { - self.cast_with_options(data_type, CastOptions::Overflowing) + unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { + self.cast_with_options(dtype, CastOptions::Overflowing) } } impl ChunkCast for BooleanChunked { fn cast_with_options( &self, - data_type: &DataType, + dtype: &DataType, options: CastOptions, ) -> PolarsResult { - match data_type { + match dtype { #[cfg(feature = "dtype-struct")] DataType::Struct(fields) => { cast_single_to_struct(self.name().clone(), &self.chunks, fields, options) }, - _ => cast_impl(self.name().clone(), &self.chunks, data_type, options), + _ => cast_impl(self.name().clone(), &self.chunks, dtype, options), } } - unsafe fn cast_unchecked(&self, data_type: &DataType) -> PolarsResult { - self.cast_with_options(data_type, CastOptions::Overflowing) + unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { + self.cast_with_options(dtype, CastOptions::Overflowing) } } @@ -468,11 +468,11 @@ impl ChunkCast for BooleanChunked { impl ChunkCast for ListChunked { fn cast_with_options( &self, - data_type: &DataType, + dtype: &DataType, options: CastOptions, ) -> PolarsResult { use DataType::*; - match data_type { + match dtype { List(child_type) => { match (self.inner_dtype(), &**child_type) { (old, new) if old == new => Ok(self.clone().into_series()), @@ -499,7 +499,7 @@ impl ChunkCast for ListChunked { }, #[cfg(feature = "dtype-array")] Array(child_type, width) => { - let physical_type = data_type.to_physical(); + let physical_type = dtype.to_physical(); // TODO!: properly implement this recursively. #[cfg(feature = "dtype-categorical")] @@ -521,17 +521,17 @@ impl ChunkCast for ListChunked { polars_bail!( InvalidOperation: "cannot cast List type (inner: '{:?}', to: '{:?}')", self.inner_dtype(), - data_type, + dtype, ) }, } } - unsafe fn cast_unchecked(&self, data_type: &DataType) -> PolarsResult { + unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { use DataType::*; - match data_type { + match dtype { List(child_type) => cast_list_unchecked(self, child_type), - _ => self.cast_with_options(data_type, CastOptions::Overflowing), + _ => self.cast_with_options(dtype, CastOptions::Overflowing), } } } @@ -542,11 +542,11 @@ impl ChunkCast for ListChunked { impl ChunkCast for ArrayChunked { fn cast_with_options( &self, - data_type: &DataType, + dtype: &DataType, options: CastOptions, ) -> PolarsResult { use DataType::*; - match data_type { + match dtype { Array(child_type, width) => { polars_ensure!( *width == self.width(), @@ -575,7 +575,7 @@ impl ChunkCast for ArrayChunked { } }, List(child_type) => { - let physical_type = data_type.to_physical(); + let physical_type = dtype.to_physical(); // cast to the physical type to avoid logical chunks. let chunks = cast_chunks(self.chunks(), &physical_type, options)?; // SAFETY: we just casted so the dtype matches. @@ -592,14 +592,14 @@ impl ChunkCast for ArrayChunked { polars_bail!( InvalidOperation: "cannot cast Array type (inner: '{:?}', to: '{:?}')", self.inner_dtype(), - data_type, + dtype, ) }, } } - unsafe fn cast_unchecked(&self, data_type: &DataType) -> PolarsResult { - self.cast_with_options(data_type, CastOptions::Overflowing) + unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult { + self.cast_with_options(dtype, CastOptions::Overflowing) } } @@ -629,9 +629,9 @@ fn cast_list( let new_values = new_inner.array_ref(0).clone(); - let data_type = ListArray::::default_datatype(new_values.data_type().clone()); + let dtype = ListArray::::default_datatype(new_values.dtype().clone()); let new_arr = ListArray::::new( - data_type, + dtype, arr.offsets().clone(), new_values, arr.validity().cloned(), @@ -654,9 +654,9 @@ unsafe fn cast_list_unchecked(ca: &ListChunked, child_type: &DataType) -> Polars let new_inner = s.cast_unchecked(child_type)?; let new_values = new_inner.array_ref(0).clone(); - let data_type = ListArray::::default_datatype(new_values.data_type().clone()); + let dtype = ListArray::::default_datatype(new_values.dtype().clone()); let new_arr = ListArray::::new( - data_type, + dtype, arr.offsets().clone(), new_values, arr.validity().cloned(), @@ -694,9 +694,9 @@ fn cast_fixed_size_list( let new_values = new_inner.array_ref(0).clone(); - let data_type = - FixedSizeListArray::default_datatype(new_values.data_type().clone(), ca.width()); - let new_arr = FixedSizeListArray::new(data_type, new_values, arr.validity().cloned()); + let dtype = + FixedSizeListArray::default_datatype(new_values.dtype().clone(), ca.width()); + let new_arr = FixedSizeListArray::new(dtype, new_values, arr.validity().cloned()); Ok((Box::new(new_arr), inner_dtype)) } diff --git a/crates/polars-core/src/chunked_array/from.rs b/crates/polars-core/src/chunked_array/from.rs index 5528189038f0..83acbc2c71ff 100644 --- a/crates/polars-core/src/chunked_array/from.rs +++ b/crates/polars-core/src/chunked_array/from.rs @@ -6,7 +6,7 @@ use super::*; fn from_chunks_list_dtype(chunks: &mut Vec, dtype: DataType) -> DataType { // ensure we don't get List let dtype = if let Some(arr) = chunks.get(0) { - arr.data_type().into() + arr.dtype().into() } else { dtype }; @@ -29,7 +29,7 @@ fn from_chunks_list_dtype(chunks: &mut Vec, dtype: DataType) -> DataTy Series::_try_from_arrow_unchecked( PlSmallStr::EMPTY, vec![values_arr.clone()], - values_arr.data_type(), + values_arr.dtype(), ) .unwrap() }; @@ -61,7 +61,7 @@ fn from_chunks_list_dtype(chunks: &mut Vec, dtype: DataType) -> DataTy Series::_try_from_arrow_unchecked( PlSmallStr::EMPTY, vec![values_arr.clone()], - values_arr.data_type(), + values_arr.dtype(), ) .unwrap() }; @@ -222,7 +222,7 @@ where { if !chunks.is_empty() && !chunks[0].is_empty() && dtype.is_primitive() { assert_eq!( - chunks[0].data_type(), + chunks[0].dtype(), &dtype.to_arrow(CompatLevel::newest()) ) } diff --git a/crates/polars-core/src/chunked_array/list/mod.rs b/crates/polars-core/src/chunked_array/list/mod.rs index 903fcf6cab14..8b730966b1bc 100644 --- a/crates/polars-core/src/chunked_array/list/mod.rs +++ b/crates/polars-core/src/chunked_array/list/mod.rs @@ -93,7 +93,7 @@ impl ListChunked { let out = out.rechunk(); let values = out.chunks()[0].clone(); - let inner_dtype = LargeListArray::default_datatype(values.data_type().clone()); + let inner_dtype = LargeListArray::default_datatype(values.dtype().clone()); let arr = LargeListArray::new( inner_dtype, (*arr.offsets()).clone(), diff --git a/crates/polars-core/src/chunked_array/logical/categorical/merge.rs b/crates/polars-core/src/chunked_array/logical/categorical/merge.rs index 375f8cc3e72f..0e72de7a903f 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/merge.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/merge.rs @@ -240,7 +240,7 @@ pub fn make_list_categoricals_compatible( .zip(cat_physical.chunks()) .for_each(|(arr, new_phys)| { *arr = ListArray::new( - arr.data_type().clone(), + arr.dtype().clone(), arr.offsets().clone(), new_phys.clone(), arr.validity().cloned(), diff --git a/crates/polars-core/src/chunked_array/logical/decimal.rs b/crates/polars-core/src/chunked_array/logical/decimal.rs index 64134d5e62ad..f723bc3b7e70 100644 --- a/crates/polars-core/src/chunked_array/logical/decimal.rs +++ b/crates/polars-core/src/chunked_array/logical/decimal.rs @@ -11,7 +11,7 @@ impl Int128Chunked { // physical i128 type doesn't exist // so we update the decimal dtype for arr in self.chunks.iter_mut() { - let mut default = PrimitiveArray::new_empty(arr.data_type().clone()); + let mut default = PrimitiveArray::new_empty(arr.dtype().clone()); let arr = arr .as_any_mut() .downcast_mut::>() diff --git a/crates/polars-core/src/chunked_array/mod.rs b/crates/polars-core/src/chunked_array/mod.rs index f060182a1768..8b68e0c9ef85 100644 --- a/crates/polars-core/src/chunked_array/mod.rs +++ b/crates/polars-core/src/chunked_array/mod.rs @@ -512,7 +512,7 @@ impl ChunkedArray { // SAFETY: we keep the correct dtype let mut ca = unsafe { self.copy_with_chunks(vec![new_empty_array( - self.chunks.first().unwrap().data_type().clone(), + self.chunks.first().unwrap().dtype().clone(), )]) }; @@ -599,7 +599,7 @@ impl ChunkedArray { /// Get data type of [`ChunkedArray`]. pub fn dtype(&self) -> &DataType { - self.field.data_type() + self.field.dtype() } pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) { @@ -618,7 +618,7 @@ impl ChunkedArray { /// Rename this [`ChunkedArray`]. pub fn rename(&mut self, name: PlSmallStr) { - self.field = Arc::new(Field::new(name, self.field.data_type().clone())) + self.field = Arc::new(Field::new(name, self.field.dtype().clone())) } /// Return this [`ChunkedArray`] with a new name. diff --git a/crates/polars-core/src/chunked_array/object/extension/drop.rs b/crates/polars-core/src/chunked_array/object/extension/drop.rs index 77f18fd3d3a1..3b3e16deff2e 100644 --- a/crates/polars-core/src/chunked_array/object/extension/drop.rs +++ b/crates/polars-core/src/chunked_array/object/extension/drop.rs @@ -18,8 +18,8 @@ pub(crate) unsafe fn drop_list(ca: &ListChunked) { // if empty the memory is leaked somewhere assert!(!ca.chunks.is_empty()); for lst_arr in &ca.chunks { - if let ArrowDataType::LargeList(fld) = lst_arr.data_type() { - let dtype = fld.data_type(); + if let ArrowDataType::LargeList(fld) = lst_arr.dtype() { + let dtype = fld.dtype(); assert!(matches!(dtype, ArrowDataType::Extension(_, _, _))); diff --git a/crates/polars-core/src/chunked_array/object/extension/list.rs b/crates/polars-core/src/chunked_array/object/extension/list.rs index e6ab34ceb4c8..1918039d647e 100644 --- a/crates/polars-core/src/chunked_array/object/extension/list.rs +++ b/crates/polars-core/src/chunked_array/object/extension/list.rs @@ -69,11 +69,11 @@ impl ListBuilderTrait for ExtensionListBuilder { let mut pe = create_extension(obj_arr.into_iter_cloned()); unsafe { pe.set_to_series_fn::() }; let extension_array = Box::new(pe.take_and_forget()) as ArrayRef; - let extension_dtype = extension_array.data_type(); + let extension_dtype = extension_array.dtype(); - let data_type = ListArray::::default_datatype(extension_dtype.clone()); + let dtype = ListArray::::default_datatype(extension_dtype.clone()); let arr = ListArray::::new( - data_type, + dtype, // SAFETY: offsets are monotonically increasing. unsafe { Offsets::new_unchecked(offsets).into() }, extension_array, diff --git a/crates/polars-core/src/chunked_array/object/extension/polars_extension.rs b/crates/polars-core/src/chunked_array/object/extension/polars_extension.rs index 5eca8fafe5bc..19ef81183222 100644 --- a/crates/polars-core/src/chunked_array/object/extension/polars_extension.rs +++ b/crates/polars-core/src/chunked_array/object/extension/polars_extension.rs @@ -43,7 +43,7 @@ impl PolarsExtension { /// be very careful, this dereferences a raw pointer on the heap, unsafe fn get_sentinel(&self) -> Box { if let ArrowDataType::Extension(_, _, Some(metadata)) = - self.array.as_ref().unwrap().data_type() + self.array.as_ref().unwrap().dtype() { let mut iter = metadata.split(';'); diff --git a/crates/polars-core/src/chunked_array/object/mod.rs b/crates/polars-core/src/chunked_array/object/mod.rs index fffe547f6d0c..1b018800dd98 100644 --- a/crates/polars-core/src/chunked_array/object/mod.rs +++ b/crates/polars-core/src/chunked_array/object/mod.rs @@ -169,7 +169,7 @@ where self } - fn data_type(&self) -> &ArrowDataType { + fn dtype(&self) -> &ArrowDataType { &ArrowDataType::FixedSizeBinary(std::mem::size_of::()) } diff --git a/crates/polars-core/src/chunked_array/ops/any_value.rs b/crates/polars-core/src/chunked_array/ops/any_value.rs index d03a94bf3bcb..2a50b24d9bbf 100644 --- a/crates/polars-core/src/chunked_array/ops/any_value.rs +++ b/crates/polars-core/src/chunked_array/ops/any_value.rs @@ -161,7 +161,7 @@ impl<'a> AnyValue<'a> { if arr.is_valid_unchecked(idx) { let v = arr.value_unchecked(idx); - match fld.data_type() { + match fld.dtype() { DataType::Categorical(Some(rev_map), _) => { AnyValue::Categorical( v, @@ -178,13 +178,13 @@ impl<'a> AnyValue<'a> { AnyValue::Null } } else { - arr_to_any_value(&**arr, idx, fld.data_type()) + arr_to_any_value(&**arr, idx, fld.dtype()) } } #[cfg(not(feature = "dtype-categorical"))] { - arr_to_any_value(&**arr, idx, fld.data_type()) + arr_to_any_value(&**arr, idx, fld.dtype()) } }) } diff --git a/crates/polars-core/src/chunked_array/ops/mod.rs b/crates/polars-core/src/chunked_array/ops/mod.rs index 7e11f3c6fbb9..0ef2e3c02020 100644 --- a/crates/polars-core/src/chunked_array/ops/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/mod.rs @@ -182,12 +182,12 @@ pub trait ChunkSet<'a, A, B> { /// Cast `ChunkedArray` to `ChunkedArray` pub trait ChunkCast { /// Cast a [`ChunkedArray`] to [`DataType`] - fn cast(&self, data_type: &DataType) -> PolarsResult { - self.cast_with_options(data_type, CastOptions::NonStrict) + fn cast(&self, dtype: &DataType) -> PolarsResult { + self.cast_with_options(dtype, CastOptions::NonStrict) } /// Cast a [`ChunkedArray`] to [`DataType`] - fn cast_with_options(&self, data_type: &DataType, options: CastOptions) + fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult; /// Does not check if the cast is a valid one and may over/underflow @@ -195,7 +195,7 @@ pub trait ChunkCast { /// # Safety /// - This doesn't do utf8 validation checking when casting from binary /// - This doesn't do categorical bound checking when casting from UInt32 - unsafe fn cast_unchecked(&self, data_type: &DataType) -> PolarsResult; + unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult; } /// Fastest way to do elementwise operations on a [`ChunkedArray`] when the operation is cheaper than @@ -543,7 +543,7 @@ impl ChunkExpandAtIndex for StructChunked { let (chunk_idx, idx) = self.index_to_chunked_index(index); let chunk = self.downcast_chunks().get(chunk_idx).unwrap(); let chunk = if chunk.is_null(idx) { - new_null_array(chunk.data_type().clone(), length) + new_null_array(chunk.dtype().clone(), length) } else { let values = chunk .values() @@ -555,7 +555,7 @@ impl ChunkExpandAtIndex for StructChunked { }) .collect::>(); - StructArray::new(chunk.data_type().clone(), values, None).boxed() + StructArray::new(chunk.dtype().clone(), values, None).boxed() }; // SAFETY: chunks are from self. diff --git a/crates/polars-core/src/chunked_array/ops/reverse.rs b/crates/polars-core/src/chunked_array/ops/reverse.rs index 903fa5d81887..0436b2264c5c 100644 --- a/crates/polars-core/src/chunked_array/ops/reverse.rs +++ b/crates/polars-core/src/chunked_array/ops/reverse.rs @@ -51,7 +51,7 @@ impl ChunkReverse for BinaryChunked { unsafe { let arr = BinaryViewArray::new_unchecked( - arr.data_type().clone(), + arr.dtype().clone(), views.into(), arr.data_buffers().clone(), arr.validity().map(|bitmap| bitmap.iter().rev().collect()), diff --git a/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs b/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs index 5baad2a11d7f..d659ebab7e69 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs @@ -179,7 +179,7 @@ pub fn _get_rows_encoded_unordered(by: &[Series]) -> PolarsResult { for by in by { let arr = _get_rows_encoded_compat_array(by)?; let field = EncodingField::new_unsorted(); - match arr.data_type() { + match arr.dtype() { // Flatten the struct fields. ArrowDataType::Struct(_) => { let arr = arr.as_any().downcast_ref::().unwrap(); @@ -215,7 +215,7 @@ pub fn _get_rows_encoded( nulls_last: *null_last, no_order: false, }; - match arr.data_type() { + match arr.dtype() { // Flatten the struct fields. ArrowDataType::Struct(_) => { let arr = arr.as_any().downcast_ref::().unwrap(); diff --git a/crates/polars-core/src/chunked_array/ops/unique/mod.rs b/crates/polars-core/src/chunked_array/ops/unique/mod.rs index d0f09e61b98c..4ae58c00b5c7 100644 --- a/crates/polars-core/src/chunked_array/ops/unique/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/unique/mod.rs @@ -126,16 +126,16 @@ where if !T::Native::is_float() && MetadataEnv::experimental_enabled() { let md = self.metadata(); if let (Some(min), Some(max)) = (md.get_min_value(), md.get_max_value()) { - let data_type = self + let dtype = self .field .as_ref() - .data_type() + .dtype() .to_arrow(CompatLevel::oldest()); if let Some(mut state) = PrimitiveRangedUniqueState::new( *min, *max, self.null_count() > 0, - data_type, + dtype, ) { use polars_compute::unique::RangedUniqueKernel; @@ -272,13 +272,13 @@ impl ChunkUnique for BooleanChunked { fn unique(&self) -> PolarsResult { use polars_compute::unique::RangedUniqueKernel; - let data_type = self + let dtype = self .field .as_ref() - .data_type() + .dtype() .to_arrow(CompatLevel::oldest()); let has_null = self.null_count() > 0; - let mut state = BooleanUniqueKernelState::new(has_null, data_type); + let mut state = BooleanUniqueKernelState::new(has_null, dtype); for arr in self.downcast_iter() { state.append(arr); diff --git a/crates/polars-core/src/chunked_array/ops/zip.rs b/crates/polars-core/src/chunked_array/ops/zip.rs index c518954c91e7..eb24468d892d 100644 --- a/crates/polars-core/src/chunked_array/ops/zip.rs +++ b/crates/polars-core/src/chunked_array/ops/zip.rs @@ -94,7 +94,7 @@ where combine_validities_and, ), (Some(t), Some(f)) => { - let dtype = if_true.downcast_iter().next().unwrap().data_type(); + let dtype = if_true.downcast_iter().next().unwrap().dtype(); let chunks = mask.downcast_iter().map(|m| { let bm = bool_null_to_false(m); let t = t.clone(); diff --git a/crates/polars-core/src/datatypes/_serde.rs b/crates/polars-core/src/datatypes/_serde.rs index fd79b5bf6566..e9d961ef4be0 100644 --- a/crates/polars-core/src/datatypes/_serde.rs +++ b/crates/polars-core/src/datatypes/_serde.rs @@ -192,7 +192,7 @@ impl From for DataType { #[cfg(feature = "dtype-categorical")] Categorical(_, ordering) => Self::Categorical(None, ordering), #[cfg(feature = "dtype-categorical")] - Enum(Some(categories), _) => create_enum_data_type(categories.0), + Enum(Some(categories), _) => create_enum_dtype(categories.0), #[cfg(feature = "dtype-categorical")] Enum(None, ordering) => Self::Enum(None, ordering), #[cfg(feature = "dtype-decimal")] diff --git a/crates/polars-core/src/datatypes/any_value.rs b/crates/polars-core/src/datatypes/any_value.rs index 60e6cebb7c42..d59cc1317901 100644 --- a/crates/polars-core/src/datatypes/any_value.rs +++ b/crates/polars-core/src/datatypes/any_value.rs @@ -1249,7 +1249,7 @@ pub trait GetAnyValue { impl GetAnyValue for ArrayRef { // Should only be called with physical types unsafe fn get_unchecked(&self, index: usize) -> AnyValue { - match self.data_type() { + match self.dtype() { ArrowDataType::Int8 => { let arr = self .as_any() diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 5b0aed4baa3a..9cdc5620ed07 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -261,7 +261,7 @@ impl DataType { Struct(fields) => { let new_fields = fields .iter() - .map(|s| Field::new(s.name().clone(), s.data_type().to_physical())) + .map(|s| Field::new(s.name().clone(), s.dtype().to_physical())) .collect(); Struct(new_fields) }, @@ -791,7 +791,7 @@ pub fn merge_dtypes(left: &DataType, right: &DataType) -> PolarsResult } #[cfg(feature = "dtype-categorical")] -pub fn create_enum_data_type(categories: Utf8ViewArray) -> DataType { +pub fn create_enum_dtype(categories: Utf8ViewArray) -> DataType { let rev_map = RevMapping::build_local(categories); DataType::Enum(Some(Arc::new(rev_map)), Default::default()) } diff --git a/crates/polars-core/src/datatypes/field.rs b/crates/polars-core/src/datatypes/field.rs index 1ed898add812..f3bc3571505c 100644 --- a/crates/polars-core/src/datatypes/field.rs +++ b/crates/polars-core/src/datatypes/field.rs @@ -60,10 +60,10 @@ impl Field { /// # use polars_core::prelude::*; /// let f = Field::new("Birthday".into(), DataType::Date); /// - /// assert_eq!(f.data_type(), &DataType::Date); + /// assert_eq!(f.dtype(), &DataType::Date); /// ``` #[inline] - pub fn data_type(&self) -> &DataType { + pub fn dtype(&self) -> &DataType { &self.dtype } @@ -145,8 +145,8 @@ impl DataType { ArrowDataType::Float32 => DataType::Float32, ArrowDataType::Float64 => DataType::Float64, #[cfg(feature = "dtype-array")] - ArrowDataType::FixedSizeList(f, size) => DataType::Array(DataType::from_arrow(f.data_type(), bin_to_view).boxed(), *size), - ArrowDataType::LargeList(f) | ArrowDataType::List(f) => DataType::List(DataType::from_arrow(f.data_type(), bin_to_view).boxed()), + ArrowDataType::FixedSizeList(f, size) => DataType::Array(DataType::from_arrow(f.dtype(), bin_to_view).boxed(), *size), + ArrowDataType::LargeList(f) | ArrowDataType::List(f) => DataType::List(DataType::from_arrow(f.dtype(), bin_to_view).boxed()), ArrowDataType::Date32 => DataType::Date, ArrowDataType::Timestamp(tu, tz) => DataType::Datetime(tu.into(), DataType::canonical_timezone(tz)), ArrowDataType::Duration(tu) => DataType::Duration(tu.into()), @@ -198,6 +198,6 @@ impl From<&ArrowDataType> for DataType { impl From<&ArrowField> for Field { fn from(f: &ArrowField) -> Self { - Field::new(f.name.clone(), f.data_type().into()) + Field::new(f.name.clone(), f.dtype().into()) } } diff --git a/crates/polars-core/src/fmt.rs b/crates/polars-core/src/fmt.rs index a63f77e2df8e..00455a1a841a 100644 --- a/crates/polars-core/src/fmt.rs +++ b/crates/polars-core/src/fmt.rs @@ -446,16 +446,16 @@ fn field_to_str(f: &Field, str_truncate: usize) -> (String, usize) { if env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES) { column_name = "".to_string(); } - let column_data_type = if env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES) { + let column_dtype = if env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES) { "".to_string() } else if env_is_true(FMT_TABLE_INLINE_COLUMN_DATA_TYPE) | env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES) { - format!("{}", f.data_type()) + format!("{}", f.dtype()) } else { - format!("\n{}", f.data_type()) + format!("\n{}", f.dtype()) }; - let mut dtype_length = column_data_type.trim_start().len(); + let mut dtype_length = column_dtype.trim_start().len(); let mut separator = "\n---"; if env_is_true(FMT_TABLE_HIDE_COLUMN_SEPARATOR) | env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES) @@ -466,11 +466,11 @@ fn field_to_str(f: &Field, str_truncate: usize) -> (String, usize) { let s = if env_is_true(FMT_TABLE_INLINE_COLUMN_DATA_TYPE) & !env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES) { - let inline_name_dtype = format!("{column_name} ({column_data_type})"); + let inline_name_dtype = format!("{column_name} ({column_dtype})"); dtype_length = inline_name_dtype.len(); inline_name_dtype } else { - format!("{column_name}{separator}{column_data_type}") + format!("{column_name}{separator}{column_dtype}") }; let mut s_len = std::cmp::max(name_length, dtype_length); let separator_length = separator.trim().len(); @@ -729,7 +729,7 @@ impl Display for DataFrame { let num_preset = std::env::var(FMT_TABLE_CELL_NUMERIC_ALIGNMENT) .unwrap_or_else(|_| str_preset.to_string()); for (column_index, column) in table.column_iter_mut().enumerate() { - let dtype = fields[column_index].data_type(); + let dtype = fields[column_index].dtype(); let mut preset = str_preset.as_str(); if dtype.is_numeric() || dtype.is_decimal() { preset = num_preset.as_str(); diff --git a/crates/polars-core/src/frame/from.rs b/crates/polars-core/src/frame/from.rs index 2af97c569942..5c3e1a8cb212 100644 --- a/crates/polars-core/src/frame/from.rs +++ b/crates/polars-core/src/frame/from.rs @@ -19,7 +19,7 @@ impl TryFrom for DataFrame { Series::_try_from_arrow_unchecked_with_md( fld.name.clone(), vec![arr], - fld.data_type(), + fld.dtype(), Some(&fld.metadata), ) } diff --git a/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs b/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs index faa3f72efc9c..3e71953c5753 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs @@ -74,13 +74,13 @@ where list_values.into(), validity, ); - let data_type = ListArray::::default_datatype( + let dtype = ListArray::::default_datatype( T::get_dtype().to_arrow(CompatLevel::newest()), ); // SAFETY: // offsets are monotonically increasing let arr = ListArray::::new( - data_type, + dtype, Offsets::new_unchecked(offsets).into(), Box::new(array), None, @@ -139,11 +139,11 @@ where list_values.into(), validity, ); - let data_type = ListArray::::default_datatype( + let dtype = ListArray::::default_datatype( T::get_dtype().to_arrow(CompatLevel::newest()), ); let arr = ListArray::::new( - data_type, + dtype, Offsets::new_unchecked(offsets).into(), Box::new(array), None, @@ -259,12 +259,12 @@ impl AggList for ObjectChunked { // the pointer does not fail. pe.set_to_series_fn::(); let extension_array = Box::new(pe.take_and_forget()) as ArrayRef; - let extension_dtype = extension_array.data_type(); + let extension_dtype = extension_array.dtype(); - let data_type = ListArray::::default_datatype(extension_dtype.clone()); + let dtype = ListArray::::default_datatype(extension_dtype.clone()); // SAFETY: offsets are monotonically increasing. let arr = ListArray::::new( - data_type, + dtype, Offsets::new_unchecked(offsets).into(), extension_array, None, @@ -291,7 +291,7 @@ impl AggList for StructChunked { }; let arr = gathered.chunks()[0].clone(); - let dtype = LargeListArray::default_datatype(arr.data_type().clone()); + let dtype = LargeListArray::default_datatype(arr.dtype().clone()); let mut chunk = ListChunked::with_chunk( self.name().clone(), @@ -322,7 +322,7 @@ where }; let arr = gathered.chunks()[0].clone(); - let dtype = LargeListArray::default_datatype(arr.data_type().clone()); + let dtype = LargeListArray::default_datatype(arr.dtype().clone()); let mut chunk = ListChunked::with_chunk( ca.name().clone(), diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index b2139bdd770d..648141688db8 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -336,7 +336,7 @@ impl DataFrame { pub fn empty_with_arrow_schema(schema: &ArrowSchema) -> Self { let cols = schema .iter_values() - .map(|fld| Series::new_empty(fld.name.clone(), &(fld.data_type().into()))) + .map(|fld| Series::new_empty(fld.name.clone(), &(fld.dtype().into()))) .collect(); unsafe { DataFrame::new_no_checks(cols) } } diff --git a/crates/polars-core/src/frame/row/av_buffer.rs b/crates/polars-core/src/frame/row/av_buffer.rs index 9147ea8e7478..608d6ec820af 100644 --- a/crates/polars-core/src/frame/row/av_buffer.rs +++ b/crates/polars-core/src/frame/row/av_buffer.rs @@ -697,7 +697,7 @@ impl From<(&DataType, usize)> for AnyValueBufferTrusted<'_> { let buffers = fields .iter() .map(|field| { - let dtype = field.data_type().to_physical(); + let dtype = field.dtype().to_physical(); let buffer: AnyValueBuffer = (&dtype, len).into(); (buffer, field.name.clone()) }) diff --git a/crates/polars-core/src/frame/row/mod.rs b/crates/polars-core/src/frame/row/mod.rs index 090f22c78dfe..44e445b0874e 100644 --- a/crates/polars-core/src/frame/row/mod.rs +++ b/crates/polars-core/src/frame/row/mod.rs @@ -115,18 +115,18 @@ pub fn infer_schema( Schema::from_iter(resolve_fields(values)) } -fn add_or_insert(values: &mut Tracker, key: PlSmallStr, data_type: DataType) { - if data_type == DataType::Null { +fn add_or_insert(values: &mut Tracker, key: PlSmallStr, dtype: DataType) { + if dtype == DataType::Null { return; } if values.contains_key(&key) { let x = values.get_mut(&key).unwrap(); - x.insert(data_type); + x.insert(dtype); } else { // create hashset and add value type let mut hs = PlHashSet::new(); - hs.insert(data_type); + hs.insert(dtype); values.insert(key, hs); } } @@ -135,13 +135,13 @@ fn resolve_fields(spec: Tracker) -> Vec { spec.iter() .map(|(k, hs)| { let v: Vec<&DataType> = hs.iter().collect(); - Field::new(k.clone(), coerce_data_type(&v)) + Field::new(k.clone(), coerce_dtype(&v)) }) .collect() } /// Coerces a slice of datatypes into a single supertype. -pub fn coerce_data_type>(datatypes: &[A]) -> DataType { +pub fn coerce_dtype>(datatypes: &[A]) -> DataType { use DataType::*; let are_all_equal = datatypes.windows(2).all(|w| w[0].borrow() == w[1].borrow()); diff --git a/crates/polars-core/src/schema.rs b/crates/polars-core/src/schema.rs index 6485c513e6ec..b2ec55b528c7 100644 --- a/crates/polars-core/src/schema.rs +++ b/crates/polars-core/src/schema.rs @@ -26,7 +26,7 @@ impl SchemaExt for Schema { fn from_arrow_schema(value: &ArrowSchema) -> Self { value .iter_values() - .map(|x| (x.name.clone(), DataType::from_arrow(&x.data_type, true))) + .map(|x| (x.name.clone(), DataType::from_arrow(&x.dtype, true))) .collect() } @@ -173,7 +173,7 @@ impl SchemaNamesAndDtypes for ArrowSchema { fn get_names_and_dtypes(&'_ self) -> Vec<(&'_ str, Self::DataType)> { self.iter_values() - .map(|x| (x.name.as_str(), x.data_type.clone())) + .map(|x| (x.name.as_str(), x.dtype.clone())) .collect() } } diff --git a/crates/polars-core/src/series/from.rs b/crates/polars-core/src/series/from.rs index 88375656e166..ce473a4d60fb 100644 --- a/crates/polars-core/src/series/from.rs +++ b/crates/polars-core/src/series/from.rs @@ -468,9 +468,9 @@ fn map_arrays_to_series(name: PlSmallStr, chunks: Vec) -> PolarsResult let inner = arr.field().clone(); // map has i32 offsets - let data_type = ListArray::::default_datatype(inner.data_type().clone()); + let dtype = ListArray::::default_datatype(inner.dtype().clone()); Box::new(ListArray::::new( - data_type, + dtype, arr.offsets().clone(), inner, arr.validity().cloned(), @@ -490,7 +490,7 @@ unsafe fn to_physical_and_dtype( arrays: Vec, md: Option<&Metadata>, ) -> (Vec, DataType) { - match arrays[0].data_type() { + match arrays[0].dtype() { ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => { let chunks = cast_chunks(&arrays, &DataType::String, CastOptions::NonStrict).unwrap(); (chunks, DataType::String) @@ -538,7 +538,7 @@ unsafe fn to_physical_and_dtype( let arr = arr.as_any().downcast_ref::().unwrap(); let dtype = - FixedSizeListArray::default_datatype(values.data_type().clone(), *size); + FixedSizeListArray::default_datatype(values.dtype().clone(), *size); Box::from(FixedSizeListArray::new( dtype, values, @@ -566,7 +566,7 @@ unsafe fn to_physical_and_dtype( .map(|(arr, values)| { let arr = arr.as_any().downcast_ref::>().unwrap(); - let dtype = ListArray::::default_datatype(values.data_type().clone()); + let dtype = ListArray::::default_datatype(values.dtype().clone()); Box::from(ListArray::::new( dtype, arr.offsets().clone(), @@ -597,7 +597,7 @@ unsafe fn to_physical_and_dtype( .iter() .zip(_fields.iter()) .map(|(arr, field)| { - ArrowField::new(field.name.clone(), arr.data_type().clone(), true) + ArrowField::new(field.name.clone(), arr.dtype().clone(), true) }) .collect(); let arrow_array = Box::new(StructArray::new( @@ -635,20 +635,20 @@ unsafe fn to_physical_and_dtype( fn check_types(chunks: &[ArrayRef]) -> PolarsResult { let mut chunks_iter = chunks.iter(); - let data_type: ArrowDataType = chunks_iter + let dtype: ArrowDataType = chunks_iter .next() .ok_or_else(|| polars_err!(NoData: "expected at least one array-ref"))? - .data_type() + .dtype() .clone(); for chunk in chunks_iter { - if chunk.data_type() != &data_type { + if chunk.dtype() != &dtype { polars_bail!( ComputeError: "cannot create series from multiple arrays with different types" ); } } - Ok(data_type) + Ok(dtype) } impl Series { @@ -671,10 +671,10 @@ impl TryFrom<(PlSmallStr, Vec)> for Series { fn try_from(name_arr: (PlSmallStr, Vec)) -> PolarsResult { let (name, chunks) = name_arr; - let data_type = check_types(&chunks)?; + let dtype = check_types(&chunks)?; // SAFETY: // dtype is checked - unsafe { Series::_try_from_arrow_unchecked(name, chunks, &data_type) } + unsafe { Series::_try_from_arrow_unchecked(name, chunks, &dtype) } } } @@ -693,7 +693,7 @@ impl TryFrom<(&ArrowField, Vec)> for Series { fn try_from(field_arr: (&ArrowField, Vec)) -> PolarsResult { let (field, chunks) = field_arr; - let data_type = check_types(&chunks)?; + let dtype = check_types(&chunks)?; // SAFETY: // dtype is checked @@ -701,7 +701,7 @@ impl TryFrom<(&ArrowField, Vec)> for Series { Series::_try_from_arrow_unchecked_with_md( field.name.clone(), chunks, - &data_type, + &dtype, Some(&field.metadata), ) } diff --git a/crates/polars-core/src/series/implementations/array.rs b/crates/polars-core/src/series/implementations/array.rs index 351c7d51b9c9..51bd084cd46d 100644 --- a/crates/polars-core/src/series/implementations/array.rs +++ b/crates/polars-core/src/series/implementations/array.rs @@ -18,7 +18,7 @@ impl private::PrivateSeries for SeriesWrap { Cow::Borrowed(self.0.ref_field()) } fn _dtype(&self) -> &DataType { - self.0.ref_field().data_type() + self.0.ref_field().dtype() } fn _get_flags(&self) -> MetadataFlags { @@ -141,8 +141,8 @@ impl SeriesTrait for SeriesWrap { ChunkExpandAtIndex::new_from_index(&self.0, index, length).into_series() } - fn cast(&self, data_type: &DataType, options: CastOptions) -> PolarsResult { - self.0.cast_with_options(data_type, options) + fn cast(&self, dtype: &DataType, options: CastOptions) -> PolarsResult { + self.0.cast_with_options(dtype, options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/binary.rs b/crates/polars-core/src/series/implementations/binary.rs index 221ddd25ca8b..8cdf326302d1 100644 --- a/crates/polars-core/src/series/implementations/binary.rs +++ b/crates/polars-core/src/series/implementations/binary.rs @@ -13,7 +13,7 @@ impl private::PrivateSeries for SeriesWrap { Cow::Borrowed(self.0.ref_field()) } fn _dtype(&self) -> &DataType { - self.0.ref_field().data_type() + self.0.ref_field().dtype() } fn _get_flags(&self) -> MetadataFlags { self.0.get_flags() @@ -170,8 +170,8 @@ impl SeriesTrait for SeriesWrap { ChunkExpandAtIndex::new_from_index(&self.0, index, length).into_series() } - fn cast(&self, data_type: &DataType, options: CastOptions) -> PolarsResult { - self.0.cast_with_options(data_type, options) + fn cast(&self, dtype: &DataType, options: CastOptions) -> PolarsResult { + self.0.cast_with_options(dtype, options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/binary_offset.rs b/crates/polars-core/src/series/implementations/binary_offset.rs index 8f1fa74df7b2..9ff8cd6704d0 100644 --- a/crates/polars-core/src/series/implementations/binary_offset.rs +++ b/crates/polars-core/src/series/implementations/binary_offset.rs @@ -13,7 +13,7 @@ impl private::PrivateSeries for SeriesWrap { Cow::Borrowed(self.0.ref_field()) } fn _dtype(&self) -> &DataType { - self.0.ref_field().data_type() + self.0.ref_field().dtype() } fn _get_flags(&self) -> MetadataFlags { self.0.get_flags() @@ -142,8 +142,8 @@ impl SeriesTrait for SeriesWrap { ChunkExpandAtIndex::new_from_index(&self.0, index, length).into_series() } - fn cast(&self, data_type: &DataType, options: CastOptions) -> PolarsResult { - self.0.cast_with_options(data_type, options) + fn cast(&self, dtype: &DataType, options: CastOptions) -> PolarsResult { + self.0.cast_with_options(dtype, options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/boolean.rs b/crates/polars-core/src/series/implementations/boolean.rs index 86ee10da02b1..49da460464b8 100644 --- a/crates/polars-core/src/series/implementations/boolean.rs +++ b/crates/polars-core/src/series/implementations/boolean.rs @@ -12,7 +12,7 @@ impl private::PrivateSeries for SeriesWrap { Cow::Borrowed(self.0.ref_field()) } fn _dtype(&self) -> &DataType { - self.0.ref_field().data_type() + self.0.ref_field().dtype() } fn _get_flags(&self) -> MetadataFlags { self.0.get_flags() @@ -198,8 +198,8 @@ impl SeriesTrait for SeriesWrap { ChunkExpandAtIndex::new_from_index(&self.0, index, length).into_series() } - fn cast(&self, data_type: &DataType, options: CastOptions) -> PolarsResult { - self.0.cast_with_options(data_type, options) + fn cast(&self, dtype: &DataType, options: CastOptions) -> PolarsResult { + self.0.cast_with_options(dtype, options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/categorical.rs b/crates/polars-core/src/series/implementations/categorical.rs index b0f7623e23fd..497ff5267d88 100644 --- a/crates/polars-core/src/series/implementations/categorical.rs +++ b/crates/polars-core/src/series/implementations/categorical.rs @@ -219,8 +219,8 @@ impl SeriesTrait for SeriesWrap { .into_series() } - fn cast(&self, data_type: &DataType, options: CastOptions) -> PolarsResult { - self.0.cast_with_options(data_type, options) + fn cast(&self, dtype: &DataType, options: CastOptions) -> PolarsResult { + self.0.cast_with_options(dtype, options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/date.rs b/crates/polars-core/src/series/implementations/date.rs index b7d761683333..01c639958d92 100644 --- a/crates/polars-core/src/series/implementations/date.rs +++ b/crates/polars-core/src/series/implementations/date.rs @@ -234,8 +234,8 @@ impl SeriesTrait for SeriesWrap { .into_series() } - fn cast(&self, data_type: &DataType, cast_options: CastOptions) -> PolarsResult { - match data_type { + fn cast(&self, dtype: &DataType, cast_options: CastOptions) -> PolarsResult { + match dtype { DataType::String => Ok(self .0 .clone() @@ -248,11 +248,11 @@ impl SeriesTrait for SeriesWrap { DataType::Datetime(_, _) => { let mut out = self .0 - .cast_with_options(data_type, CastOptions::NonStrict)?; + .cast_with_options(dtype, CastOptions::NonStrict)?; out.set_sorted_flag(self.0.is_sorted_flag()); Ok(out) }, - _ => self.0.cast_with_options(data_type, cast_options), + _ => self.0.cast_with_options(dtype, cast_options), } } diff --git a/crates/polars-core/src/series/implementations/datetime.rs b/crates/polars-core/src/series/implementations/datetime.rs index eed12d8586c5..59c733c8d1e9 100644 --- a/crates/polars-core/src/series/implementations/datetime.rs +++ b/crates/polars-core/src/series/implementations/datetime.rs @@ -249,8 +249,8 @@ impl SeriesTrait for SeriesWrap { .into_series() } - fn cast(&self, data_type: &DataType, cast_options: CastOptions) -> PolarsResult { - match (data_type, self.0.time_unit()) { + fn cast(&self, dtype: &DataType, cast_options: CastOptions) -> PolarsResult { + match (dtype, self.0.time_unit()) { (DataType::String, TimeUnit::Milliseconds) => { Ok(self.0.to_string("%F %T%.3f")?.into_series()) }, @@ -260,7 +260,7 @@ impl SeriesTrait for SeriesWrap { (DataType::String, TimeUnit::Nanoseconds) => { Ok(self.0.to_string("%F %T%.9f")?.into_series()) }, - _ => self.0.cast_with_options(data_type, cast_options), + _ => self.0.cast_with_options(dtype, cast_options), } } diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs index a2d9d329ffe8..98f579a95e8f 100644 --- a/crates/polars-core/src/series/implementations/decimal.rs +++ b/crates/polars-core/src/series/implementations/decimal.rs @@ -61,10 +61,10 @@ impl SeriesWrap { ) }; let new_values = s.array_ref(0).clone(); - let data_type = + let dtype = ListArray::::default_datatype(dtype.to_arrow(CompatLevel::newest())); let new_arr = ListArray::::new( - data_type, + dtype, arr.offsets().clone(), new_values, arr.validity().cloned(), @@ -290,8 +290,8 @@ impl SeriesTrait for SeriesWrap { .into_series() } - fn cast(&self, data_type: &DataType, cast_options: CastOptions) -> PolarsResult { - self.0.cast_with_options(data_type, cast_options) + fn cast(&self, dtype: &DataType, cast_options: CastOptions) -> PolarsResult { + self.0.cast_with_options(dtype, cast_options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/duration.rs b/crates/polars-core/src/series/implementations/duration.rs index 81f5ee7497dd..35751b722485 100644 --- a/crates/polars-core/src/series/implementations/duration.rs +++ b/crates/polars-core/src/series/implementations/duration.rs @@ -372,8 +372,8 @@ impl SeriesTrait for SeriesWrap { .into_series() } - fn cast(&self, data_type: &DataType, cast_options: CastOptions) -> PolarsResult { - self.0.cast_with_options(data_type, cast_options) + fn cast(&self, dtype: &DataType, cast_options: CastOptions) -> PolarsResult { + self.0.cast_with_options(dtype, cast_options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/floats.rs b/crates/polars-core/src/series/implementations/floats.rs index 1a7f57927e47..ab999199cf07 100644 --- a/crates/polars-core/src/series/implementations/floats.rs +++ b/crates/polars-core/src/series/implementations/floats.rs @@ -14,7 +14,7 @@ macro_rules! impl_dyn_series { Cow::Borrowed(self.0.ref_field()) } fn _dtype(&self) -> &DataType { - self.0.ref_field().data_type() + self.0.ref_field().dtype() } fn _set_flags(&mut self, flags: MetadataFlags) { @@ -261,10 +261,10 @@ macro_rules! impl_dyn_series { fn cast( &self, - data_type: &DataType, + dtype: &DataType, cast_options: CastOptions, ) -> PolarsResult { - self.0.cast_with_options(data_type, cast_options) + self.0.cast_with_options(dtype, cast_options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/list.rs b/crates/polars-core/src/series/implementations/list.rs index 554e3852f8c8..865fadcfcb93 100644 --- a/crates/polars-core/src/series/implementations/list.rs +++ b/crates/polars-core/src/series/implementations/list.rs @@ -12,7 +12,7 @@ impl private::PrivateSeries for SeriesWrap { Cow::Borrowed(self.0.ref_field()) } fn _dtype(&self) -> &DataType { - self.0.ref_field().data_type() + self.0.ref_field().dtype() } fn _get_flags(&self) -> MetadataFlags { self.0.get_flags() @@ -126,8 +126,8 @@ impl SeriesTrait for SeriesWrap { ChunkExpandAtIndex::new_from_index(&self.0, index, length).into_series() } - fn cast(&self, data_type: &DataType, cast_options: CastOptions) -> PolarsResult { - self.0.cast_with_options(data_type, cast_options) + fn cast(&self, dtype: &DataType, cast_options: CastOptions) -> PolarsResult { + self.0.cast_with_options(dtype, cast_options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/mod.rs b/crates/polars-core/src/series/implementations/mod.rs index 1a9df0216c14..6b6dd08f36cc 100644 --- a/crates/polars-core/src/series/implementations/mod.rs +++ b/crates/polars-core/src/series/implementations/mod.rs @@ -78,7 +78,7 @@ macro_rules! impl_dyn_series { } fn _dtype(&self) -> &DataType { - self.0.ref_field().data_type() + self.0.ref_field().dtype() } fn _get_flags(&self) -> MetadataFlags { @@ -362,8 +362,8 @@ macro_rules! impl_dyn_series { ChunkExpandAtIndex::new_from_index(&self.0, index, length).into_series() } - fn cast(&self, data_type: &DataType, options: CastOptions) -> PolarsResult { - self.0.cast_with_options(data_type, options) + fn cast(&self, dtype: &DataType, options: CastOptions) -> PolarsResult { + self.0.cast_with_options(dtype, options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/null.rs b/crates/polars-core/src/series/implementations/null.rs index 75e3acb69dda..34655a6ac61e 100644 --- a/crates/polars-core/src/series/implementations/null.rs +++ b/crates/polars-core/src/series/implementations/null.rs @@ -198,8 +198,8 @@ impl SeriesTrait for NullChunked { NullChunked::new(self.name.clone(), 0).into_series() } - fn cast(&self, data_type: &DataType, _cast_options: CastOptions) -> PolarsResult { - Ok(Series::full_null(self.name.clone(), self.len(), data_type)) + fn cast(&self, dtype: &DataType, _cast_options: CastOptions) -> PolarsResult { + Ok(Series::full_null(self.name.clone(), self.len(), dtype)) } fn null_count(&self) -> usize { diff --git a/crates/polars-core/src/series/implementations/object.rs b/crates/polars-core/src/series/implementations/object.rs index b4821682693f..5ef70ed8c99c 100644 --- a/crates/polars-core/src/series/implementations/object.rs +++ b/crates/polars-core/src/series/implementations/object.rs @@ -160,8 +160,8 @@ where ChunkExpandAtIndex::new_from_index(&self.0, index, length).into_series() } - fn cast(&self, data_type: &DataType, _cast_options: CastOptions) -> PolarsResult { - if matches!(data_type, DataType::Object(_, None)) { + fn cast(&self, dtype: &DataType, _cast_options: CastOptions) -> PolarsResult { + if matches!(dtype, DataType::Object(_, None)) { Ok(self.0.clone().into_series()) } else { Err(PolarsError::ComputeError( diff --git a/crates/polars-core/src/series/implementations/string.rs b/crates/polars-core/src/series/implementations/string.rs index 3cceaca32c48..c8d85825e84b 100644 --- a/crates/polars-core/src/series/implementations/string.rs +++ b/crates/polars-core/src/series/implementations/string.rs @@ -12,7 +12,7 @@ impl private::PrivateSeries for SeriesWrap { Cow::Borrowed(self.0.ref_field()) } fn _dtype(&self) -> &DataType { - self.0.ref_field().data_type() + self.0.ref_field().dtype() } fn _set_flags(&mut self, flags: MetadataFlags) { @@ -175,8 +175,8 @@ impl SeriesTrait for SeriesWrap { ChunkExpandAtIndex::new_from_index(&self.0, index, length).into_series() } - fn cast(&self, data_type: &DataType, cast_options: CastOptions) -> PolarsResult { - self.0.cast_with_options(data_type, cast_options) + fn cast(&self, dtype: &DataType, cast_options: CastOptions) -> PolarsResult { + self.0.cast_with_options(dtype, cast_options) } fn get(&self, index: usize) -> PolarsResult { diff --git a/crates/polars-core/src/series/implementations/time.rs b/crates/polars-core/src/series/implementations/time.rs index e0f87a4d80f8..137de2f31961 100644 --- a/crates/polars-core/src/series/implementations/time.rs +++ b/crates/polars-core/src/series/implementations/time.rs @@ -210,8 +210,8 @@ impl SeriesTrait for SeriesWrap { .into_series() } - fn cast(&self, data_type: &DataType, cast_options: CastOptions) -> PolarsResult { - match data_type { + fn cast(&self, dtype: &DataType, cast_options: CastOptions) -> PolarsResult { + match dtype { DataType::String => Ok(self .0 .clone() @@ -220,7 +220,7 @@ impl SeriesTrait for SeriesWrap { .unwrap() .to_string("%T") .into_series()), - _ => self.0.cast_with_options(data_type, cast_options), + _ => self.0.cast_with_options(dtype, cast_options), } } diff --git a/crates/polars-core/src/series/into.rs b/crates/polars-core/src/series/into.rs index 22aa43a70662..1213c3346525 100644 --- a/crates/polars-core/src/series/into.rs +++ b/crates/polars-core/src/series/into.rs @@ -70,9 +70,9 @@ impl Series { s.to_arrow(0, compat_level) }; - let data_type = ListArray::::default_datatype(inner.to_arrow(compat_level)); + let dtype = ListArray::::default_datatype(inner.to_arrow(compat_level)); let arr = ListArray::::new( - data_type, + dtype, arr.offsets().clone(), new_values, arr.validity().cloned(), diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index bdd500ab3100..a629a8fd1c5c 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -896,10 +896,10 @@ impl Series { let offsets = (0i64..(s.len() as i64 + 1)).collect::>(); let offsets = unsafe { Offsets::new_unchecked(offsets) }; - let data_type = LargeListArray::default_datatype( + let dtype = LargeListArray::default_datatype( s.dtype().to_physical().to_arrow(CompatLevel::newest()), ); - let new_arr = LargeListArray::new(data_type, offsets.into(), values, None); + let new_arr = LargeListArray::new(dtype, offsets.into(), values, None); let mut out = ListChunked::with_chunk(s.name().clone(), new_arr); out.set_inner_dtype(s.dtype().clone()); out diff --git a/crates/polars-core/src/series/ops/null.rs b/crates/polars-core/src/series/ops/null.rs index 3e6a32e0a9d7..ee33c309687e 100644 --- a/crates/polars-core/src/series/ops/null.rs +++ b/crates/polars-core/src/series/ops/null.rs @@ -53,7 +53,7 @@ impl Series { DataType::Struct(fields) => { let fields = fields .iter() - .map(|fld| Series::full_null(fld.name().clone(), size, fld.data_type())) + .map(|fld| Series::full_null(fld.name().clone(), size, fld.dtype())) .collect::>(); let ca = StructChunked::from_series(name, &fields).unwrap(); diff --git a/crates/polars-core/src/series/ops/reshape.rs b/crates/polars-core/src/series/ops/reshape.rs index 76d8d59886c7..85c8e283e166 100644 --- a/crates/polars-core/src/series/ops/reshape.rs +++ b/crates/polars-core/src/series/ops/reshape.rs @@ -71,12 +71,12 @@ impl Series { let offsets = vec![0i64, values.len() as i64]; let inner_type = s.dtype(); - let data_type = ListArray::::default_datatype(values.data_type().clone()); + let dtype = ListArray::::default_datatype(values.dtype().clone()); // SAFETY: offsets are correct. let arr = unsafe { ListArray::new( - data_type, + dtype, Offsets::new_unchecked(offsets).into(), values.clone(), None, diff --git a/crates/polars-core/src/series/series_trait.rs b/crates/polars-core/src/series/series_trait.rs index 21cc1bf37df0..933a600d4cb2 100644 --- a/crates/polars-core/src/series/series_trait.rs +++ b/crates/polars-core/src/series/series_trait.rs @@ -328,7 +328,7 @@ pub trait SeriesTrait: /// ``` fn new_from_index(&self, _index: usize, _length: usize) -> Series; - fn cast(&self, _data_type: &DataType, options: CastOptions) -> PolarsResult; + fn cast(&self, _dtype: &DataType, options: CastOptions) -> PolarsResult; /// Get a single value by index. Don't use this operation for loops as a runtime cast is /// needed for every iteration. diff --git a/crates/polars-core/src/utils/mod.rs b/crates/polars-core/src/utils/mod.rs index d6fe5890ff0b..a516626e1abb 100644 --- a/crates/polars-core/src/utils/mod.rs +++ b/crates/polars-core/src/utils/mod.rs @@ -393,7 +393,7 @@ macro_rules! match_dtype_to_logical_apply_macro { /// Apply a macro on the Downcasted ChunkedArray's #[macro_export] -macro_rules! match_arrow_data_type_apply_macro_ca { +macro_rules! match_arrow_dtype_apply_macro_ca { ($self:expr, $macro:ident, $macro_string:ident, $macro_bool:ident $(, $opt_args:expr)*) => {{ match $self.dtype() { DataType::String => $macro_string!($self.str().unwrap() $(, $opt_args)*), diff --git a/crates/polars-expr/src/expressions/aggregation.rs b/crates/polars-expr/src/expressions/aggregation.rs index 5600581036b7..5c64ef144cc4 100644 --- a/crates/polars-expr/src/expressions/aggregation.rs +++ b/crates/polars-expr/src/expressions/aggregation.rs @@ -632,10 +632,10 @@ impl PartitionedAggregation for AggregationExpr { let vals = values.iter().map(|arr| &**arr).collect::>(); let values = concatenate(&vals).unwrap(); - let data_type = ListArray::::default_datatype(values.data_type().clone()); + let dtype = ListArray::::default_datatype(values.dtype().clone()); // SAFETY: offsets are monotonically increasing. let arr = ListArray::::new( - data_type, + dtype, unsafe { Offsets::new_unchecked(offsets).into() }, values, None, diff --git a/crates/polars-expr/src/expressions/alias.rs b/crates/polars-expr/src/expressions/alias.rs index 8298bbf06ee8..c3e0ebff7bd0 100644 --- a/crates/polars-expr/src/expressions/alias.rs +++ b/crates/polars-expr/src/expressions/alias.rs @@ -57,7 +57,7 @@ impl PhysicalExpr for AliasExpr { self.name.clone(), self.physical_expr .to_field(input_schema)? - .data_type() + .dtype() .clone(), )) } diff --git a/crates/polars-expr/src/expressions/apply.rs b/crates/polars-expr/src/expressions/apply.rs index 783d261ffa9b..77102af2d224 100644 --- a/crates/polars-expr/src/expressions/apply.rs +++ b/crates/polars-expr/src/expressions/apply.rs @@ -133,7 +133,7 @@ impl ApplyExpr { Ok(Series::full_null( field.name().clone(), 1, - field.data_type(), + field.dtype(), )) } } diff --git a/crates/polars-expr/src/expressions/binary.rs b/crates/polars-expr/src/expressions/binary.rs index d9ebf38070e3..dd7697e2f054 100644 --- a/crates/polars-expr/src/expressions/binary.rs +++ b/crates/polars-expr/src/expressions/binary.rs @@ -392,7 +392,7 @@ mod stats { #[cfg(debug_assertions)] { - match (fld_l.data_type(), fld_r.data_type()) { + match (fld_l.dtype(), fld_r.dtype()) { #[cfg(feature = "dtype-categorical")] (DataType::String, DataType::Categorical(_, _) | DataType::Enum(_, _)) => {}, #[cfg(feature = "dtype-categorical")] diff --git a/crates/polars-expr/src/expressions/cast.rs b/crates/polars-expr/src/expressions/cast.rs index f78f1f7c11f7..dcc3d4dddbca 100644 --- a/crates/polars-expr/src/expressions/cast.rs +++ b/crates/polars-expr/src/expressions/cast.rs @@ -6,14 +6,14 @@ use crate::expressions::{AggState, AggregationContext, PartitionedAggregation, P pub struct CastExpr { pub(crate) input: Arc, - pub(crate) data_type: DataType, + pub(crate) dtype: DataType, pub(crate) expr: Expr, pub(crate) options: CastOptions, } impl CastExpr { fn finish(&self, input: &Series) -> PolarsResult { - input.cast_with_options(&self.data_type, self.options) + input.cast_with_options(&self.dtype, self.options) } } @@ -71,7 +71,7 @@ impl PhysicalExpr for CastExpr { fn to_field(&self, input_schema: &Schema) -> PolarsResult { self.input.to_field(input_schema).map(|mut fld| { - fld.coerce(self.data_type.clone()); + fld.coerce(self.dtype.clone()); fld }) } diff --git a/crates/polars-expr/src/expressions/literal.rs b/crates/polars-expr/src/expressions/literal.rs index d46d109f1fa9..425275cebe58 100644 --- a/crates/polars-expr/src/expressions/literal.rs +++ b/crates/polars-expr/src/expressions/literal.rs @@ -46,8 +46,8 @@ impl PhysicalExpr for LiteralExpr { Range { low, high, - data_type, - } => match data_type { + dtype, + } => match dtype { DataType::Int32 => { polars_ensure!( *low >= i32::MIN as i64 && *high <= i32::MAX as i64, diff --git a/crates/polars-expr/src/expressions/ternary.rs b/crates/polars-expr/src/expressions/ternary.rs index 8e689eec2ec0..ef12dcea0204 100644 --- a/crates/polars-expr/src/expressions/ternary.rs +++ b/crates/polars-expr/src/expressions/ternary.rs @@ -280,10 +280,10 @@ impl PhysicalExpr for TernaryExpr { let values = out.array_ref(0); let offsets = ac_target.series().list().unwrap().offsets()?; let inner_type = out.dtype(); - let data_type = LargeListArray::default_datatype(values.data_type().clone()); + let dtype = LargeListArray::default_datatype(values.dtype().clone()); // SAFETY: offsets are correct. - let out = LargeListArray::new(data_type, offsets, values.clone(), None); + let out = LargeListArray::new(dtype, offsets, values.clone(), None); let mut out = ListChunked::with_chunk(truthy.name().clone(), out); unsafe { out.to_logical(inner_type.clone()) }; diff --git a/crates/polars-expr/src/expressions/window.rs b/crates/polars-expr/src/expressions/window.rs index 5ea5d156b320..bc810d1df095 100644 --- a/crates/polars-expr/src/expressions/window.rs +++ b/crates/polars-expr/src/expressions/window.rs @@ -409,7 +409,7 @@ impl PhysicalExpr for WindowExpr { return Ok(Series::full_null( field.name().clone(), 0, - field.data_type(), + field.dtype(), )); } diff --git a/crates/polars-expr/src/planner.rs b/crates/polars-expr/src/planner.rs index e09b274afa9f..315fc123d158 100644 --- a/crates/polars-expr/src/planner.rs +++ b/crates/polars-expr/src/planner.rs @@ -428,13 +428,13 @@ fn create_physical_expr_inner( }, Cast { expr, - data_type, + dtype, options, } => { let phys_expr = create_physical_expr_inner(*expr, ctxt, expr_arena, schema, state)?; Ok(Arc::new(CastExpr { input: phys_expr, - data_type: data_type.clone(), + dtype: dtype.clone(), expr: node_to_expr(expression, expr_arena), options: *options, })) diff --git a/crates/polars-ffi/src/lib.rs b/crates/polars-ffi/src/lib.rs index 51635b2c0068..31a47aef3eef 100644 --- a/crates/polars-ffi/src/lib.rs +++ b/crates/polars-ffi/src/lib.rs @@ -29,6 +29,6 @@ unsafe fn import_array( schema: &ffi::ArrowSchema, ) -> PolarsResult { let field = ffi::import_field_from_c(schema)?; - let out = ffi::import_array_from_c(array, field.data_type)?; + let out = ffi::import_array_from_c(array, field.dtype)?; Ok(out) } diff --git a/crates/polars-io/src/csv/read/read_impl.rs b/crates/polars-io/src/csv/read/read_impl.rs index 007bc215171d..eabd1ca6a2f0 100644 --- a/crates/polars-io/src/csv/read/read_impl.rs +++ b/crates/polars-io/src/csv/read/read_impl.rs @@ -35,7 +35,7 @@ pub(crate) fn cast_columns( ignore_errors: bool, ) -> PolarsResult<()> { let cast_fn = |s: &Series, fld: &Field| { - let out = match (s.dtype(), fld.data_type()) { + let out = match (s.dtype(), fld.dtype()) { #[cfg(feature = "temporal")] (DataType::String, DataType::Date) => s .str() diff --git a/crates/polars-io/src/csv/read/reader.rs b/crates/polars-io/src/csv/read/reader.rs index f1a155d84fd2..49fb576fff8a 100644 --- a/crates/polars-io/src/csv/read/reader.rs +++ b/crates/polars-io/src/csv/read/reader.rs @@ -168,7 +168,7 @@ impl CsvReader { .map(|mut fld| { use DataType::*; - match fld.data_type() { + match fld.dtype() { Time => { self.options.fields_to_cast.push(fld.clone()); fld.coerce(String); diff --git a/crates/polars-io/src/json/infer.rs b/crates/polars-io/src/json/infer.rs index 9cd82721d156..0ff83225e97f 100644 --- a/crates/polars-io/src/json/infer.rs +++ b/crates/polars-io/src/json/infer.rs @@ -22,7 +22,7 @@ pub(crate) fn json_values_to_supertype( .unwrap_or_else(|| polars_bail!(ComputeError: "could not infer data-type")) } -pub(crate) fn data_types_to_supertype>( +pub(crate) fn dtypes_to_supertype>( datatypes: I, ) -> PolarsResult { datatypes diff --git a/crates/polars-io/src/ndjson/mod.rs b/crates/polars-io/src/ndjson/mod.rs index 196842d4ad1a..295d5be81f90 100644 --- a/crates/polars-io/src/ndjson/mod.rs +++ b/crates/polars-io/src/ndjson/mod.rs @@ -10,10 +10,10 @@ pub fn infer_schema( reader: &mut R, infer_schema_len: Option, ) -> PolarsResult { - let data_types = polars_json::ndjson::iter_unique_dtypes(reader, infer_schema_len)?; - let data_type = - crate::json::infer::data_types_to_supertype(data_types.map(|dt| DataType::from(&dt)))?; - let schema = StructArray::get_fields(&data_type.to_arrow(CompatLevel::newest())) + let dtypes = polars_json::ndjson::iter_unique_dtypes(reader, infer_schema_len)?; + let dtype = + crate::json::infer::dtypes_to_supertype(dtypes.map(|dt| DataType::from(&dt)))?; + let schema = StructArray::get_fields(&dtype.to_arrow(CompatLevel::newest())) .iter() .map(Into::::into) .collect(); diff --git a/crates/polars-io/src/parquet/read/read_impl.rs b/crates/polars-io/src/parquet/read/read_impl.rs index 2ac4ef7c019f..d68c08602de1 100644 --- a/crates/polars-io/src/parquet/read/read_impl.rs +++ b/crates/polars-io/src/parquet/read/read_impl.rs @@ -37,10 +37,10 @@ use crate::RowIndex; #[cfg(debug_assertions)] // Ensure we get the proper polars types from schema inference // This saves unneeded casts. -fn assert_dtypes(data_type: &ArrowDataType) { +fn assert_dtypes(dtype: &ArrowDataType) { use ArrowDataType as D; - match data_type { + match dtype { // These should all be casted to the BinaryView / Utf8View variants D::Utf8 | D::Binary | D::LargeUtf8 | D::LargeBinary => unreachable!(), @@ -51,11 +51,11 @@ fn assert_dtypes(data_type: &ArrowDataType) { D::Map(_, _) => unreachable!(), // Recursive checks - D::Dictionary(_, data_type, _) => assert_dtypes(data_type), - D::Extension(_, data_type, _) => assert_dtypes(data_type), - D::LargeList(inner) => assert_dtypes(&inner.data_type), - D::FixedSizeList(inner, _) => assert_dtypes(&inner.data_type), - D::Struct(fields) => fields.iter().for_each(|f| assert_dtypes(f.data_type())), + D::Dictionary(_, dtype, _) => assert_dtypes(dtype), + D::Extension(_, dtype, _) => assert_dtypes(dtype), + D::LargeList(inner) => assert_dtypes(&inner.dtype), + D::FixedSizeList(inner, _) => assert_dtypes(&inner.dtype), + D::Struct(fields) => fields.iter().for_each(|f| assert_dtypes(f.dtype())), _ => {}, } @@ -73,7 +73,7 @@ fn column_idx_to_series( #[cfg(debug_assertions)] { - assert_dtypes(field.data_type()) + assert_dtypes(field.dtype()) } let columns = mmap_columns(store, field_md); let stats = columns @@ -85,7 +85,7 @@ fn column_idx_to_series( // We cannot really handle nested metadata at the moment. Just skip it. use ArrowDataType as AD; - match field.data_type() { + match field.dtype() { AD::List(_) | AD::LargeList(_) | AD::Struct(_) | AD::FixedSizeList(_, _) => { return Ok(series) }, @@ -477,7 +477,7 @@ fn rg_to_dfs_prefiltered( .get_at_index(col_idx) .unwrap() .1 - .data_type + .dtype .is_nested(); // We empirically selected these numbers. diff --git a/crates/polars-io/src/parquet/write/writer.rs b/crates/polars-io/src/parquet/write/writer.rs index 7ca2229ae62f..99cf4c95a45b 100644 --- a/crates/polars-io/src/parquet/write/writer.rs +++ b/crates/polars-io/src/parquet/write/writer.rs @@ -133,13 +133,13 @@ where fn get_encodings(schema: &ArrowSchema) -> Vec> { schema .iter_values() - .map(|f| transverse(&f.data_type, encoding_map)) + .map(|f| transverse(&f.dtype, encoding_map)) .collect() } /// Declare encodings -fn encoding_map(data_type: &ArrowDataType) -> Encoding { - match data_type.to_physical_type() { +fn encoding_map(dtype: &ArrowDataType) -> Encoding { + match dtype.to_physical_type() { PhysicalType::Dictionary(_) | PhysicalType::LargeBinary | PhysicalType::LargeUtf8 diff --git a/crates/polars-io/src/predicates.rs b/crates/polars-io/src/predicates.rs index 8acfc304a1a8..b46600666c44 100644 --- a/crates/polars-io/src/predicates.rs +++ b/crates/polars-io/src/predicates.rs @@ -100,7 +100,7 @@ impl ColumnStats { /// Returns the [`DataType`] of the column. pub fn dtype(&self) -> &DataType { - self.field.data_type() + self.field.dtype() } /// Returns the null count of each row group of the column. diff --git a/crates/polars-io/src/shared.rs b/crates/polars-io/src/shared.rs index 06d135e4ca5f..586dd9722fe3 100644 --- a/crates/polars-io/src/shared.rs +++ b/crates/polars-io/src/shared.rs @@ -99,7 +99,7 @@ pub(crate) fn finish_reader( let empty_cols = arrow_schema .iter_values() .map(|fld| { - Series::try_from((fld.name.clone(), new_empty_array(fld.data_type.clone()))) + Series::try_from((fld.name.clone(), new_empty_array(fld.dtype.clone()))) }) .collect::>()?; DataFrame::new(empty_cols)? @@ -126,14 +126,14 @@ pub(crate) fn schema_to_arrow_checked( #[cfg(feature = "object")] { polars_ensure!( - !matches!(field.data_type(), DataType::Object(_, _)), + !matches!(field.dtype(), DataType::Object(_, _)), ComputeError: "cannot write 'Object' datatype to {}", _file_name ); } let field = field - .data_type() + .dtype() .to_arrow_field(field.name().clone(), compat_level); Ok((field.name.clone(), field)) }) diff --git a/crates/polars-json/src/json/deserialize.rs b/crates/polars-json/src/json/deserialize.rs index 2cf49f9ce0ef..f35679a8d5c3 100644 --- a/crates/polars-json/src/json/deserialize.rs +++ b/crates/polars-json/src/json/deserialize.rs @@ -91,9 +91,9 @@ fn deserialize_utf8view_into<'a, A: Borrow>>( fn deserialize_list<'a, A: Borrow>>( rows: &[A], - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> ListArray { - let child = ListArray::::get_child_type(&data_type); + let child = ListArray::::get_child_type(&dtype); let mut validity = MutableBitmap::with_capacity(rows.len()); let mut offsets = Offsets::::with_capacity(rows.len()); @@ -123,18 +123,18 @@ fn deserialize_list<'a, A: Borrow>>( let values = _deserialize(&inner, child.clone()); - ListArray::::new(data_type, offsets.into(), values, validity.into()) + ListArray::::new(dtype, offsets.into(), values, validity.into()) } fn deserialize_struct<'a, A: Borrow>>( rows: &[A], - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> StructArray { - let fields = StructArray::get_fields(&data_type); + let fields = StructArray::get_fields(&dtype); let mut values = fields .iter() - .map(|f| (f.name.as_str(), (f.data_type(), vec![]))) + .map(|f| (f.name.as_str(), (f.dtype(), vec![]))) .collect::>(); let mut validity = MutableBitmap::with_capacity(rows.len()); @@ -160,24 +160,24 @@ fn deserialize_struct<'a, A: Borrow>>( let values = fields .iter() .map(|fld| { - let (data_type, vals) = values.get(fld.name.as_str()).unwrap(); - _deserialize(vals, (*data_type).clone()) + let (dtype, vals) = values.get(fld.name.as_str()).unwrap(); + _deserialize(vals, (*dtype).clone()) }) .collect::>(); - StructArray::new(data_type.clone(), values, validity.into()) + StructArray::new(dtype.clone(), values, validity.into()) } fn fill_array_from( f: fn(&mut MutablePrimitiveArray, &[B]), - data_type: ArrowDataType, + dtype: ArrowDataType, rows: &[B], ) -> Box where T: NativeType, A: From> + Array, { - let mut array = MutablePrimitiveArray::::with_capacity(rows.len()).to(data_type); + let mut array = MutablePrimitiveArray::::with_capacity(rows.len()).to(dtype); f(&mut array, rows); Box::new(A::from(array)) } @@ -248,19 +248,19 @@ where pub(crate) fn _deserialize<'a, A: Borrow>>( rows: &[A], - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> Box { - match &data_type { - ArrowDataType::Null => Box::new(NullArray::new(data_type, rows.len())), + match &dtype { + ArrowDataType::Null => Box::new(NullArray::new(dtype, rows.len())), ArrowDataType::Boolean => { fill_generic_array_from::<_, _, BooleanArray>(deserialize_boolean_into, rows) }, ArrowDataType::Int8 => { - fill_array_from::<_, _, PrimitiveArray>(deserialize_primitive_into, data_type, rows) + fill_array_from::<_, _, PrimitiveArray>(deserialize_primitive_into, dtype, rows) }, ArrowDataType::Int16 => fill_array_from::<_, _, PrimitiveArray>( deserialize_primitive_into, - data_type, + dtype, rows, ), ArrowDataType::Int32 @@ -269,7 +269,7 @@ pub(crate) fn _deserialize<'a, A: Borrow>>( | ArrowDataType::Interval(IntervalUnit::YearMonth) => { fill_array_from::<_, _, PrimitiveArray>( deserialize_primitive_into, - data_type, + dtype, rows, ) }, @@ -281,7 +281,7 @@ pub(crate) fn _deserialize<'a, A: Borrow>>( | ArrowDataType::Time64(_) | ArrowDataType::Duration(_) => fill_array_from::<_, _, PrimitiveArray>( deserialize_primitive_into, - data_type, + dtype, rows, ), ArrowDataType::Timestamp(tu, tz) => { @@ -296,35 +296,35 @@ pub(crate) fn _deserialize<'a, A: Borrow>>( }, _ => None, }); - Box::new(Int64Array::from_iter(iter).to(data_type)) + Box::new(Int64Array::from_iter(iter).to(dtype)) }, ArrowDataType::UInt8 => { - fill_array_from::<_, _, PrimitiveArray>(deserialize_primitive_into, data_type, rows) + fill_array_from::<_, _, PrimitiveArray>(deserialize_primitive_into, dtype, rows) }, ArrowDataType::UInt16 => fill_array_from::<_, _, PrimitiveArray>( deserialize_primitive_into, - data_type, + dtype, rows, ), ArrowDataType::UInt32 => fill_array_from::<_, _, PrimitiveArray>( deserialize_primitive_into, - data_type, + dtype, rows, ), ArrowDataType::UInt64 => fill_array_from::<_, _, PrimitiveArray>( deserialize_primitive_into, - data_type, + dtype, rows, ), ArrowDataType::Float16 => unreachable!(), ArrowDataType::Float32 => fill_array_from::<_, _, PrimitiveArray>( deserialize_primitive_into, - data_type, + dtype, rows, ), ArrowDataType::Float64 => fill_array_from::<_, _, PrimitiveArray>( deserialize_primitive_into, - data_type, + dtype, rows, ), ArrowDataType::LargeUtf8 => { @@ -333,19 +333,19 @@ pub(crate) fn _deserialize<'a, A: Borrow>>( ArrowDataType::Utf8View => { fill_generic_array_from::<_, _, Utf8ViewArray>(deserialize_utf8view_into, rows) }, - ArrowDataType::LargeList(_) => Box::new(deserialize_list(rows, data_type)), + ArrowDataType::LargeList(_) => Box::new(deserialize_list(rows, dtype)), ArrowDataType::LargeBinary => Box::new(deserialize_binary(rows)), - ArrowDataType::Struct(_) => Box::new(deserialize_struct(rows, data_type)), + ArrowDataType::Struct(_) => Box::new(deserialize_struct(rows, dtype)), _ => todo!(), } } -pub fn deserialize(json: &BorrowedValue, data_type: ArrowDataType) -> PolarsResult> { +pub fn deserialize(json: &BorrowedValue, dtype: ArrowDataType) -> PolarsResult> { match json { - BorrowedValue::Array(rows) => match data_type { - ArrowDataType::LargeList(inner) => Ok(_deserialize(rows, inner.data_type)), + BorrowedValue::Array(rows) => match dtype { + ArrowDataType::LargeList(inner) => Ok(_deserialize(rows, inner.dtype)), _ => todo!("read an Array from a non-Array data type"), }, - _ => Ok(_deserialize(&[json], data_type)), + _ => Ok(_deserialize(&[json], dtype)), } } diff --git a/crates/polars-json/src/json/infer_schema.rs b/crates/polars-json/src/json/infer_schema.rs index 19c96f5cf659..4d0eb4d47309 100644 --- a/crates/polars-json/src/json/infer_schema.rs +++ b/crates/polars-json/src/json/infer_schema.rs @@ -46,7 +46,7 @@ fn infer_array(values: &[BorrowedValue]) -> PolarsResult { let dt = if !types.is_empty() { let types = types.into_iter().collect::>(); - coerce_data_type(&types) + coerce_dtype(&types) } else { ArrowDataType::Null }; @@ -64,7 +64,7 @@ fn infer_array(values: &[BorrowedValue]) -> PolarsResult { /// * Lists and scalars are coerced to a list of a compatible scalar /// * Structs contain the union of all fields /// * All other types are coerced to `Utf8` -pub(crate) fn coerce_data_type>(datatypes: &[A]) -> ArrowDataType { +pub(crate) fn coerce_dtype>(datatypes: &[A]) -> ArrowDataType { use ArrowDataType::*; if datatypes.is_empty() { @@ -97,11 +97,11 @@ pub(crate) fn coerce_data_type>(datatypes: &[A]) -> Arr |mut acc, field| { match acc.entry(field.name.as_str()) { Entry::Occupied(mut v) => { - v.get_mut().insert(&field.data_type); + v.get_mut().insert(&field.dtype); }, Entry::Vacant(v) => { let mut a = PlHashSet::default(); - a.insert(&field.data_type); + a.insert(&field.dtype); v.insert(a); }, } @@ -113,7 +113,7 @@ pub(crate) fn coerce_data_type>(datatypes: &[A]) -> Arr .into_iter() .map(|(name, dts)| { let dts = dts.into_iter().collect::>(); - Field::new(name.into(), coerce_data_type(&dts), true) + Field::new(name.into(), coerce_dtype(&dts), true) }) .collect(); return Struct(fields); @@ -122,7 +122,7 @@ pub(crate) fn coerce_data_type>(datatypes: &[A]) -> Arr .iter() .map(|dt| { if let LargeList(inner) = dt.borrow() { - inner.data_type() + inner.dtype() } else { unreachable!(); } @@ -130,18 +130,18 @@ pub(crate) fn coerce_data_type>(datatypes: &[A]) -> Arr .collect(); return LargeList(Box::new(Field::new( PlSmallStr::from_static(ITEM_NAME), - coerce_data_type(inner_types.as_slice()), + coerce_dtype(inner_types.as_slice()), true, ))); } else if datatypes.len() > 2 { - return coerce_data_type(datatypes); + return coerce_dtype(datatypes); } let (lhs, rhs) = (datatypes[0].borrow(), datatypes[1].borrow()); return match (lhs, rhs) { (lhs, rhs) if lhs == rhs => lhs.clone(), (LargeList(lhs), LargeList(rhs)) => { - let inner = coerce_data_type(&[lhs.data_type(), rhs.data_type()]); + let inner = coerce_dtype(&[lhs.dtype(), rhs.dtype()]); LargeList(Box::new(Field::new( PlSmallStr::from_static(ITEM_NAME), inner, @@ -149,7 +149,7 @@ pub(crate) fn coerce_data_type>(datatypes: &[A]) -> Arr ))) }, (scalar, LargeList(list)) => { - let inner = coerce_data_type(&[scalar, list.data_type()]); + let inner = coerce_dtype(&[scalar, list.dtype()]); LargeList(Box::new(Field::new( PlSmallStr::from_static(ITEM_NAME), inner, @@ -157,7 +157,7 @@ pub(crate) fn coerce_data_type>(datatypes: &[A]) -> Arr ))) }, (LargeList(list), scalar) => { - let inner = coerce_data_type(&[scalar, list.data_type()]); + let inner = coerce_dtype(&[scalar, list.dtype()]); LargeList(Box::new(Field::new( PlSmallStr::from_static(ITEM_NAME), inner, diff --git a/crates/polars-json/src/json/write/serialize.rs b/crates/polars-json/src/json/write/serialize.rs index 872e13970814..2fd5920bd2f2 100644 --- a/crates/polars-json/src/json/write/serialize.rs +++ b/crates/polars-json/src/json/write/serialize.rs @@ -406,7 +406,7 @@ pub(crate) fn new_serializer<'a>( offset: usize, take: usize, ) -> Box + 'a + Send + Sync> { - match array.data_type().to_logical_type() { + match array.dtype().to_logical_type() { ArrowDataType::Boolean => { boolean_serializer(array.as_any().downcast_ref().unwrap(), offset, take) }, diff --git a/crates/polars-json/src/ndjson/deserialize.rs b/crates/polars-json/src/ndjson/deserialize.rs index 35961e96c9a2..d8bab5af157b 100644 --- a/crates/polars-json/src/ndjson/deserialize.rs +++ b/crates/polars-json/src/ndjson/deserialize.rs @@ -15,7 +15,7 @@ use super::*; /// This function errors iff any of the rows is not a valid JSON (i.e. the format is not valid NDJSON). pub fn deserialize_iter<'a>( rows: impl Iterator, - data_type: ArrowDataType, + dtype: ArrowDataType, buf_size: usize, count: usize, ) -> PolarsResult { @@ -23,12 +23,12 @@ pub fn deserialize_iter<'a>( let mut buf = String::with_capacity(std::cmp::min(buf_size + count + 2, u32::MAX as usize)); buf.push('['); - fn _deserializer(s: &mut str, data_type: ArrowDataType) -> PolarsResult> { + fn _deserializer(s: &mut str, dtype: ArrowDataType) -> PolarsResult> { let slice = unsafe { s.as_bytes_mut() }; let out = simd_json::to_borrowed_value(slice) .map_err(|e| PolarsError::ComputeError(format!("json parsing error: '{e}'").into()))?; Ok(if let BorrowedValue::Array(rows) = out { - super::super::json::deserialize::_deserialize(&rows, data_type.clone()) + super::super::json::deserialize::_deserialize(&rows, dtype.clone()) } else { unreachable!() }) @@ -43,7 +43,7 @@ pub fn deserialize_iter<'a>( if buf.len() + next_row_length >= u32::MAX as usize { let _ = buf.pop(); buf.push(']'); - arr.push(_deserializer(&mut buf, data_type.clone())?); + arr.push(_deserializer(&mut buf, dtype.clone())?); buf.clear(); buf.push('['); } @@ -54,9 +54,9 @@ pub fn deserialize_iter<'a>( buf.push(']'); if arr.is_empty() { - _deserializer(&mut buf, data_type.clone()) + _deserializer(&mut buf, dtype.clone()) } else { - arr.push(_deserializer(&mut buf, data_type.clone())?); + arr.push(_deserializer(&mut buf, dtype.clone())?); concatenate_owned_unchecked(&arr) } } diff --git a/crates/polars-json/src/ndjson/file.rs b/crates/polars-json/src/ndjson/file.rs index 08f059b685d2..e0a166f934e8 100644 --- a/crates/polars-json/src/ndjson/file.rs +++ b/crates/polars-json/src/ndjson/file.rs @@ -113,15 +113,15 @@ pub fn iter_unique_dtypes( let rows = vec!["".to_string(); 1]; // 1 <=> read row by row let mut reader = FileReader::new(reader, rows, number_of_rows.map(|v| v.into())); - let mut data_types = PlIndexSet::default(); + let mut dtypes = PlIndexSet::default(); let mut buf = vec![]; while let Some(rows) = reader.next()? { // 0 because it is row by row let value = parse_value(&mut buf, rows[0].as_bytes())?; - let data_type = crate::json::infer(&value)?; - data_types.insert(data_type); + let dtype = crate::json::infer(&value)?; + dtypes.insert(dtype); } - Ok(data_types.into_iter()) + Ok(dtypes.into_iter()) } /// Infers the [`ArrowDataType`] from an iterator of JSON strings. A limited number of @@ -130,17 +130,17 @@ pub fn iter_unique_dtypes( /// # Implementation /// This implementation infers each row by going through the entire iterator. pub fn infer_iter>(rows: impl Iterator) -> PolarsResult { - let mut data_types = IndexSet::<_, PlRandomState>::default(); + let mut dtypes = IndexSet::<_, PlRandomState>::default(); let mut buf = vec![]; for row in rows { let v = parse_value(&mut buf, row.as_ref().as_bytes())?; - let data_type = crate::json::infer(&v)?; - if data_type != ArrowDataType::Null { - data_types.insert(data_type); + let dtype = crate::json::infer(&v)?; + if dtype != ArrowDataType::Null { + dtypes.insert(dtype); } } - let v: Vec<&ArrowDataType> = data_types.iter().collect(); - Ok(crate::json::infer_schema::coerce_data_type(&v)) + let v: Vec<&ArrowDataType> = dtypes.iter().collect(); + Ok(crate::json::infer_schema::coerce_dtype(&v)) } diff --git a/crates/polars-lazy/src/dsl/eval.rs b/crates/polars-lazy/src/dsl/eval.rs index 3469d2a1021c..574c2b336407 100644 --- a/crates/polars-lazy/src/dsl/eval.rs +++ b/crates/polars-lazy/src/dsl/eval.rs @@ -8,10 +8,10 @@ use crate::prelude::*; pub(crate) fn eval_field_to_dtype(f: &Field, expr: &Expr, list: bool) -> Field { // Dummy df to determine output dtype. let dtype = f - .data_type() + .dtype() .inner_dtype() .cloned() - .unwrap_or_else(|| f.data_type().clone()); + .unwrap_or_else(|| f.dtype().clone()); let df = Series::new_empty(PlSmallStr::EMPTY, &dtype).into_frame(); @@ -109,8 +109,8 @@ pub trait ExprEvalExtension: IntoExpr + Sized { }; let s = Series::new(name, avs); - if s.dtype() != output_field.data_type() { - s.cast(output_field.data_type()).map(Some) + if s.dtype() != output_field.dtype() { + s.cast(output_field.dtype()).map(Some) } else { Ok(Some(s)) } diff --git a/crates/polars-lazy/src/dsl/list.rs b/crates/polars-lazy/src/dsl/list.rs index ca7f0a238de7..0417c5ae53cb 100644 --- a/crates/polars-lazy/src/dsl/list.rs +++ b/crates/polars-lazy/src/dsl/list.rs @@ -106,8 +106,8 @@ fn run_per_sublist( ca.rename(s.name().clone()); - if ca.dtype() != output_field.data_type() { - ca.cast(output_field.data_type()).map(Some) + if ca.dtype() != output_field.dtype() { + ca.cast(output_field.dtype()).map(Some) } else { Ok(Some(ca.into_series())) } @@ -156,7 +156,7 @@ pub trait ListNameSpaceExtension: IntoListNameSpace + Sized { match e { #[cfg(feature = "dtype-categorical")] Expr::Cast { - data_type: DataType::Categorical(_, _) | DataType::Enum(_, _), + dtype: DataType::Categorical(_, _) | DataType::Enum(_, _), .. } => { polars_bail!( @@ -181,11 +181,11 @@ pub trait ListNameSpaceExtension: IntoListNameSpace + Sized { if lst.is_empty() { return Ok(Some(Series::new_empty( s.name().clone(), - output_field.data_type(), + output_field.dtype(), ))); } if lst.null_count() == lst.len() { - return Ok(Some(s.cast(output_field.data_type())?)); + return Ok(Some(s.cast(output_field.dtype())?)); } let fits_idx_size = lst.get_values_size() <= (IdxSize::MAX as usize); diff --git a/crates/polars-lazy/src/physical_plan/streaming/convert_alp.rs b/crates/polars-lazy/src/physical_plan/streaming/convert_alp.rs index d651f019afcf..7100c083bd47 100644 --- a/crates/polars-lazy/src/physical_plan/streaming/convert_alp.rs +++ b/crates/polars-lazy/src/physical_plan/streaming/convert_alp.rs @@ -356,7 +356,7 @@ pub(crate) fn insert_streaming_nodes( #[cfg(feature = "dtype-struct")] DataType::Struct(fields) => fields .iter() - .all(|fld| allowed_dtype(fld.data_type(), string_cache)), + .all(|fld| allowed_dtype(fld.dtype(), string_cache)), // We need to be able to sink to disk or produce the aggregate return dtype. DataType::Unknown(_) => false, #[cfg(feature = "dtype-decimal")] diff --git a/crates/polars-ops/src/chunked_array/array/any_all.rs b/crates/polars-ops/src/chunked_array/array/any_all.rs index 270885082818..8f9bd175c8ca 100644 --- a/crates/polars-ops/src/chunked_array/array/any_all.rs +++ b/crates/polars-ops/src/chunked_array/array/any_all.rs @@ -10,7 +10,7 @@ where { let values = arr.values(); - polars_ensure!(values.data_type() == &ArrowDataType::Boolean, ComputeError: "expected boolean elements in array"); + polars_ensure!(values.dtype() == &ArrowDataType::Boolean, ComputeError: "expected boolean elements in array"); let values = values.as_any().downcast_ref::().unwrap(); let validity = arr.validity().cloned(); diff --git a/crates/polars-ops/src/chunked_array/list/any_all.rs b/crates/polars-ops/src/chunked_array/list/any_all.rs index 431692780a45..a8727bb3082a 100644 --- a/crates/polars-ops/src/chunked_array/list/any_all.rs +++ b/crates/polars-ops/src/chunked_array/list/any_all.rs @@ -10,7 +10,7 @@ where let offsets = arr.offsets().as_slice(); let values = arr.values(); - polars_ensure!(values.data_type() == &ArrowDataType::Boolean, ComputeError: "expected boolean elements in list"); + polars_ensure!(values.dtype() == &ArrowDataType::Boolean, ComputeError: "expected boolean elements in list"); let values = values.as_any().downcast_ref::().unwrap(); let validity = arr.validity().cloned(); diff --git a/crates/polars-ops/src/chunked_array/list/sets.rs b/crates/polars-ops/src/chunked_array/list/sets.rs index 4a6f1f0466b4..e105d96b737a 100644 --- a/crates/polars-ops/src/chunked_array/list/sets.rs +++ b/crates/polars-ops/src/chunked_array/list/sets.rs @@ -251,7 +251,7 @@ where offsets.push(offset as i64); } let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets.into()) }; - let dtype = ListArray::::default_datatype(values_out.data_type().clone()); + let dtype = ListArray::::default_datatype(values_out.dtype().clone()); let values: PrimitiveArray = values_out.into(); Ok(ListArray::new(dtype, offsets, values.boxed(), validity)) @@ -346,10 +346,10 @@ fn binary( if as_utf8 { let values = unsafe { values.to_utf8view_unchecked() }; - let dtype = ListArray::::default_datatype(values.data_type().clone()); + let dtype = ListArray::::default_datatype(values.dtype().clone()); Ok(ListArray::new(dtype, offsets, values.boxed(), validity)) } else { - let dtype = ListArray::::default_datatype(values.data_type().clone()); + let dtype = ListArray::::default_datatype(values.dtype().clone()); Ok(ListArray::new(dtype, offsets, values.boxed(), validity)) } } @@ -364,9 +364,9 @@ fn array_set_operation( let values_a = a.values(); let values_b = b.values(); - assert_eq!(values_a.data_type(), values_b.data_type()); + assert_eq!(values_a.dtype(), values_b.dtype()); - let dtype = values_b.data_type(); + let dtype = values_b.dtype(); let validity = combine_validities_and(a.validity(), b.validity()); match dtype { diff --git a/crates/polars-ops/src/chunked_array/strings/extract.rs b/crates/polars-ops/src/chunked_array/strings/extract.rs index a80820969612..4c80cc8ef1e4 100644 --- a/crates/polars-ops/src/chunked_array/strings/extract.rs +++ b/crates/polars-ops/src/chunked_array/strings/extract.rs @@ -13,7 +13,7 @@ fn extract_groups_array( arr: &Utf8ViewArray, reg: &Regex, names: &[&str], - data_type: ArrowDataType, + dtype: ArrowDataType, ) -> PolarsResult { let mut builders = (0..names.len()) .map(|_| MutablePlString::with_capacity(arr.len())) @@ -36,7 +36,7 @@ fn extract_groups_array( } let values = builders.into_iter().map(|a| a.freeze().boxed()).collect(); - Ok(StructArray::new(data_type.clone(), values, arr.validity().cloned()).boxed()) + Ok(StructArray::new(dtype.clone(), values, arr.validity().cloned()).boxed()) } #[cfg(feature = "extract_groups")] @@ -55,7 +55,7 @@ pub(super) fn extract_groups( .map(|ca| ca.into_series()); } - let data_type = dtype.try_to_arrow(CompatLevel::newest())?; + let dtype = dtype.try_to_arrow(CompatLevel::newest())?; let DataType::Struct(fields) = dtype else { unreachable!() // Implementation error if it isn't a struct. }; @@ -66,7 +66,7 @@ pub(super) fn extract_groups( let chunks = ca .downcast_iter() - .map(|array| extract_groups_array(array, ®, &names, data_type.clone())) + .map(|array| extract_groups_array(array, ®, &names, dtype.clone())) .collect::>>()?; Series::try_from((ca.name().clone(), chunks)) diff --git a/crates/polars-ops/src/series/ops/is_in.rs b/crates/polars-ops/src/series/ops/is_in.rs index bb5ea8eb5522..5b6fdc80b35e 100644 --- a/crates/polars-ops/src/series/ops/is_in.rs +++ b/crates/polars-ops/src/series/ops/is_in.rs @@ -523,12 +523,12 @@ fn is_in_struct(ca_in: &StructChunked, other: &Series) -> PolarsResult = ca_in .struct_fields() .iter() - .map(|f| f.data_type()) + .map(|f| f.dtype()) .collect(); let other_dtypes: Vec<_> = other .struct_fields() .iter() - .map(|f| f.data_type()) + .map(|f| f.dtype()) .collect(); if ca_in_dtypes != other_dtypes { let ca_in_names = ca_in.struct_fields().iter().map(|f| f.name().clone()); diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binview.rs b/crates/polars-parquet/src/arrow/read/deserialize/binview.rs index a1f3054bd852..6777f7e639c9 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/binview.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/binview.rs @@ -843,7 +843,7 @@ impl utils::Decoder for BinViewDecoder { fn finalize( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, _dict: Option, (values, validity): Self::DecodedState, ) -> ParquetResult> { @@ -852,13 +852,13 @@ impl utils::Decoder for BinViewDecoder { let validity = freeze_validity(validity); array = array.with_validity(validity); - match data_type.to_physical_type() { + match dtype.to_physical_type() { PhysicalType::BinaryView => Ok(array.boxed()), PhysicalType::Utf8View => { // SAFETY: we already checked utf8 unsafe { Ok(Utf8ViewArray::new_unchecked( - data_type, + dtype, array.views().clone(), array.data_buffers().clone(), array.validity().cloned(), @@ -876,13 +876,13 @@ impl utils::Decoder for BinViewDecoder { impl utils::DictDecodable for BinViewDecoder { fn finalize_dict_array( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, dict: Self::Dict, keys: PrimitiveArray, ) -> ParquetResult> { - let value_data_type = match &data_type { + let value_dtype = match &dtype { ArrowDataType::Dictionary(_, values, _) => values.as_ref().clone(), - _ => data_type.clone(), + _ => dtype.clone(), }; let mut view_dict = MutableBinaryViewArray::with_capacity(dict.0.len()); @@ -893,13 +893,13 @@ impl utils::DictDecodable for BinViewDecoder { unsafe { view_dict.set_total_bytes_len(dict.0.iter().map(|v| v.length as usize).sum()) }; let view_dict = view_dict.freeze(); - let dict = match value_data_type.to_physical_type() { + let dict = match value_dtype.to_physical_type() { PhysicalType::Utf8View => view_dict.to_utf8view().unwrap().boxed(), PhysicalType::BinaryView => view_dict.boxed(), _ => unreachable!(), }; - Ok(DictionaryArray::try_new(data_type, keys, dict).unwrap()) + Ok(DictionaryArray::try_new(dtype, keys, dict).unwrap()) } } diff --git a/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs b/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs index e99e7a5ed56c..af2e504d2646 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/boolean.rs @@ -249,12 +249,12 @@ impl Decoder for BooleanDecoder { fn finalize( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, _dict: Option, (values, validity): Self::DecodedState, ) -> ParquetResult { let validity = freeze_validity(validity); - Ok(BooleanArray::new(data_type, values.into(), validity)) + Ok(BooleanArray::new(dtype, values.into(), validity)) } } diff --git a/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs b/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs index db718ed9c330..de2bfe2e47f3 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/dictionary.rs @@ -121,7 +121,7 @@ impl utils::Decoder for DictionaryDec fn finalize( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, dict: Option, (values, validity): Self::DecodedState, ) -> ParquetResult> { @@ -129,7 +129,7 @@ impl utils::Decoder for DictionaryDec let dict = dict.unwrap(); let keys = PrimitiveArray::new(K::PRIMITIVE.into(), values.into(), validity); - self.decoder.finalize_dict_array(data_type, dict, keys) + self.decoder.finalize_dict_array(dtype, dict, keys) } fn decode_plain_encoded<'a>( diff --git a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs index b4fcd2c38e7d..3825d528c8f5 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs @@ -308,13 +308,13 @@ impl Decoder for BinaryDecoder { fn finalize( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, _dict: Option, (values, validity): Self::DecodedState, ) -> ParquetResult { let validity = freeze_validity(validity); Ok(FixedSizeBinaryArray::new( - data_type, + dtype, values.values.into(), validity, )) @@ -324,13 +324,13 @@ impl Decoder for BinaryDecoder { impl utils::DictDecodable for BinaryDecoder { fn finalize_dict_array( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, dict: Self::Dict, keys: PrimitiveArray, ) -> ParquetResult> { let dict = FixedSizeBinaryArray::new(ArrowDataType::FixedSizeBinary(self.size), dict.into(), None); - Ok(DictionaryArray::try_new(data_type, keys, Box::new(dict)).unwrap()) + Ok(DictionaryArray::try_new(dtype, keys, Box::new(dict)).unwrap()) } } diff --git a/crates/polars-parquet/src/arrow/read/deserialize/mod.rs b/crates/polars-parquet/src/arrow/read/deserialize/mod.rs index e7333a7ca3ea..17e9434e1d3d 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/mod.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/mod.rs @@ -41,13 +41,13 @@ pub fn get_page_iterator( /// Creates a new [`ListArray`] or [`FixedSizeListArray`]. pub fn create_list( - data_type: ArrowDataType, + dtype: ArrowDataType, nested: &mut NestedState, values: Box, ) -> Box { let (mut offsets, validity) = nested.pop().unwrap(); let validity = validity.and_then(freeze_validity); - match data_type.to_logical_type() { + match dtype.to_logical_type() { ArrowDataType::List(_) => { offsets.push(values.len() as i64); @@ -58,7 +58,7 @@ pub fn create_list( .expect("i64 offsets do not fit in i32 offsets"); Box::new(ListArray::::new( - data_type, + dtype, offsets.into(), values, validity, @@ -68,14 +68,14 @@ pub fn create_list( offsets.push(values.len() as i64); Box::new(ListArray::::new( - data_type, + dtype, offsets.try_into().expect("List too large"), values, validity, )) }, ArrowDataType::FixedSizeList(_, _) => { - Box::new(FixedSizeListArray::new(data_type, values, validity)) + Box::new(FixedSizeListArray::new(dtype, values, validity)) }, _ => unreachable!(), } @@ -83,12 +83,12 @@ pub fn create_list( /// Creates a new [`MapArray`]. pub fn create_map( - data_type: ArrowDataType, + dtype: ArrowDataType, nested: &mut NestedState, values: Box, ) -> Box { let (mut offsets, validity) = nested.pop().unwrap(); - match data_type.to_logical_type() { + match dtype.to_logical_type() { ArrowDataType::Map(_, _) => { offsets.push(values.len() as i64); let offsets = offsets.iter().map(|x| *x as i32).collect::>(); @@ -98,7 +98,7 @@ pub fn create_map( .expect("i64 offsets do not fit in i32 offsets"); Box::new(MapArray::new( - data_type, + dtype, offsets.into(), values, validity.and_then(freeze_validity), @@ -108,9 +108,9 @@ pub fn create_map( } } -fn is_primitive(data_type: &ArrowDataType) -> bool { +fn is_primitive(dtype: &ArrowDataType) -> bool { matches!( - data_type.to_physical_type(), + dtype.to_physical_type(), arrow::datatypes::PhysicalType::Primitive(_) | arrow::datatypes::PhysicalType::Null | arrow::datatypes::PhysicalType::Boolean @@ -132,11 +132,11 @@ fn columns_to_iter_recursive( init: Vec, filter: Option, ) -> PolarsResult<(NestedState, Box)> { - if init.is_empty() && is_primitive(&field.data_type) { + if init.is_empty() && is_primitive(&field.dtype) { let array = page_iter_to_array( columns.pop().unwrap(), types.pop().unwrap(), - field.data_type, + field.dtype, filter, )?; @@ -147,34 +147,34 @@ fn columns_to_iter_recursive( } /// Returns the number of (parquet) columns that a [`ArrowDataType`] contains. -pub fn n_columns(data_type: &ArrowDataType) -> usize { +pub fn n_columns(dtype: &ArrowDataType) -> usize { use arrow::datatypes::PhysicalType::*; - match data_type.to_physical_type() { + match dtype.to_physical_type() { Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8 | Dictionary(_) | LargeUtf8 | BinaryView | Utf8View => 1, List | FixedSizeList | LargeList => { - let a = data_type.to_logical_type(); + let a = dtype.to_logical_type(); if let ArrowDataType::List(inner) = a { - n_columns(&inner.data_type) + n_columns(&inner.dtype) } else if let ArrowDataType::LargeList(inner) = a { - n_columns(&inner.data_type) + n_columns(&inner.dtype) } else if let ArrowDataType::FixedSizeList(inner, _) = a { - n_columns(&inner.data_type) + n_columns(&inner.dtype) } else { unreachable!() } }, Map => { - let a = data_type.to_logical_type(); + let a = dtype.to_logical_type(); if let ArrowDataType::Map(inner, _) = a { - n_columns(&inner.data_type) + n_columns(&inner.dtype) } else { unreachable!() } }, Struct => { - if let ArrowDataType::Struct(fields) = data_type.to_logical_type() { - fields.iter().map(|inner| n_columns(&inner.data_type)).sum() + if let ArrowDataType::Struct(fields) = dtype.to_logical_type() { + fields.iter().map(|inner| n_columns(&inner.dtype)).sum() } else { unreachable!() } @@ -188,7 +188,7 @@ pub fn n_columns(data_type: &ArrowDataType) -> usize { /// For a non-nested datatypes such as [`ArrowDataType::Int32`], this function requires a single element in `columns` and `types`. /// For nested types, `columns` must be composed by all parquet columns with associated types `types`. /// -/// The arrays are guaranteed to be at most of size `chunk_size` and data type `field.data_type`. +/// The arrays are guaranteed to be at most of size `chunk_size` and data type `field.dtype`. pub fn column_iter_to_arrays( columns: Vec, types: Vec<&PrimitiveType>, diff --git a/crates/polars-parquet/src/arrow/read/deserialize/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/nested.rs index a2076014a966..114eeef67341 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/nested.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/nested.rs @@ -18,14 +18,14 @@ pub fn columns_to_iter_recursive( use arrow::datatypes::PhysicalType::*; use arrow::datatypes::PrimitiveType::*; - Ok(match field.data_type().to_physical_type() { + Ok(match field.dtype().to_physical_type() { Null => { // physical type is i32 init.push(InitNested::Primitive(field.is_nullable)); types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), null::NullDecoder, init, )? @@ -49,7 +49,7 @@ pub fn columns_to_iter_recursive( types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::IntDecoder::::cast_as(), init, )? @@ -61,7 +61,7 @@ pub fn columns_to_iter_recursive( types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::IntDecoder::::cast_as(), init, )? @@ -73,7 +73,7 @@ pub fn columns_to_iter_recursive( types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::IntDecoder::::unit(), init, )? @@ -85,7 +85,7 @@ pub fn columns_to_iter_recursive( types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::IntDecoder::::unit(), init, )? @@ -97,7 +97,7 @@ pub fn columns_to_iter_recursive( types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::IntDecoder::::cast_as(), init, )? @@ -109,7 +109,7 @@ pub fn columns_to_iter_recursive( types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::IntDecoder::::cast_as(), init, )? @@ -122,7 +122,7 @@ pub fn columns_to_iter_recursive( match type_.physical_type { PhysicalType::Int32 => PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::IntDecoder::::cast_as(), init, )? @@ -131,7 +131,7 @@ pub fn columns_to_iter_recursive( // some implementations of parquet write arrow's u32 into i64. PhysicalType::Int64 => PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::IntDecoder::::cast_as(), init, )? @@ -149,7 +149,7 @@ pub fn columns_to_iter_recursive( types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::IntDecoder::::cast_as(), init, )? @@ -161,7 +161,7 @@ pub fn columns_to_iter_recursive( types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::FloatDecoder::::unit(), init, )? @@ -173,7 +173,7 @@ pub fn columns_to_iter_recursive( types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), primitive::FloatDecoder::::unit(), init, )? @@ -185,7 +185,7 @@ pub fn columns_to_iter_recursive( types.pop(); PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type().clone(), + field.dtype().clone(), binview::BinViewDecoder::default(), init, )? @@ -193,15 +193,15 @@ pub fn columns_to_iter_recursive( }, // These are all converted to View variants before. LargeBinary | LargeUtf8 | Binary | Utf8 => unreachable!(), - _ => match field.data_type().to_logical_type() { + _ => match field.dtype().to_logical_type() { ArrowDataType::Dictionary(key_type, _, _) => { init.push(InitNested::Primitive(field.is_nullable)); let type_ = types.pop().unwrap(); let iter = columns.pop().unwrap(); - let data_type = field.data_type().clone(); + let dtype = field.dtype().clone(); match_integer_type!(key_type, |$K| { - dict_read::<$K>(iter, init, type_, data_type, filter).map(|(s, arr)| (s, Box::new(arr) as Box<_>)) + dict_read::<$K>(iter, init, type_, dtype, filter).map(|(s, arr)| (s, Box::new(arr) as Box<_>)) })? }, ArrowDataType::List(inner) | ArrowDataType::LargeList(inner) => { @@ -213,7 +213,7 @@ pub fn columns_to_iter_recursive( init, filter, )?; - let array = create_list(field.data_type().clone(), &mut nested, array); + let array = create_list(field.dtype().clone(), &mut nested, array); (nested, array) }, ArrowDataType::FixedSizeList(inner, width) => { @@ -225,7 +225,7 @@ pub fn columns_to_iter_recursive( init, filter, )?; - let array = create_list(field.data_type().clone(), &mut nested, array); + let array = create_list(field.dtype().clone(), &mut nested, array); (nested, array) }, ArrowDataType::Decimal(_, _) => { @@ -234,7 +234,7 @@ pub fn columns_to_iter_recursive( match type_.physical_type { PhysicalType::Int32 => PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type.clone(), + field.dtype.clone(), primitive::IntDecoder::::cast_into(), init, )? @@ -242,7 +242,7 @@ pub fn columns_to_iter_recursive( .map(|(s, a)| (s, Box::new(a) as Box<_>))?, PhysicalType::Int64 => PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type.clone(), + field.dtype.clone(), primitive::IntDecoder::::cast_into(), init, )? @@ -271,7 +271,7 @@ pub fn columns_to_iter_recursive( let validity = array.validity().cloned(); let array: Box = Box::new(PrimitiveArray::::try_new( - field.data_type.clone(), + field.dtype.clone(), values.into(), validity, )?); @@ -292,7 +292,7 @@ pub fn columns_to_iter_recursive( match type_.physical_type { PhysicalType::Int32 => PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type.clone(), + field.dtype.clone(), primitive::IntDecoder::closure(|x: i32| i256(I256::new(x as i128))), init, )? @@ -300,7 +300,7 @@ pub fn columns_to_iter_recursive( .map(|(s, a)| (s, Box::new(a) as Box<_>))?, PhysicalType::Int64 => PageNestedDecoder::new( columns.pop().unwrap(), - field.data_type.clone(), + field.dtype.clone(), primitive::IntDecoder::closure(|x: i64| i256(I256::new(x as i128))), init, )? @@ -324,7 +324,7 @@ pub fn columns_to_iter_recursive( let validity = array.validity().cloned(); let array: Box = Box::new(PrimitiveArray::::try_new( - field.data_type.clone(), + field.dtype.clone(), values.into(), validity, )?); @@ -350,7 +350,7 @@ pub fn columns_to_iter_recursive( let validity = array.validity().cloned(); let array: Box = Box::new(PrimitiveArray::::try_new( - field.data_type.clone(), + field.dtype.clone(), values.into(), validity, )?); @@ -387,7 +387,7 @@ pub fn columns_to_iter_recursive( types: &mut Vec<&PrimitiveType>, struct_field: &Field| { init.push(InitNested::Struct(field.is_nullable)); - let n = n_columns(&struct_field.data_type); + let n = n_columns(&struct_field.dtype); let columns = columns.split_off(columns.len() - n); let types = types.split_off(types.len() - n); @@ -445,7 +445,7 @@ pub fn columns_to_iter_recursive( init, filter, )?; - let array = create_map(field.data_type().clone(), &mut nested, array); + let array = create_map(field.dtype().clone(), &mut nested, array); (nested, array) }, other => { @@ -461,76 +461,76 @@ fn dict_read( iter: BasicDecompressor, init: Vec, _type_: &PrimitiveType, - data_type: ArrowDataType, + dtype: ArrowDataType, filter: Option, ) -> PolarsResult<(NestedState, DictionaryArray)> { use ArrowDataType::*; - let values_data_type = if let Dictionary(_, v, _) = &data_type { + let values_dtype = if let Dictionary(_, v, _) = &dtype { v.as_ref() } else { panic!() }; - Ok(match values_data_type.to_logical_type() { + Ok(match values_dtype.to_logical_type() { UInt8 => PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::IntDecoder::::cast_as()), init, )? .collect_n(filter)?, UInt16 => PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::IntDecoder::::cast_as()), init, )? .collect_n(filter)?, UInt32 => PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::IntDecoder::::cast_as()), init, )? .collect_n(filter)?, Int8 => PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::IntDecoder::::cast_as()), init, )? .collect_n(filter)?, Int16 => PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::IntDecoder::::cast_as()), init, )? .collect_n(filter)?, Int32 | Date32 | Time32(_) | Interval(IntervalUnit::YearMonth) => PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::IntDecoder::::unit()), init, )? .collect_n(filter)?, Int64 | Date64 | Time64(_) | Duration(_) => PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::IntDecoder::::cast_as()), init, )? .collect_n(filter)?, Float32 => PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::unit()), init, )? .collect_n(filter)?, Float64 => PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::unit()), init, )? @@ -539,7 +539,7 @@ fn dict_read( LargeUtf8 | LargeBinary | Utf8 | Binary => unreachable!(), Utf8View | BinaryView => PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(binview::BinViewDecoder::default()), init, )? @@ -548,7 +548,7 @@ fn dict_read( let size = *size; PageNestedDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(fixed_size_binary::BinaryDecoder { size }), init, )? @@ -562,7 +562,7 @@ fn dict_read( iter, physical_type, logical_type, - data_type, + dtype, chunk_size, time_unit, ); diff --git a/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs b/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs index 42e321a2f570..ad542cf05753 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/nested_utils.rs @@ -713,7 +713,7 @@ fn extend_offsets_limited<'a, D: utils::NestedDecoder>( pub struct PageNestedDecoder { pub iter: BasicDecompressor, - pub data_type: ArrowDataType, + pub dtype: ArrowDataType, pub dict: Option, pub decoder: D, pub init: Vec, @@ -737,7 +737,7 @@ fn level_iters(page: &DataPage) -> ParquetResult<(HybridRleDecoder, HybridRleDec impl PageNestedDecoder { pub fn new( mut iter: BasicDecompressor, - data_type: ArrowDataType, + dtype: ArrowDataType, decoder: D, init: Vec, ) -> ParquetResult { @@ -746,7 +746,7 @@ impl PageNestedDecoder { Ok(Self { iter, - data_type, + dtype, dict, decoder, init, @@ -970,7 +970,7 @@ impl PageNestedDecoder { )); _ = nested_state.pop().unwrap(); - let array = self.decoder.finalize(self.data_type, self.dict, target)?; + let array = self.decoder.finalize(self.dtype, self.dict, target)?; Ok((nested_state, array)) } diff --git a/crates/polars-parquet/src/arrow/read/deserialize/null.rs b/crates/polars-parquet/src/arrow/read/deserialize/null.rs index 8066c1d73af3..e12757fe2e20 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/null.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/null.rs @@ -97,11 +97,11 @@ impl utils::Decoder for NullDecoder { fn finalize( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, _dict: Option, decoded: Self::DecodedState, ) -> ParquetResult { - Ok(NullArray::new(data_type, decoded.length)) + Ok(NullArray::new(dtype, decoded.length)) } } @@ -128,7 +128,7 @@ use super::BasicDecompressor; /// Converts [`PagesIter`] to an [`ArrayIter`] pub fn iter_to_arrays( mut iter: BasicDecompressor, - data_type: ArrowDataType, + dtype: ArrowDataType, mut filter: Option, ) -> ParquetResult> { _ = iter.read_dict_page()?; @@ -159,5 +159,5 @@ pub fn iter_to_arrays( len = (len + num_rows).min(num_rows); } - Ok(Box::new(NullArray::new(data_type, len))) + Ok(Box::new(NullArray::new(dtype, len))) } diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/float.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/float.rs index 1c09ea3f7e87..0a43141abd06 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/float.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/float.rs @@ -296,12 +296,12 @@ where fn finalize( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, _dict: Option, (values, validity): Self::DecodedState, ) -> ParquetResult { let validity = freeze_validity(validity); - Ok(PrimitiveArray::try_new(data_type, values.into(), validity).unwrap()) + Ok(PrimitiveArray::try_new(dtype, values.into(), validity).unwrap()) } } @@ -313,18 +313,18 @@ where { fn finalize_dict_array( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, dict: Self::Dict, keys: PrimitiveArray, ) -> ParquetResult> { - let value_type = match &data_type { + let value_type = match &dtype { ArrowDataType::Dictionary(_, value, _) => value.as_ref().clone(), _ => T::PRIMITIVE.into(), }; let dict = Box::new(PrimitiveArray::new(value_type, dict.into(), None)); - Ok(DictionaryArray::try_new(data_type, keys, dict).unwrap()) + Ok(DictionaryArray::try_new(dtype, keys, dict).unwrap()) } } diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs index dfe3f2f09cd6..ed8e0a541a68 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs @@ -351,12 +351,12 @@ where fn finalize( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, _dict: Option, (values, validity): Self::DecodedState, ) -> ParquetResult { let validity = freeze_validity(validity); - Ok(PrimitiveArray::try_new(data_type, values.into(), validity).unwrap()) + Ok(PrimitiveArray::try_new(dtype, values.into(), validity).unwrap()) } } @@ -369,18 +369,18 @@ where { fn finalize_dict_array( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, dict: Self::Dict, keys: PrimitiveArray, ) -> ParquetResult> { - let value_type = match &data_type { + let value_type = match &dtype { ArrowDataType::Dictionary(_, value, _) => value.as_ref().clone(), _ => T::PRIMITIVE.into(), }; let dict = Box::new(PrimitiveArray::new(value_type, dict.into(), None)); - Ok(DictionaryArray::try_new(data_type, keys, dict).unwrap()) + Ok(DictionaryArray::try_new(dtype, keys, dict).unwrap()) } } diff --git a/crates/polars-parquet/src/arrow/read/deserialize/simple.rs b/crates/polars-parquet/src/arrow/read/deserialize/simple.rs index 9d512d834ebf..c0ab9ed79488 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/simple.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/simple.rs @@ -18,11 +18,11 @@ use crate::read::deserialize::binview::{self, BinViewDecoder}; use crate::read::deserialize::utils::PageDecoder; /// An iterator adapter that maps an iterator of Pages a boxed [`Array`] of [`ArrowDataType`] -/// `data_type` with a maximum of `num_rows` elements. +/// `dtype` with a maximum of `num_rows` elements. pub fn page_iter_to_array( pages: BasicDecompressor, type_: &PrimitiveType, - data_type: ArrowDataType, + dtype: ArrowDataType, filter: Option, ) -> PolarsResult> { use ArrowDataType::*; @@ -30,50 +30,50 @@ pub fn page_iter_to_array( let physical_type = &type_.physical_type; let logical_type = &type_.logical_type; - Ok(match (physical_type, data_type.to_logical_type()) { - (_, Null) => null::iter_to_arrays(pages, data_type, filter)?, + Ok(match (physical_type, dtype.to_logical_type()) { + (_, Null) => null::iter_to_arrays(pages, dtype, filter)?, (PhysicalType::Boolean, Boolean) => { - Box::new(PageDecoder::new(pages, data_type, boolean::BooleanDecoder)?.collect_n(filter)?) + Box::new(PageDecoder::new(pages, dtype, boolean::BooleanDecoder)?.collect_n(filter)?) }, (PhysicalType::Int32, UInt8) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::cast_as(), )? .collect_n(filter)?), (PhysicalType::Int32, UInt16) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::cast_as(), )? .collect_n(filter)?), (PhysicalType::Int32, UInt32) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::cast_as(), )? .collect_n(filter)?), (PhysicalType::Int64, UInt32) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::cast_as(), )? .collect_n(filter)?), (PhysicalType::Int32, Int8) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::cast_as(), )? .collect_n(filter)?), (PhysicalType::Int32, Int16) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::cast_as(), )? .collect_n(filter)?), (PhysicalType::Int32, Int32 | Date32 | Time32(_)) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::unit(), )? .collect_n(filter)?), @@ -83,15 +83,15 @@ pub fn page_iter_to_array( pages, physical_type, logical_type, - data_type, + dtype, filter, time_unit, ); }, (PhysicalType::FixedLenByteArray(_), FixedSizeBinary(_)) => { - let size = FixedSizeBinaryArray::get_size(&data_type); + let size = FixedSizeBinaryArray::get_size(&dtype); - Box::new(PageDecoder::new(pages, data_type, fixed_size_binary::BinaryDecoder { size })? + Box::new(PageDecoder::new(pages, dtype, fixed_size_binary::BinaryDecoder { size })? .collect_n(filter)?) }, (PhysicalType::FixedLenByteArray(12), Interval(IntervalUnit::YearMonth)) => { @@ -113,7 +113,7 @@ pub fn page_iter_to_array( let validity = array.validity().cloned(); Box::new(PrimitiveArray::::try_new( - data_type.clone(), + dtype.clone(), values.into(), validity, )?) @@ -137,20 +137,20 @@ pub fn page_iter_to_array( let validity = array.validity().cloned(); Box::new(PrimitiveArray::::try_new( - data_type.clone(), + dtype.clone(), values.into(), validity, )?) }, (PhysicalType::Int32, Decimal(_, _)) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::cast_into(), )? .collect_n(filter)?), (PhysicalType::Int64, Decimal(_, _)) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::cast_into(), )? .collect_n(filter)?), @@ -179,20 +179,20 @@ pub fn page_iter_to_array( let validity = array.validity().cloned(); Box::new(PrimitiveArray::::try_new( - data_type.clone(), + dtype.clone(), values.into(), validity, )?) }, (PhysicalType::Int32, Decimal256(_, _)) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::closure(|x: i32| i256(I256::new(x as i128))), )? .collect_n(filter)?), (PhysicalType::Int64, Decimal256(_, _)) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::closure(|x: i64| i256(I256::new(x as i128))), )? .collect_n(filter)?), @@ -216,7 +216,7 @@ pub fn page_iter_to_array( let validity = array.validity().cloned(); Box::new(PrimitiveArray::::try_new( - data_type.clone(), + dtype.clone(), values.into(), validity, )?) @@ -241,7 +241,7 @@ pub fn page_iter_to_array( let validity = array.validity().cloned(); Box::new(PrimitiveArray::::try_new( - data_type.clone(), + dtype.clone(), values.into(), validity, )?) @@ -253,53 +253,53 @@ pub fn page_iter_to_array( }, (PhysicalType::Int32, Date64) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::closure(|x: i32| i64::from(x) * 86400000), )? .collect_n(filter)?), (PhysicalType::Int64, Date64) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::unit(), )? .collect_n(filter)?), (PhysicalType::Int64, Int64 | Time64(_) | Duration(_)) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::unit(), )? .collect_n(filter)?), (PhysicalType::Int64, UInt64) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::::cast_as(), )? .collect_n(filter)?), (PhysicalType::Float, Float32) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::FloatDecoder::::unit(), )? .collect_n(filter)?), (PhysicalType::Double, Float64) => Box::new(PageDecoder::new( pages, - data_type, + dtype, primitive::FloatDecoder::::unit(), )? .collect_n(filter)?), // Don't compile this code with `i32` as we don't use this in polars (PhysicalType::ByteArray, LargeBinary | LargeUtf8) => { - PageDecoder::new(pages, data_type, binview::BinViewDecoder::default())? + PageDecoder::new(pages, dtype, binview::BinViewDecoder::default())? .collect_n(filter)? }, (_, Binary | Utf8) => unreachable!(), (PhysicalType::ByteArray, BinaryView | Utf8View) => { - PageDecoder::new(pages, data_type, binview::BinViewDecoder::default())? + PageDecoder::new(pages, dtype, binview::BinViewDecoder::default())? .collect_n(filter)? }, (_, Dictionary(key_type, _, _)) => { return match_integer_type!(key_type, |$K| { - dict_read::<$K>(pages, physical_type, logical_type, data_type, filter).map(|v| Box::new(v) as Box<_>) + dict_read::<$K>(pages, physical_type, logical_type, dtype, filter).map(|v| Box::new(v) as Box<_>) }).map_err(Into::into) }, (from, to) => { @@ -383,7 +383,7 @@ fn timestamp( pages: BasicDecompressor, physical_type: &PhysicalType, logical_type: &Option, - data_type: ArrowDataType, + dtype: ArrowDataType, filter: Option, time_unit: TimeUnit, ) -> PolarsResult> { @@ -392,7 +392,7 @@ fn timestamp( TimeUnit::Nanosecond => Ok(Box::new( PageDecoder::new( pages, - data_type, + dtype, primitive::FloatDecoder::closure(|x: [u32; 3]| int96_to_i64_ns(x)), )? .collect_n(filter)?, @@ -400,7 +400,7 @@ fn timestamp( TimeUnit::Microsecond => Ok(Box::new( PageDecoder::new( pages, - data_type, + dtype, primitive::FloatDecoder::closure(|x: [u32; 3]| int96_to_i64_us(x)), )? .collect_n(filter)?, @@ -408,7 +408,7 @@ fn timestamp( TimeUnit::Millisecond => Ok(Box::new( PageDecoder::new( pages, - data_type, + dtype, primitive::FloatDecoder::closure(|x: [u32; 3]| int96_to_i64_ms(x)), )? .collect_n(filter)?, @@ -416,7 +416,7 @@ fn timestamp( TimeUnit::Second => Ok(Box::new( PageDecoder::new( pages, - data_type, + dtype, primitive::FloatDecoder::closure(|x: [u32; 3]| int96_to_i64_s(x)), )? .collect_n(filter)?, @@ -433,13 +433,13 @@ fn timestamp( let (factor, is_multiplier) = unify_timestamp_unit(logical_type, time_unit); Ok(match (factor, is_multiplier) { (1, _) => Box::new( - PageDecoder::new(pages, data_type, primitive::IntDecoder::::unit())? + PageDecoder::new(pages, dtype, primitive::IntDecoder::::unit())? .collect_n(filter)?, ), (a, true) => Box::new( PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::closure(|x: i64| x * a), )? .collect_n(filter)?, @@ -447,7 +447,7 @@ fn timestamp( (a, false) => Box::new( PageDecoder::new( pages, - data_type, + dtype, primitive::IntDecoder::closure(|x: i64| x / a), )? .collect_n(filter)?, @@ -459,7 +459,7 @@ fn timestamp_dict( pages: BasicDecompressor, physical_type: &PhysicalType, logical_type: &Option, - data_type: ArrowDataType, + dtype: ArrowDataType, filter: Option, time_unit: TimeUnit, ) -> ParquetResult> { @@ -493,13 +493,13 @@ fn timestamp_dict( match (factor, is_multiplier) { (a, true) => PageDecoder::new( pages, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::closure(|x: i64| x * a)), )? .collect_n(filter), (a, false) => PageDecoder::new( pages, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::closure(|x: i64| x / a)), )? .collect_n(filter), @@ -510,57 +510,57 @@ fn dict_read( iter: BasicDecompressor, physical_type: &PhysicalType, logical_type: &Option, - data_type: ArrowDataType, + dtype: ArrowDataType, filter: Option, ) -> ParquetResult> { use ArrowDataType::*; - let values_data_type = if let Dictionary(_, v, _) = &data_type { + let values_dtype = if let Dictionary(_, v, _) = &dtype { v.as_ref() } else { panic!() }; - Ok(match (physical_type, values_data_type.to_logical_type()) { + Ok(match (physical_type, values_dtype.to_logical_type()) { (PhysicalType::Int32, UInt8) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::cast_as()), )? .collect_n(filter)?, (PhysicalType::Int32, UInt16) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::cast_as()), )? .collect_n(filter)?, (PhysicalType::Int32, UInt32) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::cast_as()), )? .collect_n(filter)?, (PhysicalType::Int64, UInt64) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::cast_as()), )? .collect_n(filter)?, (PhysicalType::Int32, Int8) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::cast_as()), )? .collect_n(filter)?, (PhysicalType::Int32, Int16) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::cast_as()), )? .collect_n(filter)?, (PhysicalType::Int32, Int32 | Date32 | Time32(_) | Interval(IntervalUnit::YearMonth)) => { PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::unit()), )? .collect_n(filter)? @@ -572,7 +572,7 @@ fn dict_read( iter, physical_type, logical_type, - data_type, + dtype, filter, time_unit, ); @@ -580,32 +580,32 @@ fn dict_read( (PhysicalType::Int64, Int64 | Date64 | Time64(_) | Duration(_)) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::unit()), )? .collect_n(filter)?, (PhysicalType::Float, Float32) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::unit()), )? .collect_n(filter)?, (PhysicalType::Double, Float64) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(primitive::FloatDecoder::::unit()), )? .collect_n(filter)?, (_, LargeUtf8 | LargeBinary | Utf8 | Binary) => unreachable!(), (PhysicalType::ByteArray, Utf8View | BinaryView) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(BinViewDecoder::default()), )? .collect_n(filter)?, (PhysicalType::FixedLenByteArray(size), FixedSizeBinary(_)) => PageDecoder::new( iter, - data_type, + dtype, dictionary::DictionaryDecoder::new(fixed_size_binary::BinaryDecoder { size: *size }), )? .collect_n(filter)?, diff --git a/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs b/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs index c1dc1324bb27..dba00fc97930 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs @@ -635,7 +635,7 @@ pub(super) trait Decoder: Sized { fn finalize( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, dict: Option, decoded: Self::DecodedState, ) -> ParquetResult; @@ -676,7 +676,7 @@ pub(crate) trait NestedDecoder: Decoder { pub trait DictDecodable: Decoder { fn finalize_dict_array( &self, - data_type: ArrowDataType, + dtype: ArrowDataType, dict: Self::Dict, keys: PrimitiveArray, ) -> ParquetResult>; @@ -684,7 +684,7 @@ pub trait DictDecodable: Decoder { pub struct PageDecoder { pub iter: BasicDecompressor, - pub data_type: ArrowDataType, + pub dtype: ArrowDataType, pub dict: Option, pub decoder: D, } @@ -692,7 +692,7 @@ pub struct PageDecoder { impl PageDecoder { pub fn new( mut iter: BasicDecompressor, - data_type: ArrowDataType, + dtype: ArrowDataType, decoder: D, ) -> ParquetResult { let dict_page = iter.read_dict_page()?; @@ -700,7 +700,7 @@ impl PageDecoder { Ok(Self { iter, - data_type, + dtype, dict, decoder, }) @@ -745,7 +745,7 @@ impl PageDecoder { self.iter.reuse_page_buffer(page); } - self.decoder.finalize(self.data_type, self.dict, target) + self.decoder.finalize(self.dtype, self.dict, target) } } diff --git a/crates/polars-parquet/src/arrow/read/schema/convert.rs b/crates/polars-parquet/src/arrow/read/schema/convert.rs index aae71e871b00..e79139109845 100644 --- a/crates/polars-parquet/src/arrow/read/schema/convert.rs +++ b/crates/polars-parquet/src/arrow/read/schema/convert.rs @@ -311,7 +311,7 @@ pub(crate) fn is_nullable(field_info: &FieldInfo) -> bool { fn to_field(type_: &ParquetType, options: &SchemaInferenceOptions) -> Option { Some(Field::new( type_.get_field_info().name.clone(), - to_data_type(type_, options)?, + to_dtype(type_, options)?, is_nullable(type_.get_field_info()), )) } @@ -340,7 +340,7 @@ fn to_list( } { // extract the repetition field let nested_item = fields.first().unwrap(); - to_data_type(nested_item, options) + to_dtype(nested_item, options) } else { to_struct(fields, options) } @@ -383,7 +383,7 @@ fn to_list( /// /// If this schema is a group type and none of its children is reserved in the /// conversion, the result is Ok(None). -pub(crate) fn to_data_type( +pub(crate) fn to_dtype( type_: &ParquetType, options: &SchemaInferenceOptions, ) -> Option { diff --git a/crates/polars-parquet/src/arrow/read/schema/metadata.rs b/crates/polars-parquet/src/arrow/read/schema/metadata.rs index 0339032f5d84..915936c81296 100644 --- a/crates/polars-parquet/src/arrow/read/schema/metadata.rs +++ b/crates/polars-parquet/src/arrow/read/schema/metadata.rs @@ -19,15 +19,15 @@ pub fn read_schema_from_metadata(metadata: &mut Metadata) -> PolarsResult