From 6baf6a7be66fb0b7d36173b2ff127f3d5b67195b Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Wed, 12 Jul 2023 15:19:22 -0400 Subject: [PATCH 01/35] GH-36363: [MATLAB] Create proxy classes for the DataType class hierarchy (#36419) ### Rationale for this change In the original pull request in which we added the MATLAB `arrow.type.Type` classes (e.g. `arrow.type.Float32Type`), we did implement these classes as proxies. At the time, we weren't sure if it would be advantageous to implement the type classes as proxies, but now realize it will be for composite data structures, i.e. `Schema`, `StructArray`, `ListArray`. ### What changes are included in this PR? 1. All classes within the `arrow.type.Type` class hierarchy are implemented as proxies. ### Are these changes tested? Yes, we had existing tests for these classes. ### Are there any user-facing changes? No. ### Future Directions 1. In a followup PR request, we plan on integrating the proxy type classes and the array classes so that they share the same underlying C++` arrow::DataType` object. We thought doing so in this change would be too much code churn. ### Notes Thank you @ kevingurney for the help! * Closes: #36363 Lead-authored-by: Sarah Gilmore Co-authored-by: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Co-authored-by: Sutou Kouhei Signed-off-by: Kevin Gurney --- .../matlab/array/proxy/timestamp_array.cc | 9 ++- matlab/src/cpp/arrow/matlab/proxy/factory.cc | 17 ++++ .../matlab/type/proxy/fixed_width_type.cc | 34 ++++++++ .../matlab/type/proxy/fixed_width_type.h | 34 ++++++++ .../arrow/matlab/type/proxy/primitive_ctype.h | 55 +++++++++++++ .../arrow/matlab/type/proxy/string_type.cc | 28 +++++++ .../cpp/arrow/matlab/type/proxy/string_type.h | 35 ++++++++ .../arrow/matlab/type/proxy/timestamp_type.cc | 80 +++++++++++++++++++ .../arrow/matlab/type/proxy/timestamp_type.h | 42 ++++++++++ .../src/cpp/arrow/matlab/type/proxy/type.cc | 48 +++++++++++ matlab/src/cpp/arrow/matlab/type/proxy/type.h | 43 ++++++++++ matlab/src/cpp/arrow/matlab/type/time_unit.cc | 2 +- matlab/src/cpp/arrow/matlab/type/time_unit.h | 4 +- .../src/matlab/+arrow/+array/TimestampArray.m | 4 +- matlab/src/matlab/+arrow/+type/BooleanType.m | 8 +- .../{PrimitiveType.m => FixedWidthType.m} | 19 +++-- matlab/src/matlab/+arrow/+type/Float32Type.m | 8 +- matlab/src/matlab/+arrow/+type/Float64Type.m | 8 +- matlab/src/matlab/+arrow/+type/ID.m | 20 ----- matlab/src/matlab/+arrow/+type/Int16Type.m | 8 +- matlab/src/matlab/+arrow/+type/Int32Type.m | 8 +- matlab/src/matlab/+arrow/+type/Int64Type.m | 8 +- matlab/src/matlab/+arrow/+type/Int8Type.m | 8 +- matlab/src/matlab/+arrow/+type/StringType.m | 14 ++-- matlab/src/matlab/+arrow/+type/TimeUnit.m | 27 +++---- .../src/matlab/+arrow/+type/TimestampType.m | 26 +++--- matlab/src/matlab/+arrow/+type/Type.m | 33 +++++++- matlab/src/matlab/+arrow/+type/UInt16Type.m | 8 +- matlab/src/matlab/+arrow/+type/UInt32Type.m | 8 +- matlab/src/matlab/+arrow/+type/UInt64Type.m | 8 +- matlab/src/matlab/+arrow/+type/UInt8Type.m | 8 +- matlab/test/arrow/array/hNumericArray.m | 2 +- matlab/test/arrow/array/tBooleanArray.m | 2 +- matlab/test/arrow/array/tStringArray.m | 2 +- .../{hPrimitiveType.m => hFixedWidthType.m} | 12 +-- matlab/test/arrow/type/tBooleanType.m | 4 +- matlab/test/arrow/type/tFloat32Type.m | 4 +- matlab/test/arrow/type/tFloat64Type.m | 4 +- matlab/test/arrow/type/tID.m | 16 ---- matlab/test/arrow/type/tInt16Type.m | 4 +- matlab/test/arrow/type/tInt32Type.m | 4 +- matlab/test/arrow/type/tInt64Type.m | 4 +- matlab/test/arrow/type/tInt8Type.m | 4 +- matlab/test/arrow/type/tStringType.m | 7 +- matlab/test/arrow/type/tTimeUnit.m | 4 +- matlab/test/arrow/type/tTimestampType.m | 4 +- matlab/test/arrow/type/tUInt16Type.m | 4 +- matlab/test/arrow/type/tUInt32Type.m | 4 +- matlab/test/arrow/type/tUInt64Type.m | 4 +- matlab/test/arrow/type/tUInt8Type.m | 4 +- .../cmake/BuildMatlabArrowInterface.cmake | 9 ++- 51 files changed, 590 insertions(+), 174 deletions(-) create mode 100644 matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.cc create mode 100644 matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.h create mode 100644 matlab/src/cpp/arrow/matlab/type/proxy/primitive_ctype.h create mode 100644 matlab/src/cpp/arrow/matlab/type/proxy/string_type.cc create mode 100644 matlab/src/cpp/arrow/matlab/type/proxy/string_type.h create mode 100644 matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc create mode 100644 matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.h create mode 100644 matlab/src/cpp/arrow/matlab/type/proxy/type.cc create mode 100644 matlab/src/cpp/arrow/matlab/type/proxy/type.h rename matlab/src/matlab/+arrow/+type/{PrimitiveType.m => FixedWidthType.m} (71%) rename matlab/test/arrow/type/{hPrimitiveType.m => hFixedWidthType.m} (81%) diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc index aa79a4f99240e..3b19daec58576 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc @@ -49,13 +49,14 @@ namespace arrow::matlab::array::proxy { const mda::TypedArray units_mda = opts[0]["TimeUnit"]; // extract the time zone string - const std::u16string& u16_timezone = timezone_mda[0]; - MATLAB_ASSIGN_OR_ERROR(const auto timezone, arrow::util::UTF16StringToUTF8(u16_timezone), + const std::u16string& utf16_timezone = timezone_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto timezone, arrow::util::UTF16StringToUTF8(utf16_timezone), error::UNICODE_CONVERSION_ERROR_ID); // extract the time unit - MATLAB_ASSIGN_OR_ERROR(const auto time_unit, arrow::matlab::type::timeUnitFromString(units_mda[0]), - error::UKNOWN_TIME_UNIT_ERROR_ID) + const std::u16string& utf16_unit = units_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto time_unit, arrow::matlab::type::timeUnitFromString(utf16_unit), + error::UKNOWN_TIME_UNIT_ERROR_ID); // create the timestamp_type auto data_type = arrow::timestamp(time_unit, timezone); diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 41f1357bcedc5..0f7751035a052 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -21,6 +21,9 @@ #include "arrow/matlab/array/proxy/timestamp_array.h" #include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/error/error.h" +#include "arrow/matlab/type/proxy/primitive_ctype.h" +#include "arrow/matlab/type/proxy/string_type.h" +#include "arrow/matlab/type/proxy/timestamp_type.h" #include "factory.h" @@ -41,6 +44,20 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray); REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::TimestampArray); REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch); + REGISTER_PROXY(arrow.type.proxy.Float32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Float64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt8Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt16Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int8Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int16Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); + REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); + return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name}; }; diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.cc b/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.cc new file mode 100644 index 0000000000000..9ede57f2ee1dd --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.cc @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include "arrow/matlab/type/proxy/fixed_width_type.h" + +namespace arrow::matlab::type::proxy { + + FixedWidthType::FixedWidthType(std::shared_ptr type) : Type(std::move(type)) { + REGISTER_METHOD(FixedWidthType, bitWidth); + } + + void FixedWidthType::bitWidth(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + auto bit_width_mda = factory.createScalar(data_type->bit_width()); + context.outputs[0] = bit_width_mda; + } +} diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.h b/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.h new file mode 100644 index 0000000000000..e245acd55640e --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include "arrow/matlab/type/proxy/type.h" + +namespace arrow::matlab::type::proxy { + +class FixedWidthType : public arrow::matlab::type::proxy::Type { + public: + FixedWidthType(std::shared_ptr type); + + virtual ~FixedWidthType() {} + + protected: + void bitWidth(libmexclass::proxy::method::Context& context); + +}; + +} diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/primitive_ctype.h b/matlab/src/cpp/arrow/matlab/type/proxy/primitive_ctype.h new file mode 100644 index 0000000000000..0415972b44c5b --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/primitive_ctype.h @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/matlab/type/proxy/fixed_width_type.h" +#include "arrow/type_traits.h" + +#include + + +namespace arrow::matlab::type::proxy { + +template +using arrow_type_t = typename arrow::CTypeTraits::ArrowType; + +template +using is_primitive = arrow::is_primitive_ctype>; + +template +using enable_if_primitive = std::enable_if_t::value, bool>; + +template = true> +class PrimitiveCType : public arrow::matlab::type::proxy::FixedWidthType { + + using ArrowDataType = arrow_type_t; + + public: + PrimitiveCType(std::shared_ptr primitive_type) : arrow::matlab::type::proxy::FixedWidthType(std::move(primitive_type)) { + } + + ~PrimitiveCType() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + auto data_type = arrow::CTypeTraits::type_singleton(); + return std::make_shared(std::static_pointer_cast(std::move(data_type))); + } +}; + +} + diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/string_type.cc b/matlab/src/cpp/arrow/matlab/type/proxy/string_type.cc new file mode 100644 index 0000000000000..362dfba7344ea --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/string_type.cc @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/type/proxy/string_type.h" + +namespace arrow::matlab::type::proxy { + + StringType::StringType(std::shared_ptr string_type) : Type(std::move(string_type)) {} + + libmexclass::proxy::MakeResult StringType::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + auto string_type = std::static_pointer_cast(arrow::utf8()); + return std::make_shared(std::move(string_type)); + } +} diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/string_type.h b/matlab/src/cpp/arrow/matlab/type/proxy/string_type.h new file mode 100644 index 0000000000000..fd1808d9b8058 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/string_type.h @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/matlab/type/proxy/type.h" + +namespace arrow::matlab::type::proxy { + +class StringType : public arrow::matlab::type::proxy::Type { + + public: + StringType(std::shared_ptr string_type); + + ~StringType() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); +}; + +} + diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc b/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc new file mode 100644 index 0000000000000..b1d35ee4874db --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/type/proxy/timestamp_type.h" +#include "arrow/matlab/type/time_unit.h" +#include "arrow/matlab/error/error.h" +#include "arrow/util/utf8.h" + +namespace arrow::matlab::type::proxy { + + TimestampType::TimestampType(std::shared_ptr timestamp_type) : FixedWidthType(std::move(timestamp_type)) { + REGISTER_METHOD(TimestampType, timeUnit); + REGISTER_METHOD(TimestampType, timeZone); + } + + libmexclass::proxy::MakeResult TimestampType::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + + using TimestampTypeProxy = arrow::matlab::type::proxy::TimestampType; + + mda::StructArray opts = constructor_arguments[0]; + + // Get the mxArray from constructor arguments + const mda::StringArray timezone_mda = opts[0]["TimeZone"]; + const mda::StringArray timeunit_mda = opts[0]["TimeUnit"]; + + // extract the time zone + const std::u16string& utf16_timezone = timezone_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto timezone, + arrow::util::UTF16StringToUTF8(utf16_timezone), + error::UNICODE_CONVERSION_ERROR_ID); + + // extract the time unit + const std::u16string& utf16_timeunit = timeunit_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto timeunit, + arrow::matlab::type::timeUnitFromString(utf16_timeunit), + error::UKNOWN_TIME_UNIT_ERROR_ID); + + auto type = arrow::timestamp(timeunit, timezone); + auto time_type = std::static_pointer_cast(type); + return std::make_shared(std::move(time_type)); + } + + void TimestampType::timeZone(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + auto timestamp_type = std::static_pointer_cast(data_type); + const auto timezone_utf8 = timestamp_type->timezone(); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto timezone_utf16, + arrow::util::UTF8StringToUTF16(timezone_utf8), + context, error::UNICODE_CONVERSION_ERROR_ID); + auto timezone_mda = factory.createScalar(timezone_utf16); + context.outputs[0] = timezone_mda; + } + + void TimestampType::timeUnit(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + auto timestamp_type = std::static_pointer_cast(data_type); + const auto timeunit = timestamp_type->unit(); + auto timeunit_mda = factory.createScalar(static_cast(timeunit)); + context.outputs[0] = timeunit_mda; + } +} diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.h b/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.h new file mode 100644 index 0000000000000..71005dc3a980d --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.h @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/matlab/type/proxy/fixed_width_type.h" +#include "arrow/type_traits.h" + +namespace arrow::matlab::type::proxy { + +class TimestampType : public arrow::matlab::type::proxy::FixedWidthType { + + public: + TimestampType(std::shared_ptr timestamp_type); + + ~TimestampType() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + + void timeZone(libmexclass::proxy::method::Context& context); + + void timeUnit(libmexclass::proxy::method::Context& context); +}; + +} + diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/type.cc b/matlab/src/cpp/arrow/matlab/type/proxy/type.cc new file mode 100644 index 0000000000000..f6a307ff3f62f --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/type.cc @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/type/proxy/type.h" + +namespace arrow::matlab::type::proxy { + + Type::Type(std::shared_ptr type) : data_type{std::move(type)} { + REGISTER_METHOD(Type, typeID); + REGISTER_METHOD(Type, numFields); + } + + std::shared_ptr Type::unwrap() { + return data_type; + } + + void Type::typeID(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + auto type_number_mda = factory.createScalar(static_cast(data_type->id())); + context.outputs[0] = type_number_mda; + } + + void Type::numFields(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + auto num_fields_mda = factory.createScalar(data_type->num_fields()); + context.outputs[0] = num_fields_mda; + } + +} + diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/type.h b/matlab/src/cpp/arrow/matlab/type/proxy/type.h new file mode 100644 index 0000000000000..e94097aa73cb4 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/type.h @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/type.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::type::proxy { + +class Type : public libmexclass::proxy::Proxy { + public: + Type(std::shared_ptr type); + + virtual ~Type() {} + + std::shared_ptr unwrap(); + + protected: + + void typeID(libmexclass::proxy::method::Context& context); + + void numFields(libmexclass::proxy::method::Context& context); + + std::shared_ptr data_type; +}; + +} diff --git a/matlab/src/cpp/arrow/matlab/type/time_unit.cc b/matlab/src/cpp/arrow/matlab/type/time_unit.cc index 15ebfcfc0c06b..eb839b0e78096 100644 --- a/matlab/src/cpp/arrow/matlab/type/time_unit.cc +++ b/matlab/src/cpp/arrow/matlab/type/time_unit.cc @@ -20,7 +20,7 @@ namespace arrow::matlab::type { - arrow::Result timeUnitFromString(const std::u16string& unit_str) { + arrow::Result timeUnitFromString(std::u16string_view unit_str) { if (unit_str == u"Second") { return arrow::TimeUnit::type::SECOND; } else if (unit_str == u"Millisecond") { diff --git a/matlab/src/cpp/arrow/matlab/type/time_unit.h b/matlab/src/cpp/arrow/matlab/type/time_unit.h index cf3248d77b967..9534b1f902db7 100644 --- a/matlab/src/cpp/arrow/matlab/type/time_unit.h +++ b/matlab/src/cpp/arrow/matlab/type/time_unit.h @@ -18,10 +18,10 @@ #include "arrow/type_fwd.h" #include "arrow/result.h" -#include +#include namespace arrow::matlab::type { - arrow::Result timeUnitFromString(const std::u16string& unit_str); + arrow::Result timeUnitFromString(std::u16string_view unit_str); } diff --git a/matlab/src/matlab/+arrow/+array/TimestampArray.m b/matlab/src/matlab/+arrow/+array/TimestampArray.m index 0aa76beb99c7a..fb4b2fa1bfade 100644 --- a/matlab/src/matlab/+arrow/+array/TimestampArray.m +++ b/matlab/src/matlab/+arrow/+array/TimestampArray.m @@ -48,7 +48,7 @@ epoch = datetime(1970, 1, 1, TimeZone="UTC"); tz = obj.Type.TimeZone; - ticsPerSecond = obj.Type.TimeUnit.TicksPerSecond; + ticsPerSecond = ticksPerSecond(obj.Type.TimeUnit); dates = datetime(time, ConvertFrom="epochtime", Epoch=epoch, ... TimeZone=tz, TicksPerSecond=ticsPerSecond); @@ -72,7 +72,7 @@ % % TODO: convertTo may error if the datetime is 2^63-1 before or % after the epoch. We should throw a custom error in this case. - time(indices) = convertTo(dates(indices), "epochtime", TicksPerSecond=units.TicksPerSecond); + time(indices) = convertTo(dates(indices), "epochtime", TicksPerSecond=ticksPerSecond(units)); end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/BooleanType.m b/matlab/src/matlab/+arrow/+type/BooleanType.m index 050beae3f5120..202d177dee03f 100644 --- a/matlab/src/matlab/+arrow/+type/BooleanType.m +++ b/matlab/src/matlab/+arrow/+type/BooleanType.m @@ -13,10 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef BooleanType < arrow.type.PrimitiveType +classdef BooleanType < arrow.type.FixedWidthType %BOOLEANTYPE Type class for boolean data. - properties(SetAccess = protected) - ID = arrow.type.ID.Boolean + methods + function obj = BooleanType() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.BooleanType", "ConstructorArguments", {}) + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/PrimitiveType.m b/matlab/src/matlab/+arrow/+type/FixedWidthType.m similarity index 71% rename from matlab/src/matlab/+arrow/+type/PrimitiveType.m rename to matlab/src/matlab/+arrow/+type/FixedWidthType.m index 6297b98d8b01b..dcbb3e69e756a 100644 --- a/matlab/src/matlab/+arrow/+type/PrimitiveType.m +++ b/matlab/src/matlab/+arrow/+type/FixedWidthType.m @@ -13,21 +13,20 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef PrimitiveType < arrow.type.Type -%PRIMITIVETYPE Abstract type class representing primtive data types. +classdef (Abstract) FixedWidthType < arrow.type.Type +%FIXEDWIDTHTYPE Abstract type class representing fixed width data types. - properties(Dependent, SetAccess=protected, GetAccess=public) + properties(Dependent, SetAccess=private, GetAccess=public) BitWidth end - properties(Constant) - NumFields = 0 - NumBuffers = 2 - end - methods + function obj = FixedWidthType(varargin) + obj@arrow.type.Type(varargin{:}); + end + function width = get.BitWidth(obj) - width = bitWidth(obj.ID); + width = obj.Proxy.bitWidth(); end - end + end end diff --git a/matlab/src/matlab/+arrow/+type/Float32Type.m b/matlab/src/matlab/+arrow/+type/Float32Type.m index b0430bda7eab4..aec21fe1ce5e8 100644 --- a/matlab/src/matlab/+arrow/+type/Float32Type.m +++ b/matlab/src/matlab/+arrow/+type/Float32Type.m @@ -13,10 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Float32Type < arrow.type.PrimitiveType +classdef Float32Type < arrow.type.FixedWidthType %FLOAT32TYPE Type class for float32 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Float32 + methods + function obj = Float32Type() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Float32Type", "ConstructorArguments", {}) + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Float64Type.m b/matlab/src/matlab/+arrow/+type/Float64Type.m index a2ffe02b786af..25c9ff41b61ab 100644 --- a/matlab/src/matlab/+arrow/+type/Float64Type.m +++ b/matlab/src/matlab/+arrow/+type/Float64Type.m @@ -13,10 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Float64Type < arrow.type.PrimitiveType +classdef Float64Type < arrow.type.FixedWidthType %FLOAT64Type Type class for float64 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Float64 + methods + function obj = Float64Type() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Float64Type", "ConstructorArguments", {}) + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/ID.m b/matlab/src/matlab/+arrow/+type/ID.m index 2e320603d039c..076d79d196a30 100644 --- a/matlab/src/matlab/+arrow/+type/ID.m +++ b/matlab/src/matlab/+arrow/+type/ID.m @@ -35,24 +35,4 @@ % Date64 (17) Timestamp (18) end - - methods - function bitWidth = bitWidth(obj) - import arrow.type.ID - switch obj - case ID.Boolean - bitWidth = 1; - case {ID.UInt8, ID.Int8} - bitWidth = 8; - case {ID.UInt16, ID.Int16} - bitWidth = 16; - case {ID.UInt32, ID.Int32, ID.Float32} - bitWidth = 32; - case {ID.UInt64, ID.Int64, ID.Float64, ID.Timestamp} - bitWidth = 64; - otherwise - bitWidth = NaN; - end - end - end end diff --git a/matlab/src/matlab/+arrow/+type/Int16Type.m b/matlab/src/matlab/+arrow/+type/Int16Type.m index 3d060f7e58671..ce9c61d447407 100644 --- a/matlab/src/matlab/+arrow/+type/Int16Type.m +++ b/matlab/src/matlab/+arrow/+type/Int16Type.m @@ -13,11 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Int16Type < arrow.type.PrimitiveType +classdef Int16Type < arrow.type.FixedWidthType %INT16TYPE Type class for int8 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Int16 + methods + function obj = Int16Type() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Int16Type", "ConstructorArguments", {}) + end end end diff --git a/matlab/src/matlab/+arrow/+type/Int32Type.m b/matlab/src/matlab/+arrow/+type/Int32Type.m index 98c81c08647dd..260a9d7a37cee 100644 --- a/matlab/src/matlab/+arrow/+type/Int32Type.m +++ b/matlab/src/matlab/+arrow/+type/Int32Type.m @@ -13,11 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Int32Type < arrow.type.PrimitiveType +classdef Int32Type < arrow.type.FixedWidthType %INT32TYPE Type class for int32 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Int32 + methods + function obj = Int32Type() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Int32Type", "ConstructorArguments", {}) + end end end diff --git a/matlab/src/matlab/+arrow/+type/Int64Type.m b/matlab/src/matlab/+arrow/+type/Int64Type.m index 23147817e36e1..857a84e74cc34 100644 --- a/matlab/src/matlab/+arrow/+type/Int64Type.m +++ b/matlab/src/matlab/+arrow/+type/Int64Type.m @@ -13,10 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Int64Type < arrow.type.PrimitiveType +classdef Int64Type < arrow.type.FixedWidthType %INT64TYPE Type class for int64 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Int64 + methods + function obj = Int64Type() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Int64Type", "ConstructorArguments", {}) + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Int8Type.m b/matlab/src/matlab/+arrow/+type/Int8Type.m index 9d364bb32be82..1d066b4b8b84a 100644 --- a/matlab/src/matlab/+arrow/+type/Int8Type.m +++ b/matlab/src/matlab/+arrow/+type/Int8Type.m @@ -13,11 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Int8Type < arrow.type.PrimitiveType +classdef Int8Type < arrow.type.FixedWidthType %INT8TYPE Type class for int8 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Int8 + methods + function obj = Int8Type() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Int8Type", "ConstructorArguments", {}) + end end end diff --git a/matlab/src/matlab/+arrow/+type/StringType.m b/matlab/src/matlab/+arrow/+type/StringType.m index 66a15dd0ea3e2..337c5a9bd6863 100644 --- a/matlab/src/matlab/+arrow/+type/StringType.m +++ b/matlab/src/matlab/+arrow/+type/StringType.m @@ -16,14 +16,10 @@ classdef StringType < arrow.type.Type %STRINGTYPE Type class for string data. - properties(SetAccess = protected) - ID = arrow.type.ID.String - end - - properties(Constant) - NumFields = 0 - NumBuffers = 3 - end - + methods + function obj = StringType() + obj@arrow.type.Type("Name", "arrow.type.proxy.StringType", "ConstructorArguments", {}); + end + end end diff --git a/matlab/src/matlab/+arrow/+type/TimeUnit.m b/matlab/src/matlab/+arrow/+type/TimeUnit.m index 3ec8bf44d104f..358818be985c9 100644 --- a/matlab/src/matlab/+arrow/+type/TimeUnit.m +++ b/matlab/src/matlab/+arrow/+type/TimeUnit.m @@ -12,33 +12,28 @@ % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef TimeUnit +classdef TimeUnit < int16 % Enumeration class representing Time Units. enumeration - Second - Millisecond - Microsecond - Nanosecond + Second (0) + Millisecond (1) + Microsecond (2) + Nanosecond (3) end - properties (Dependent) - TicksPerSecond - end - - - methods - function ticksPerSecond = get.TicksPerSecond(obj) + methods (Hidden) + function ticks = ticksPerSecond(obj) import arrow.type.TimeUnit switch obj case TimeUnit.Second - ticksPerSecond = 1; + ticks = 1; case TimeUnit.Millisecond - ticksPerSecond = 1e3; + ticks = 1e3; case TimeUnit.Microsecond - ticksPerSecond = 1e6; + ticks = 1e6; case TimeUnit.Nanosecond - ticksPerSecond = 1e9; + ticks = 1e9; end end end diff --git a/matlab/src/matlab/+arrow/+type/TimestampType.m b/matlab/src/matlab/+arrow/+type/TimestampType.m index 99ac4a7b769f7..c7a576968edec 100644 --- a/matlab/src/matlab/+arrow/+type/TimestampType.m +++ b/matlab/src/matlab/+arrow/+type/TimestampType.m @@ -13,17 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef TimestampType < arrow.type.PrimitiveType +classdef TimestampType < arrow.type.FixedWidthType %TIMESTAMPTYPE Type class for timestamp data. - - properties(SetAccess=private) - TimeZone(1, 1) string - TimeUnit(1, 1) arrow.type.TimeUnit - end - - properties(SetAccess = protected) - ID = arrow.type.ID.Timestamp + properties(Dependent, SetAccess=private, GetAccess=public) + TimeZone + TimeUnit end methods @@ -33,8 +28,17 @@ opts.TimeUnit(1, 1) arrow.type.TimeUnit = arrow.type.TimeUnit.Microsecond opts.TimeZone(1, 1) string {mustBeNonmissing} = "" end - obj.TimeUnit = opts.TimeUnit; - obj.TimeZone = opts.TimeZone; + args = struct(TimeUnit=string(opts.TimeUnit), TimeZone=opts.TimeZone); + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.TimestampType", "ConstructorArguments", {args}); + end + + function unit = get.TimeUnit(obj) + val = obj.Proxy.timeUnit(); + unit = arrow.type.TimeUnit(val); + end + + function tz = get.TimeZone(obj) + tz = obj.Proxy.timeZone(); end end end diff --git a/matlab/src/matlab/+arrow/+type/Type.m b/matlab/src/matlab/+arrow/+type/Type.m index a05eb2253bf87..d6efc32be3b47 100644 --- a/matlab/src/matlab/+arrow/+type/Type.m +++ b/matlab/src/matlab/+arrow/+type/Type.m @@ -13,11 +13,36 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Type +classdef (Abstract) Type < matlab.mixin.CustomDisplay %TYPE Abstract type class. - properties (Abstract, SetAccess=protected) - ID(1, 1) arrow.type.ID + properties (Dependent, GetAccess=public, SetAccess=private) + ID + NumFields end -end + properties (GetAccess=public, SetAccess=private, Hidden) + Proxy + end + + methods + function obj = Type(varargin) + obj.Proxy = libmexclass.proxy.Proxy(varargin{:}); + end + + function numFields = get.NumFields(obj) + numFields = obj.Proxy.numFields(); + end + + function typeID = get.ID(obj) + typeID = arrow.type.ID(obj.Proxy.typeID()); + end + end + + methods (Access=protected) + function propgrp = getPropertyGroups(~) + proplist = {'ID'}; + propgrp = matlab.mixin.util.PropertyGroup(proplist); + end + end +end diff --git a/matlab/src/matlab/+arrow/+type/UInt16Type.m b/matlab/src/matlab/+arrow/+type/UInt16Type.m index 8d53ea68556d8..40def5f927227 100644 --- a/matlab/src/matlab/+arrow/+type/UInt16Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt16Type.m @@ -13,10 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef UInt16Type < arrow.type.PrimitiveType +classdef UInt16Type < arrow.type.FixedWidthType %UINT16TYPE Type class for uint16 data. - properties(SetAccess = protected) - ID = arrow.type.ID.UInt16 + methods + function obj = UInt16Type() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.UInt16Type", "ConstructorArguments", {}) + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/UInt32Type.m b/matlab/src/matlab/+arrow/+type/UInt32Type.m index 693bd897d66dc..5b030884fe004 100644 --- a/matlab/src/matlab/+arrow/+type/UInt32Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt32Type.m @@ -13,10 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef UInt32Type < arrow.type.PrimitiveType +classdef UInt32Type < arrow.type.FixedWidthType %UINT32TYPE Type class for uint32 data. - properties(SetAccess = protected) - ID = arrow.type.ID.UInt32 + methods + function obj = UInt32Type() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.UInt32Type", "ConstructorArguments", {}) + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/UInt64Type.m b/matlab/src/matlab/+arrow/+type/UInt64Type.m index fbd06646cedd7..60f7173bfe59a 100644 --- a/matlab/src/matlab/+arrow/+type/UInt64Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt64Type.m @@ -13,10 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef UInt64Type < arrow.type.PrimitiveType +classdef UInt64Type < arrow.type.FixedWidthType %UINT64TYPE Type class for uint64 data. - properties(SetAccess = protected) - ID = arrow.type.ID.UInt64 + methods + function obj = UInt64Type() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.UInt64Type", "ConstructorArguments", {}) + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/UInt8Type.m b/matlab/src/matlab/+arrow/+type/UInt8Type.m index 9abd001b43c67..e09c7ed71116a 100644 --- a/matlab/src/matlab/+arrow/+type/UInt8Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt8Type.m @@ -13,10 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef UInt8Type < arrow.type.PrimitiveType +classdef UInt8Type < arrow.type.FixedWidthType %UINT8TYPE Type class for uint8 data. - properties(SetAccess = protected) - ID = arrow.type.ID.UInt8 + methods + function obj = UInt8Type() + obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.UInt8Type", "ConstructorArguments", {}) + end end end \ No newline at end of file diff --git a/matlab/test/arrow/array/hNumericArray.m b/matlab/test/arrow/array/hNumericArray.m index 7938811e4213a..69d77c5394697 100644 --- a/matlab/test/arrow/array/hNumericArray.m +++ b/matlab/test/arrow/array/hNumericArray.m @@ -154,7 +154,7 @@ function TestArrowType(tc) % Verify the array has the expected arrow.type.Type object data = tc.MatlabArrayFcn([1 2 3 4]); arrowArray = tc.ArrowArrayConstructor(data); - tc.verifyEqual(arrowArray.Type, tc.ArrowType); + tc.verifyEqual(arrowArray.Type.ID, tc.ArrowType.ID); end end end diff --git a/matlab/test/arrow/array/tBooleanArray.m b/matlab/test/arrow/array/tBooleanArray.m index 3a565202a2775..00eef2c91064c 100644 --- a/matlab/test/arrow/array/tBooleanArray.m +++ b/matlab/test/arrow/array/tBooleanArray.m @@ -155,7 +155,7 @@ function TestArrowType(tc) % Verify the array has the expected arrow.type.Type object data = tc.MatlabArrayFcn([true false]); arrowArray = tc.ArrowArrayConstructor(data); - tc.verifyEqual(arrowArray.Type, tc.ArrowType); + tc.verifyEqual(arrowArray.Type.ID, tc.ArrowType.ID); end end end diff --git a/matlab/test/arrow/array/tStringArray.m b/matlab/test/arrow/array/tStringArray.m index 000a57b27bcc2..b076c636b13e5 100644 --- a/matlab/test/arrow/array/tStringArray.m +++ b/matlab/test/arrow/array/tStringArray.m @@ -149,7 +149,7 @@ function TestArrowType(tc) % Verify the array has the expected arrow.type.Type object data = tc.MatlabArrayFcn(["A", "B"]); arrowArray = tc.ArrowArrayConstructor(data); - tc.verifyEqual(arrowArray.Type, tc.ArrowType); + tc.verifyEqual(arrowArray.Type.ID, tc.ArrowType.ID); end function Unicode(tc) diff --git a/matlab/test/arrow/type/hPrimitiveType.m b/matlab/test/arrow/type/hFixedWidthType.m similarity index 81% rename from matlab/test/arrow/type/hPrimitiveType.m rename to matlab/test/arrow/type/hFixedWidthType.m index b757ad4b409c9..1f2a5e413dd70 100644 --- a/matlab/test/arrow/type/hPrimitiveType.m +++ b/matlab/test/arrow/type/hFixedWidthType.m @@ -13,9 +13,9 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef hPrimitiveType < matlab.unittest.TestCase +classdef hFixedWidthType < matlab.unittest.TestCase % Test class that defines shared unit tests for classes that inherit from -% arrow.type.PrimitiveType +% arrow.type.FixedWidthType properties(Abstract) ArrowType @@ -39,13 +39,7 @@ function TestBitWidth(testCase) function TestNumFields(testCase) % Verify NumFields is set to 0 for primitive types. arrowType = testCase.ArrowType; - testCase.verifyEqual(arrowType.NumFields, 0); - end - - function TestNumBuffers(testCase) - % Verify NumBuffers is set to 2 for primitive types. - arrowType = testCase.ArrowType; - testCase.verifyEqual(arrowType.NumBuffers, 2); + testCase.verifyEqual(arrowType.NumFields, int32(0)); end end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tBooleanType.m b/matlab/test/arrow/type/tBooleanType.m index 23884991f2065..900ff3d9b3390 100644 --- a/matlab/test/arrow/type/tBooleanType.m +++ b/matlab/test/arrow/type/tBooleanType.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tBooleanType < hPrimitiveType +classdef tBooleanType < hFixedWidthType % Test class for arrow.type.BooleanType properties ArrowType = arrow.type.BooleanType TypeID = arrow.type.ID.Boolean - BitWidth = 1; + BitWidth = int32(1); end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tFloat32Type.m b/matlab/test/arrow/type/tFloat32Type.m index 8c4fa5f402942..af407559ee24f 100644 --- a/matlab/test/arrow/type/tFloat32Type.m +++ b/matlab/test/arrow/type/tFloat32Type.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tFloat32Type < hPrimitiveType +classdef tFloat32Type < hFixedWidthType % Test class for arrow.type.Float32Type properties ArrowType = arrow.type.Float32Type TypeID = arrow.type.ID.Float32 - BitWidth = 32; + BitWidth = int32(32); end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tFloat64Type.m b/matlab/test/arrow/type/tFloat64Type.m index c4489c4080341..d5ccd8ef259a8 100644 --- a/matlab/test/arrow/type/tFloat64Type.m +++ b/matlab/test/arrow/type/tFloat64Type.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tFloat64Type < hPrimitiveType +classdef tFloat64Type < hFixedWidthType % Test class for arrow.type.Float64Type properties ArrowType = arrow.type.Float64Type TypeID = arrow.type.ID.Float64 - BitWidth = 64; + BitWidth = int32(64); end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tID.m b/matlab/test/arrow/type/tID.m index 10c99dfab8775..344d2dd0f5a96 100644 --- a/matlab/test/arrow/type/tID.m +++ b/matlab/test/arrow/type/tID.m @@ -26,22 +26,6 @@ function verifyOnMatlabPath(tc) end methods (Test) - function bitWidth(testCase) - import arrow.type.ID - - typeIDs = [ID.Boolean, ID.UInt8, ID.Int8, ID.UInt16, ... - ID.Int16, ID.UInt32, ID.Int32, ID.UInt64, ... - ID.Int64, ID.Float32, ID.Float64]; - - expectedWidths = [1, 8, 8, 16, 16, 32, 32, 64, 64, 32, 64]; - - for ii = 1:numel(typeIDs) - actualWidth = bitWidth(typeIDs(ii)); - expectedWidth = expectedWidths(ii); - testCase.verifyEqual(actualWidth, expectedWidth); - end - end - function CastToUInt64(testCase) import arrow.type.ID diff --git a/matlab/test/arrow/type/tInt16Type.m b/matlab/test/arrow/type/tInt16Type.m index b5b5e803dfd06..32e9ff5cb2fdf 100644 --- a/matlab/test/arrow/type/tInt16Type.m +++ b/matlab/test/arrow/type/tInt16Type.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tInt16Type < hPrimitiveType +classdef tInt16Type < hFixedWidthType % Test class for arrow.type.Int16Type properties ArrowType = arrow.type.Int16Type TypeID = arrow.type.ID.Int16 - BitWidth = 16; + BitWidth = int32(16); end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tInt32Type.m b/matlab/test/arrow/type/tInt32Type.m index ab9c1bf4a7afa..1076ef802654e 100644 --- a/matlab/test/arrow/type/tInt32Type.m +++ b/matlab/test/arrow/type/tInt32Type.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tInt32Type < hPrimitiveType +classdef tInt32Type < hFixedWidthType % Test class for arrow.type.Int32Type properties ArrowType = arrow.type.Int32Type TypeID = arrow.type.ID.Int32 - BitWidth = 32; + BitWidth = int32(32); end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tInt64Type.m b/matlab/test/arrow/type/tInt64Type.m index b5a273f0f36a0..24b94c4c04b6c 100644 --- a/matlab/test/arrow/type/tInt64Type.m +++ b/matlab/test/arrow/type/tInt64Type.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tInt64Type < hPrimitiveType +classdef tInt64Type < hFixedWidthType % Test class for arrow.type.Int64Type properties ArrowType = arrow.type.Int64Type TypeID = arrow.type.ID.Int64 - BitWidth = 64; + BitWidth = int32(64); end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tInt8Type.m b/matlab/test/arrow/type/tInt8Type.m index 7e8e06790d460..57adf7bd2f118 100644 --- a/matlab/test/arrow/type/tInt8Type.m +++ b/matlab/test/arrow/type/tInt8Type.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tInt8Type < hPrimitiveType +classdef tInt8Type < hFixedWidthType % Test class for arrow.type.Int8Type properties ArrowType = arrow.type.Int8Type TypeID = arrow.type.ID.Int8 - BitWidth = 8; + BitWidth = int32(8); end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tStringType.m b/matlab/test/arrow/type/tStringType.m index f3cf101ac6185..a4a0309218316 100644 --- a/matlab/test/arrow/type/tStringType.m +++ b/matlab/test/arrow/type/tStringType.m @@ -25,14 +25,9 @@ function Basic(tc) tc.verifyEqual(type.ID, arrow.type.ID.String); end - function NumBuffers(tc) - type = arrow.type.StringType; - tc.verifyEqual(type.NumBuffers, 3); - end - function NumFields(tc) type = arrow.type.StringType; - tc.verifyEqual(type.NumFields, 0); + tc.verifyEqual(type.NumFields, int32(0)); end end diff --git a/matlab/test/arrow/type/tTimeUnit.m b/matlab/test/arrow/type/tTimeUnit.m index b01de443443c5..0c2432193a3af 100644 --- a/matlab/test/arrow/type/tTimeUnit.m +++ b/matlab/test/arrow/type/tTimeUnit.m @@ -31,9 +31,9 @@ function TicksPerSecond(testCase) import arrow.type.TimeUnit units = [TimeUnit.Second, TimeUnit.Millisecond, ... TimeUnit.Microsecond, TimeUnit.Nanosecond]'; - ticksPerSecond = [1 1e3 1e6 1e9]; + ticks = [1 1e3 1e6 1e9]; for ii = 1:numel(units) - testCase.verifyEqual(units(ii).TicksPerSecond, ticksPerSecond(ii)); + testCase.verifyEqual(ticksPerSecond(units(ii)), ticks(ii)); end end end diff --git a/matlab/test/arrow/type/tTimestampType.m b/matlab/test/arrow/type/tTimestampType.m index f8a9a37f32a63..95b06d7b56faf 100644 --- a/matlab/test/arrow/type/tTimestampType.m +++ b/matlab/test/arrow/type/tTimestampType.m @@ -13,13 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tTimestampType < hPrimitiveType +classdef tTimestampType < hFixedWidthType % Test class for arrow.type.TimestampType properties ArrowType = arrow.type.TimestampType TypeID = arrow.type.ID.Timestamp - BitWidth = 64; + BitWidth = int32(64); end methods(Test) diff --git a/matlab/test/arrow/type/tUInt16Type.m b/matlab/test/arrow/type/tUInt16Type.m index b5102ace34d84..c0823b4f6962b 100644 --- a/matlab/test/arrow/type/tUInt16Type.m +++ b/matlab/test/arrow/type/tUInt16Type.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tUInt16Type < hPrimitiveType +classdef tUInt16Type < hFixedWidthType % Test class for arrow.type.UInt16Type properties ArrowType = arrow.type.UInt16Type TypeID = arrow.type.ID.UInt16 - BitWidth = 16; + BitWidth = int32(16); end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tUInt32Type.m b/matlab/test/arrow/type/tUInt32Type.m index 8f86eec7f53c3..15fe93106e3da 100644 --- a/matlab/test/arrow/type/tUInt32Type.m +++ b/matlab/test/arrow/type/tUInt32Type.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tUInt32Type < hPrimitiveType +classdef tUInt32Type < hFixedWidthType % Test class for arrow.type.UInt32Type properties ArrowType = arrow.type.UInt32Type TypeID = arrow.type.ID.UInt32 - BitWidth = 32; + BitWidth = int32(32); end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tUInt64Type.m b/matlab/test/arrow/type/tUInt64Type.m index 7f3084616d35f..4646c91455e8a 100644 --- a/matlab/test/arrow/type/tUInt64Type.m +++ b/matlab/test/arrow/type/tUInt64Type.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tUInt64Type < hPrimitiveType +classdef tUInt64Type < hFixedWidthType % Test class for arrow.type.UInt64Type properties ArrowType = arrow.type.UInt64Type TypeID = arrow.type.ID.UInt64 - BitWidth = 64; + BitWidth = int32(64); end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tUInt8Type.m b/matlab/test/arrow/type/tUInt8Type.m index 6dfc8a4694359..ebd6b04b0eade 100644 --- a/matlab/test/arrow/type/tUInt8Type.m +++ b/matlab/test/arrow/type/tUInt8Type.m @@ -13,12 +13,12 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tUInt8Type < hPrimitiveType +classdef tUInt8Type < hFixedWidthType % Test class for arrow.type.UInt64Type properties ArrowType = arrow.type.UInt8Type TypeID = arrow.type.ID.UInt8 - BitWidth = 8; + BitWidth = int32(8); end end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 419f2ae459b81..1a049f47d1a8f 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -37,7 +37,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_ROOT_INCLUDE_DIR "${CMAKE_SOUR set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/error" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc" @@ -47,7 +48,11 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/pack.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/unpack.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/time_unit.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/time_unit.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/type.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/fixed_width_type.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/string_type.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy/factory.cc") From 6fb98a6d21a8c7720e8fb71f77487c8d8878e91e Mon Sep 17 00:00:00 2001 From: Vitalii Tverdokhlib Date: Wed, 12 Jul 2023 22:23:46 +0300 Subject: [PATCH 02/35] MINOR: [Docs] error in example (#36534) ### Rationale for this change ### What changes are included in this PR? ### Are these changes tested? ### Are there any user-facing changes? --- js/src/builder.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/src/builder.ts b/js/src/builder.ts index 6f84154935f7b..90fe3ddcc9477 100644 --- a/js/src/builder.ts +++ b/js/src/builder.ts @@ -72,7 +72,7 @@ export interface BuilderOptions { * * @example * ```ts - * import { Builder, Utf8 } from 'apache-arrow'; + * import { makeBuilder, Utf8 } from 'apache-arrow'; * * const utf8Builder = makeBuilder({ * type: new Utf8(), From 0339078ddc4c6d8f9ba527706676fa55ff457d94 Mon Sep 17 00:00:00 2001 From: rtpsw Date: Wed, 12 Jul 2023 22:58:30 +0300 Subject: [PATCH 03/35] GH-36482: [C++][CI] Fix sporadic test failures in AsofJoinBasicTest (#36499) ### What changes are included in this PR? The key hasher is invalidated before the first invocation of `GetKey` (via `GetLatestKey`) after a new batch arrives. In the pre-PR code, this invalidation happens within `Advance`, which is called from `AdvanceAndMemoize` only after `GetLatestKey` is called. The change adds synchronization between the input-receiving- and processing- threads, because avoiding that would require a more complicated and brittle change, e.g., one that involves detecting in the processing thread when a new batch was added to the queue in order to invalidate the key hasher at that time. ### Are these changes tested? Yes, by existing tests. ### Are there any user-facing changes? No. **This PR contains a "Critical Fix".** * Closes: #36482 Authored-by: Yaron Gvili Signed-off-by: Weston Pace --- cpp/src/arrow/acero/asof_join_node.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index 98e5918ebbf55..b7f5d878e5881 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -524,7 +524,7 @@ class KeyHasher { size_t index_; std::vector indices_; std::vector metadata_; - const RecordBatch* batch_; + std::atomic batch_; std::vector hashes_; LightContext ctx_; std::vector column_arrays_; @@ -819,7 +819,6 @@ class InputState { have_active_batch &= !queue_.TryPop(); if (have_active_batch) { DCHECK_GT(queue_.UnsyncFront()->num_rows(), 0); // empty batches disallowed - key_hasher_->Invalidate(); // batch changed - invalidate key hasher's cache memo_.UpdateTime(GetTime(queue_.UnsyncFront().get(), 0)); // time changed } } @@ -897,7 +896,8 @@ class InputState { Status Push(const std::shared_ptr& rb) { if (rb->num_rows() > 0) { - queue_.Push(rb); // only after above updates - push batch for processing + key_hasher_->Invalidate(); // batch changed - invalidate key hasher's cache + queue_.Push(rb); // only now push batch for processing } else { ++batches_processed_; // don't enqueue empty batches, just record as processed } From d8a336068540389196102b4e5366b82bad92987b Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Wed, 12 Jul 2023 16:18:25 -0400 Subject: [PATCH 04/35] GH-36614: [MATLAB] Subclass arrow::Buffer to keep MATLAB data backing arrow::Arrays alive (#36615) ### Rationale for this change When building `arrow.array.Arrays` from native MATLAB arrays, we avoid copying by 1. Wrapping the MATLAB data inside a non-owning `arrow::Buffer` 2. Constructing an `arrow::ArrayData` object from the `arrow::Buffer` 3. Constructing the `arrow::Array` from the `arrow::Data` object. Because the `Array`'s underlying `Buffer` does not have ownership of its backing data, we have been storing the original MATLAB array as a property called `MatlabArray` on the MATLAB `arrow.array.NumericArray` class. This solution is not ideal because the backing MATLAB array is kept separate from the actual `std::shared_ptr`, which is stored within the C++ proxy objects (e.g. `arrow::matlab::array::proxy::NumericArray`). A better solution would be to create a new subclass of `arrow::Buffer` called `MatlabBuffer`, which will keep the original MATLAB array alive by taking it in as an input parameter and storing it as a member variable. ### What changes are included in this PR? 1. Removed the `MatlabArray` property from the MATLAB class `matlab.io.NumericArray` 2. Added a private member variable called `mda_array` to `arrow::matlab::array::proxy::NumericArray` and `arrow::matlab::array::proxy::TimestampArray`. `mda_array` is a `matlab::data::Array` object. This member variable stores the MATLAB array that owns the memory the `arrow::Array` objects are backed by. ### Are these changes tested? Existing tests used. ### Are there any user-facing changes? No. * Closes: #36614 Authored-by: Sarah Gilmore Signed-off-by: Kevin Gurney --- .../src/cpp/arrow/matlab/array/proxy/array.cc | 2 +- .../src/cpp/arrow/matlab/array/proxy/array.h | 2 +- .../arrow/matlab/array/proxy/boolean_array.cc | 6 ++- .../arrow/matlab/array/proxy/boolean_array.h | 6 +-- .../arrow/matlab/array/proxy/numeric_array.h | 26 +++++----- .../arrow/matlab/array/proxy/string_array.cc | 7 ++- .../arrow/matlab/array/proxy/string_array.h | 9 ++-- .../matlab/array/proxy/timestamp_array.cc | 45 ++++++++--------- .../matlab/array/proxy/timestamp_array.h | 8 ++-- .../cpp/arrow/matlab/buffer/matlab_buffer.h | 48 +++++++++++++++++++ .../src/matlab/+arrow/+array/NumericArray.m | 8 ---- matlab/test/arrow/array/hNumericArray.m | 8 ---- .../cmake/BuildMatlabArrowInterface.cmake | 4 +- 13 files changed, 104 insertions(+), 75 deletions(-) create mode 100644 matlab/src/cpp/arrow/matlab/buffer/matlab_buffer.h diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc index 35dc496bddb00..7f4d789c105e2 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc @@ -23,7 +23,7 @@ namespace arrow::matlab::array::proxy { - Array::Array() { + Array::Array(std::shared_ptr array) : array{std::move(array)} { // Register Proxy methods. REGISTER_METHOD(Array, toString); diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/array/proxy/array.h index 94fad759759ca..c36f1900712e1 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h @@ -25,7 +25,7 @@ namespace arrow::matlab::array::proxy { class Array : public libmexclass::proxy::Proxy { public: - Array(); + Array(std::shared_ptr array); virtual ~Array() {} diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc index 9a3b7ed4e22e9..bcbe49f04bb6d 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc @@ -23,6 +23,9 @@ namespace arrow::matlab::array::proxy { + BooleanArray::BooleanArray(std::shared_ptr array) + : arrow::matlab::array::proxy::Array{std::move(array)} {} + libmexclass::proxy::MakeResult BooleanArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { ::matlab::data::StructArray opts = constructor_arguments[0]; @@ -40,7 +43,8 @@ namespace arrow::matlab::array::proxy { const auto array_length = logical_mda.getNumberOfElements(); auto array_data = arrow::ArrayData::Make(data_type, array_length, {validity_bitmap_buffer, data_buffer}); - return std::make_shared(arrow::MakeArray(array_data)); + auto arrow_array = std::static_pointer_cast(arrow::MakeArray(array_data)); + return std::make_shared(std::move(arrow_array)); } void BooleanArray::toMATLAB(libmexclass::proxy::method::Context& context) { diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h index 6966d1090ee56..b3117d852a7d8 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h @@ -20,15 +20,13 @@ #include "arrow/matlab/array/proxy/array.h" #include "libmexclass/proxy/Proxy.h" +#include "arrow/type_fwd.h" namespace arrow::matlab::array::proxy { class BooleanArray : public arrow::matlab::array::proxy::Array { public: - BooleanArray(const std::shared_ptr logical_array) - : arrow::matlab::array::proxy::Array() { - array = logical_array; - } + BooleanArray(std::shared_ptr array); static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h index 24d2565f306c9..d3930c77ca036 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h @@ -27,6 +27,7 @@ #include "arrow/matlab/error/error.h" #include "arrow/matlab/bit/pack.h" #include "arrow/matlab/bit/unpack.h" +#include "arrow/matlab/buffer/matlab_buffer.h" #include "libmexclass/proxy/Proxy.h" @@ -35,14 +36,15 @@ namespace arrow::matlab::array::proxy { template class NumericArray : public arrow::matlab::array::proxy::Array { public: - NumericArray(const std::shared_ptr numeric_array) - : arrow::matlab::array::proxy::Array() { - array = numeric_array; - } + using ArrowType = typename arrow::CTypeTraits::ArrowType; - static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { - using ArrowType = typename arrow::CTypeTraits::ArrowType; - using BuilderType = typename arrow::CTypeTraits::BuilderType; + NumericArray(const std::shared_ptr> numeric_array) + : arrow::matlab::array::proxy::Array{std::move(numeric_array)} {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + using MatlabBuffer = arrow::matlab::buffer::MatlabBuffer; + using NumericArray = arrow::NumericArray; + using NumericArrayProxy = typename arrow::matlab::array::proxy::NumericArray; ::matlab::data::StructArray opts = constructor_arguments[0]; @@ -50,20 +52,16 @@ class NumericArray : public arrow::matlab::array::proxy::Array { const ::matlab::data::TypedArray numeric_mda = opts[0]["MatlabArray"]; const ::matlab::data::TypedArray valid_mda = opts[0]["Valid"]; - // Get raw pointer of mxArray - auto it(numeric_mda.cbegin()); - auto dt = it.operator->(); + auto data_buffer = std::make_shared(numeric_mda); const auto data_type = arrow::CTypeTraits::type_singleton(); const auto length = static_cast(numeric_mda.getNumberOfElements()); // cast size_t to int64_t - // Do not make a copy when creating arrow::Buffer - auto data_buffer = std::make_shared(reinterpret_cast(dt), - sizeof(CType) * numeric_mda.getNumberOfElements()); // Pack the validity bitmap values. MATLAB_ASSIGN_OR_ERROR(auto packed_validity_bitmap, bit::packValid(valid_mda), error::BITPACK_VALIDITY_BITMAP_ERROR_ID); auto array_data = arrow::ArrayData::Make(data_type, length, {packed_validity_bitmap, data_buffer}); - return std::make_shared>(arrow::MakeArray(array_data)); + auto numeric_array = std::static_pointer_cast(arrow::MakeArray(array_data)); + return std::make_shared(std::move(numeric_array)); } protected: diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc index 51f39d72fca6c..2a11323a212bb 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc @@ -26,6 +26,9 @@ namespace arrow::matlab::array::proxy { + StringArray::StringArray(const std::shared_ptr string_array) + : arrow::matlab::array::proxy::Array(std::move(string_array)) {} + libmexclass::proxy::MakeResult StringArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { namespace mda = ::matlab::data; @@ -53,8 +56,8 @@ namespace arrow::matlab::array::proxy { arrow::StringBuilder builder; MATLAB_ERROR_IF_NOT_OK(builder.AppendValues(strings, unpacked_validity_bitmap_ptr), error::STRING_BUILDER_APPEND_FAILED); MATLAB_ASSIGN_OR_ERROR(auto array, builder.Finish(), error::STRING_BUILDER_FINISH_FAILED); - - return std::make_shared(array); + auto typed_array = std::static_pointer_cast(array); + return std::make_shared(std::move(typed_array)); } void StringArray::toMATLAB(libmexclass::proxy::method::Context& context) { diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h index de0c4625928e4..bdcfedd7cdda3 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h @@ -21,15 +21,14 @@ #include "libmexclass/proxy/Proxy.h" +#include "arrow/type_fwd.h" + namespace arrow::matlab::array::proxy { class StringArray : public arrow::matlab::array::proxy::Array { public: - StringArray(const std::shared_ptr string_array) - : arrow::matlab::array::proxy::Array() { - array = string_array; - } - + StringArray(const std::shared_ptr string_array); + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); protected: diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc index 3b19daec58576..17a86e848a868 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc @@ -20,24 +20,22 @@ #include "arrow/matlab/error/error.h" #include "arrow/matlab/bit/pack.h" #include "arrow/matlab/bit/unpack.h" +#include "arrow/matlab/buffer/matlab_buffer.h" #include "arrow/matlab/type/time_unit.h" #include "arrow/util/utf8.h" #include "arrow/type.h" -#include "arrow/builder.h" - namespace arrow::matlab::array::proxy { - namespace { - const uint8_t* getUnpackedValidityBitmap(const ::matlab::data::TypedArray& valid_elements) { - const auto valid_elements_iterator(valid_elements.cbegin()); - return reinterpret_cast(valid_elements_iterator.operator->()); - } - } // anonymous namespace + TimestampArray::TimestampArray(std::shared_ptr array) + : arrow::matlab::array::proxy::Array{std::move(array)} {} libmexclass::proxy::MakeResult TimestampArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { namespace mda = ::matlab::data; + using MatlabBuffer = arrow::matlab::buffer::MatlabBuffer; + using TimestampArray = arrow::TimestampArray; + using TimestampArrayProxy = arrow::matlab::array::proxy::TimestampArray; mda::StructArray opts = constructor_arguments[0]; @@ -49,32 +47,31 @@ namespace arrow::matlab::array::proxy { const mda::TypedArray units_mda = opts[0]["TimeUnit"]; // extract the time zone string - const std::u16string& utf16_timezone = timezone_mda[0]; - MATLAB_ASSIGN_OR_ERROR(const auto timezone, arrow::util::UTF16StringToUTF8(utf16_timezone), + const std::u16string& u16_timezone = timezone_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto timezone, + arrow::util::UTF16StringToUTF8(u16_timezone), error::UNICODE_CONVERSION_ERROR_ID); // extract the time unit - const std::u16string& utf16_unit = units_mda[0]; - MATLAB_ASSIGN_OR_ERROR(const auto time_unit, arrow::matlab::type::timeUnitFromString(utf16_unit), - error::UKNOWN_TIME_UNIT_ERROR_ID); + const std::u16string& u16_timeunit = units_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto time_unit, + arrow::matlab::type::timeUnitFromString(u16_timeunit), + error::UKNOWN_TIME_UNIT_ERROR_ID) // create the timestamp_type auto data_type = arrow::timestamp(time_unit, timezone); - arrow::TimestampBuilder builder(data_type, arrow::default_memory_pool()); + auto array_length = static_cast(timestamp_mda.getNumberOfElements()); // cast size_t to int64_t - // Get raw pointer of mxArray - auto it(timestamp_mda.cbegin()); - auto dt = it.operator->(); + auto data_buffer = std::make_shared(timestamp_mda); // Pack the validity bitmap values. - const uint8_t* valid_mask = getUnpackedValidityBitmap(validity_bitmap_mda); - const auto num_elements = timestamp_mda.getNumberOfElements(); - - // Append values - MATLAB_ERROR_IF_NOT_OK(builder.AppendValues(dt, num_elements, valid_mask), error::APPEND_VALUES_ERROR_ID); - MATLAB_ASSIGN_OR_ERROR(auto timestamp_array, builder.Finish(), error::BUILD_ARRAY_ERROR_ID); + MATLAB_ASSIGN_OR_ERROR(auto packed_validity_bitmap, + bit::packValid(validity_bitmap_mda), + error::BITPACK_VALIDITY_BITMAP_ERROR_ID); - return std::make_shared(timestamp_array); + auto array_data = arrow::ArrayData::Make(data_type, array_length, {packed_validity_bitmap, data_buffer}); + auto timestamp_array = std::static_pointer_cast(arrow::MakeArray(array_data)); + return std::make_shared(std::move(timestamp_array)); } void TimestampArray::toMATLAB(libmexclass::proxy::method::Context& context) { diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h index ec67245564beb..8f28d6165ed2f 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h @@ -23,19 +23,17 @@ #include "libmexclass/proxy/Proxy.h" +#include "arrow/type_fwd.h" + namespace arrow::matlab::array::proxy { class TimestampArray : public arrow::matlab::array::proxy::Array { public: - TimestampArray(const std::shared_ptr timestamp_array) - : arrow::matlab::array::proxy::Array() { - array = timestamp_array; - } + TimestampArray(std::shared_ptr array); static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); protected: - void toMATLAB(libmexclass::proxy::method::Context& context) override; }; diff --git a/matlab/src/cpp/arrow/matlab/buffer/matlab_buffer.h b/matlab/src/cpp/arrow/matlab/buffer/matlab_buffer.h new file mode 100644 index 0000000000000..80b237544ded8 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/buffer/matlab_buffer.h @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/buffer.h" + +#include "MatlabDataArray.hpp" + +namespace arrow::matlab::buffer { + + namespace mda = ::matlab::data; + + class MatlabBuffer : public arrow::Buffer { + public: + + template + MatlabBuffer(const mda::TypedArray typed_array) + : arrow::Buffer{nullptr, 0} + , array{typed_array} { + + // Get raw pointer of mxArray + auto it(typed_array.cbegin()); + auto dt = it.operator->(); + + data_ = reinterpret_cast(dt); + size_ = sizeof(CType) * static_cast(typed_array.getNumberOfElements()); + capacity_ = size_; + is_mutable_ = false; + } + private: + const mda::Array array; + }; +} \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+array/NumericArray.m b/matlab/src/matlab/+arrow/+array/NumericArray.m index fa692724c6458..fb2fc1d333939 100644 --- a/matlab/src/matlab/+arrow/+array/NumericArray.m +++ b/matlab/src/matlab/+arrow/+array/NumericArray.m @@ -15,11 +15,6 @@ classdef NumericArray < arrow.array.Array % arrow.array.NumericArray - - - properties (Hidden, SetAccess=protected) - MatlabArray = [] - end properties(Abstract, Access=protected) NullSubstitutionValue; @@ -38,9 +33,6 @@ validElements = arrow.args.parseValidElements(data, opts); opts = struct(MatlabArray=data, Valid=validElements); obj@arrow.array.Array("Name", proxyName, "ConstructorArguments", {opts}); - obj.MatlabArray = cast(obj.MatlabArray, type); - % Store a reference to the array - obj.MatlabArray = data; end function matlabArray = toMATLAB(obj) diff --git a/matlab/test/arrow/array/hNumericArray.m b/matlab/test/arrow/array/hNumericArray.m index 69d77c5394697..f9f5f1d9e4ee3 100644 --- a/matlab/test/arrow/array/hNumericArray.m +++ b/matlab/test/arrow/array/hNumericArray.m @@ -43,14 +43,6 @@ function BasicTest(tc) tc.verifyEqual(className, tc.ArrowArrayClassName); end - function ShallowCopyTest(tc) - % NumericArrays stores a shallow copy of the array keep the - % memory alive. - A = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1, 2, 3])); - tc.verifyEqual(A.MatlabArray, tc.MatlabArrayFcn([1, 2, 3])); - tc.verifyEqual(toMATLAB(A), tc.MatlabArrayFcn([1 2 3]')); - end - function ToMATLAB(tc) % Create array from a scalar A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(100)); diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 1a049f47d1a8f..41d2ee4a705d7 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -38,8 +38,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/c "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/error" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy") - + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/boolean_array.cc" From 3bdbd0d34cdfe6f34e1c2bf80df472faccdff3c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 13 Jul 2023 11:15:43 +0200 Subject: [PATCH 05/35] GH-36629: [CI][Python] Skip dask tests due to our non-nanosecond changes in arrow->pandas conversion (#36630) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Due to the changes on #33321 a dask test started failing. ### What changes are included in this PR? Skip the test in the meantime ### Are these changes tested? Yes, with crossbow ### Are there any user-facing changes? No * Closes: #36629 Authored-by: Raúl Cumplido Signed-off-by: Joris Van den Bossche --- ci/scripts/integration_dask.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/scripts/integration_dask.sh b/ci/scripts/integration_dask.sh index eeaba715b6ae7..d1e2ecdc847f2 100755 --- a/ci/scripts/integration_dask.sh +++ b/ci/scripts/integration_dask.sh @@ -33,6 +33,9 @@ python -c "import dask.dataframe" pytest -v --pyargs dask.dataframe.tests.test_dataframe pytest -v --pyargs dask.dataframe.io.tests.test_orc -pytest -v --pyargs dask.dataframe.io.tests.test_parquet +# skip failing parquet tests +# test_pandas_timestamp_overflow_pyarrow is skipped because of GH-33321. +pytest -v --pyargs dask.dataframe.io.tests.test_parquet \ + -k "not test_pandas_timestamp_overflow_pyarrow" # this file contains parquet tests that use S3 filesystem pytest -v --pyargs dask.bytes.tests.test_s3 From 83b1bcd2515f1033c44beb0d87c13ecddb8a5108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 13 Jul 2023 13:16:26 +0200 Subject: [PATCH 06/35] GH-36655: [Dev] Fix fury command to upload nightly wheels (#36657) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change The fury command was missing the push command. ### What changes are included in this PR? Fix the fury command ### Are these changes tested? Will test with CI. ### Are there any user-facing changes? No * Closes: #36655 Authored-by: Raúl Cumplido Signed-off-by: Joris Van den Bossche --- dev/tasks/macros.jinja | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja index 1fdfc08be03e6..4c65a64c81052 100644 --- a/dev/tasks/macros.jinja +++ b/dev/tasks/macros.jinja @@ -142,7 +142,7 @@ on: run: | PATH=$(echo $(ruby -r rubygems -e 'puts Gem.user_dir') | sed "s/C:\//\/c\//")/bin:$PATH gem install --user-install gemfury - fury \ + fury push \ --api-token=${CROSSBOW_GEMFURY_TOKEN} \ --as=${CROSSBOW_GEMFURY_ORG} \ {{ pattern }} From c9fbc88eccd56fda27d6bc655732ed17388317ac Mon Sep 17 00:00:00 2001 From: mwish Date: Thu, 13 Jul 2023 20:40:28 +0800 Subject: [PATCH 07/35] GH-36643: [C++][Parquet] Use nested namespace in parquet (#36647) ### Rationale for this change Just for code style, using nested namespace rather than before ### What changes are included in this PR? Using nested namespace ### Are these changes tested? no ### Are there any user-facing changes? no * Closes: #36643 Authored-by: mwish Signed-off-by: Antoine Pitrou --- cpp/src/parquet/arrow/arrow_schema_test.cc | 7 ++----- cpp/src/parquet/arrow/arrow_statistics_test.cc | 6 ++---- cpp/src/parquet/arrow/path_internal.cc | 6 ++---- cpp/src/parquet/arrow/path_internal_test.cc | 6 ++---- cpp/src/parquet/arrow/reader.cc | 6 ++---- cpp/src/parquet/arrow/reader_internal.cc | 6 ++---- cpp/src/parquet/arrow/reconstruct_internal_test.cc | 6 ++---- cpp/src/parquet/arrow/schema.cc | 7 ++----- cpp/src/parquet/arrow/schema_internal.cc | 7 ++----- cpp/src/parquet/arrow/schema_internal.h | 6 ++---- cpp/src/parquet/arrow/writer.cc | 6 ++---- cpp/src/parquet/bloom_filter_reader_test.cc | 6 ++---- cpp/src/parquet/encoding_test.cc | 7 ++----- cpp/src/parquet/encryption/crypto_factory.cc | 6 ++---- cpp/src/parquet/encryption/crypto_factory.h | 6 ++---- cpp/src/parquet/encryption/encryption_internal.cc | 6 ++---- cpp/src/parquet/encryption/encryption_internal.h | 6 ++---- cpp/src/parquet/encryption/encryption_internal_nossl.cc | 6 ++---- cpp/src/parquet/encryption/file_key_material_store.h | 6 ++---- cpp/src/parquet/encryption/file_key_unwrapper.cc | 6 ++---- cpp/src/parquet/encryption/file_key_unwrapper.h | 6 ++---- cpp/src/parquet/encryption/file_key_wrapper.cc | 6 ++---- cpp/src/parquet/encryption/file_key_wrapper.h | 6 ++---- .../parquet/encryption/file_system_key_material_store.cc | 6 ++---- .../parquet/encryption/file_system_key_material_store.h | 6 ++---- cpp/src/parquet/encryption/key_encryption_key.h | 6 ++---- cpp/src/parquet/encryption/key_management_test.cc | 8 ++------ cpp/src/parquet/encryption/key_material.cc | 6 ++---- cpp/src/parquet/encryption/key_material.h | 6 ++---- cpp/src/parquet/encryption/key_metadata.cc | 6 ++---- cpp/src/parquet/encryption/key_metadata.h | 6 ++---- cpp/src/parquet/encryption/key_metadata_test.cc | 8 ++------ cpp/src/parquet/encryption/key_toolkit.cc | 6 ++---- cpp/src/parquet/encryption/key_toolkit.h | 6 ++---- cpp/src/parquet/encryption/key_toolkit_internal.cc | 8 ++------ cpp/src/parquet/encryption/key_toolkit_internal.h | 8 ++------ cpp/src/parquet/encryption/key_wrapping_test.cc | 8 ++------ cpp/src/parquet/encryption/kms_client.cc | 6 ++---- cpp/src/parquet/encryption/kms_client.h | 6 ++---- cpp/src/parquet/encryption/kms_client_factory.h | 6 ++---- cpp/src/parquet/encryption/local_wrap_kms_client.cc | 6 ++---- cpp/src/parquet/encryption/local_wrap_kms_client.h | 6 ++---- cpp/src/parquet/encryption/properties_test.cc | 8 ++------ cpp/src/parquet/encryption/read_configurations_test.cc | 8 ++------ cpp/src/parquet/encryption/test_encryption_util.cc | 8 ++------ cpp/src/parquet/encryption/test_encryption_util.h | 6 ++---- cpp/src/parquet/encryption/test_in_memory_kms.cc | 6 ++---- cpp/src/parquet/encryption/test_in_memory_kms.h | 6 ++---- .../parquet/encryption/two_level_cache_with_expiration.h | 6 ++---- .../encryption/two_level_cache_with_expiration_test.cc | 8 ++------ cpp/src/parquet/encryption/write_configurations_test.cc | 8 ++------ cpp/src/parquet/level_comparison.cc | 6 ++---- cpp/src/parquet/level_comparison.h | 6 ++---- cpp/src/parquet/level_comparison_inc.h | 8 ++------ cpp/src/parquet/level_conversion.cc | 6 ++---- cpp/src/parquet/level_conversion.h | 6 ++---- cpp/src/parquet/level_conversion_bmi2.cc | 6 ++---- cpp/src/parquet/level_conversion_inc.h | 9 ++------- cpp/src/parquet/level_conversion_test.cc | 6 ++---- cpp/src/parquet/types.h | 6 ++---- 60 files changed, 120 insertions(+), 269 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index 7c608e4424753..f11101eb24298 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -50,9 +50,7 @@ using parquet::schema::PrimitiveNode; using ::testing::ElementsAre; -namespace parquet { - -namespace arrow { +namespace parquet::arrow { const auto BOOL = ::arrow::boolean(); const auto UINT8 = ::arrow::uint8(); @@ -1776,5 +1774,4 @@ TEST_F(TestLevels, ListErrors) { } } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/arrow_statistics_test.cc b/cpp/src/parquet/arrow/arrow_statistics_test.cc index 604f163a66f84..ad4496933ef4c 100644 --- a/cpp/src/parquet/arrow/arrow_statistics_test.cc +++ b/cpp/src/parquet/arrow/arrow_statistics_test.cc @@ -36,8 +36,7 @@ using arrow::Table; using arrow::io::BufferReader; -namespace parquet { -namespace arrow { +namespace parquet::arrow { struct StatisticsTestParam { std::shared_ptr<::arrow::Table> table; @@ -157,5 +156,4 @@ INSTANTIATE_TEST_SUITE_P( /*expected_min=*/"z", /*expected_max=*/"z"})); -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/path_internal.cc b/cpp/src/parquet/arrow/path_internal.cc index 2aeee6e500f5d..919c97f4323b6 100644 --- a/cpp/src/parquet/arrow/path_internal.cc +++ b/cpp/src/parquet/arrow/path_internal.cc @@ -108,8 +108,7 @@ #include "parquet/properties.h" -namespace parquet { -namespace arrow { +namespace parquet::arrow { namespace { @@ -901,5 +900,4 @@ Status MultipathLevelBuilder::Write(const Array& array, bool array_field_nullabl return Status::OK(); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/path_internal_test.cc b/cpp/src/parquet/arrow/path_internal_test.cc index 4645807007478..fb9c404247f3b 100644 --- a/cpp/src/parquet/arrow/path_internal_test.cc +++ b/cpp/src/parquet/arrow/path_internal_test.cc @@ -29,8 +29,7 @@ #include "parquet/properties.h" -namespace parquet { -namespace arrow { +namespace parquet::arrow { using ::arrow::default_memory_pool; using ::arrow::field; @@ -644,5 +643,4 @@ TEST_F(MultipathLevelBuilderTest, TestPrimitiveNonNullable) { EXPECT_THAT(results_[0].post_list_elements[0].end, Eq(4)); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index 40fbdcbb562b1..855fb5a5a4882 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -76,8 +76,7 @@ using parquet::internal::RecordReader; namespace bit_util = arrow::bit_util; -namespace parquet { -namespace arrow { +namespace parquet::arrow { namespace { ::arrow::Result> ChunksToSingle(const ChunkedArray& chunked) { @@ -1414,5 +1413,4 @@ Status FuzzReader(const uint8_t* data, int64_t size) { } // namespace internal -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc index a294b712a7ce3..7034f5839e1ea 100644 --- a/cpp/src/parquet/arrow/reader_internal.cc +++ b/cpp/src/parquet/arrow/reader_internal.cc @@ -94,8 +94,7 @@ using ParquetType = parquet::Type; namespace bit_util = arrow::bit_util; -namespace parquet { -namespace arrow { +namespace parquet::arrow { namespace { template @@ -856,5 +855,4 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr& va return Status::OK(); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/reconstruct_internal_test.cc b/cpp/src/parquet/arrow/reconstruct_internal_test.cc index 8a69f8266f1e6..4e1f421498e85 100644 --- a/cpp/src/parquet/arrow/reconstruct_internal_test.cc +++ b/cpp/src/parquet/arrow/reconstruct_internal_test.cc @@ -65,8 +65,7 @@ using testing::Eq; using testing::NotNull; using testing::SizeIs; -namespace parquet { -namespace arrow { +namespace parquet::arrow { using parquet::schema::GroupNode; using parquet::schema::NodePtr; @@ -1637,5 +1636,4 @@ TEST_F(TestReconstructColumn, ListList6) { // TODO legacy-list-in-struct etc.? -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index c5d5e0743a7f1..f713548d05a70 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -63,9 +63,7 @@ using parquet::LogicalType; using parquet::internal::LevelInfo; -namespace parquet { - -namespace arrow { +namespace parquet::arrow { // ---------------------------------------------------------------------- // Parquet to Arrow schema conversion @@ -1106,5 +1104,4 @@ Status SchemaManifest::Make(const SchemaDescriptor* schema, return Status::OK(); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc index 064bf4f55cc7e..da0427cb31000 100644 --- a/cpp/src/parquet/arrow/schema_internal.cc +++ b/cpp/src/parquet/arrow/schema_internal.cc @@ -23,9 +23,7 @@ using ArrowType = ::arrow::DataType; using ArrowTypeId = ::arrow::Type; using ParquetType = parquet::Type; -namespace parquet { - -namespace arrow { +namespace parquet::arrow { using ::arrow::Result; using ::arrow::Status; @@ -218,5 +216,4 @@ Result> GetArrowType( primitive.type_length(), int96_arrow_time_unit); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/schema_internal.h b/cpp/src/parquet/arrow/schema_internal.h index fb837c3ee6cab..55292ac35ab9c 100644 --- a/cpp/src/parquet/arrow/schema_internal.h +++ b/cpp/src/parquet/arrow/schema_internal.h @@ -24,8 +24,7 @@ namespace arrow { class DataType; } -namespace parquet { -namespace arrow { +namespace parquet::arrow { using ::arrow::Result; @@ -47,5 +46,4 @@ Result> GetArrowType( const schema::PrimitiveNode& primitive, ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO); -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc index 6d22f318f6b97..0c67e8d6bb3d4 100644 --- a/cpp/src/parquet/arrow/writer.cc +++ b/cpp/src/parquet/arrow/writer.cc @@ -73,8 +73,7 @@ using parquet::ParquetFileWriter; using parquet::ParquetVersion; using parquet::schema::GroupNode; -namespace parquet { -namespace arrow { +namespace parquet::arrow { namespace { @@ -600,5 +599,4 @@ Status WriteTable(const ::arrow::Table& table, ::arrow::MemoryPool* pool, return writer->Close(); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/bloom_filter_reader_test.cc b/cpp/src/parquet/bloom_filter_reader_test.cc index 64dd0d9b9d190..e297ab7045120 100644 --- a/cpp/src/parquet/bloom_filter_reader_test.cc +++ b/cpp/src/parquet/bloom_filter_reader_test.cc @@ -22,8 +22,7 @@ #include "parquet/file_reader.h" #include "parquet/test_util.h" -namespace parquet { -namespace test { +namespace parquet::test { TEST(BloomFilterReader, ReadBloomFilter) { std::string dir_string(parquet::test::get_data_dir()); @@ -70,5 +69,4 @@ TEST(BloomFilterReader, FileNotHaveBloomFilter) { ASSERT_EQ(nullptr, bloom_filter); } -} // namespace test -} // namespace parquet +} // namespace parquet::test diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc index 6285c4c12539d..7a910e4220831 100644 --- a/cpp/src/parquet/encoding_test.cc +++ b/cpp/src/parquet/encoding_test.cc @@ -48,9 +48,7 @@ using arrow::internal::checked_cast; namespace bit_util = arrow::bit_util; -namespace parquet { - -namespace test { +namespace parquet::test { TEST(VectorBooleanTest, TestEncodeBoolDecode) { // PARQUET-454 @@ -1910,5 +1908,4 @@ TEST(DeltaLengthByteArrayEncodingAdHoc, ArrowDirectPut) { CheckDecode(encoded, ::arrow::ArrayFromJSON(::arrow::large_binary(), values)); } -} // namespace test -} // namespace parquet +} // namespace parquet::test diff --git a/cpp/src/parquet/encryption/crypto_factory.cc b/cpp/src/parquet/encryption/crypto_factory.cc index 67e3d8c5f297e..ebb7c3c7b37fb 100644 --- a/cpp/src/parquet/encryption/crypto_factory.cc +++ b/cpp/src/parquet/encryption/crypto_factory.cc @@ -26,8 +26,7 @@ #include "parquet/encryption/file_system_key_material_store.h" #include "parquet/encryption/key_toolkit_internal.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { void CryptoFactory::RegisterKmsClientFactory( std::shared_ptr kms_client_factory) { @@ -192,5 +191,4 @@ void CryptoFactory::RotateMasterKeys( double_wrapping, cache_lifetime_seconds); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/crypto_factory.h b/cpp/src/parquet/encryption/crypto_factory.h index 14015a95c85e5..291cccf30f8e3 100644 --- a/cpp/src/parquet/encryption/crypto_factory.h +++ b/cpp/src/parquet/encryption/crypto_factory.h @@ -25,8 +25,7 @@ #include "parquet/encryption/kms_client_factory.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm = ParquetCipher::AES_GCM_V1; @@ -152,5 +151,4 @@ class PARQUET_EXPORT CryptoFactory { KeyToolkit key_toolkit_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc index 1c4d3d8dc4897..6e66efeff6326 100644 --- a/cpp/src/parquet/encryption/encryption_internal.cc +++ b/cpp/src/parquet/encryption/encryption_internal.cc @@ -31,8 +31,7 @@ using parquet::ParquetException; -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr int kGcmMode = 0; constexpr int kCtrMode = 1; @@ -649,5 +648,4 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) { void RandBytes(unsigned char* buf, int num) { RAND_bytes(buf, num); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/encryption_internal.h b/cpp/src/parquet/encryption/encryption_internal.h index 24093c68be531..4ed5b5cf61243 100644 --- a/cpp/src/parquet/encryption/encryption_internal.h +++ b/cpp/src/parquet/encryption/encryption_internal.h @@ -26,8 +26,7 @@ using parquet::ParquetCipher; -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr int kGcmTagLength = 16; constexpr int kNonceLength = 12; @@ -129,5 +128,4 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD); // Wraps OpenSSL RAND_bytes function void RandBytes(unsigned char* buf, int num); -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc b/cpp/src/parquet/encryption/encryption_internal_nossl.cc index bb203f0fd877d..0241923474de9 100644 --- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc +++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc @@ -18,8 +18,7 @@ #include "parquet/encryption/encryption_internal.h" #include "parquet/exception.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { void ThrowOpenSSLRequiredException() { throw ParquetException( @@ -115,5 +114,4 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) { void RandBytes(unsigned char* buf, int num) { ThrowOpenSSLRequiredException(); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_key_material_store.h b/cpp/src/parquet/encryption/file_key_material_store.h index 862e8d9761b0d..83f028a4bc1e9 100644 --- a/cpp/src/parquet/encryption/file_key_material_store.h +++ b/cpp/src/parquet/encryption/file_key_material_store.h @@ -24,8 +24,7 @@ #include "arrow/filesystem/filesystem.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { /// Stores encryption key material outside the Parquet file, for example in a separate /// small file in the same folder. This is important for “key rotation”, when MEKs have to @@ -55,5 +54,4 @@ class PARQUET_EXPORT FileKeyMaterialStore { virtual ~FileKeyMaterialStore() {} }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index a2e2d2df5b284..50cc6eee539f7 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -22,8 +22,7 @@ #include "parquet/encryption/file_key_unwrapper.h" #include "parquet/encryption/key_metadata.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { FileKeyUnwrapper::FileKeyUnwrapper( KeyToolkit* key_toolkit, const KmsConnectionConfig& kms_connection_config, @@ -136,5 +135,4 @@ std::shared_ptr FileKeyUnwrapper::GetKmsClientFromConfigOrKeyMaterial cache_entry_lifetime_seconds_); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.h b/cpp/src/parquet/encryption/file_key_unwrapper.h index 3400641ed91c5..71b245788a713 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.h +++ b/cpp/src/parquet/encryption/file_key_unwrapper.h @@ -27,8 +27,7 @@ #include "parquet/encryption/kms_client.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { // This class will retrieve the key from "key metadata", following these steps: // 1. Parse "key metadata" (see structure in KeyMetadata class). @@ -78,5 +77,4 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { std::shared_ptr<::arrow::fs::FileSystem> file_system_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_key_wrapper.cc b/cpp/src/parquet/encryption/file_key_wrapper.cc index 4f0f1d219acba..704651ebaa8b3 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.cc +++ b/cpp/src/parquet/encryption/file_key_wrapper.cc @@ -22,8 +22,7 @@ #include "parquet/encryption/key_toolkit_internal.h" #include "parquet/exception.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { FileKeyWrapper::FileKeyWrapper(KeyToolkit* key_toolkit, const KmsConnectionConfig& kms_connection_config, @@ -124,5 +123,4 @@ KeyEncryptionKey FileKeyWrapper::CreateKeyEncryptionKey( return KeyEncryptionKey(kek_bytes, kek_id, encoded_wrapped_kek); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_key_wrapper.h b/cpp/src/parquet/encryption/file_key_wrapper.h index 95ad6ec4829bf..26b9719de64db 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.h +++ b/cpp/src/parquet/encryption/file_key_wrapper.h @@ -29,8 +29,7 @@ #include "parquet/encryption/kms_client.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { // This class will generate "key metadata" from "data encryption key" and "master key", // following these steps: @@ -82,5 +81,4 @@ class PARQUET_EXPORT FileKeyWrapper { uint16_t key_counter_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_system_key_material_store.cc b/cpp/src/parquet/encryption/file_system_key_material_store.cc index 494a75e4cc307..2d898c1d3970f 100644 --- a/cpp/src/parquet/encryption/file_system_key_material_store.cc +++ b/cpp/src/parquet/encryption/file_system_key_material_store.cc @@ -27,8 +27,7 @@ #include "parquet/encryption/key_material.h" #include "parquet/exception.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr const char FileSystemKeyMaterialStore::kKeyMaterialFilePrefix[]; constexpr const char FileSystemKeyMaterialStore::kTempFilePrefix[]; @@ -139,5 +138,4 @@ void FileSystemKeyMaterialStore::MoveMaterialTo( } } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_system_key_material_store.h b/cpp/src/parquet/encryption/file_system_key_material_store.h index 6fbdd55e9413c..896a53202f589 100644 --- a/cpp/src/parquet/encryption/file_system_key_material_store.h +++ b/cpp/src/parquet/encryption/file_system_key_material_store.h @@ -25,8 +25,7 @@ #include "parquet/encryption/file_key_material_store.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { /// A FileKeyMaterialStore that stores key material in a file system file in the same /// folder as the Parquet file. @@ -87,5 +86,4 @@ class PARQUET_EXPORT FileSystemKeyMaterialStore : public FileKeyMaterialStore { std::unordered_map key_material_map_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_encryption_key.h b/cpp/src/parquet/encryption/key_encryption_key.h index 153bb4b5e2885..62263ee3cd506 100644 --- a/cpp/src/parquet/encryption/key_encryption_key.h +++ b/cpp/src/parquet/encryption/key_encryption_key.h @@ -22,8 +22,7 @@ #include "arrow/util/base64.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { // In the double wrapping mode, each "data encryption key" (DEK) is encrypted with a “key // encryption key” (KEK), that in turn is encrypted with a "master encryption key" (MEK). @@ -55,5 +54,4 @@ class KeyEncryptionKey { std::string encoded_wrapped_kek_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_management_test.cc b/cpp/src/parquet/encryption/key_management_test.cc index 5eebde0c29584..f733c43ee1e79 100644 --- a/cpp/src/parquet/encryption/key_management_test.cc +++ b/cpp/src/parquet/encryption/key_management_test.cc @@ -37,9 +37,7 @@ #include "parquet/file_reader.h" #include "parquet/test_util.h" -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { class TestEncryptionKeyManagement : public ::testing::Test { protected: @@ -387,6 +385,4 @@ TEST_F(TestEncryptionKeyManagement, ReadParquetMRExternalKeyMaterialFile) { } } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/key_material.cc b/cpp/src/parquet/encryption/key_material.cc index 372279c33a5bd..1cebf5900f316 100644 --- a/cpp/src/parquet/encryption/key_material.cc +++ b/cpp/src/parquet/encryption/key_material.cc @@ -25,8 +25,7 @@ using ::arrow::json::internal::ObjectParser; using ::arrow::json::internal::ObjectWriter; -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr const char KeyMaterial::kKeyMaterialTypeField[]; constexpr const char KeyMaterial::kKeyMaterialType1[]; @@ -155,5 +154,4 @@ std::string KeyMaterial::SerializeToJson( return json_writer.Serialize(); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_material.h b/cpp/src/parquet/encryption/key_material.h index f20d23ea35d3b..3e7e862c996d3 100644 --- a/cpp/src/parquet/encryption/key_material.h +++ b/cpp/src/parquet/encryption/key_material.h @@ -29,8 +29,7 @@ class ObjectParser; } // namespace json } // namespace arrow -namespace parquet { -namespace encryption { +namespace parquet::encryption { // KeyMaterial class represents the "key material", keeping the information that allows // readers to recover an encryption key (see description of the KeyMetadata class). The @@ -127,5 +126,4 @@ class PARQUET_EXPORT KeyMaterial { std::string encoded_wrapped_dek_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_metadata.cc b/cpp/src/parquet/encryption/key_metadata.cc index 624626c890cc5..e23a67b6b86ee 100644 --- a/cpp/src/parquet/encryption/key_metadata.cc +++ b/cpp/src/parquet/encryption/key_metadata.cc @@ -24,8 +24,7 @@ using ::arrow::json::internal::ObjectParser; using ::arrow::json::internal::ObjectWriter; -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr const char KeyMetadata::kKeyMaterialInternalStorageField[]; constexpr const char KeyMetadata::kKeyReferenceField[]; @@ -85,5 +84,4 @@ std::string KeyMetadata::CreateSerializedForExternalMaterial( return json_writer.Serialize(); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_metadata.h b/cpp/src/parquet/encryption/key_metadata.h index b6dc349f19bdf..6fe8ac7ccb9db 100644 --- a/cpp/src/parquet/encryption/key_metadata.h +++ b/cpp/src/parquet/encryption/key_metadata.h @@ -24,8 +24,7 @@ #include "parquet/exception.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { // Parquet encryption specification defines "key metadata" as an arbitrary byte array, // generated by file writers for each encryption key, and passed to the low level API for @@ -89,5 +88,4 @@ class PARQUET_EXPORT KeyMetadata { ::std::variant key_material_or_reference_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_metadata_test.cc b/cpp/src/parquet/encryption/key_metadata_test.cc index 3f891ef26db83..f9409edf2a8d9 100644 --- a/cpp/src/parquet/encryption/key_metadata_test.cc +++ b/cpp/src/parquet/encryption/key_metadata_test.cc @@ -22,9 +22,7 @@ #include "parquet/encryption/key_material.h" #include "parquet/encryption/key_metadata.h" -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { TEST(KeyMetadataTest, InternalMaterialStorage) { bool is_footer_key = true; @@ -72,6 +70,4 @@ TEST(KeyMetadataTest, ExternalMaterialStorage) { ASSERT_EQ(key_metadata.key_reference(), key_reference); } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/key_toolkit.cc b/cpp/src/parquet/encryption/key_toolkit.cc index 0b8543b458289..cb488d3fa23a0 100644 --- a/cpp/src/parquet/encryption/key_toolkit.cc +++ b/cpp/src/parquet/encryption/key_toolkit.cc @@ -27,8 +27,7 @@ #include "parquet/encryption/file_system_key_material_store.h" #include "parquet/encryption/key_toolkit_internal.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { std::shared_ptr KeyToolkit::GetKmsClient( const KmsConnectionConfig& kms_connection_config, double cache_entry_lifetime_ms) { @@ -119,5 +118,4 @@ void KeyToolkit::RemoveCacheEntriesForAllTokens() { kek_read_cache_per_token().Clear(); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_toolkit.h b/cpp/src/parquet/encryption/key_toolkit.h index d65f5d8a2d0f9..f63ade4c8c93f 100644 --- a/cpp/src/parquet/encryption/key_toolkit.h +++ b/cpp/src/parquet/encryption/key_toolkit.h @@ -26,8 +26,7 @@ #include "parquet/encryption/two_level_cache_with_expiration.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { static constexpr uint64_t kCacheCleanPeriodForKeyRotation = 60 * 60; // 1 hour @@ -104,5 +103,4 @@ class PARQUET_EXPORT KeyWithMasterId { const std::string master_id_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index 6e0e4e6c65e1e..bdd65d8de3919 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -20,9 +20,7 @@ #include "parquet/encryption/encryption_internal.h" #include "parquet/encryption/key_toolkit_internal.h" -namespace parquet { -namespace encryption { -namespace internal { +namespace parquet::encryption::internal { // Acceptable key lengths in number of bits, used to validate the data key lengths // configured by users and the master key lengths fetched from KMS server. @@ -77,6 +75,4 @@ bool ValidateKeyLength(int32_t key_length_bits) { return found_key_length != std::end(kAcceptableDataKeyLengths); } -} // namespace internal -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::internal diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.h b/cpp/src/parquet/encryption/key_toolkit_internal.h index bcc60bdad68e2..8474a91fc1aba 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.h +++ b/cpp/src/parquet/encryption/key_toolkit_internal.h @@ -21,9 +21,7 @@ #include "parquet/platform.h" -namespace parquet { -namespace encryption { -namespace internal { +namespace parquet::encryption::internal { /// Encrypts "key" with "master_key", using AES-GCM and the "aad" PARQUET_EXPORT @@ -38,6 +36,4 @@ std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, PARQUET_EXPORT bool ValidateKeyLength(int32_t key_length_bits); -} // namespace internal -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::internal diff --git a/cpp/src/parquet/encryption/key_wrapping_test.cc b/cpp/src/parquet/encryption/key_wrapping_test.cc index dba9d67dfe13d..198ceb9bf4b11 100644 --- a/cpp/src/parquet/encryption/key_wrapping_test.cc +++ b/cpp/src/parquet/encryption/key_wrapping_test.cc @@ -26,9 +26,7 @@ #include "parquet/encryption/test_encryption_util.h" #include "parquet/encryption/test_in_memory_kms.h" -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { class KeyWrappingTest : public ::testing::Test { protected: @@ -113,6 +111,4 @@ TEST_F(KeyWrappingTest, ExternalMaterialStorage) { this->WrapThenUnwrap(false, false, false); } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/kms_client.cc b/cpp/src/parquet/encryption/kms_client.cc index b9c720272c479..fee03dd3db656 100644 --- a/cpp/src/parquet/encryption/kms_client.cc +++ b/cpp/src/parquet/encryption/kms_client.cc @@ -17,8 +17,7 @@ #include "parquet/encryption/kms_client.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr const char KmsClient::kKmsInstanceIdDefault[]; constexpr const char KmsClient::kKmsInstanceUrlDefault[]; @@ -40,5 +39,4 @@ void KmsConnectionConfig::SetDefaultIfEmpty() { } } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/kms_client.h b/cpp/src/parquet/encryption/kms_client.h index 5ffa604ffd198..a55fd552eed5f 100644 --- a/cpp/src/parquet/encryption/kms_client.h +++ b/cpp/src/parquet/encryption/kms_client.h @@ -26,8 +26,7 @@ #include "parquet/exception.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { /// This class wraps the key access token of a KMS server. If your token changes over /// time, you should keep the reference to the KeyAccessToken object and call Refresh() @@ -91,5 +90,4 @@ class PARQUET_EXPORT KmsClient { virtual ~KmsClient() {} }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/kms_client_factory.h b/cpp/src/parquet/encryption/kms_client_factory.h index eac8dfc5d06e2..7a7c77c7eebbf 100644 --- a/cpp/src/parquet/encryption/kms_client_factory.h +++ b/cpp/src/parquet/encryption/kms_client_factory.h @@ -20,8 +20,7 @@ #include "parquet/encryption/kms_client.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { class PARQUET_EXPORT KmsClientFactory { public: @@ -36,5 +35,4 @@ class PARQUET_EXPORT KmsClientFactory { bool wrap_locally_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.cc b/cpp/src/parquet/encryption/local_wrap_kms_client.cc index 1b89dc57d0e52..23e28bb8e61be 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.cc +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.cc @@ -25,8 +25,7 @@ using ::arrow::json::internal::ObjectParser; using ::arrow::json::internal::ObjectWriter; -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr const char LocalWrapKmsClient::kLocalWrapNoKeyVersion[]; @@ -112,5 +111,4 @@ std::string LocalWrapKmsClient::GetKeyFromServer(const std::string& key_identifi return master_key; } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.h b/cpp/src/parquet/encryption/local_wrap_kms_client.h index 65cf8f42c7964..3c90d82960525 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.h +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.h @@ -25,8 +25,7 @@ #include "parquet/encryption/kms_client.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { /// This class supports local wrapping mode, master keys will be fetched from the KMS /// server and used to encrypt other keys (data encryption keys or key encryption keys). @@ -92,5 +91,4 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { ::arrow::util::ConcurrentMap master_key_cache_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/properties_test.cc b/cpp/src/parquet/encryption/properties_test.cc index 0eb5cba201a24..895cf6c63431e 100644 --- a/cpp/src/parquet/encryption/properties_test.cc +++ b/cpp/src/parquet/encryption/properties_test.cc @@ -22,9 +22,7 @@ #include "parquet/encryption/encryption.h" #include "parquet/encryption/test_encryption_util.h" -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { TEST(TestColumnEncryptionProperties, ColumnEncryptedWithOwnKey) { std::string column_path_1 = "column_1"; @@ -271,6 +269,4 @@ TEST(TestDecryptionProperties, UsingExplicitFooterAndColumnKeys) { ASSERT_EQ(kColumnEncryptionKey2, props->column_key(column_path_2)); } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/read_configurations_test.cc b/cpp/src/parquet/encryption/read_configurations_test.cc index 0bdb67ee9eadc..10de7198ac5ff 100644 --- a/cpp/src/parquet/encryption/read_configurations_test.cc +++ b/cpp/src/parquet/encryption/read_configurations_test.cc @@ -81,9 +81,7 @@ */ -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { using parquet::test::ParquetTestException; @@ -272,6 +270,4 @@ INSTANTIATE_TEST_SUITE_P( 5, "encrypt_columns_and_footer_disable_aad_storage.parquet.encrypted"), std::make_tuple(6, "encrypt_columns_and_footer_ctr.parquet.encrypted"))); -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/test_encryption_util.cc b/cpp/src/parquet/encryption/test_encryption_util.cc index 26ed15ae031e1..694ed3cf42d9e 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.cc +++ b/cpp/src/parquet/encryption/test_encryption_util.cc @@ -37,9 +37,7 @@ using parquet::Type; using parquet::schema::GroupNode; using parquet::schema::PrimitiveNode; -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { std::string data_file(const char* file) { std::string dir_string(parquet::test::get_data_dir()); @@ -511,6 +509,4 @@ void FileDecryptor::CheckFile(parquet::ParquetFileReader* file_reader, } } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h index c2190709aff96..19c230ee5ff99 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.h +++ b/cpp/src/parquet/encryption/test_encryption_util.h @@ -37,8 +37,7 @@ namespace parquet { class ParquetFileReader; -namespace encryption { -namespace test { +namespace encryption::test { using ::arrow::internal::TemporaryDir; @@ -122,6 +121,5 @@ class FileDecryptor { FileDecryptionProperties* file_decryption_properties); }; -} // namespace test -} // namespace encryption +} // namespace encryption::test } // namespace parquet diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.cc b/cpp/src/parquet/encryption/test_in_memory_kms.cc index 5389196b6fa39..e1339ab48b5d6 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.cc +++ b/cpp/src/parquet/encryption/test_in_memory_kms.cc @@ -21,8 +21,7 @@ #include "parquet/encryption/test_in_memory_kms.h" #include "parquet/exception.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { std::unordered_map TestOnlyLocalWrapInMemoryKms::master_key_map_; @@ -95,5 +94,4 @@ std::string TestOnlyInServerWrapKms::GetMasterKeyFromServer( return wrapping_master_key_map_.at(master_key_identifier); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.h b/cpp/src/parquet/encryption/test_in_memory_kms.h index bf887191d1efc..c5fdc797b8ca7 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.h +++ b/cpp/src/parquet/encryption/test_in_memory_kms.h @@ -25,8 +25,7 @@ #include "parquet/encryption/local_wrap_kms_client.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { // This is a mock class, built for testing only. Don't use it as an example of // LocalWrapKmsClient implementation. @@ -92,5 +91,4 @@ class TestOnlyInMemoryKmsClientFactory : public KmsClientFactory { } }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/two_level_cache_with_expiration.h b/cpp/src/parquet/encryption/two_level_cache_with_expiration.h index fbd06dc7d20e0..76c2b82770000 100644 --- a/cpp/src/parquet/encryption/two_level_cache_with_expiration.h +++ b/cpp/src/parquet/encryption/two_level_cache_with_expiration.h @@ -23,8 +23,7 @@ #include "arrow/util/concurrent_map.h" #include "arrow/util/mutex.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { using ::arrow::util::ConcurrentMap; @@ -155,5 +154,4 @@ class TwoLevelCacheWithExpiration { ::arrow::util::Mutex mutex_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/two_level_cache_with_expiration_test.cc b/cpp/src/parquet/encryption/two_level_cache_with_expiration_test.cc index f375a5c5b315c..d8f2c6255145f 100644 --- a/cpp/src/parquet/encryption/two_level_cache_with_expiration_test.cc +++ b/cpp/src/parquet/encryption/two_level_cache_with_expiration_test.cc @@ -25,9 +25,7 @@ #include "parquet/encryption/two_level_cache_with_expiration.h" -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { using ::arrow::SleepFor; @@ -172,6 +170,4 @@ TEST_F(TwoLevelCacheWithExpirationTest, MultiThread) { clean_thread.join(); } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/write_configurations_test.cc b/cpp/src/parquet/encryption/write_configurations_test.cc index 580c95fdfd2aa..e262003db3e6a 100644 --- a/cpp/src/parquet/encryption/write_configurations_test.cc +++ b/cpp/src/parquet/encryption/write_configurations_test.cc @@ -60,9 +60,7 @@ * keys. Use the alternative (AES_GCM_CTR_V1) algorithm. */ -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { using FileClass = ::arrow::io::FileOutputStream; @@ -231,6 +229,4 @@ void TestEncryptionConfiguration::SetUpTestCase() { temp_dir = temp_data_dir().ValueOrDie(); } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/level_comparison.cc b/cpp/src/parquet/level_comparison.cc index c9ad6b76c7280..f3188e987d081 100644 --- a/cpp/src/parquet/level_comparison.cc +++ b/cpp/src/parquet/level_comparison.cc @@ -25,8 +25,7 @@ #include "arrow/util/dispatch.h" -namespace parquet { -namespace internal { +namespace parquet::internal { #if defined(ARROW_HAVE_RUNTIME_AVX2) MinMax FindMinMaxAvx2(const int16_t* levels, int64_t num_levels); @@ -78,5 +77,4 @@ MinMax FindMinMax(const int16_t* levels, int64_t num_levels) { return dispatch.func(levels, num_levels); } -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/level_comparison.h b/cpp/src/parquet/level_comparison.h index 38e7ef8e2ec3f..3ae442dd46e57 100644 --- a/cpp/src/parquet/level_comparison.h +++ b/cpp/src/parquet/level_comparison.h @@ -21,8 +21,7 @@ #include "parquet/platform.h" -namespace parquet { -namespace internal { +namespace parquet::internal { /// Builds a bitmap where each set bit indicates the corresponding level is greater /// than rhs. @@ -36,5 +35,4 @@ struct MinMax { MinMax FindMinMax(const int16_t* levels, int64_t num_levels); -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/level_comparison_inc.h b/cpp/src/parquet/level_comparison_inc.h index 055f81ffae898..cfee506654331 100644 --- a/cpp/src/parquet/level_comparison_inc.h +++ b/cpp/src/parquet/level_comparison_inc.h @@ -24,9 +24,7 @@ #ifndef PARQUET_IMPL_NAMESPACE #error "PARQUET_IMPL_NAMESPACE must be defined" #endif -namespace parquet { -namespace internal { -namespace PARQUET_IMPL_NAMESPACE { +namespace parquet::internal::PARQUET_IMPL_NAMESPACE { /// Builds a bitmap by applying predicate to the level vector provided. /// /// \param[in] levels Rep or def level array. @@ -60,6 +58,4 @@ inline uint64_t GreaterThanBitmapImpl(const int16_t* levels, int64_t num_levels, return LevelsToBitmap(levels, num_levels, [rhs](int16_t value) { return value > rhs; }); } -} // namespace PARQUET_IMPL_NAMESPACE -} // namespace internal -} // namespace parquet +} // namespace parquet::internal::PARQUET_IMPL_NAMESPACE diff --git a/cpp/src/parquet/level_conversion.cc b/cpp/src/parquet/level_conversion.cc index 2e5bcacea55d6..1271afd866d14 100644 --- a/cpp/src/parquet/level_conversion.cc +++ b/cpp/src/parquet/level_conversion.cc @@ -31,8 +31,7 @@ #include "parquet/level_conversion_inc.h" #undef PARQUET_IMPL_NAMESPACE -namespace parquet { -namespace internal { +namespace parquet::internal { namespace { using ::arrow::internal::CpuInfo; @@ -179,5 +178,4 @@ void DefRepLevelsToBitmap(const int16_t* def_levels, const int16_t* rep_levels, output, /*offsets=*/nullptr); } -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/level_conversion.h b/cpp/src/parquet/level_conversion.h index 480d82ed0d81a..3f56b2de36a78 100644 --- a/cpp/src/parquet/level_conversion.h +++ b/cpp/src/parquet/level_conversion.h @@ -23,8 +23,7 @@ #include "parquet/platform.h" #include "parquet/schema.h" -namespace parquet { -namespace internal { +namespace parquet::internal { struct PARQUET_EXPORT LevelInfo { LevelInfo() @@ -196,5 +195,4 @@ void PARQUET_EXPORT DefRepLevelsToBitmap(const int16_t* def_levels, // (i.e. it isn't hidden by runtime dispatch). uint64_t PARQUET_EXPORT TestOnlyExtractBitsSoftware(uint64_t bitmap, uint64_t selection); -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/level_conversion_bmi2.cc b/cpp/src/parquet/level_conversion_bmi2.cc index 274d54e503c81..a39d1fd1eb461 100644 --- a/cpp/src/parquet/level_conversion_bmi2.cc +++ b/cpp/src/parquet/level_conversion_bmi2.cc @@ -20,8 +20,7 @@ #include "parquet/level_conversion_inc.h" #undef PARQUET_IMPL_NAMESPACE -namespace parquet { -namespace internal { +namespace parquet::internal { void DefLevelsToBitmapBmi2WithRepeatedParent(const int16_t* def_levels, int64_t num_def_levels, LevelInfo level_info, ValidityBitmapInputOutput* output) { @@ -29,5 +28,4 @@ void DefLevelsToBitmapBmi2WithRepeatedParent(const int16_t* def_levels, level_info, output); } -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/level_conversion_inc.h b/cpp/src/parquet/level_conversion_inc.h index 710d2f6237913..0bcdbccb34a73 100644 --- a/cpp/src/parquet/level_conversion_inc.h +++ b/cpp/src/parquet/level_conversion_inc.h @@ -29,13 +29,10 @@ #include "arrow/util/simd.h" #include "parquet/exception.h" #include "parquet/level_comparison.h" - -namespace parquet { -namespace internal { #ifndef PARQUET_IMPL_NAMESPACE #error "PARQUET_IMPL_NAMESPACE must be defined" #endif -namespace PARQUET_IMPL_NAMESPACE { +namespace parquet::internal::PARQUET_IMPL_NAMESPACE { // clang-format off /* Python code to generate lookup table: @@ -352,6 +349,4 @@ void DefLevelsToBitmapSimd(const int16_t* def_levels, int64_t num_def_levels, writer.Finish(); } -} // namespace PARQUET_IMPL_NAMESPACE -} // namespace internal -} // namespace parquet +} // namespace parquet::internal::PARQUET_IMPL_NAMESPACE diff --git a/cpp/src/parquet/level_conversion_test.cc b/cpp/src/parquet/level_conversion_test.cc index bfce74ae3a868..b12680089b839 100644 --- a/cpp/src/parquet/level_conversion_test.cc +++ b/cpp/src/parquet/level_conversion_test.cc @@ -31,8 +31,7 @@ #include "arrow/util/bitmap.h" #include "arrow/util/ubsan.h" -namespace parquet { -namespace internal { +namespace parquet::internal { using ::arrow::internal::Bitmap; using ::testing::ElementsAreArray; @@ -357,5 +356,4 @@ TEST(TestOnlyExtractBitsSoftware, BasicTest) { check(0xFECBDA9876543210ULL, 0xF00FF00FF00FF00FULL, 0xFBD87430ULL); } -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index f35384b8df1ef..e81e9de0a1efa 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -30,13 +30,11 @@ #include "parquet/type_fwd.h" #include "parquet/windows_fixup.h" // for OPTIONAL -namespace arrow { -namespace util { +namespace arrow::util { class Codec; -} // namespace util -} // namespace arrow +} // namespace arrow::util namespace parquet { From fbf8a970c3989cc4b196a724764879d9343817f6 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Thu, 13 Jul 2023 06:01:59 -0700 Subject: [PATCH 08/35] GH-36641: [C++] Remove reference to acero from non-acero file (#36650) ### Rationale for this change Files in modules which do not depend on the acero module should not reference files inside the acero module. ### What changes are included in this PR? There were no changes to the body of any functions. I simply moved functions around so that the acero include was no longer needed. There were some conflicts that arose between the class `bit_util` and the namespace `bit_util` and so I got rid of the class in favor of the namespace as that is more similar to how we handle `bit_util` elsewhere. ### Are these changes tested? Sort of. I would like to add an AVX2 CI system as well. I'm not confident any of the CI builds are building with AVX2 enabled. Also, even if we have an AVX2 CI system it would not have caught this issue since the code was only needed definitions from the acero header and was not relying on any actual compiled symbols. However, I think setting up tests to catch this sort of invalid include are beyond the scope of this PR. ### Are there any user-facing changes? No. * Closes: #36641 Lead-authored-by: Weston Pace Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/compute/util.cc | 70 ++++++++++++------------ cpp/src/arrow/compute/util.h | 86 +++++++++++++----------------- cpp/src/arrow/compute/util_avx2.cc | 62 ++++++++++----------- 3 files changed, 99 insertions(+), 119 deletions(-) diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index 78f90ea37f7af..f69f60a5af434 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -56,7 +56,9 @@ void TempVectorStack::release(int id, uint32_t num_bytes) { --num_vectors_; } -inline uint64_t bit_util::SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { +namespace bit_util { + +inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { // This will not be correct on big-endian architectures. #if !ARROW_LITTLE_ENDIAN ARROW_DCHECK(false); @@ -73,7 +75,7 @@ inline uint64_t bit_util::SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes } } -inline void bit_util::SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) { +inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) { // This will not be correct on big-endian architectures. #if !ARROW_LITTLE_ENDIAN ARROW_DCHECK(false); @@ -88,8 +90,8 @@ inline void bit_util::SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_ } } -inline void bit_util::bits_to_indexes_helper(uint64_t word, uint16_t base_index, - int* num_indexes, uint16_t* indexes) { +inline void bits_to_indexes_helper(uint64_t word, uint16_t base_index, int* num_indexes, + uint16_t* indexes) { int n = *num_indexes; while (word) { indexes[n++] = base_index + static_cast(CountTrailingZeros(word)); @@ -98,9 +100,8 @@ inline void bit_util::bits_to_indexes_helper(uint64_t word, uint16_t base_index, *num_indexes = n; } -inline void bit_util::bits_filter_indexes_helper(uint64_t word, - const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes) { +inline void bits_filter_indexes_helper(uint64_t word, const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes) { int n = *num_indexes; while (word) { indexes[n++] = input_indexes[CountTrailingZeros(word)]; @@ -110,21 +111,21 @@ inline void bit_util::bits_filter_indexes_helper(uint64_t word, } template -void bit_util::bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes, uint16_t base_index) { +void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, + const uint8_t* bits, const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes, + uint16_t base_index = 0) { // 64 bits at a time constexpr int unroll = 64; int tail = num_bits % unroll; #if defined(ARROW_HAVE_AVX2) if (hardware_flags & arrow::internal::CpuInfo::AVX2) { if (filter_input_indexes) { - bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, input_indexes, - num_indexes, indexes); + avx2::bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, input_indexes, + num_indexes, indexes); } else { - bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes, indexes, - base_index); + avx2::bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes, + indexes, base_index); } } else { #endif @@ -160,9 +161,9 @@ void bit_util::bits_to_indexes_internal(int64_t hardware_flags, const int num_bi } } -void bit_util::bits_to_indexes(int bit_to_search, int64_t hardware_flags, int num_bits, - const uint8_t* bits, int* num_indexes, uint16_t* indexes, - int bit_offset) { +void bits_to_indexes(int bit_to_search, int64_t hardware_flags, int num_bits, + const uint8_t* bits, int* num_indexes, uint16_t* indexes, + int bit_offset) { bits += bit_offset / 8; bit_offset %= 8; *num_indexes = 0; @@ -193,10 +194,9 @@ void bit_util::bits_to_indexes(int bit_to_search, int64_t hardware_flags, int nu *num_indexes += num_indexes_new; } -void bit_util::bits_filter_indexes(int bit_to_search, int64_t hardware_flags, - const int num_bits, const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes, int bit_offset) { +void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, const int num_bits, + const uint8_t* bits, const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes, int bit_offset) { bits += bit_offset / 8; bit_offset %= 8; if (bit_offset != 0) { @@ -226,10 +226,9 @@ void bit_util::bits_filter_indexes(int bit_to_search, int64_t hardware_flags, } } -void bit_util::bits_split_indexes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, int* num_indexes_bit0, - uint16_t* indexes_bit0, uint16_t* indexes_bit1, - int bit_offset) { +void bits_split_indexes(int64_t hardware_flags, const int num_bits, const uint8_t* bits, + int* num_indexes_bit0, uint16_t* indexes_bit0, + uint16_t* indexes_bit1, int bit_offset) { bits_to_indexes(0, hardware_flags, num_bits, bits, num_indexes_bit0, indexes_bit0, bit_offset); int num_indexes_bit1; @@ -237,8 +236,8 @@ void bit_util::bits_split_indexes(int64_t hardware_flags, const int num_bits, bit_offset); } -void bit_util::bits_to_bytes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, uint8_t* bytes, int bit_offset) { +void bits_to_bytes(int64_t hardware_flags, const int num_bits, const uint8_t* bits, + uint8_t* bytes, int bit_offset) { bits += bit_offset / 8; bit_offset %= 8; if (bit_offset != 0) { @@ -258,7 +257,7 @@ void bit_util::bits_to_bytes(int64_t hardware_flags, const int num_bits, if (hardware_flags & arrow::internal::CpuInfo::AVX2) { // The function call below processes whole 32 bit chunks together. num_processed = num_bits - (num_bits % 32); - bits_to_bytes_avx2(num_processed, bits, bytes); + avx2::bits_to_bytes_avx2(num_processed, bits, bytes); } #endif // Processing 8 bits at a time @@ -290,8 +289,8 @@ void bit_util::bits_to_bytes(int64_t hardware_flags, const int num_bits, } } -void bit_util::bytes_to_bits(int64_t hardware_flags, const int num_bits, - const uint8_t* bytes, uint8_t* bits, int bit_offset) { +void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* bytes, + uint8_t* bits, int bit_offset) { bits += bit_offset / 8; bit_offset %= 8; if (bit_offset != 0) { @@ -314,7 +313,7 @@ void bit_util::bytes_to_bits(int64_t hardware_flags, const int num_bits, if (hardware_flags & arrow::internal::CpuInfo::AVX2) { // The function call below processes whole 32 bit chunks together. num_processed = num_bits - (num_bits % 32); - bytes_to_bits_avx2(num_processed, bytes, bits); + avx2::bytes_to_bits_avx2(num_processed, bytes, bits); } #endif // Process 8 bits at a time @@ -338,11 +337,11 @@ void bit_util::bytes_to_bits(int64_t hardware_flags, const int num_bits, } } -bool bit_util::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, - uint32_t num_bytes) { +bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, + uint32_t num_bytes) { #if defined(ARROW_HAVE_AVX2) if (hardware_flags & arrow::internal::CpuInfo::AVX2) { - return are_all_bytes_zero_avx2(bytes, num_bytes); + return avx2::are_all_bytes_zero_avx2(bytes, num_bytes); } #endif uint64_t result_or = 0; @@ -358,6 +357,7 @@ bool bit_util::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, return result_or == 0; } +} // namespace bit_util } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 6e1bb79674cba..489139eab87f2 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -139,69 +139,55 @@ class TempVectorHolder { uint32_t num_elements_; }; -class ARROW_EXPORT bit_util { - public: - static void bits_to_indexes(int bit_to_search, int64_t hardware_flags, - const int num_bits, const uint8_t* bits, int* num_indexes, - uint16_t* indexes, int bit_offset = 0); +namespace bit_util { - static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, +ARROW_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags, const int num_bits, const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes, int bit_offset = 0); + int* num_indexes, uint16_t* indexes, + int bit_offset = 0); - // Input and output indexes may be pointing to the same data (in-place filtering). - static void bits_split_indexes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, int* num_indexes_bit0, - uint16_t* indexes_bit0, uint16_t* indexes_bit1, - int bit_offset = 0); +ARROW_EXPORT void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, + const int num_bits, const uint8_t* bits, + const uint16_t* input_indexes, int* num_indexes, + uint16_t* indexes, int bit_offset = 0); - // Bit 1 is replaced with byte 0xFF. - static void bits_to_bytes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, uint8_t* bytes, int bit_offset = 0); +// Input and output indexes may be pointing to the same data (in-place filtering). +ARROW_EXPORT void bits_split_indexes(int64_t hardware_flags, const int num_bits, + const uint8_t* bits, int* num_indexes_bit0, + uint16_t* indexes_bit0, uint16_t* indexes_bit1, + int bit_offset = 0); - // Return highest bit of each byte. - static void bytes_to_bits(int64_t hardware_flags, const int num_bits, - const uint8_t* bytes, uint8_t* bits, int bit_offset = 0); +// Bit 1 is replaced with byte 0xFF. +ARROW_EXPORT void bits_to_bytes(int64_t hardware_flags, const int num_bits, + const uint8_t* bits, uint8_t* bytes, int bit_offset = 0); - static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, - uint32_t num_bytes); +// Return highest bit of each byte. +ARROW_EXPORT void bytes_to_bits(int64_t hardware_flags, const int num_bits, + const uint8_t* bytes, uint8_t* bits, int bit_offset = 0); - private: - inline static uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes); - inline static void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value); - inline static void bits_to_indexes_helper(uint64_t word, uint16_t base_index, - int* num_indexes, uint16_t* indexes); - inline static void bits_filter_indexes_helper(uint64_t word, - const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes); - template - static void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes, - uint16_t base_index = 0); +ARROW_EXPORT bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, + uint32_t num_bytes); #if defined(ARROW_HAVE_AVX2) - static void bits_to_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, int* num_indexes, - uint16_t* indexes, uint16_t base_index = 0); - static void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes); - template - static void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, - int* num_indexes, uint16_t* indexes, - uint16_t base_index = 0); - template - static void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits, + +namespace avx2 { +ARROW_EXPORT void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, + const uint8_t* bits, const uint16_t* input_indexes, int* num_indexes, uint16_t* indexes); - static void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes); - static void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits); - static bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes); +ARROW_EXPORT void bits_to_indexes_avx2(int bit_to_search, const int num_bits, + const uint8_t* bits, int* num_indexes, + uint16_t* indexes, uint16_t base_index = 0); +ARROW_EXPORT void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, + uint8_t* bytes); +ARROW_EXPORT void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, + uint8_t* bits); +ARROW_EXPORT bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes); +} // namespace avx2 + #endif -}; +} // namespace bit_util } // namespace util namespace compute { diff --git a/cpp/src/arrow/compute/util_avx2.cc b/cpp/src/arrow/compute/util_avx2.cc index 7c2a378254562..89ec6aa97a608 100644 --- a/cpp/src/arrow/compute/util_avx2.cc +++ b/cpp/src/arrow/compute/util_avx2.cc @@ -16,30 +16,18 @@ // under the License. #include +#include -#include "arrow/acero/util.h" #include "arrow/util/bit_util.h" - -namespace arrow { -namespace util { +#include "arrow/util/logging.h" #if defined(ARROW_HAVE_AVX2) -void bit_util::bits_to_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, int* num_indexes, - uint16_t* indexes, uint16_t base_index) { - if (bit_to_search == 0) { - bits_to_indexes_imp_avx2<0>(num_bits, bits, num_indexes, indexes, base_index); - } else { - ARROW_DCHECK(bit_to_search == 1); - bits_to_indexes_imp_avx2<1>(num_bits, bits, num_indexes, indexes, base_index); - } -} +namespace arrow::util::avx2 { template -void bit_util::bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, - int* num_indexes, uint16_t* indexes, - uint16_t base_index) { +void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, int* num_indexes, + uint16_t* indexes, uint16_t base_index = 0) { // 64 bits at a time constexpr int unroll = 64; @@ -82,21 +70,20 @@ void bit_util::bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, } } -void bit_util::bits_filter_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes) { +void bits_to_indexes_avx2(int bit_to_search, const int num_bits, const uint8_t* bits, + int* num_indexes, uint16_t* indexes, uint16_t base_index) { if (bit_to_search == 0) { - bits_filter_indexes_imp_avx2<0>(num_bits, bits, input_indexes, num_indexes, indexes); + bits_to_indexes_imp_avx2<0>(num_bits, bits, num_indexes, indexes, base_index); } else { - bits_filter_indexes_imp_avx2<1>(num_bits, bits, input_indexes, num_indexes, indexes); + ARROW_DCHECK(bit_to_search == 1); + bits_to_indexes_imp_avx2<1>(num_bits, bits, num_indexes, indexes, base_index); } } template -void bit_util::bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits, - const uint16_t* input_indexes, - int* out_num_indexes, uint16_t* indexes) { +void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits, + const uint16_t* input_indexes, int* out_num_indexes, + uint16_t* indexes) { // 64 bits at a time constexpr int unroll = 64; @@ -167,8 +154,17 @@ void bit_util::bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* b *out_num_indexes = num_indexes; } -void bit_util::bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, - uint8_t* bytes) { +void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, const uint8_t* bits, + const uint16_t* input_indexes, int* num_indexes, + uint16_t* indexes) { + if (bit_to_search == 0) { + bits_filter_indexes_imp_avx2<0>(num_bits, bits, input_indexes, num_indexes, indexes); + } else { + bits_filter_indexes_imp_avx2<1>(num_bits, bits, input_indexes, num_indexes, indexes); + } +} + +void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes) { constexpr int unroll = 32; constexpr uint64_t kEachByteIs1 = 0x0101010101010101ULL; @@ -188,8 +184,7 @@ void bit_util::bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, } } -void bit_util::bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, - uint8_t* bits) { +void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits) { constexpr int unroll = 32; // Processing 32 bits at a time for (int i = 0; i < num_bits / unroll; ++i) { @@ -198,7 +193,7 @@ void bit_util::bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, } } -bool bit_util::are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) { +bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) { __m256i result_or = _mm256_setzero_si256(); uint32_t i; for (i = 0; i < num_bytes / 32; ++i) { @@ -216,7 +211,6 @@ bool bit_util::are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) return result_or32 == 0; } -#endif // ARROW_HAVE_AVX2 +} // namespace arrow::util::avx2 -} // namespace util -} // namespace arrow +#endif // ARROW_HAVE_AVX2 From 994c73b019536b31248f1438278c622d2f0a0f94 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 13 Jul 2023 15:28:58 +0200 Subject: [PATCH 09/35] GH-36659: [Python] Fix pyarrow.dataset.Partitioning.__eq__ when comparing with other type (#36661) ### Rationale for this change Ensure that `part == other` doesn't crash with `other` is not a Partitioning instance Small follow-up on https://github.com/apache/arrow/pull/36462 * Closes: #36659 Authored-by: Joris Van den Bossche Signed-off-by: Joris Van den Bossche --- python/pyarrow/_dataset.pyx | 5 ++--- python/pyarrow/tests/test_dataset.py | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index c5f0a663a814c..925565804f63e 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -2348,10 +2348,9 @@ cdef class Partitioning(_Weakrefable): return self.wrapped def __eq__(self, other): - try: + if isinstance(other, Partitioning): return self.partitioning.Equals(deref((other).unwrap())) - except TypeError: - return False + return False def parse(self, path): cdef CResult[CExpression] result diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 2f9b6a0922351..a70cf2fbc72af 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -589,6 +589,7 @@ def test_partitioning(): partitioning = klass(schema) assert isinstance(partitioning, ds.Partitioning) assert partitioning == klass(schema) + assert partitioning != "other object" schema = pa.schema([ pa.field('group', pa.int64()), From 7690409568e8a4b51946f292b109075629ed1ee7 Mon Sep 17 00:00:00 2001 From: Tommy Setiawan Date: Thu, 13 Jul 2023 10:14:35 -0400 Subject: [PATCH 10/35] GH-36645: [Go] returns writer.Close error to caller when writing parquet (#36646) ### Rationale for this change ### What changes are included in this PR? adding handler for `sink.Close()` error, chaining it with other reported error (if any) via `error.Join` ### Are these changes tested? Unit test included in the change ### Are there any user-facing changes? No * Closes: #36645 Lead-authored-by: Tommy Setiawan Co-authored-by: Tommy Setiawan Signed-off-by: Matt Topol --- go/parquet/file/file_writer.go | 16 +++++++++++++--- go/parquet/file/file_writer_test.go | 23 +++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/go/parquet/file/file_writer.go b/go/parquet/file/file_writer.go index c931377323e2b..cd0445f4180f1 100644 --- a/go/parquet/file/file_writer.go +++ b/go/parquet/file/file_writer.go @@ -18,6 +18,7 @@ package file import ( "encoding/binary" + "fmt" "io" "github.com/apache/arrow/go/v13/parquet" @@ -155,7 +156,7 @@ func (fw *Writer) startFile() { // Close closes any open row group writer and writes the file footer. Subsequent // calls to close will have no effect. -func (fw *Writer) Close() error { +func (fw *Writer) Close() (err error) { if fw.open { // if any functions here panic, we set open to be false so // that this doesn't get called again @@ -165,11 +166,20 @@ func (fw *Writer) Close() error { fw.rowGroupWriter.Close() } fw.rowGroupWriter = nil - defer fw.sink.Close() + defer func() { + ierr := fw.sink.Close() + if err != nil { + if ierr != nil { + err = fmt.Errorf("error on close:%w, %s", err, ierr) + } + return + } + + err = ierr + }() fileEncryptProps := fw.props.FileEncryptionProperties() if fileEncryptProps == nil { // non encrypted file - var err error if fw.FileMetadata, err = fw.metadata.Finish(); err != nil { return err } diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go index bba0d2be28d98..2cbdb910724ad 100644 --- a/go/parquet/file/file_writer_test.go +++ b/go/parquet/file/file_writer_test.go @@ -18,6 +18,7 @@ package file_test import ( "bytes" + "fmt" "reflect" "testing" @@ -395,3 +396,25 @@ func TestSerialize(t *testing.T) { }) } } + +type errCloseWriter struct { + sink *encoding.BufferWriter +} + +func (c *errCloseWriter) Write(p []byte) (n int, err error) { + return c.sink.Write(p) +} +func (c *errCloseWriter) Close() error { + return fmt.Errorf("error during close") +} +func (c *errCloseWriter) Bytes() []byte { + return c.sink.Bytes() +} + +func TestCloseError(t *testing.T) { + fields := schema.FieldList{schema.NewInt32Node("col", parquet.Repetitions.Required, 1)} + sc, _ := schema.NewGroupNode("schema", parquet.Repetitions.Required, fields, 0) + sink := &errCloseWriter{sink: encoding.NewBufferWriter(0, memory.DefaultAllocator)} + writer := file.NewParquetWriter(sink, sc) + assert.Error(t, writer.Close()) +} From de23a7e54ebe3cb2d0cf68f38044d42595957849 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Thu, 13 Jul 2023 13:34:21 -0400 Subject: [PATCH 11/35] GH-36601: [MATLAB] Add a MATLAB "type traits" class hierarchy (#36653) ### Rationale for this change To make it easier to write generic code for `arrow.array.Array` and associated objects in the MATLAB interface, it would be helpful to have some kind of ["type traits"-like](https://github.com/apache/arrow/blob/d676078c13a02ad920eeea2acd5fa517f14526e2/cpp/src/arrow/type_traits.h#L105) objects (e.g. `arrow.type.traits.StringTraits`, `arrow.type.traits.UInt8Traits`, etc.). These "type traits" objects would help centralize various type-specific information in one place (e.g. MATLAB `double` <-> `arrow.type.Float64Type` <-> `arrow.array.Float64Array`), simplifying generic client code. This would help reduce the number of "switch-like" statements across the MATLAB code base that branch based on type. ### What changes are included in this PR? 1. Added new `arrow.type.traits.TypeTraits` classes (e.g. `arrow.type.traits.UInt8Traits` and `arrow.type.traits.StringTraits`). 2. Added new `arrow.type.traits.traits` "gateway" function for creating "type traits" objects from MATLAB class strings (e.g. `"double"` or `"datetime"`) and `arrow.type.ID` enumeration values (e.g. `arrow.type.ID.Timestamp` or `arrow.type.ID.UInt8`). ### Are these changes tested? Yes. 1. Added MATLAB tests for the new `arrow.type.traits.TypeTraits` classes. 2. Added MATLAB tests (`ttraits.m`) for the new `arrow.type.traits.traits` "gateway" function. ### Are there any user-facing changes? Yes. There are now MATLAB "type traits" classes that can be used to simplify writing generic client code that switches based on type. **Note**: It may make sense to eventually mark these "type traits" APIs as "internal" so that they aren't encouraged to be used by client code outside of the MATLAB interface. The "type traits" classes expose some implementation details such as the `Proxy` classes that are used under the hood to represent a given `Array` object in C++. **Example** ```matlab % Construct a "type traits" object from an arrow.type.ID enumeration value >> logicalTraits = arrow.type.traits.traits(arrow.type.ID.Boolean) logicalTraits = BooleanTraits with properties: ArrayConstructor: @ arrow.array.BooleanArray ArrayClassName: "arrow.array.BooleanArray" ArrayProxyClassName: "arrow.array.proxy.BooleanArray" TypeConstructor: @ arrow.type.BooleanType TypeClassName: "arrow.type.BooleanType" TypeProxyClassName: "arrow.type.proxy.BooleanType" MatlabConstructor: @ logical MatlabClassName: "logical" % Construct a "type traits" object from a MATLAB class string >> stringTraits = arrow.type.traits.traits("string") stringTraits = StringTraits with properties: ArrayConstructor: @ arrow.array.StringArray ArrayClassName: "arrow.array.StringArray" ArrayProxyClassName: "arrow.array.proxy.StringArray" TypeConstructor: @ arrow.type.StringType TypeClassName: "arrow.type.StringType" TypeProxyClassName: "arrow.type.proxy.StringType" MatlabConstructor: @ string MatlabClassName: "string" ``` ### Future Directions 1. Re-implement `switch` statement in `RecordBatch` code to use "type traits" classes instead. 2. Look for more opportunties to leverage "type traits" to simplify code in the MATLAB interface. ### Notes 1. Thanks @ sgilmore10 for your help with this pull request! * Closes: #36601 Lead-authored-by: Kevin Gurney Co-authored-by: Sarah Gilmore Co-authored-by: Fiona La Signed-off-by: Kevin Gurney --- .../+arrow/+type/+traits/BooleanTraits.m | 29 ++ .../+arrow/+type/+traits/Float32Traits.m | 29 ++ .../+arrow/+type/+traits/Float64Traits.m | 29 ++ .../matlab/+arrow/+type/+traits/Int16Traits.m | 29 ++ .../matlab/+arrow/+type/+traits/Int32Traits.m | 29 ++ .../matlab/+arrow/+type/+traits/Int64Traits.m | 29 ++ .../matlab/+arrow/+type/+traits/Int8Traits.m | 29 ++ .../+arrow/+type/+traits/StringTraits.m | 29 ++ .../+arrow/+type/+traits/TimestampTraits.m | 29 ++ .../matlab/+arrow/+type/+traits/TypeTraits.m | 29 ++ .../+arrow/+type/+traits/UInt16Traits.m | 29 ++ .../+arrow/+type/+traits/UInt32Traits.m | 29 ++ .../+arrow/+type/+traits/UInt64Traits.m | 29 ++ .../matlab/+arrow/+type/+traits/UInt8Traits.m | 29 ++ .../src/matlab/+arrow/+type/+traits/traits.m | 89 +++++ matlab/src/matlab/+arrow/+type/Type.m | 1 + matlab/test/arrow/type/traits/hTypeTraits.m | 78 +++++ .../test/arrow/type/traits/tBooleanTraits.m | 30 ++ matlab/test/arrow/type/traits/tInt16Traits.m | 30 ++ matlab/test/arrow/type/traits/tInt32Traits.m | 30 ++ matlab/test/arrow/type/traits/tInt64Traits.m | 30 ++ matlab/test/arrow/type/traits/tInt8Traits.m | 30 ++ matlab/test/arrow/type/traits/tStringTraits.m | 30 ++ .../test/arrow/type/traits/tTimestampTraits.m | 30 ++ matlab/test/arrow/type/traits/tUInt16Traits.m | 30 ++ matlab/test/arrow/type/traits/tUInt32Traits.m | 30 ++ matlab/test/arrow/type/traits/tUInt64Traits.m | 30 ++ matlab/test/arrow/type/traits/tUInt8Traits.m | 30 ++ matlab/test/arrow/type/traits/ttraits.m | 320 ++++++++++++++++++ 29 files changed, 1224 insertions(+) create mode 100644 matlab/src/matlab/+arrow/+type/+traits/BooleanTraits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/Float32Traits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/Float64Traits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/Int16Traits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/Int32Traits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/Int64Traits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/Int8Traits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/StringTraits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/TimestampTraits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/TypeTraits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/UInt16Traits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/UInt32Traits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/UInt64Traits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/UInt8Traits.m create mode 100644 matlab/src/matlab/+arrow/+type/+traits/traits.m create mode 100644 matlab/test/arrow/type/traits/hTypeTraits.m create mode 100644 matlab/test/arrow/type/traits/tBooleanTraits.m create mode 100644 matlab/test/arrow/type/traits/tInt16Traits.m create mode 100644 matlab/test/arrow/type/traits/tInt32Traits.m create mode 100644 matlab/test/arrow/type/traits/tInt64Traits.m create mode 100644 matlab/test/arrow/type/traits/tInt8Traits.m create mode 100644 matlab/test/arrow/type/traits/tStringTraits.m create mode 100644 matlab/test/arrow/type/traits/tTimestampTraits.m create mode 100644 matlab/test/arrow/type/traits/tUInt16Traits.m create mode 100644 matlab/test/arrow/type/traits/tUInt32Traits.m create mode 100644 matlab/test/arrow/type/traits/tUInt64Traits.m create mode 100644 matlab/test/arrow/type/traits/tUInt8Traits.m create mode 100644 matlab/test/arrow/type/traits/ttraits.m diff --git a/matlab/src/matlab/+arrow/+type/+traits/BooleanTraits.m b/matlab/src/matlab/+arrow/+type/+traits/BooleanTraits.m new file mode 100644 index 0000000000000..82a8b6b1e28ba --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/BooleanTraits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef BooleanTraits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.BooleanArray + ArrayClassName = "arrow.array.BooleanArray" + ArrayProxyClassName = "arrow.array.proxy.BooleanArray" + TypeConstructor = @arrow.type.BooleanType; + TypeClassName = "arrow.type.BooleanType" + TypeProxyClassName = "arrow.type.proxy.BooleanType" + MatlabConstructor = @logical + MatlabClassName = "logical" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Float32Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Float32Traits.m new file mode 100644 index 0000000000000..7dc0d17474e2f --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Float32Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Float32Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Float32Array + ArrayClassName = "arrow.array.Float32Array" + ArrayProxyClassName = "arrow.array.proxy.Float32Array" + TypeConstructor = @arrow.type.Float32Type; + TypeClassName = "arrow.type.Float32Type" + TypeProxyClassName = "arrow.type.proxy.Float32Type" + MatlabConstructor = @single + MatlabClassName = "single" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Float64Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Float64Traits.m new file mode 100644 index 0000000000000..9c52634b2c942 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Float64Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Float64Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Float64Array + ArrayClassName = "arrow.array.Float64Array" + ArrayProxyClassName = "arrow.array.proxy.Float64Array" + TypeConstructor = @arrow.type.Float64Type; + TypeClassName = "arrow.type.Float64Type" + TypeProxyClassName = "arrow.type.proxy.Float64Type" + MatlabConstructor = @double + MatlabClassName = "double" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Int16Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Int16Traits.m new file mode 100644 index 0000000000000..46b67b43c1783 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Int16Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Int16Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Int16Array + ArrayClassName = "arrow.array.Int16Array" + ArrayProxyClassName = "arrow.array.proxy.Int16Array" + TypeConstructor = @arrow.type.Int16Type; + TypeClassName = "arrow.type.Int16Type" + TypeProxyClassName = "arrow.type.proxy.Int16Type" + MatlabConstructor = @int16 + MatlabClassName = "int16" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Int32Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Int32Traits.m new file mode 100644 index 0000000000000..4117271e50ff1 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Int32Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Int32Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Int32Array + ArrayClassName = "arrow.array.Int32Array" + ArrayProxyClassName = "arrow.array.proxy.Int32Array" + TypeConstructor = @arrow.type.Int32Type; + TypeClassName = "arrow.type.Int32Type" + TypeProxyClassName = "arrow.type.proxy.Int32Type" + MatlabConstructor = @int32 + MatlabClassName = "int32" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Int64Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Int64Traits.m new file mode 100644 index 0000000000000..e25da953aa0fc --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Int64Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Int64Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Int64Array + ArrayClassName = "arrow.array.Int64Array" + ArrayProxyClassName = "arrow.array.proxy.Int64Array" + TypeConstructor = @arrow.type.Int64Type; + TypeClassName = "arrow.type.Int64Type" + TypeProxyClassName = "arrow.type.proxy.Int64Type" + MatlabConstructor = @int64 + MatlabClassName = "int64" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Int8Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Int8Traits.m new file mode 100644 index 0000000000000..9f73bd2667e1b --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Int8Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Int8Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Int8Array + ArrayClassName = "arrow.array.Int8Array" + ArrayProxyClassName = "arrow.array.proxy.Int8Array" + TypeConstructor = @arrow.type.Int8Type; + TypeClassName = "arrow.type.Int8Type" + TypeProxyClassName = "arrow.type.proxy.Int8Type" + MatlabConstructor = @int8 + MatlabClassName = "int8" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/StringTraits.m b/matlab/src/matlab/+arrow/+type/+traits/StringTraits.m new file mode 100644 index 0000000000000..0730657270129 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/StringTraits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef StringTraits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.StringArray + ArrayClassName = "arrow.array.StringArray" + ArrayProxyClassName = "arrow.array.proxy.StringArray" + TypeConstructor = @arrow.type.StringType; + TypeClassName = "arrow.type.StringType" + TypeProxyClassName = "arrow.type.proxy.StringType" + MatlabConstructor = @string + MatlabClassName = "string" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/TimestampTraits.m b/matlab/src/matlab/+arrow/+type/+traits/TimestampTraits.m new file mode 100644 index 0000000000000..488a5e7314016 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/TimestampTraits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef TimestampTraits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.TimestampArray + ArrayClassName = "arrow.array.TimestampArray" + ArrayProxyClassName = "arrow.array.proxy.TimestampArray" + TypeConstructor = @arrow.type.TimestampType; + TypeClassName = "arrow.type.TimestampType" + TypeProxyClassName = "arrow.type.proxy.TimestampType" + MatlabConstructor = @datetime + MatlabClassName = "datetime" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/TypeTraits.m b/matlab/src/matlab/+arrow/+type/+traits/TypeTraits.m new file mode 100644 index 0000000000000..54b8fc0a7709c --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/TypeTraits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef TypeTraits + + properties (Abstract, Constant) + ArrayConstructor + ArrayClassName + ArrayProxyClassName + TypeConstructor + TypeClassName + TypeProxyClassName + MatlabConstructor + MatlabClassName + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/UInt16Traits.m b/matlab/src/matlab/+arrow/+type/+traits/UInt16Traits.m new file mode 100644 index 0000000000000..b90e6294ce0d8 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/UInt16Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef UInt16Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.UInt16Array + ArrayClassName = "arrow.array.UInt16Array" + ArrayProxyClassName = "arrow.array.proxy.UInt16Array" + TypeConstructor = @arrow.type.UInt16Type; + TypeClassName = "arrow.type.UInt16Type" + TypeProxyClassName = "arrow.type.proxy.UInt16Type" + MatlabConstructor = @uint16 + MatlabClassName = "uint16" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/UInt32Traits.m b/matlab/src/matlab/+arrow/+type/+traits/UInt32Traits.m new file mode 100644 index 0000000000000..ff79bd9579a3b --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/UInt32Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef UInt32Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.UInt32Array + ArrayClassName = "arrow.array.UInt32Array" + ArrayProxyClassName = "arrow.array.proxy.UInt32Array" + TypeConstructor = @arrow.type.UInt32Type; + TypeClassName = "arrow.type.UInt32Type" + TypeProxyClassName = "arrow.type.proxy.UInt32Type" + MatlabConstructor = @uint32 + MatlabClassName = "uint32" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/UInt64Traits.m b/matlab/src/matlab/+arrow/+type/+traits/UInt64Traits.m new file mode 100644 index 0000000000000..a6b0de37528a9 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/UInt64Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef UInt64Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.UInt64Array + ArrayClassName = "arrow.array.UInt64Array" + ArrayProxyClassName = "arrow.array.proxy.UInt64Array" + TypeConstructor = @arrow.type.UInt64Type; + TypeClassName = "arrow.type.UInt64Type" + TypeProxyClassName = "arrow.type.proxy.UInt64Type" + MatlabConstructor = @uint64 + MatlabClassName = "uint64" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/UInt8Traits.m b/matlab/src/matlab/+arrow/+type/+traits/UInt8Traits.m new file mode 100644 index 0000000000000..ff2377ff812c3 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/UInt8Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef UInt8Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.UInt8Array + ArrayClassName = "arrow.array.UInt8Array" + ArrayProxyClassName = "arrow.array.proxy.UInt8Array" + TypeConstructor = @arrow.type.UInt8Type; + TypeClassName = "arrow.type.UInt8Type" + TypeProxyClassName = "arrow.type.proxy.UInt8Type" + MatlabConstructor = @uint8 + MatlabClassName = "uint8" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/traits.m b/matlab/src/matlab/+arrow/+type/+traits/traits.m new file mode 100644 index 0000000000000..af59e2822df96 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/traits.m @@ -0,0 +1,89 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function typeTraits = traits(type) + % "Gateway" function that links an arrow Type ID enumeration (e.g. + % arrow.type.ID.String) or a MATLAB class string (e.g. "datetime") + % to associated type information. + import arrow.type.traits.* + import arrow.type.* + + if isa(type, "arrow.type.ID") + switch type + case ID.UInt8 + typeTraits = UInt8Traits(); + case ID.UInt16 + typeTraits = UInt16Traits(); + case ID.UInt32 + typeTraits = UInt32Traits(); + case ID.UInt64 + typeTraits = UInt64Traits(); + case ID.Int8 + typeTraits = Int8Traits(); + case ID.Int16 + typeTraits = Int16Traits(); + case ID.Int32 + typeTraits = Int32Traits(); + case ID.Int64 + typeTraits = Int64Traits(); + case ID.Float32 + typeTraits = Float32Traits(); + case ID.Float64 + typeTraits = Float64Traits(); + case ID.Boolean + typeTraits = BooleanTraits(); + case ID.String + typeTraits = StringTraits(); + case ID.Timestamp + typeTraits = TimestampTraits(); + otherwise + error("arrow:type:traits:UnsupportedArrowTypeID", "Unsupported Arrow type ID: " + type); + end + elseif isa(type, "string") % MATLAB class string + switch type + case "uint8" + typeTraits = UInt8Traits(); + case "uint16" + typeTraits = UInt16Traits(); + case "uint32" + typeTraits = UInt32Traits(); + case "uint64" + typeTraits = UInt64Traits(); + case "int8" + typeTraits = Int8Traits(); + case "int16" + typeTraits = Int16Traits(); + case "int32" + typeTraits = Int32Traits(); + case "int64" + typeTraits = Int64Traits(); + case "single" + typeTraits = Float32Traits(); + case "double" + typeTraits = Float64Traits(); + case "logical" + typeTraits = BooleanTraits(); + case "string" + typeTraits = StringTraits(); + case "datetime" + typeTraits = TimestampTraits(); + otherwise + error("arrow:type:traits:UnsupportedMatlabClass", "Unsupported MATLAB class: " + type); + end + else + error("arrow:type:traits:UnsupportedInputType", "The input argument to the traits function " + ... + "must be a MATLAB class string or an arrow.type.ID enumeration."); + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Type.m b/matlab/src/matlab/+arrow/+type/Type.m index d6efc32be3b47..466f393c7d082 100644 --- a/matlab/src/matlab/+arrow/+type/Type.m +++ b/matlab/src/matlab/+arrow/+type/Type.m @@ -45,4 +45,5 @@ propgrp = matlab.mixin.util.PropertyGroup(proplist); end end + end diff --git a/matlab/test/arrow/type/traits/hTypeTraits.m b/matlab/test/arrow/type/traits/hTypeTraits.m new file mode 100644 index 0000000000000..df62fdd325f2f --- /dev/null +++ b/matlab/test/arrow/type/traits/hTypeTraits.m @@ -0,0 +1,78 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef hTypeTraits < matlab.unittest.TestCase +% Superclass for tests that validate the behavior of "type trait" objects +% like arrow.type.traits.StringTraits. + + properties (Abstract) + TraitsConstructor + ArrayConstructor + ArrayClassName + ArrayProxyClassName + TypeConstructor + TypeClassName + TypeProxyClassName + MatlabConstructor + MatlabClassName + end + + properties + Traits + end + + methods (TestMethodSetup) + function setupTraits(testCase) + testCase.Traits = testCase.TraitsConstructor(); + end + end + + methods(Test) + + function TestArrayConstructor(testCase) + testCase.verifyEqual(testCase.Traits.ArrayConstructor, testCase.ArrayConstructor); + end + + function TestArrayClassName(testCase) + testCase.verifyEqual(testCase.Traits.ArrayClassName, testCase.ArrayClassName); + end + + function TestArrayProxyClassName(testCase) + testCase.verifyEqual(testCase.Traits.ArrayProxyClassName, testCase.ArrayProxyClassName); + end + + function TestTypeConstructor(testCase) + testCase.verifyEqual(testCase.Traits.TypeConstructor, testCase.TypeConstructor); + end + + function TestTypeClassName(testCase) + testCase.verifyEqual(testCase.Traits.TypeClassName, testCase.TypeClassName); + end + + function TestTypeProxyClassName(testCase) + testCase.verifyEqual(testCase.Traits.TypeProxyClassName, testCase.TypeProxyClassName); + end + + function TestMatlabConstructor(testCase) + testCase.verifyEqual(testCase.Traits.MatlabConstructor, testCase.MatlabConstructor); + end + + function TestMatlabClassName(testCase) + testCase.verifyEqual(testCase.Traits.MatlabClassName, testCase.MatlabClassName); + end + + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tBooleanTraits.m b/matlab/test/arrow/type/traits/tBooleanTraits.m new file mode 100644 index 0000000000000..859dc630a1fc7 --- /dev/null +++ b/matlab/test/arrow/type/traits/tBooleanTraits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tBooleanTraits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.BooleanTraits + ArrayConstructor = @arrow.array.BooleanArray + ArrayClassName = "arrow.array.BooleanArray" + ArrayProxyClassName = "arrow.array.proxy.BooleanArray" + TypeConstructor = @arrow.type.BooleanType + TypeClassName = "arrow.type.BooleanType" + TypeProxyClassName = "arrow.type.proxy.BooleanType" + MatlabConstructor = @logical + MatlabClassName = "logical" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tInt16Traits.m b/matlab/test/arrow/type/traits/tInt16Traits.m new file mode 100644 index 0000000000000..bde308d28e68a --- /dev/null +++ b/matlab/test/arrow/type/traits/tInt16Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tInt16Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.Int16Traits + ArrayConstructor = @arrow.array.Int16Array + ArrayClassName = "arrow.array.Int16Array" + ArrayProxyClassName = "arrow.array.proxy.Int16Array" + TypeConstructor = @arrow.type.Int16Type + TypeClassName = "arrow.type.Int16Type" + TypeProxyClassName = "arrow.type.proxy.Int16Type" + MatlabConstructor = @int16 + MatlabClassName = "int16" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tInt32Traits.m b/matlab/test/arrow/type/traits/tInt32Traits.m new file mode 100644 index 0000000000000..651f647455408 --- /dev/null +++ b/matlab/test/arrow/type/traits/tInt32Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tInt32Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.Int32Traits + ArrayConstructor = @arrow.array.Int32Array + ArrayClassName = "arrow.array.Int32Array" + ArrayProxyClassName = "arrow.array.proxy.Int32Array" + TypeConstructor = @arrow.type.Int32Type + TypeClassName = "arrow.type.Int32Type" + TypeProxyClassName = "arrow.type.proxy.Int32Type" + MatlabConstructor = @int32 + MatlabClassName = "int32" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tInt64Traits.m b/matlab/test/arrow/type/traits/tInt64Traits.m new file mode 100644 index 0000000000000..4f16c91eb4e09 --- /dev/null +++ b/matlab/test/arrow/type/traits/tInt64Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tInt64Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.Int64Traits + ArrayConstructor = @arrow.array.Int64Array + ArrayClassName = "arrow.array.Int64Array" + ArrayProxyClassName = "arrow.array.proxy.Int64Array" + TypeConstructor = @arrow.type.Int64Type + TypeClassName = "arrow.type.Int64Type" + TypeProxyClassName = "arrow.type.proxy.Int64Type" + MatlabConstructor = @int64 + MatlabClassName = "int64" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tInt8Traits.m b/matlab/test/arrow/type/traits/tInt8Traits.m new file mode 100644 index 0000000000000..3e767abbebba4 --- /dev/null +++ b/matlab/test/arrow/type/traits/tInt8Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tInt8Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.Int8Traits + ArrayConstructor = @arrow.array.Int8Array + ArrayClassName = "arrow.array.Int8Array" + ArrayProxyClassName = "arrow.array.proxy.Int8Array" + TypeConstructor = @arrow.type.Int8Type + TypeClassName = "arrow.type.Int8Type" + TypeProxyClassName = "arrow.type.proxy.Int8Type" + MatlabConstructor = @int8 + MatlabClassName = "int8" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tStringTraits.m b/matlab/test/arrow/type/traits/tStringTraits.m new file mode 100644 index 0000000000000..68f061d1b031d --- /dev/null +++ b/matlab/test/arrow/type/traits/tStringTraits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tStringTraits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.StringTraits + ArrayConstructor = @arrow.array.StringArray + ArrayClassName = "arrow.array.StringArray" + ArrayProxyClassName = "arrow.array.proxy.StringArray" + TypeConstructor = @arrow.type.StringType + TypeClassName = "arrow.type.StringType" + TypeProxyClassName = "arrow.type.proxy.StringType" + MatlabConstructor = @string + MatlabClassName = "string" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tTimestampTraits.m b/matlab/test/arrow/type/traits/tTimestampTraits.m new file mode 100644 index 0000000000000..5f451c0631465 --- /dev/null +++ b/matlab/test/arrow/type/traits/tTimestampTraits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tTimestampTraits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.TimestampTraits + ArrayConstructor = @arrow.array.TimestampArray + ArrayClassName = "arrow.array.TimestampArray" + ArrayProxyClassName = "arrow.array.proxy.TimestampArray" + TypeConstructor = @arrow.type.TimestampType + TypeClassName = "arrow.type.TimestampType" + TypeProxyClassName = "arrow.type.proxy.TimestampType" + MatlabConstructor = @datetime + MatlabClassName = "datetime" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tUInt16Traits.m b/matlab/test/arrow/type/traits/tUInt16Traits.m new file mode 100644 index 0000000000000..4a9eef6f2978d --- /dev/null +++ b/matlab/test/arrow/type/traits/tUInt16Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tUInt16Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.UInt16Traits + ArrayConstructor = @arrow.array.UInt16Array + ArrayClassName = "arrow.array.UInt16Array" + ArrayProxyClassName = "arrow.array.proxy.UInt16Array" + TypeConstructor = @arrow.type.UInt16Type + TypeClassName = "arrow.type.UInt16Type" + TypeProxyClassName = "arrow.type.proxy.UInt16Type" + MatlabConstructor = @uint16 + MatlabClassName = "uint16" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tUInt32Traits.m b/matlab/test/arrow/type/traits/tUInt32Traits.m new file mode 100644 index 0000000000000..227e42c4eb0ec --- /dev/null +++ b/matlab/test/arrow/type/traits/tUInt32Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tUInt32Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.UInt32Traits + ArrayConstructor = @arrow.array.UInt32Array + ArrayClassName = "arrow.array.UInt32Array" + ArrayProxyClassName = "arrow.array.proxy.UInt32Array" + TypeConstructor = @arrow.type.UInt32Type + TypeClassName = "arrow.type.UInt32Type" + TypeProxyClassName = "arrow.type.proxy.UInt32Type" + MatlabConstructor = @uint32 + MatlabClassName = "uint32" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tUInt64Traits.m b/matlab/test/arrow/type/traits/tUInt64Traits.m new file mode 100644 index 0000000000000..370e905f27736 --- /dev/null +++ b/matlab/test/arrow/type/traits/tUInt64Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tUInt64Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.UInt64Traits + ArrayConstructor = @arrow.array.UInt64Array + ArrayClassName = "arrow.array.UInt64Array" + ArrayProxyClassName = "arrow.array.proxy.UInt64Array" + TypeConstructor = @arrow.type.UInt64Type + TypeClassName = "arrow.type.UInt64Type" + TypeProxyClassName = "arrow.type.proxy.UInt64Type" + MatlabConstructor = @uint64 + MatlabClassName = "uint64" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tUInt8Traits.m b/matlab/test/arrow/type/traits/tUInt8Traits.m new file mode 100644 index 0000000000000..d93f9d3c1b942 --- /dev/null +++ b/matlab/test/arrow/type/traits/tUInt8Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tUInt8Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.UInt8Traits + ArrayConstructor = @arrow.array.UInt8Array + ArrayClassName = "arrow.array.UInt8Array" + ArrayProxyClassName = "arrow.array.proxy.UInt8Array" + TypeConstructor = @arrow.type.UInt8Type + TypeClassName = "arrow.type.UInt8Type" + TypeProxyClassName = "arrow.type.proxy.UInt8Type" + MatlabConstructor = @uint8 + MatlabClassName = "uint8" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/ttraits.m b/matlab/test/arrow/type/traits/ttraits.m new file mode 100644 index 0000000000000..14149a5ebff48 --- /dev/null +++ b/matlab/test/arrow/type/traits/ttraits.m @@ -0,0 +1,320 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef ttraits < matlab.unittest.TestCase + % Tests for the type traits (i.e. arrow.type.traits.traits) + % "gateway" function. + + methods(Test) + + function TestUInt8(testCase) + import arrow.type.traits.* + import arrow.type.* + + typeID = ID.UInt8; + expectedTraits = UInt8Traits(); + + actualTraits = traits(typeID); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestUInt16(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.UInt16; + expectedTraits = UInt16Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestUInt32(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.UInt32; + expectedTraits = UInt32Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestUInt64(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.UInt64; + expectedTraits = UInt64Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestInt8(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Int8; + expectedTraits = Int8Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestInt16(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Int16; + expectedTraits = Int16Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestInt32(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Int32; + expectedTraits = Int32Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestInt64(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Int64; + expectedTraits = Int64Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestString(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.String; + expectedTraits = StringTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestTimestamp(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Timestamp; + expectedTraits = TimestampTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestBoolean(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Boolean; + expectedTraits = BooleanTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabUInt8(testCase) + import arrow.type.traits.* + + type = "uint8"; + expectedTraits = UInt8Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabUInt16(testCase) + import arrow.type.traits.* + + type = "uint16"; + expectedTraits = UInt16Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabUInt32(testCase) + import arrow.type.traits.* + + type = "uint32"; + expectedTraits = UInt32Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabUInt64(testCase) + import arrow.type.traits.* + + type = "uint64"; + expectedTraits = UInt64Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabInt8(testCase) + import arrow.type.traits.* + + type = "int8"; + expectedTraits = Int8Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabInt16(testCase) + import arrow.type.traits.* + + type = "int16"; + expectedTraits = Int16Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabInt32(testCase) + import arrow.type.traits.* + + type = "int32"; + expectedTraits = Int32Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabInt64(testCase) + import arrow.type.traits.* + + type = "int64"; + expectedTraits = Int64Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabSingle(testCase) + import arrow.type.traits.* + + type = "single"; + expectedTraits = Float32Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabDouble(testCase) + import arrow.type.traits.* + + type = "double"; + expectedTraits = Float64Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabLogical(testCase) + import arrow.type.traits.* + + type = "logical"; + expectedTraits = BooleanTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabString(testCase) + import arrow.type.traits.* + + type = "string"; + expectedTraits = StringTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabDatetime(testCase) + import arrow.type.traits.* + + type = "datetime"; + expectedTraits = TimestampTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestErrorIfUnsupportedMatlabClass(testCase) + import arrow.type.traits.* + + type = "not-a-class"; + + testCase.verifyError(@() traits(type), "arrow:type:traits:UnsupportedMatlabClass"); + end + + function TestErrorIfUnsupportedInputType(testCase) + import arrow.type.traits.* + + type = 123; + testCase.verifyError(@() traits(type), "arrow:type:traits:UnsupportedInputType"); + + type = {'double'}; + testCase.verifyError(@() traits(type), "arrow:type:traits:UnsupportedInputType"); + + type = datetime(2023, 1, 1); + testCase.verifyError(@() traits(type), "arrow:type:traits:UnsupportedInputType"); + end + + end + +end \ No newline at end of file From b9759776535583549a9f3ef4cb5b20f44f42f8aa Mon Sep 17 00:00:00 2001 From: David Li Date: Thu, 13 Jul 2023 14:09:41 -0400 Subject: [PATCH 12/35] GH-36669: [Go] Guard against garbage in C Data structures (#36670) ### Rationale for this change Prevent hard to debug crashes when using Go code with other code via C Data Interface. ### What changes are included in this PR? In the C Stream Interface implementation, jump through a trampoline that zeroes the out parameters before letting Go see them. Note that this can only guard against the issue when the C Stream Interface is used. Also, fix other issues in the C Data Interface tests with invalid pointers and uninitialized memory that were turned up by the new test here (because it calls `runtime.GC` very frequently). ### Are these changes tested? Yes ### Are there any user-facing changes? No **This PR contains a "Critical Fix".** * Closes: #36669 Lead-authored-by: David Li Co-authored-by: Matt Topol Signed-off-by: David Li --- go/arrow/cdata/cdata_exports.go | 10 ++++---- go/arrow/cdata/cdata_fulltest.c | 33 +++++++++++++++++++++++++ go/arrow/cdata/cdata_test.go | 26 ++++++++++++++++++++ go/arrow/cdata/cdata_test_framework.go | 34 ++++++++++++++++++++------ go/arrow/cdata/exports.go | 18 ++++++++------ go/arrow/cdata/interface.go | 20 +++++++++++++++ go/arrow/cdata/trampoline.c | 34 ++++++++++++++++++++++++++ 7 files changed, 156 insertions(+), 19 deletions(-) create mode 100644 go/arrow/cdata/trampoline.c diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index 7b2f10ea66723..dae9f5fefe242 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -283,7 +283,7 @@ func (exp *schemaExporter) export(field arrow.Field) { func allocateArrowSchemaArr(n int) (out []CArrowSchema) { s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = uintptr(C.malloc(C.sizeof_struct_ArrowSchema * C.size_t(n))) + s.Data = uintptr(C.calloc(C.size_t(n), C.sizeof_struct_ArrowSchema)) s.Len = n s.Cap = n @@ -292,7 +292,7 @@ func allocateArrowSchemaArr(n int) (out []CArrowSchema) { func allocateArrowSchemaPtrArr(n int) (out []*CArrowSchema) { s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = uintptr(C.malloc(C.size_t(unsafe.Sizeof((*CArrowSchema)(nil))) * C.size_t(n))) + s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*CArrowSchema)(nil))))) s.Len = n s.Cap = n @@ -301,7 +301,7 @@ func allocateArrowSchemaPtrArr(n int) (out []*CArrowSchema) { func allocateArrowArrayArr(n int) (out []CArrowArray) { s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = uintptr(C.malloc(C.sizeof_struct_ArrowArray * C.size_t(n))) + s.Data = uintptr(C.calloc(C.size_t(n), C.sizeof_struct_ArrowArray)) s.Len = n s.Cap = n @@ -310,7 +310,7 @@ func allocateArrowArrayArr(n int) (out []CArrowArray) { func allocateArrowArrayPtrArr(n int) (out []*CArrowArray) { s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = uintptr(C.malloc(C.size_t(unsafe.Sizeof((*CArrowArray)(nil))) * C.size_t(n))) + s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*CArrowArray)(nil))))) s.Len = n s.Cap = n @@ -319,7 +319,7 @@ func allocateArrowArrayPtrArr(n int) (out []*CArrowArray) { func allocateBufferPtrArr(n int) (out []*C.void) { s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = uintptr(C.malloc(C.size_t(unsafe.Sizeof((*C.void)(nil))) * C.size_t(n))) + s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*C.void)(nil))))) s.Len = n s.Cap = n diff --git a/go/arrow/cdata/cdata_fulltest.c b/go/arrow/cdata/cdata_fulltest.c index b85e1e8310f94..7aed597942b51 100644 --- a/go/arrow/cdata/cdata_fulltest.c +++ b/go/arrow/cdata/cdata_fulltest.c @@ -404,6 +404,7 @@ void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out) int test_exported_stream(struct ArrowArrayStream* stream) { while (1) { struct ArrowArray array; + memset(&array, 0, sizeof(array)); // Garbage - implementation should not try to call it, though! array.release = (void*)0xDEADBEEF; int rc = stream->get_next(stream, &array); @@ -447,3 +448,35 @@ void test_stream_schema_fallible(struct ArrowArrayStream* stream) { stream->private_data = &kFallibleStream; stream->release = FallibleRelease; } + +int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed) { + struct ArrowSchema schema; + // Try to confuse the Go GC by putting what looks like a Go pointer here. +#ifdef _WIN32 + // Thread-safe on Windows with the multithread CRT +#define DORAND rand() +#else +#define DORAND rand_r(&seed) +#endif + schema.name = (char*)(0xc000000000L + (DORAND % 0x2000)); + schema.format = (char*)(0xc000000000L + (DORAND % 0x2000)); + int rc = stream->get_schema(stream, &schema); + if (rc != 0) return rc; + schema.release(&schema); + + while (1) { + struct ArrowArray array; + array.release = (void*)(0xc000000000L + (DORAND % 0x2000)); + array.private_data = (void*)(0xc000000000L + (DORAND % 0x2000)); + int rc = stream->get_next(stream, &array); + if (rc != 0) return rc; + + if (array.release == NULL) { + stream->release(stream); + break; + } + array.release(&array); + } + return 0; +#undef DORAND +} diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go index f336dec3707da..f4c09000cbfdb 100644 --- a/go/arrow/cdata/cdata_test.go +++ b/go/arrow/cdata/cdata_test.go @@ -29,6 +29,7 @@ import ( "io" "runtime" "runtime/cgo" + "sync" "testing" "time" "unsafe" @@ -940,3 +941,28 @@ func TestRecordReaderImportError(t *testing.T) { } assert.Contains(t, err.Error(), "Expected error message") } + +func TestConfuseGoGc(t *testing.T) { + // Regression test for https://github.com/apache/arrow-adbc/issues/729 + reclist := arrdata.Records["primitives"] + + var wg sync.WaitGroup + concurrency := 32 + wg.Add(concurrency) + + // XXX: this test is a bit expensive + for i := 0; i < concurrency; i++ { + go func() { + for i := 0; i < 256; i++ { + rdr, err := array.NewRecordReader(reclist[0].Schema(), reclist) + assert.NoError(t, err) + runtime.GC() + assert.NoError(t, confuseGoGc(rdr)) + runtime.GC() + } + wg.Done() + }() + } + + wg.Wait() +} diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go index fb6122964168b..c731c730c6bcd 100644 --- a/go/arrow/cdata/cdata_test_framework.go +++ b/go/arrow/cdata/cdata_test_framework.go @@ -21,11 +21,16 @@ package cdata // #include // #include +// #include // #include "arrow/c/abi.h" // #include "arrow/c/helpers.h" // // void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out); -// struct ArrowArray* get_test_arr() { return (struct ArrowArray*)(malloc(sizeof(struct ArrowArray))); } +// struct ArrowArray* get_test_arr() { +// struct ArrowArray* array = (struct ArrowArray*)malloc(sizeof(struct ArrowArray)); +// memset(array, 0, sizeof(*array)); +// return array; +// } // struct ArrowArrayStream* get_test_stream() { // struct ArrowArrayStream* out = (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream)); // memset(out, 0, sizeof(struct ArrowArrayStream)); @@ -56,11 +61,13 @@ package cdata // struct ArrowSchema** test_union(const char** fmts, const char** names, int64_t* flags, const int n); // int test_exported_stream(struct ArrowArrayStream* stream); // void test_stream_schema_fallible(struct ArrowArrayStream* stream); +// int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed); import "C" import ( "errors" "fmt" "io" + "math/rand" "unsafe" "github.com/apache/arrow/go/v13/arrow" @@ -271,15 +278,17 @@ func createCArr(arr arrow.Array) *CArrowArray { carr.null_count = C.int64_t(arr.NullN()) carr.offset = C.int64_t(arr.Data().Offset()) buffers := arr.Data().Buffers() - cbuf := []unsafe.Pointer{} - for _, b := range buffers { + cbufs := allocateBufferPtrArr(len(buffers)) + for i, b := range buffers { if b != nil { - cbuf = append(cbuf, C.CBytes(b.Bytes())) + cbufs[i] = (*C.void)(C.CBytes(b.Bytes())) + } else { + cbufs[i] = nil } } - carr.n_buffers = C.int64_t(len(cbuf)) - if len(cbuf) > 0 { - carr.buffers = &cbuf[0] + carr.n_buffers = C.int64_t(len(cbufs)) + if len(cbufs) > 0 { + carr.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cbufs[0])) } carr.release = (*[0]byte)(C.release_test_arr) @@ -350,3 +359,14 @@ func fallibleSchemaTest() error { } return nil } + +func confuseGoGc(reader array.RecordReader) error { + out := C.get_test_stream() + ExportRecordReader(reader, out) + rc := C.confuse_go_gc(out, C.uint(rand.Int())) + C.free(unsafe.Pointer(out)) + if rc == 0 { + return nil + } + return fmt.Errorf("Exported stream test failed with return code %d", int(rc)) +} diff --git a/go/arrow/cdata/exports.go b/go/arrow/cdata/exports.go index 2bbd45e58af01..118dec2c38b96 100644 --- a/go/arrow/cdata/exports.go +++ b/go/arrow/cdata/exports.go @@ -28,11 +28,14 @@ import ( // #include // #include "arrow/c/helpers.h" // -// typedef const char cchar_t; -// extern int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); -// extern int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); -// extern const char* streamGetError(struct ArrowArrayStream*); -// extern void streamRelease(struct ArrowArrayStream*); +// typedef const char cchar_t; +// extern int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); +// extern int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); +// extern const char* streamGetError(struct ArrowArrayStream*); +// extern void streamRelease(struct ArrowArrayStream*); +// // XXX(https://github.com/apache/arrow-adbc/issues/729) +// int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out); +// int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out); // import "C" @@ -154,10 +157,11 @@ func streamRelease(handle *CArrowArrayStream) { } func exportStream(rdr array.RecordReader, out *CArrowArrayStream) { - out.get_schema = (*[0]byte)(C.streamGetSchema) - out.get_next = (*[0]byte)(C.streamGetNext) + out.get_schema = (*[0]byte)(C.streamGetSchemaTrampoline) + out.get_next = (*[0]byte)(C.streamGetNextTrampoline) out.get_last_error = (*[0]byte)(C.streamGetError) out.release = (*[0]byte)(C.streamRelease) + rdr.Retain() h := cgo.NewHandle(cRecordReader{rdr: rdr, err: nil}) out.private_data = createHandle(h) } diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go index 64b8176ad221a..50404878005b9 100644 --- a/go/arrow/cdata/interface.go +++ b/go/arrow/cdata/interface.go @@ -198,6 +198,11 @@ func ImportCRecordReader(stream *CArrowArrayStream, schema *arrow.Schema) (arrio // the populating of the struct. Any memory allocated will be allocated using malloc // which means that it is invisible to the Go Garbage Collector and must be freed manually // using the callback on the CArrowSchema object. +// +// WARNING: the output ArrowSchema MUST BE ZERO INITIALIZED, or the Go garbage collector +// may error at runtime, due to CGO rules ("the current implementation may sometimes +// cause a runtime error if the contents of the C memory appear to be a Go pointer"). +// You have been warned! func ExportArrowSchema(schema *arrow.Schema, out *CArrowSchema) { dummy := arrow.Field{Type: arrow.StructOf(schema.Fields()...), Metadata: schema.Metadata()} exportField(dummy, out) @@ -220,6 +225,11 @@ func ExportArrowSchema(schema *arrow.Schema, out *CArrowSchema) { // The release function on the populated CArrowArray will properly decrease the reference counts, // and release the memory if the record has already been released. But since this must be explicitly // done, make sure it is released so that you do not create a memory leak. +// +// WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector +// may error at runtime, due to CGO rules ("the current implementation may sometimes +// cause a runtime error if the contents of the C memory appear to be a Go pointer"). +// You have been warned! func ExportArrowRecordBatch(rb arrow.Record, out *CArrowArray, outSchema *CArrowSchema) { children := make([]arrow.ArrayData, rb.NumCols()) for i := range rb.Columns() { @@ -243,6 +253,11 @@ func ExportArrowRecordBatch(rb arrow.Record, out *CArrowArray, outSchema *CArrow // being used by the arrow.Array passed in, in order to share with zero-copy across the C // Data Interface. See the documentation for ExportArrowRecordBatch for details on how to ensure // you do not leak memory and prevent unwanted, undefined or strange behaviors. +// +// WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector +// may error at runtime, due to CGO rules ("the current implementation may sometimes +// cause a runtime error if the contents of the C memory appear to be a Go pointer"). +// You have been warned! func ExportArrowArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { exportArray(arr, out, outSchema) } @@ -252,6 +267,11 @@ func ExportArrowArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema // CArrowArrayStream takes ownership of the RecordReader until the consumer calls the release // callback, as such it is unnecesary to call Release on the passed in reader unless it has // previously been retained. +// +// WARNING: the output ArrowArrayStream MUST BE ZERO INITIALIZED, or the Go garbage +// collector may error at runtime, due to CGO rules ("the current implementation may +// sometimes cause a runtime error if the contents of the C memory appear to be a Go +// pointer"). You have been warned! func ExportRecordReader(reader array.RecordReader, out *CArrowArrayStream) { exportStream(reader, out) } diff --git a/go/arrow/cdata/trampoline.c b/go/arrow/cdata/trampoline.c new file mode 100644 index 0000000000000..01db13fab4845 --- /dev/null +++ b/go/arrow/cdata/trampoline.c @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "arrow/c/abi.h" + +int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); +int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); + +int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out) { + // XXX(https://github.com/apache/arrow-adbc/issues/729) + memset(out, 0, sizeof(*out)); + return streamGetSchema(stream, out); +} + +int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out) { + // XXX(https://github.com/apache/arrow-adbc/issues/729) + memset(out, 0, sizeof(*out)); + return streamGetNext(stream, out); +} From 8245b214ae63fe5e33fab69d7d4223850c9ce37b Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 14 Jul 2023 09:44:38 +0900 Subject: [PATCH 13/35] GH-36610: [CI][C++] Don't enable ARROW_ACERO by default (#36611) ### Rationale for this change Because it's not a required component. ### What changes are included in this PR? Use `OFF` by default. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * Closes: #36610 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- ci/scripts/cpp_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index fd682d0e2a62a..f0f893c419616 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -83,7 +83,7 @@ pushd ${build_dir} cmake \ -Dabsl_SOURCE=${absl_SOURCE:-} \ - -DARROW_ACERO=${ARROW_ACERO:-ON} \ + -DARROW_ACERO=${ARROW_ACERO:-OFF} \ -DARROW_AZURE=${ARROW_AZURE:-OFF} \ -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \ -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \ From a4384d9d48d731e40cbaf035cc9862b534fe3737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 14 Jul 2023 04:55:14 +0200 Subject: [PATCH 14/35] GH-36634: [Dev] Ensure merge script goes over all pages when requesting info from GitHub (#36637) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change We currently were missing maintenance branches due to pagination on GH API. ### What changes are included in this PR? Check whether the API is returning a paginated view and extend the list returned. ### Are these changes tested? I have tested locally: ``` (Pdb) pr.maintenance_branches ['maint-0.11.x', 'maint-0.12.x', 'maint-0.14.x', 'maint-0.15.x', 'maint-0.17.x', 'maint-1.0.x', 'maint-3.0.x', 'maint-4.0.x', 'maint-6.0.x', 'maint-7.0.x', 'maint-7.0.1', 'maint-8.0.x', 'maint-9.0.0', 'maint-10.0.x', 'maint-10.0.0', 'maint-10.0.1', 'maint-11.0.0', 'maint-12.0.x', 'maint-12.0.0', 'maint-12.0.1', 'maint-13.0.0'] (Pdb) c Enter fix version [14.0.0]: ``` ### Are there any user-facing changes? No * Closes: #36634 Lead-authored-by: Raúl Cumplido Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/merge_arrow_pr.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 90b2e9b034eea..0f36a5ba9025c 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -78,7 +78,24 @@ def get_json(url, headers=None): response = requests.get(url, headers=headers) if response.status_code != 200: raise ValueError(response.json()) - return response.json() + # GitHub returns a link header with the next, previous, last + # page if there is pagination on the response. See: + # https://docs.github.com/en/rest/guides/using-pagination-in-the-rest-api#using-link-headers + next_responses = None + if "link" in response.headers: + links = response.headers['link'].split(', ') + for link in links: + if 'rel="next"' in link: + # Format: '; rel="next"' + next_url = link.split(";")[0][1:-1] + next_responses = get_json(next_url, headers) + responses = response.json() + if next_responses: + if isinstance(responses, list): + responses.extend(next_responses) + else: + raise ValueError('GitHub response was paginated and is not a list') + return responses def run_cmd(cmd): From 5037ab14f2a7686c38f6653b5f39ebe7feaf200f Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Thu, 13 Jul 2023 22:36:11 -0700 Subject: [PATCH 15/35] GH-36621: [C++] Add documentation for ACERO_ALIGNMENT_HANDLING (#36622) ### Rationale for this change To document an existing environment variable that was not documented. ### What changes are included in this PR? Adds a documentation section for this environment variable. ### Are these changes tested? No, there are only docs. ### Are there any user-facing changes? New documentation * Closes: #36621 Lead-authored-by: Weston Pace Co-authored-by: Antoine Pitrou Signed-off-by: Sutou Kouhei --- docs/source/cpp/env_vars.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docs/source/cpp/env_vars.rst b/docs/source/cpp/env_vars.rst index 06fd73ffd0d98..e8490735926c1 100644 --- a/docs/source/cpp/env_vars.rst +++ b/docs/source/cpp/env_vars.rst @@ -26,6 +26,29 @@ Arrow C++ at runtime. Many of these variables are inspected only once per process (for example, when the Arrow C++ DLL is loaded), so you cannot assume that changing their value later will have an effect. +.. envvar:: ACERO_ALIGNMENT_HANDLING + + Arrow C++'s Acero module performs computation on streams of data. This + computation may involve a form of "type punning" that is technically + undefined behavior if the underlying array is not properly aligned. On + most modern CPUs this is not an issue, but some older CPUs may crash or + suffer poor performance. For this reason it is recommended that all + incoming array buffers are properly aligned, but some data sources + such as :ref:`Flight ` may produce unaligned buffers. + + The value of this environment variable controls what will happen when + Acero detects an unaligned buffer: + + - ``warn``: a warning is emitted + - ``ignore``: nothing, alignment checking is disabled + - ``reallocate``: the buffer is reallocated to a properly aligned address + - ``error``: the operation fails with an error + + The default behavior is ``warn``. On modern hardware it is usually safe + to change this to ``ignore``. Changing to ``reallocate`` is the safest + option but this will have a significant performance impact as the buffer + will need to be copied. + .. envvar:: ARROW_DEBUG_MEMORY_POOL Enable rudimentary memory checks to guard against buffer overflows. From 601e3c62c358e6a9204cc38fba59f58b5628a1b4 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 14 Jul 2023 16:38:22 +0900 Subject: [PATCH 16/35] GH-36511: [C++][FlightRPC] Get rid of GRPCPP_PP_INCLUDE (#36679) ### Rationale for this change It's for gRPC < 1.10 and we require gRPC >= 1.30.0. So we can get rid of it. ### What changes are included in this PR? Get rid of GRPCPP_PP_INCLUDE. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * Closes: #36511 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 13 ------------- cpp/src/arrow/flight/flight_test.cc | 4 ---- .../arrow/flight/transport/grpc/customize_grpc.h | 9 --------- cpp/src/arrow/flight/transport/grpc/grpc_client.cc | 5 ----- cpp/src/arrow/flight/transport/grpc/grpc_server.cc | 5 ----- .../flight/transport/grpc/serialization_internal.cc | 5 ----- .../arrow/flight/transport/grpc/util_internal.cc | 4 ---- cpp/src/arrow/util/config.h.cmake | 2 -- 8 files changed, 47 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 6488ac13cbe77..9b7dd8b80c38e 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -3998,7 +3998,6 @@ if(ARROW_WITH_GRPC) if(GRPC_VENDORED) # Remove "v" from "vX.Y.Z" string(SUBSTRING ${ARROW_GRPC_BUILD_VERSION} 1 -1 ARROW_GRPC_VERSION) - set(GRPCPP_PP_INCLUDE TRUE) # Examples need to link to static Arrow if we're using static gRPC set(ARROW_GRPC_USE_SHARED OFF) else() @@ -4007,18 +4006,6 @@ if(ARROW_WITH_GRPC) else() set(ARROW_GRPC_VERSION ${gRPC_VERSION}) endif() - # grpc++ headers may reside in ${GRPC_INCLUDE_DIR}/grpc++ or ${GRPC_INCLUDE_DIR}/grpcpp - # depending on the gRPC version. - get_target_property(GRPC_INCLUDE_DIR gRPC::grpc++ INTERFACE_INCLUDE_DIRECTORIES) - if(GRPC_INCLUDE_DIR MATCHES "^\\$<" - OR # generator expression - EXISTS "${GRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h") - set(GRPCPP_PP_INCLUDE TRUE) - elseif(EXISTS "${GRPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h") - set(GRPCPP_PP_INCLUDE FALSE) - else() - message(FATAL_ERROR "Cannot find grpc++ headers in ${GRPC_INCLUDE_DIR}") - endif() if(ARROW_USE_ASAN) # Disable ASAN in system gRPC. add_library(gRPC::grpc_asan_suppressed INTERFACE IMPORTED) diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc index d56dc81e356bd..1e7ea9bb002bb 100644 --- a/cpp/src/arrow/flight/flight_test.cc +++ b/cpp/src/arrow/flight/flight_test.cc @@ -46,11 +46,7 @@ #error "gRPC headers should not be in public API" #endif -#ifdef GRPCPP_PP_INCLUDE #include -#else -#include -#endif // Include before test_util.h (boost), contains Windows fixes #include "arrow/flight/platform.h" diff --git a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h index 1085a946966c8..5005fc6b16eb4 100644 --- a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h +++ b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h @@ -31,17 +31,8 @@ #pragma warning(disable : 4267) #endif -#ifdef GRPCPP_PP_INCLUDE #include -#else -#include -#endif - -#ifdef GRPCPP_PP_INCLUDE #include -#else -#include -#endif #ifdef _MSC_VER #pragma warning(pop) diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc index a1d0e3266b4e6..89f088638320e 100644 --- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc +++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc @@ -25,15 +25,10 @@ #include #include -#include "arrow/util/config.h" -#ifdef GRPCPP_PP_INCLUDE #include #if defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS) #include #endif -#else -#include -#endif #include diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc index dcf9c3f8c9f4b..2c7a1d5e99234 100644 --- a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc +++ b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc @@ -25,12 +25,7 @@ #include #include -#include "arrow/util/config.h" -#ifdef GRPCPP_PP_INCLUDE #include -#else -#include -#endif #include "arrow/buffer.h" #include "arrow/flight/serialization_internal.h" diff --git a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc index 8514ca361df33..cff111d64df91 100644 --- a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc +++ b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc @@ -36,13 +36,8 @@ #include #include -#ifdef GRPCPP_PP_INCLUDE #include #include -#else -#include -#include -#endif #if defined(_MSC_VER) #pragma warning(pop) diff --git a/cpp/src/arrow/flight/transport/grpc/util_internal.cc b/cpp/src/arrow/flight/transport/grpc/util_internal.cc index f9bf26058ad58..f431fc30ec87a 100644 --- a/cpp/src/arrow/flight/transport/grpc/util_internal.cc +++ b/cpp/src/arrow/flight/transport/grpc/util_internal.cc @@ -22,11 +22,7 @@ #include #include -#ifdef GRPCPP_PP_INCLUDE #include -#else -#include -#endif #include "arrow/flight/transport.h" #include "arrow/flight/types.h" diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake index f6fad2016a27e..1008b9c6b9a05 100644 --- a/cpp/src/arrow/util/config.h.cmake +++ b/cpp/src/arrow/util/config.h.cmake @@ -57,5 +57,3 @@ #cmakedefine ARROW_WITH_MUSL #cmakedefine ARROW_WITH_OPENTELEMETRY #cmakedefine ARROW_WITH_UCX - -#cmakedefine GRPCPP_PP_INCLUDE From cf558cc9be5ef354cfd00bf990c0933f4c24fa98 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 14 Jul 2023 19:10:26 +0900 Subject: [PATCH 17/35] GH-36680: [Python] Add missing pytest.mark.acero (#36683) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change `test_invalid_non_join_column` depends on Acero. ### What changes are included in this PR? Add `@ pytest.mark.acero`. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * Closes: #36680 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- python/pyarrow/tests/test_table.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index 457734bb73325..61cfb1af587a7 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -2424,6 +2424,7 @@ def test_numpy_asarray(constructor): assert result.dtype == "int32" +@pytest.mark.acero def test_invalid_non_join_column(): NUM_ITEMS = 30 t1 = pa.Table.from_pydict({ From fd9f4de6f89853f257bb61f0608aca839e63ca73 Mon Sep 17 00:00:00 2001 From: David Li Date: Fri, 14 Jul 2023 15:13:47 -0400 Subject: [PATCH 18/35] MINOR: [Go] Test that we retain/release the right number of times (#36676) ### Rationale for this change Just make sure that we're retaining/releasing the correct number of times in the C Data Interface. ### What changes are included in this PR? Add a unit test ### Are these changes tested? Yes ### Are there any user-facing changes? No Authored-by: David Li Signed-off-by: David Li --- go/arrow/cdata/cdata_test.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go index f4c09000cbfdb..0c4bbae3d5526 100644 --- a/go/arrow/cdata/cdata_test.go +++ b/go/arrow/cdata/cdata_test.go @@ -769,6 +769,34 @@ func TestExportRecordReaderStream(t *testing.T) { assert.EqualValues(t, len(reclist), i) } +func TestExportRecordReaderStreamLifetime(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + schema := arrow.NewSchema([]arrow.Field{ + {Name: "strings", Type: arrow.BinaryTypes.String, Nullable: false}, + }, nil) + + bldr := array.NewBuilder(mem, &arrow.StringType{}) + defer bldr.Release() + + arr := bldr.NewArray() + defer arr.Release() + + rec := array.NewRecord(schema, []arrow.Array{arr}, 0) + defer rec.Release() + + rdr, _ := array.NewRecordReader(schema, []arrow.Record{rec}) + defer rdr.Release() + + out := createTestStreamObj() + ExportRecordReader(rdr, out) + + // C Stream is holding on to memory + assert.NotEqual(t, 0, mem.CurrentAlloc()) + releaseStream(out) +} + func TestEmptyListExport(t *testing.T) { bldr := array.NewBuilder(memory.DefaultAllocator, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)) defer bldr.Release() From 1bee64384c7ea9513cdd2fc48e2a385e51011ed4 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 14 Jul 2023 17:12:00 -0300 Subject: [PATCH 19/35] GH-36687: [R] Add correct branch name to autobrew formulae to facilitate local testing (#36689) ### Rationale for this change It is currently not possible to recreate an autobrew build locally by following the instructions in the comments. This fixes the local copies of the upstream formulas and the instructions so that future debuggers can recreate an autobrew build. ### What changes are included in this PR? The branch `master` no longer exists and is the default value. This PR adds the revised default branch name ("main"). ### Are these changes tested? No nightly test covers this because this value would be overwritten to test specific commits anyway. ### Are there any user-facing changes? No. * Closes: #36687 Authored-by: Dewey Dunnington Signed-off-by: Sutou Kouhei --- dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb | 2 +- dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb | 2 +- r/configure | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb index c0df6a32175eb..4586649d0c0bc 100644 --- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb +++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb @@ -25,7 +25,7 @@ class ApacheArrowStatic < Formula # Uncomment and update to test on a release candidate # mirror "https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-8.0.0-rc1/apache-arrow-8.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" - head "https://github.com/apache/arrow.git" + head "https://github.com/apache/arrow.git", branch: "main" bottle do sha256 cellar: :any, arm64_big_sur: "ef89d21a110b89840cc6148add685d407e75bd633bc8f79625eb33d00e3694b4" diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb index c09436d777ae9..a5194eea3f7d0 100644 --- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb @@ -21,7 +21,7 @@ class ApacheArrow < Formula homepage "https://arrow.apache.org/" url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-12.0.1.9000/apache-arrow-12.0.1.9000.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" - head "https://github.com/apache/arrow.git" + head "https://github.com/apache/arrow.git", branch: "main" bottle do cellar :any diff --git a/r/configure b/r/configure index 198a89cd85e83..e0198773459f4 100755 --- a/r/configure +++ b/r/configure @@ -238,7 +238,7 @@ do_autobrew () { # Setup for local autobrew testing if [ -f "tools/apache-arrow.rb" ]; then # If you want to use a local apache-arrow.rb formula, do - # $ cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb tools/apache-arrow.rb + # $ cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow*.rb tools # before R CMD build or INSTALL (assuming a local checkout of the apache/arrow repository). # If you have this, you should use the local autobrew script so they match. cp tools/autobrew . From d8982fef34dcc6cda58ff2051804932f7bd39c37 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 14 Jul 2023 17:12:33 -0300 Subject: [PATCH 20/35] MINOR: [R] Remove trailing whitespace in configure (#36690) ### What changes are included in this PR? Fixes trailing whitespace in configure so that it's easier to get a clean commit history on the file. ### Are these changes tested? No. ### Are there any user-facing changes? No. Authored-by: Dewey Dunnington Signed-off-by: Sutou Kouhei --- r/configure | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/configure b/r/configure index e0198773459f4..a0f75f8ddb5cd 100755 --- a/r/configure +++ b/r/configure @@ -50,10 +50,10 @@ # Currently the configure script doesn't offer much to make this easy. # If you expect to rebuild multiple times, you should set up a dev # environment. -# * Installing a dev version as a regular developer. +# * Installing a dev version as a regular developer. # The best way is to maintain your own cmake build and install it # to a directory (not system) that you set as the env var -# $ARROW_HOME. +# $ARROW_HOME. # # For more information, see the various installation and developer vignettes. @@ -177,7 +177,7 @@ find_arrow () { else PC_LIB_VERSION=`grep '^Version' ${_LIBARROW_FOUND}/lib/pkgconfig/arrow.pc | sed s/Version:\ //` fi - # This is in an R script for convenience and testability. + # This is in an R script for convenience and testability. # Success means the found C++ library is ok to use. # Error means the versions don't line up and we shouldn't use it. # More specific messaging to the user is in the R script From 9ebb3d790f152965b1a756618edf38ad7ea0fcac Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 17 Jul 2023 01:04:36 +0900 Subject: [PATCH 21/35] GH-36686: [C++] Pass CMAKE_OSX_SYSROOT to external projects (#36706) ### Rationale for this change If we use different macOS SDK in Apache Arrow C++ and bundled projects, it will cause some problems such as a build error. ### What changes are included in this PR? Pass `CMAKE_OSX_SYSROOT` explicitly to external projects. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * Closes: #36686 Authored-by: Sutou Kouhei Signed-off-by: Dewey Dunnington --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 + dev/tasks/macros.jinja | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 9b7dd8b80c38e..57defe0b36242 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -913,6 +913,7 @@ set(EP_COMMON_CMAKE_ARGS -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=${CMAKE_EXPORT_NO_PACKAGE_REGISTRY} -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=${CMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY} -DCMAKE_INSTALL_LIBDIR=lib + -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT} -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE}) # Enable s/ccache if set by parent. diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja index 4c65a64c81052..475494af18ce6 100644 --- a/dev/tasks/macros.jinja +++ b/dev/tasks/macros.jinja @@ -238,13 +238,14 @@ on: cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow*.rb tools/ # Pin the git commit in the formula to match - cd tools + pushd tools if [ "{{ is_fork }}" == "true" ]; then - sed -i.bak -E -e 's/apache\/arrow.git"$/{{ arrow.github_repo.split("/") | join("\/") }}.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow*.rb + sed -i.bak -E -e 's/apache\/arrow.git", branch: "main"$/{{ arrow.github_repo.split("/") | join("\/") }}.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow*.rb else - sed -i.bak -E -e 's/arrow.git"$/arrow.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow*.rb + sed -i.bak -E -e 's/arrow.git", branch: "main"$/arrow.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow*.rb fi rm -f apache-arrow*.rb.bak + popd {% endmacro %} {%- macro github_change_r_pkg_version(is_fork, version) -%} From ed87a5b7f5ee1081d5613532c28de8e687b8e397 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 16 Jul 2023 19:10:29 -0300 Subject: [PATCH 22/35] GH-36456: [R] Link to correct version of OpenSSL when using autobrew (#36551) ### Rationale for this change The r-binary-packages job (which uses autobrew) and the autobrew nightly jobs are failing because they are linking to a different version of OpenSSL than the package was built against. I believe this occurred because Arrow and its dependencies are built against the autobrew headers which included openssl. The `ssl` and `crypto` libraries weren't explicitly linked, so I think whatever LibreSSL fork MacOS installs by default was getting linked. This was perhaps compatible using the version of autobrew for High Sierra/the version of LibreSSL on High Sierra but was not compatible with the version of autobrew for Big Sur/the version of LibreSSL on Big Sur. ### What changes are included in this PR? This PR explicitly adds OpenSSL 1.1 to the autobrew formulas and explicitly adds `-lssl -lcrypto` to the PKG_LIBS (1.1 because that's what was in the corresponding homebrew formula). ### Are these changes tested? Existing nightly tests cover these changes. ### Are there any user-facing changes? No. * Closes: #36456 Lead-authored-by: Dewey Dunnington Co-authored-by: Dewey Dunnington Co-authored-by: Sutou Kouhei Signed-off-by: Dewey Dunnington --- dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb | 1 + r/tools/autobrew | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb index a5194eea3f7d0..b47d0edfe0dd7 100644 --- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb @@ -35,6 +35,7 @@ class ApacheArrow < Formula depends_on "aws-sdk-cpp" depends_on "brotli" depends_on "lz4" + depends_on "openssl@1.1" depends_on "snappy" depends_on "thrift" depends_on "zstd" diff --git a/r/tools/autobrew b/r/tools/autobrew index f181309892174..35ffebcab3796 100644 --- a/r/tools/autobrew +++ b/r/tools/autobrew @@ -62,7 +62,7 @@ fi # Hardcode this for my custom autobrew build rm -f $BREWDIR/lib/*.dylib AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-management -laws-cpp-sdk-cognito-identity -laws-cpp-sdk-sts -laws-cpp-sdk-s3 -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common -laws-crt-cpp -laws-c-io -laws-c-s3 -laws-c-auth -laws-c-http -laws-c-cal -laws-c-compression -laws-c-mqtt -lpthread -lcurl" -PKG_LIBS="-lparquet -larrow_dataset -larrow_acero -larrow -larrow_bundled_dependencies -lthrift -lbrotlienc-static -lbrotlidec-static -lbrotlicommon-static -llz4 -lsnappy -lzstd $AWS_LIBS" +PKG_LIBS="-lparquet -larrow_dataset -larrow_acero -larrow -larrow_bundled_dependencies -lthrift -lbrotlienc-static -lbrotlidec-static -lbrotlicommon-static -llz4 -lsnappy -lzstd $AWS_LIBS -lssl -lcrypto" PKG_DIRS="-L$BREWDIR/lib" # Prevent CRAN builder from linking against old libs in /usr/local/lib From fb7fb0db60aac7e48f6434b48aa23ada5c4885a2 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 17 Jul 2023 22:18:18 +0900 Subject: [PATCH 23/35] GH-36707: [C++] Use ARROW_PACKAGE_PREFIX for OPENSSL_ROOT_DIR too (#36710) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change In general, a CMake package uses `${PACKAGE}_ROOT` variable to detect `PACKAGE` but `FindOpenSSL.cmake` uses `OPENSSL_ROOT_DIR` not `OpenSSL_ROOT`. ### What changes are included in this PR? Set `OPENSSL_ROOT_DIR` explicitly. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * Closes: #36707 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 57defe0b36242..635bc1684e6f2 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -138,6 +138,9 @@ if(ARROW_PACKAGE_PREFIX) if(NOT ENV{Boost_ROOT}) set(ENV{Boost_ROOT} ${ARROW_PACKAGE_PREFIX}) endif() + if(NOT DEFINED OPENSSL_ROOT_DIR) + set(OPENSSL_ROOT_DIR ${ARROW_PACKAGE_PREFIX}) + endif() endif() # For each dependency, set dependency source to global default, if unset From 2c2333c085d0d026ac9cb86eb709662c236df0d8 Mon Sep 17 00:00:00 2001 From: Justin Heesemann Date: Mon, 17 Jul 2023 18:22:35 +0200 Subject: [PATCH 24/35] GH-36318: [Go] only decode lengths for the number of existing values, not for all nvalues. (#36322) ### Rationale for this change Fixes issue 36318. DeltaLengthBinaryArray Encoding fails to handle null values. ### What changes are included in this PR? Instead of decoding lengths for "all" values (even the undefined ones), we only decode the lengths for the actually set values. The Go Version of arrow was unable to read parquet files it produced itself if the mentioned encoding was used but the values contain nulls. ### Are these changes tested? Tests are included. ### Are there any user-facing changes? No. **This PR contains a "Critical Fix".** * Closes: #36318 Authored-by: Justin Heesemann Signed-off-by: Matt Topol --- go/parquet/file/file_reader_test.go | 52 +++++++++++++++++++ .../internal/encoding/delta_bit_packing.go | 4 +- .../encoding/delta_byte_array_test.go | 47 +++++++++++++++++ .../encoding/delta_length_byte_array.go | 2 +- 4 files changed, 102 insertions(+), 3 deletions(-) create mode 100644 go/parquet/internal/encoding/delta_byte_array_test.go diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go index 6b201cadcee26..fa5a51cb5b8d5 100644 --- a/go/parquet/file/file_reader_test.go +++ b/go/parquet/file/file_reader_test.go @@ -333,3 +333,55 @@ func TestIncompleteMetadata(t *testing.T) { _, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) assert.Error(t, err) } + +func TestDeltaLengthByteArrayPackingWithNulls(t *testing.T) { + // produce file with DeltaLengthByteArray Encoding with mostly null values but one actual value. + root, _ := schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{ + schema.NewByteArrayNode("byte_array_col", parquet.Repetitions.Optional, -1), + }, -1) + props := parquet.NewWriterProperties(parquet.WithVersion(parquet.V2_LATEST), + parquet.WithEncoding(parquet.Encodings.DeltaLengthByteArray), parquet.WithDictionaryDefault(false)) + sink := encoding.NewBufferWriter(0, memory.DefaultAllocator) + + writer := file.NewParquetWriter(sink, root, file.WithWriterProps(props)) + rgw := writer.AppendRowGroup() + ccw, err := rgw.NextColumn() + assert.NoError(t, err) + const elements = 500 + data := make([]parquet.ByteArray, elements) + data[0] = parquet.ByteArray{1, 2, 3, 4, 5, 6, 7, 8} + + defLvls := make([]int16, elements) + repLvls := make([]int16, elements) + defLvls[0] = 1 + + _, err = ccw.(*file.ByteArrayColumnChunkWriter).WriteBatch(data, defLvls, repLvls) + assert.NoError(t, err) + assert.NoError(t, ccw.Close()) + assert.NoError(t, rgw.Close()) + assert.NoError(t, writer.Close()) + buf := sink.Finish() + defer buf.Release() + + // read file back in + reader, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) + assert.NoError(t, err) + defer reader.Close() + ccr, err := reader.RowGroup(0).Column(0) + assert.NoError(t, err) + const batchSize = 500 + + for ccr.HasNext() { + readData := make([]parquet.ByteArray, batchSize) + readdevLvls := make([]int16, batchSize) + readrepLvls := make([]int16, batchSize) + cr := ccr.(*file.ByteArrayColumnChunkReader) + + total, read, err := cr.ReadBatch(batchSize, readData, readdevLvls, readrepLvls) + assert.NoError(t, err) + assert.Equal(t, int64(batchSize), total) + assert.Equal(t, 1, read) + assert.Equal(t, data[0], readData[0]) + assert.NotNil(t, readData[0]) + } +} diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go index 2ebe6ad98354c..ab542eabb2d3d 100644 --- a/go/parquet/internal/encoding/delta_bit_packing.go +++ b/go/parquet/internal/encoding/delta_bit_packing.go @@ -156,7 +156,7 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { // Decode retrieves min(remaining values, len(out)) values from the data and returns the number // of values actually decoded and any errors encountered. func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) { - max := shared_utils.MinInt(len(out), d.nvals) + max := shared_utils.MinInt(len(out), int(d.totalValues)) if max == 0 { return 0, nil } @@ -315,7 +315,7 @@ const ( // Consists of a header followed by blocks of delta encoded values binary packed. // // Format -// [header] [block 1] [block 2] ... [block N] +// [header] [block 1] [block 2] ... [block N] // // Header // [block size] [number of mini blocks per block] [total value count] [first value] diff --git a/go/parquet/internal/encoding/delta_byte_array_test.go b/go/parquet/internal/encoding/delta_byte_array_test.go new file mode 100644 index 0000000000000..1c008505252fb --- /dev/null +++ b/go/parquet/internal/encoding/delta_byte_array_test.go @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "fmt" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/apache/arrow/go/v13/parquet" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestDeltaByteArrayDecoder_SetData(t *testing.T) { + tests := []struct { + name string + nvalues int + data []byte + wantErr assert.ErrorAssertionFunc + }{ + { + name: "null only page", + nvalues: 126609, + data: []byte{128, 1, 4, 0, 0}, + wantErr: assert.NoError, + }, + } + for _, tt := range tests { + d := NewDecoder(parquet.Types.ByteArray, parquet.Encodings.DeltaLengthByteArray, nil, memory.DefaultAllocator) + t.Run(tt.name, func(t *testing.T) { + tt.wantErr(t, d.SetData(tt.nvalues, tt.data), fmt.Sprintf("SetData(%v, %v)", tt.nvalues, tt.data)) + }) + } +} diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go index c11ded1b8f352..d719dcf829cbd 100644 --- a/go/parquet/internal/encoding/delta_length_byte_array.go +++ b/go/parquet/internal/encoding/delta_length_byte_array.go @@ -117,7 +117,7 @@ func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error { if err := dec.SetData(nvalues, data); err != nil { return err } - d.lengths = make([]int32, nvalues) + d.lengths = make([]int32, dec.totalValues) dec.Decode(d.lengths) return d.decoder.SetData(nvalues, data[int(dec.bytesRead()):]) From b4775012ab0f7d8f996538af69f7d1c732f16ff0 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 17 Jul 2023 10:43:22 -0700 Subject: [PATCH 25/35] GH-36688: [C#] Fix dereference error (#36691) * Closes: #36688 Authored-by: Curt Hagenlocher Signed-off-by: Weston Pace --- csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs index 56e0468f9415c..c748eed915d89 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs @@ -165,7 +165,7 @@ sealed unsafe class ExportedArrayStream : IDisposable public static void Free(void** ptr) { - GCHandle gch = GCHandle.FromIntPtr((IntPtr)ptr); + GCHandle gch = GCHandle.FromIntPtr((IntPtr)(*ptr)); if (!gch.IsAllocated) { return; From 752552c7c460596ef24dd6ea85a71c8b5071d379 Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Mon, 17 Jul 2023 14:51:05 -0400 Subject: [PATCH 26/35] GH-36652: [MATLAB] Initialize the `Type` property of `arrow.array.Array` subclasses from existing proxy ids (#36731) ### Rationale for this change Now that the issue #36363 is closed via PR #36419, we can initialize the `Type` property of `arrow.array.Array` subclasses from existing proxy ids. Currently, we create a new proxy `Type` object whose underlying `arrow::DataType` are semantically equal to - but not the same as - the `arrow::DataType` owned by the Array proxy. It would be preferable if the `Type` and `Array` proxy classes refer to the same `arrow::DataType` object (i.e. the same object on the heap). ### What changes are included in this PR? 1. Upgraded `libmexclass` to commit [d04f88d](https://github.com/mathworks/libmexclass/commit/d04f88d2a6f6dcf65d595183eda03c4b66b2961f). In this commit, we added a static "make-like" function to `Proxy` called `create`. 2. Modified the constructors of all `Type` objects to expect a single `Proxy` object as input. This is a breaking change and means clients are no longer expected to build `Type` objects via their constructors. Instead, we introduced standalone functions that clients can use to construct `Type` objects, i.e. `arrow.type.int8`, `arrow.type.string`, `arrow.type.timestamp`, etc. These functions deal with creating the `Proxy` objects to pass to the `Type` constructors. Below is an example of the new workflow for creating `Type` objects. ```matlab >> timestampType = arrow.type.timestamp(TimeUnit="second", TimeZone="America/New_York") timestampType = TimestampType with properties: ID: Timestamp ``` NOTE: We plan on enhancing the display to show the `TimeUnit` and `TimeZone` properties. 3. Made `Type` a [dependent](https://www.mathworks.com/help/matlab/matlab_oop/access-methods-for-dependent-properties.html) property on `arrow.array.Array`. The `get.Type` method constructs a `Type` object on demand by making a proxy that wraps the same `arrow::DataType` object stored within the `arrow::Array`. ### Are these changes tested? Yes, updated existing tests. ### Are there any user-facing changes? Yes, we added new standalone functions for creating `Type` objects. Below is a table mapping standalone functions to the `Type` object they output: | Standalone Function | Output Type Object | |----------------------|---------------------| |`arrow.type.boolean`| `arrow.type.BooleanType`| |`arrow.type.int8`| `arrow.type.Int8Type`| |`arrow.type.int16`| `arrow.type.Int16Type`| |`arrow.type.int32`| `arrow.type.Int32Type`| |`arrow.type.int64`| `arrow.type.Int64Type`| |`arrow.type.uint8`| `arrow.type.UInt8Type`| |`arrow.type.uint16`| `arrow.type.UInt16Type`| |`arrow.type.uint32`| `arrow.type.UInt32Type`| |`arrow.type.uint64`| `arrow.type.UInt64Type`| |`arrow.type.string`| `arrow.type.StringType`| |`arrow.type.timestamp`| `arrow.type.TimestampType`| ### Notes Thanks @ kevingurney for the advice! * Closes: #36652 Authored-by: Sarah Gilmore Signed-off-by: Kevin Gurney --- .../src/cpp/arrow/matlab/array/proxy/array.cc | 21 +++++++++++- .../src/cpp/arrow/matlab/array/proxy/array.h | 5 +++ .../arrow/matlab/array/proxy/boolean_array.cc | 7 ++++ .../arrow/matlab/array/proxy/boolean_array.h | 2 ++ .../arrow/matlab/array/proxy/numeric_array.h | 9 +++++ .../arrow/matlab/array/proxy/string_array.cc | 8 +++++ .../arrow/matlab/array/proxy/string_array.h | 3 ++ .../matlab/array/proxy/timestamp_array.cc | 8 +++++ .../matlab/array/proxy/timestamp_array.h | 3 ++ matlab/src/matlab/+arrow/+array/Array.m | 9 ++++- .../src/matlab/+arrow/+array/BooleanArray.m | 4 --- .../src/matlab/+arrow/+array/Float32Array.m | 4 --- .../src/matlab/+arrow/+array/Float64Array.m | 4 --- matlab/src/matlab/+arrow/+array/Int16Array.m | 4 --- matlab/src/matlab/+arrow/+array/Int32Array.m | 4 --- matlab/src/matlab/+arrow/+array/Int64Array.m | 4 --- matlab/src/matlab/+arrow/+array/Int8Array.m | 4 --- matlab/src/matlab/+arrow/+array/StringArray.m | 4 --- .../src/matlab/+arrow/+array/TimestampArray.m | 5 --- matlab/src/matlab/+arrow/+array/UInt16Array.m | 4 --- matlab/src/matlab/+arrow/+array/UInt32Array.m | 4 --- matlab/src/matlab/+arrow/+array/UInt64Array.m | 4 --- matlab/src/matlab/+arrow/+array/UInt8Array.m | 4 --- .../matlab/+arrow/+internal/+proxy/create.m | 25 ++++++++++++++ .../matlab/+arrow/+internal/+proxy/validate.m | 29 ++++++++++++++++ matlab/src/matlab/+arrow/+type/BooleanType.m | 8 +++-- .../src/matlab/+arrow/+type/FixedWidthType.m | 7 ++-- matlab/src/matlab/+arrow/+type/Float32Type.m | 8 +++-- matlab/src/matlab/+arrow/+type/Float64Type.m | 8 +++-- matlab/src/matlab/+arrow/+type/Int16Type.m | 8 +++-- matlab/src/matlab/+arrow/+type/Int32Type.m | 8 +++-- matlab/src/matlab/+arrow/+type/Int64Type.m | 8 +++-- matlab/src/matlab/+arrow/+type/Int8Type.m | 8 +++-- matlab/src/matlab/+arrow/+type/StringType.m | 8 +++-- .../src/matlab/+arrow/+type/TimestampType.m | 13 +++----- matlab/src/matlab/+arrow/+type/Type.m | 7 ++-- matlab/src/matlab/+arrow/+type/UInt16Type.m | 8 +++-- matlab/src/matlab/+arrow/+type/UInt32Type.m | 8 +++-- matlab/src/matlab/+arrow/+type/UInt64Type.m | 8 +++-- matlab/src/matlab/+arrow/+type/UInt8Type.m | 8 +++-- matlab/src/matlab/+arrow/+type/boolean.m | 20 +++++++++++ matlab/src/matlab/+arrow/+type/float32.m | 20 +++++++++++ matlab/src/matlab/+arrow/+type/float64.m | 20 +++++++++++ matlab/src/matlab/+arrow/+type/int16.m | 20 +++++++++++ matlab/src/matlab/+arrow/+type/int32.m | 20 +++++++++++ matlab/src/matlab/+arrow/+type/int64.m | 21 ++++++++++++ matlab/src/matlab/+arrow/+type/int8.m | 20 +++++++++++ matlab/src/matlab/+arrow/+type/string.m | 21 ++++++++++++ matlab/src/matlab/+arrow/+type/timestamp.m | 25 ++++++++++++++ matlab/src/matlab/+arrow/+type/uint16.m | 21 ++++++++++++ matlab/src/matlab/+arrow/+type/uint32.m | 21 ++++++++++++ matlab/src/matlab/+arrow/+type/uint64.m | 20 +++++++++++ matlab/src/matlab/+arrow/+type/uint8.m | 21 ++++++++++++ matlab/test/arrow/array/tBooleanArray.m | 2 +- matlab/test/arrow/array/tFloat32Array.m | 2 +- matlab/test/arrow/array/tFloat64Array.m | 2 +- matlab/test/arrow/array/tInt16Array.m | 2 +- matlab/test/arrow/array/tInt32Array.m | 2 +- matlab/test/arrow/array/tInt64Array.m | 2 +- matlab/test/arrow/array/tInt8Array.m | 2 +- matlab/test/arrow/array/tStringArray.m | 2 +- matlab/test/arrow/array/tUInt16Array.m | 2 +- matlab/test/arrow/array/tUInt32Array.m | 2 +- matlab/test/arrow/array/tUInt64Array.m | 2 +- matlab/test/arrow/array/tUInt8Array.m | 2 +- matlab/test/arrow/type/hFixedWidthType.m | 7 ++++ matlab/test/arrow/type/tBooleanType.m | 5 +-- matlab/test/arrow/type/tFloat32Type.m | 5 +-- matlab/test/arrow/type/tFloat64Type.m | 6 ++-- matlab/test/arrow/type/tInt16Type.m | 5 +-- matlab/test/arrow/type/tInt32Type.m | 5 +-- matlab/test/arrow/type/tInt64Type.m | 5 +-- matlab/test/arrow/type/tInt8Type.m | 5 +-- matlab/test/arrow/type/tStringType.m | 4 +-- matlab/test/arrow/type/tTimestampType.m | 33 +++++++++++-------- matlab/test/arrow/type/tUInt16Type.m | 5 +-- matlab/test/arrow/type/tUInt32Type.m | 5 +-- matlab/test/arrow/type/tUInt64Type.m | 5 +-- matlab/test/arrow/type/tUInt8Type.m | 5 +-- .../cmake/BuildMatlabArrowInterface.cmake | 2 +- 80 files changed, 560 insertions(+), 141 deletions(-) create mode 100644 matlab/src/matlab/+arrow/+internal/+proxy/create.m create mode 100644 matlab/src/matlab/+arrow/+internal/+proxy/validate.m create mode 100644 matlab/src/matlab/+arrow/+type/boolean.m create mode 100644 matlab/src/matlab/+arrow/+type/float32.m create mode 100644 matlab/src/matlab/+arrow/+type/float64.m create mode 100644 matlab/src/matlab/+arrow/+type/int16.m create mode 100644 matlab/src/matlab/+arrow/+type/int32.m create mode 100644 matlab/src/matlab/+arrow/+type/int64.m create mode 100644 matlab/src/matlab/+arrow/+type/int8.m create mode 100644 matlab/src/matlab/+arrow/+type/string.m create mode 100644 matlab/src/matlab/+arrow/+type/timestamp.m create mode 100644 matlab/src/matlab/+arrow/+type/uint16.m create mode 100644 matlab/src/matlab/+arrow/+type/uint32.m create mode 100644 matlab/src/matlab/+arrow/+type/uint64.m create mode 100644 matlab/src/matlab/+arrow/+type/uint8.m diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc index 7f4d789c105e2..c2d0330b5f78e 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc @@ -20,6 +20,10 @@ #include "arrow/matlab/array/proxy/array.h" #include "arrow/matlab/bit/unpack.h" #include "arrow/matlab/error/error.h" +#include "arrow/type_traits.h" +#include "arrow/visit_array_inline.h" + +#include "libmexclass/proxy/ProxyManager.h" namespace arrow::matlab::array::proxy { @@ -30,6 +34,7 @@ namespace arrow::matlab::array::proxy { REGISTER_METHOD(Array, toMATLAB); REGISTER_METHOD(Array, length); REGISTER_METHOD(Array, valid); + REGISTER_METHOD(Array, type); } std::shared_ptr Array::getArray() { @@ -69,4 +74,18 @@ namespace arrow::matlab::array::proxy { auto valid_elements_mda = bit::unpack(validity_bitmap, array_length); context.outputs[0] = valid_elements_mda; } -} + + void Array::type(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + mda::ArrayFactory factory; + + auto type_proxy = typeProxy(); + auto type_id = type_proxy->unwrap()->id(); + auto proxy_id = libmexclass::proxy::ProxyManager::manageProxy(type_proxy); + + context.outputs[0] = factory.createScalar(proxy_id); + context.outputs[1] = factory.createScalar(static_cast(type_id)); + + } +} \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/array/proxy/array.h index c36f1900712e1..55d48c26eff6f 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h @@ -18,6 +18,7 @@ #pragma once #include "arrow/array.h" +#include "arrow/matlab/type/proxy/type.h" #include "libmexclass/proxy/Proxy.h" @@ -39,8 +40,12 @@ class Array : public libmexclass::proxy::Proxy { void valid(libmexclass::proxy::method::Context& context); + void type(libmexclass::proxy::method::Context& context); + virtual void toMATLAB(libmexclass::proxy::method::Context& context) = 0; + virtual std::shared_ptr typeProxy() = 0; + std::shared_ptr array; }; diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc index bcbe49f04bb6d..281a0f732d73a 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/matlab/array/proxy/boolean_array.h" +#include "arrow/matlab/type/proxy/primitive_ctype.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/bit/pack.h" @@ -54,4 +55,10 @@ namespace arrow::matlab::array::proxy { context.outputs[0] = logical_array_mda; } + std::shared_ptr BooleanArray::typeProxy() { + using BooleanTypeProxy = type::proxy::PrimitiveCType; + + auto type = std::static_pointer_cast(array->type()); + return std::make_shared(std::move(type)); + } } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h index b3117d852a7d8..5e6e51f0bc8ff 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h @@ -33,6 +33,8 @@ namespace arrow::matlab::array::proxy { protected: void toMATLAB(libmexclass::proxy::method::Context& context) override; + std::shared_ptr typeProxy() override; + }; } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h index d3930c77ca036..c66c1d044fc12 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h @@ -24,6 +24,8 @@ #include "arrow/type_traits.h" #include "arrow/matlab/array/proxy/array.h" +#include "arrow/matlab/type/proxy/primitive_ctype.h" + #include "arrow/matlab/error/error.h" #include "arrow/matlab/bit/pack.h" #include "arrow/matlab/bit/unpack.h" @@ -79,6 +81,13 @@ class NumericArray : public arrow::matlab::array::proxy::Array { ::matlab::data::TypedArray result = factory.createArray({num_elements, 1}, data_begin, data_end); context.outputs[0] = result; } + + std::shared_ptr typeProxy() override { + using ArrowTypeProxy = type::proxy::PrimitiveCType; + auto type = std::static_pointer_cast(array->type()); + return std::make_shared(std::move(type)); + } + }; } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc index 2a11323a212bb..16331f6195a22 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/matlab/array/proxy/string_array.h" +#include "arrow/matlab/type/proxy/string_type.h" #include "arrow/array/builder_binary.h" @@ -81,4 +82,11 @@ namespace arrow::matlab::array::proxy { context.outputs[0] = array_mda; } + std::shared_ptr StringArray::typeProxy() { + using StringTypeProxy = type::proxy::StringType; + + auto type = std::static_pointer_cast(array->type()); + return std::make_shared(std::move(type)); + } + } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h index bdcfedd7cdda3..abb2322edbd20 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h @@ -33,6 +33,9 @@ namespace arrow::matlab::array::proxy { protected: void toMATLAB(libmexclass::proxy::method::Context& context) override; + + std::shared_ptr typeProxy() override; + }; } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc index 17a86e848a868..b9bbf3d7e7942 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/matlab/array/proxy/timestamp_array.h" +#include "arrow/matlab/type/proxy/timestamp_type.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/bit/pack.h" @@ -88,4 +89,11 @@ namespace arrow::matlab::array::proxy { mda::TypedArray result = factory.createArray({num_elements, 1}, data_begin, data_end); context.outputs[0] = result; } + + std::shared_ptr TimestampArray::typeProxy() { + using TimestampProxyType = type::proxy::TimestampType; + auto type = std::static_pointer_cast(array->type()); + return std::make_shared(std::move(type)); + + } } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h index 8f28d6165ed2f..a312a129a21c2 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h @@ -35,6 +35,9 @@ class TimestampArray : public arrow::matlab::array::proxy::Array { protected: void toMATLAB(libmexclass::proxy::method::Context& context) override; + + std::shared_ptr typeProxy() override; + }; } diff --git a/matlab/src/matlab/+arrow/+array/Array.m b/matlab/src/matlab/+arrow/+array/Array.m index 9b8796c33b974..7426052764166 100644 --- a/matlab/src/matlab/+arrow/+array/Array.m +++ b/matlab/src/matlab/+arrow/+array/Array.m @@ -26,7 +26,7 @@ Valid % Validity bitmap end - properties(Abstract, SetAccess=private, GetAccess=public) + properties(Dependent, SetAccess=private, GetAccess=public) Type(1, 1) arrow.type.Type end @@ -46,6 +46,13 @@ function matlabArray = toMATLAB(obj) matlabArray = obj.Proxy.toMATLAB(); end + + function type = get.Type(obj) + [proxyID, typeID] = obj.Proxy.type(); + traits = arrow.type.traits.traits(arrow.type.ID(typeID)); + proxy = libmexclass.proxy.Proxy(Name=traits.TypeProxyClassName, ID=proxyID); + type = traits.TypeConstructor(proxy); + end end methods (Access = private) diff --git a/matlab/src/matlab/+arrow/+array/BooleanArray.m b/matlab/src/matlab/+arrow/+array/BooleanArray.m index e5c4cc527e552..f4d341efce9d3 100644 --- a/matlab/src/matlab/+arrow/+array/BooleanArray.m +++ b/matlab/src/matlab/+arrow/+array/BooleanArray.m @@ -20,10 +20,6 @@ NullSubstitionValue = false; end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.BooleanType - end - methods function obj = BooleanArray(data, opts) arguments diff --git a/matlab/src/matlab/+arrow/+array/Float32Array.m b/matlab/src/matlab/+arrow/+array/Float32Array.m index 29f23393a4346..c6be563d8621f 100644 --- a/matlab/src/matlab/+arrow/+array/Float32Array.m +++ b/matlab/src/matlab/+arrow/+array/Float32Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = single(NaN); end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Float32Type - end - methods function obj = Float32Array(data, varargin) obj@arrow.array.NumericArray(data, "single", ... diff --git a/matlab/src/matlab/+arrow/+array/Float64Array.m b/matlab/src/matlab/+arrow/+array/Float64Array.m index ab92715864275..ff43ebc0536c0 100644 --- a/matlab/src/matlab/+arrow/+array/Float64Array.m +++ b/matlab/src/matlab/+arrow/+array/Float64Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = NaN; end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Float64Type - end - methods function obj = Float64Array(data, varargin) obj@arrow.array.NumericArray(data, "double", ... diff --git a/matlab/src/matlab/+arrow/+array/Int16Array.m b/matlab/src/matlab/+arrow/+array/Int16Array.m index 23716d5f59ec5..533f0c9ef549d 100644 --- a/matlab/src/matlab/+arrow/+array/Int16Array.m +++ b/matlab/src/matlab/+arrow/+array/Int16Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = int16(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Int16Type - end - methods function obj = Int16Array(data, varargin) obj@arrow.array.NumericArray(data, "int16", ... diff --git a/matlab/src/matlab/+arrow/+array/Int32Array.m b/matlab/src/matlab/+arrow/+array/Int32Array.m index 8844576ae1ef9..0f977fb90f808 100644 --- a/matlab/src/matlab/+arrow/+array/Int32Array.m +++ b/matlab/src/matlab/+arrow/+array/Int32Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = int32(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Int32Type - end - methods function obj = Int32Array(data, varargin) obj@arrow.array.NumericArray(data, "int32", ... diff --git a/matlab/src/matlab/+arrow/+array/Int64Array.m b/matlab/src/matlab/+arrow/+array/Int64Array.m index 9f72c5f2a6854..94cad56519b11 100644 --- a/matlab/src/matlab/+arrow/+array/Int64Array.m +++ b/matlab/src/matlab/+arrow/+array/Int64Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = int64(0); end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Int64Type - end - methods function obj = Int64Array(data, varargin) obj@arrow.array.NumericArray(data, "int64", ... diff --git a/matlab/src/matlab/+arrow/+array/Int8Array.m b/matlab/src/matlab/+arrow/+array/Int8Array.m index f9774f6527493..83a14caa27287 100644 --- a/matlab/src/matlab/+arrow/+array/Int8Array.m +++ b/matlab/src/matlab/+arrow/+array/Int8Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = int8(0); end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Int8Type - end - methods function obj = Int8Array(data, varargin) obj@arrow.array.NumericArray(data, "int8", ... diff --git a/matlab/src/matlab/+arrow/+array/StringArray.m b/matlab/src/matlab/+arrow/+array/StringArray.m index 9ef3f0252586f..ec2d53b371fe2 100644 --- a/matlab/src/matlab/+arrow/+array/StringArray.m +++ b/matlab/src/matlab/+arrow/+array/StringArray.m @@ -20,10 +20,6 @@ NullSubstitionValue = string(missing); end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.StringType - end - methods function obj = StringArray(data, opts) arguments diff --git a/matlab/src/matlab/+arrow/+array/TimestampArray.m b/matlab/src/matlab/+arrow/+array/TimestampArray.m index fb4b2fa1bfade..0f0da4e82130c 100644 --- a/matlab/src/matlab/+arrow/+array/TimestampArray.m +++ b/matlab/src/matlab/+arrow/+array/TimestampArray.m @@ -20,10 +20,6 @@ NullSubstitutionValue = NaT; end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.TimestampType % temporarily default value - end - methods function obj = TimestampArray(data, opts) arguments @@ -39,7 +35,6 @@ args = struct(MatlabArray=ptime, Valid=validElements, TimeZone=timezone, TimeUnit=string(opts.TimeUnit)); obj@arrow.array.Array("Name", "arrow.array.proxy.TimestampArray", "ConstructorArguments", {args}); - obj.Type = arrow.type.TimestampType(TimeUnit=opts.TimeUnit, TimeZone=timezone); end function dates = toMATLAB(obj) diff --git a/matlab/src/matlab/+arrow/+array/UInt16Array.m b/matlab/src/matlab/+arrow/+array/UInt16Array.m index 3732df3c76111..4862ca20b9f88 100644 --- a/matlab/src/matlab/+arrow/+array/UInt16Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt16Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = uint16(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.UInt16Type - end - methods function obj = UInt16Array(data, varargin) obj@arrow.array.NumericArray(data, "uint16", ... diff --git a/matlab/src/matlab/+arrow/+array/UInt32Array.m b/matlab/src/matlab/+arrow/+array/UInt32Array.m index 183d4df08257a..782b0010997fc 100644 --- a/matlab/src/matlab/+arrow/+array/UInt32Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt32Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = uint32(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.UInt32Type - end - methods function obj = UInt32Array(data, varargin) obj@arrow.array.NumericArray(data, "uint32", ... diff --git a/matlab/src/matlab/+arrow/+array/UInt64Array.m b/matlab/src/matlab/+arrow/+array/UInt64Array.m index af828978ce2a7..9e25ce4987bc1 100644 --- a/matlab/src/matlab/+arrow/+array/UInt64Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt64Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = uint64(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.UInt64Type - end - methods function obj = UInt64Array(data, varargin) obj@arrow.array.NumericArray(data, "uint64", ... diff --git a/matlab/src/matlab/+arrow/+array/UInt8Array.m b/matlab/src/matlab/+arrow/+array/UInt8Array.m index b5dc664ea1476..8bad2401bd429 100644 --- a/matlab/src/matlab/+arrow/+array/UInt8Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt8Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = uint8(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.UInt8Type - end - methods function obj = UInt8Array(data, varargin) obj@arrow.array.NumericArray(data, "uint8", ... diff --git a/matlab/src/matlab/+arrow/+internal/+proxy/create.m b/matlab/src/matlab/+arrow/+internal/+proxy/create.m new file mode 100644 index 0000000000000..0ed1476058df6 --- /dev/null +++ b/matlab/src/matlab/+arrow/+internal/+proxy/create.m @@ -0,0 +1,25 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function proxy = create(name, args) +%CREATE Creates a proxy object. + arguments + name(1, 1) string {mustBeNonmissing} + end + arguments(Repeating) + args + end + proxy = libmexclass.proxy.Proxy.create(name, args{:}); +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+internal/+proxy/validate.m b/matlab/src/matlab/+arrow/+internal/+proxy/validate.m new file mode 100644 index 0000000000000..1b2b3649e42c3 --- /dev/null +++ b/matlab/src/matlab/+arrow/+internal/+proxy/validate.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function validate(proxy, expectedName) +%VALIDATE Throws an arrow:matlab:ProxyNameMismatch error if +% proxy.Name and expectedName are not equal. + arguments + proxy(1, 1) libmexclass.proxy.Proxy + expectedName(1, 1) string + end + + if proxy.Name ~= expectedName + errid = "arrow:proxy:ProxyNameMismatch"; + msg = "Proxy class name is " + proxyName + ", but expected " + expectedProxyName; + error(errid, msg); + end +end diff --git a/matlab/src/matlab/+arrow/+type/BooleanType.m b/matlab/src/matlab/+arrow/+type/BooleanType.m index 202d177dee03f..6afa00e9258cb 100644 --- a/matlab/src/matlab/+arrow/+type/BooleanType.m +++ b/matlab/src/matlab/+arrow/+type/BooleanType.m @@ -17,8 +17,12 @@ %BOOLEANTYPE Type class for boolean data. methods - function obj = BooleanType() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.BooleanType", "ConstructorArguments", {}) + function obj = BooleanType(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.BooleanType")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/FixedWidthType.m b/matlab/src/matlab/+arrow/+type/FixedWidthType.m index dcbb3e69e756a..8c9c5b26081ae 100644 --- a/matlab/src/matlab/+arrow/+type/FixedWidthType.m +++ b/matlab/src/matlab/+arrow/+type/FixedWidthType.m @@ -21,8 +21,11 @@ end methods - function obj = FixedWidthType(varargin) - obj@arrow.type.Type(varargin{:}); + function obj = FixedWidthType(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy + end + obj@arrow.type.Type(proxy); end function width = get.BitWidth(obj) diff --git a/matlab/src/matlab/+arrow/+type/Float32Type.m b/matlab/src/matlab/+arrow/+type/Float32Type.m index aec21fe1ce5e8..df5fa1ce844e9 100644 --- a/matlab/src/matlab/+arrow/+type/Float32Type.m +++ b/matlab/src/matlab/+arrow/+type/Float32Type.m @@ -17,8 +17,12 @@ %FLOAT32TYPE Type class for float32 data. methods - function obj = Float32Type() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Float32Type", "ConstructorArguments", {}) + function obj = Float32Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Float32Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Float64Type.m b/matlab/src/matlab/+arrow/+type/Float64Type.m index 25c9ff41b61ab..ba93265ebc73e 100644 --- a/matlab/src/matlab/+arrow/+type/Float64Type.m +++ b/matlab/src/matlab/+arrow/+type/Float64Type.m @@ -17,8 +17,12 @@ %FLOAT64Type Type class for float64 data. methods - function obj = Float64Type() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Float64Type", "ConstructorArguments", {}) + function obj = Float64Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Float64Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Int16Type.m b/matlab/src/matlab/+arrow/+type/Int16Type.m index ce9c61d447407..c16d3fd5ca53f 100644 --- a/matlab/src/matlab/+arrow/+type/Int16Type.m +++ b/matlab/src/matlab/+arrow/+type/Int16Type.m @@ -17,8 +17,12 @@ %INT16TYPE Type class for int8 data. methods - function obj = Int16Type() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Int16Type", "ConstructorArguments", {}) + function obj = Int16Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Int16Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end diff --git a/matlab/src/matlab/+arrow/+type/Int32Type.m b/matlab/src/matlab/+arrow/+type/Int32Type.m index 260a9d7a37cee..786697bf1136b 100644 --- a/matlab/src/matlab/+arrow/+type/Int32Type.m +++ b/matlab/src/matlab/+arrow/+type/Int32Type.m @@ -17,8 +17,12 @@ %INT32TYPE Type class for int32 data. methods - function obj = Int32Type() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Int32Type", "ConstructorArguments", {}) + function obj = Int32Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Int32Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end diff --git a/matlab/src/matlab/+arrow/+type/Int64Type.m b/matlab/src/matlab/+arrow/+type/Int64Type.m index 857a84e74cc34..bf6c71d622a63 100644 --- a/matlab/src/matlab/+arrow/+type/Int64Type.m +++ b/matlab/src/matlab/+arrow/+type/Int64Type.m @@ -17,8 +17,12 @@ %INT64TYPE Type class for int64 data. methods - function obj = Int64Type() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Int64Type", "ConstructorArguments", {}) + function obj = Int64Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Int64Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Int8Type.m b/matlab/src/matlab/+arrow/+type/Int8Type.m index 1d066b4b8b84a..b28785f876ea8 100644 --- a/matlab/src/matlab/+arrow/+type/Int8Type.m +++ b/matlab/src/matlab/+arrow/+type/Int8Type.m @@ -17,8 +17,12 @@ %INT8TYPE Type class for int8 data. methods - function obj = Int8Type() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.Int8Type", "ConstructorArguments", {}) + function obj = Int8Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Int8Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end diff --git a/matlab/src/matlab/+arrow/+type/StringType.m b/matlab/src/matlab/+arrow/+type/StringType.m index 337c5a9bd6863..c269bfa6db33c 100644 --- a/matlab/src/matlab/+arrow/+type/StringType.m +++ b/matlab/src/matlab/+arrow/+type/StringType.m @@ -17,8 +17,12 @@ %STRINGTYPE Type class for string data. methods - function obj = StringType() - obj@arrow.type.Type("Name", "arrow.type.proxy.StringType", "ConstructorArguments", {}); + function obj = StringType(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.StringType")} + end + import arrow.internal.proxy.validate + obj@arrow.type.Type(proxy); end end end diff --git a/matlab/src/matlab/+arrow/+type/TimestampType.m b/matlab/src/matlab/+arrow/+type/TimestampType.m index c7a576968edec..a5a376f8bc3b3 100644 --- a/matlab/src/matlab/+arrow/+type/TimestampType.m +++ b/matlab/src/matlab/+arrow/+type/TimestampType.m @@ -22,14 +22,12 @@ end methods - function obj = TimestampType(opts) - %TIMESTAMPTYPE Construct an instance of this class + function obj = TimestampType(proxy) arguments - opts.TimeUnit(1, 1) arrow.type.TimeUnit = arrow.type.TimeUnit.Microsecond - opts.TimeZone(1, 1) string {mustBeNonmissing} = "" + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.TimestampType")} end - args = struct(TimeUnit=string(opts.TimeUnit), TimeZone=opts.TimeZone); - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.TimestampType", "ConstructorArguments", {args}); + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end function unit = get.TimeUnit(obj) @@ -41,5 +39,4 @@ tz = obj.Proxy.timeZone(); end end -end - +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Type.m b/matlab/src/matlab/+arrow/+type/Type.m index 466f393c7d082..c2ae3dbc58c9c 100644 --- a/matlab/src/matlab/+arrow/+type/Type.m +++ b/matlab/src/matlab/+arrow/+type/Type.m @@ -26,8 +26,11 @@ end methods - function obj = Type(varargin) - obj.Proxy = libmexclass.proxy.Proxy(varargin{:}); + function obj = Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy + end + obj.Proxy = proxy; end function numFields = get.NumFields(obj) diff --git a/matlab/src/matlab/+arrow/+type/UInt16Type.m b/matlab/src/matlab/+arrow/+type/UInt16Type.m index 40def5f927227..3198b78671ef9 100644 --- a/matlab/src/matlab/+arrow/+type/UInt16Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt16Type.m @@ -17,8 +17,12 @@ %UINT16TYPE Type class for uint16 data. methods - function obj = UInt16Type() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.UInt16Type", "ConstructorArguments", {}) + function obj = UInt16Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.UInt16Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/UInt32Type.m b/matlab/src/matlab/+arrow/+type/UInt32Type.m index 5b030884fe004..53e60e4e34290 100644 --- a/matlab/src/matlab/+arrow/+type/UInt32Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt32Type.m @@ -17,8 +17,12 @@ %UINT32TYPE Type class for uint32 data. methods - function obj = UInt32Type() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.UInt32Type", "ConstructorArguments", {}) + function obj = UInt32Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.UInt32Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/UInt64Type.m b/matlab/src/matlab/+arrow/+type/UInt64Type.m index 60f7173bfe59a..f8512ec59497c 100644 --- a/matlab/src/matlab/+arrow/+type/UInt64Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt64Type.m @@ -17,8 +17,12 @@ %UINT64TYPE Type class for uint64 data. methods - function obj = UInt64Type() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.UInt64Type", "ConstructorArguments", {}) + function obj = UInt64Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.UInt64Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/UInt8Type.m b/matlab/src/matlab/+arrow/+type/UInt8Type.m index e09c7ed71116a..898426e3a4076 100644 --- a/matlab/src/matlab/+arrow/+type/UInt8Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt8Type.m @@ -17,8 +17,12 @@ %UINT8TYPE Type class for uint8 data. methods - function obj = UInt8Type() - obj@arrow.type.FixedWidthType("Name", "arrow.type.proxy.UInt8Type", "ConstructorArguments", {}) + function obj = UInt8Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.UInt8Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/boolean.m b/matlab/src/matlab/+arrow/+type/boolean.m new file mode 100644 index 0000000000000..f5331d790e595 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/boolean.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +function type = boolean() +%BOOLEAN Creates an arrow.type.BooleanType object + proxy = arrow.internal.proxy.create("arrow.type.proxy.BooleanType"); + type = arrow.type.BooleanType(proxy); +end + diff --git a/matlab/src/matlab/+arrow/+type/float32.m b/matlab/src/matlab/+arrow/+type/float32.m new file mode 100644 index 0000000000000..d8c44dfc7f03e --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/float32.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = float32() +%FLOAT64 Creates an arrow.type.Float32Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Float32Type"); + type = arrow.type.Float32Type(proxy); +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/float64.m b/matlab/src/matlab/+arrow/+type/float64.m new file mode 100644 index 0000000000000..ae2fdc44c2a84 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/float64.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = float64() +%FLOAT64 Creates an arrow.type.Float64Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Float64Type"); + type = arrow.type.Float64Type(proxy); +end diff --git a/matlab/src/matlab/+arrow/+type/int16.m b/matlab/src/matlab/+arrow/+type/int16.m new file mode 100644 index 0000000000000..49f3bfdaa3522 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/int16.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = int16() +%INT16 Creates an arrow.type.Int16Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Int16Type"); + type = arrow.type.Int16Type(proxy); +end diff --git a/matlab/src/matlab/+arrow/+type/int32.m b/matlab/src/matlab/+arrow/+type/int32.m new file mode 100644 index 0000000000000..80673a6bb57a7 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/int32.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = int32() +%INT32 Creates an arrow.type.Int32Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Int32Type"); + type = arrow.type.Int32Type(proxy); +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/int64.m b/matlab/src/matlab/+arrow/+type/int64.m new file mode 100644 index 0000000000000..7e28fdc48e520 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/int64.m @@ -0,0 +1,21 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = int64() +%INT64 Creates an arrow.type.Int64Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Int64Type"); + type = arrow.type.Int64Type(proxy); +end + diff --git a/matlab/src/matlab/+arrow/+type/int8.m b/matlab/src/matlab/+arrow/+type/int8.m new file mode 100644 index 0000000000000..d59281cfb3db2 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/int8.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = int8() +%INT8 Creates an arrow.type.Int8Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Int8Type"); + type = arrow.type.Int8Type(proxy); +end diff --git a/matlab/src/matlab/+arrow/+type/string.m b/matlab/src/matlab/+arrow/+type/string.m new file mode 100644 index 0000000000000..71329adc7cc2e --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/string.m @@ -0,0 +1,21 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = string() +%STRING Creates an arrow.type.StringType object + proxy = arrow.internal.proxy.create("arrow.type.proxy.StringType"); + type = arrow.type.StringType(proxy); +end + diff --git a/matlab/src/matlab/+arrow/+type/timestamp.m b/matlab/src/matlab/+arrow/+type/timestamp.m new file mode 100644 index 0000000000000..6ad47eae27e45 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/timestamp.m @@ -0,0 +1,25 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = timestamp(opts) +%TIMESTAMP Creates an arrow.type.TimestampType object + arguments + opts.TimeUnit(1, 1) arrow.type.TimeUnit = arrow.type.TimeUnit.Microsecond + opts.TimeZone(1, 1) string {mustBeNonmissing} = "" + end + args = struct(TimeUnit=string(opts.TimeUnit), TimeZone=opts.TimeZone); + proxy = arrow.internal.proxy.create("arrow.type.proxy.TimestampType", args); + type = arrow.type.TimestampType(proxy); +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/uint16.m b/matlab/src/matlab/+arrow/+type/uint16.m new file mode 100644 index 0000000000000..75032a0253cbc --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/uint16.m @@ -0,0 +1,21 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = uint16() +%UINT16 Creates an arrow.type.Int16Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.UInt16Type"); + type = arrow.type.UInt16Type(proxy); +end + diff --git a/matlab/src/matlab/+arrow/+type/uint32.m b/matlab/src/matlab/+arrow/+type/uint32.m new file mode 100644 index 0000000000000..79b821605d52a --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/uint32.m @@ -0,0 +1,21 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = uint32() +%UINT32 Creates an arrow.type.UInt32Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.UInt32Type"); + type = arrow.type.UInt32Type(proxy); +end + diff --git a/matlab/src/matlab/+arrow/+type/uint64.m b/matlab/src/matlab/+arrow/+type/uint64.m new file mode 100644 index 0000000000000..c0965fc9bd40f --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/uint64.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = uint64() +%UINT64 Creates an arrow.type.UInt64Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.UInt64Type"); + type = arrow.type.UInt64Type(proxy); +end diff --git a/matlab/src/matlab/+arrow/+type/uint8.m b/matlab/src/matlab/+arrow/+type/uint8.m new file mode 100644 index 0000000000000..b199a3c766052 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/uint8.m @@ -0,0 +1,21 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = uint8() +%UINT8 Creates an arrow.type.UInt8Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.UInt8Type"); + type = arrow.type.UInt8Type(proxy); +end + diff --git a/matlab/test/arrow/array/tBooleanArray.m b/matlab/test/arrow/array/tBooleanArray.m index 00eef2c91064c..ad6126b77fe51 100644 --- a/matlab/test/arrow/array/tBooleanArray.m +++ b/matlab/test/arrow/array/tBooleanArray.m @@ -22,7 +22,7 @@ MatlabArrayFcn = @logical MatlabConversionFcn = @logical NullSubstitutionValue = false - ArrowType = arrow.type.BooleanType + ArrowType = arrow.type.boolean end methods(TestClassSetup) diff --git a/matlab/test/arrow/array/tFloat32Array.m b/matlab/test/arrow/array/tFloat32Array.m index de7b312d84c18..e8655c7781ceb 100644 --- a/matlab/test/arrow/array/tFloat32Array.m +++ b/matlab/test/arrow/array/tFloat32Array.m @@ -24,7 +24,7 @@ MaxValue = realmax("single") MinValue = realmin("single") NullSubstitutionValue = single(NaN) - ArrowType = arrow.type.Float32Type + ArrowType = arrow.type.float32 end methods(Test) diff --git a/matlab/test/arrow/array/tFloat64Array.m b/matlab/test/arrow/array/tFloat64Array.m index b4fb9ec7a07e6..a01eef73883b6 100755 --- a/matlab/test/arrow/array/tFloat64Array.m +++ b/matlab/test/arrow/array/tFloat64Array.m @@ -24,7 +24,7 @@ MaxValue = realmax("double") MinValue = realmin("double") NullSubstitutionValue = NaN - ArrowType = arrow.type.Float64Type + ArrowType = arrow.type.float64 end methods(Test) diff --git a/matlab/test/arrow/array/tInt16Array.m b/matlab/test/arrow/array/tInt16Array.m index 58193e076c228..466dfaf9c4d7f 100644 --- a/matlab/test/arrow/array/tInt16Array.m +++ b/matlab/test/arrow/array/tInt16Array.m @@ -24,7 +24,7 @@ MaxValue = intmax("int16") MinValue = intmin("int16") NullSubstitutionValue = int16(0) - ArrowType = arrow.type.Int16Type + ArrowType = arrow.type.int16 end end diff --git a/matlab/test/arrow/array/tInt32Array.m b/matlab/test/arrow/array/tInt32Array.m index 59255c1272638..b8334e97ccb9a 100644 --- a/matlab/test/arrow/array/tInt32Array.m +++ b/matlab/test/arrow/array/tInt32Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("int32") MinValue = intmin("int32") NullSubstitutionValue = int32(0) - ArrowType = arrow.type.Int32Type + ArrowType = arrow.type.int32 end end diff --git a/matlab/test/arrow/array/tInt64Array.m b/matlab/test/arrow/array/tInt64Array.m index 289b4fcf3e290..a877cb2564fe9 100644 --- a/matlab/test/arrow/array/tInt64Array.m +++ b/matlab/test/arrow/array/tInt64Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("int64") MinValue = intmin("int64") NullSubstitutionValue = int64(0) - ArrowType = arrow.type.Int64Type + ArrowType = arrow.type.int64 end end diff --git a/matlab/test/arrow/array/tInt8Array.m b/matlab/test/arrow/array/tInt8Array.m index 9ae1eb8cc4fe7..dbd6e74ea7f8f 100644 --- a/matlab/test/arrow/array/tInt8Array.m +++ b/matlab/test/arrow/array/tInt8Array.m @@ -24,7 +24,7 @@ MaxValue = intmax("int8") MinValue = intmin("int8") NullSubstitutionValue = int8(0) - ArrowType = arrow.type.Int8Type + ArrowType = arrow.type.int8 end end diff --git a/matlab/test/arrow/array/tStringArray.m b/matlab/test/arrow/array/tStringArray.m index b076c636b13e5..792d7599816d5 100644 --- a/matlab/test/arrow/array/tStringArray.m +++ b/matlab/test/arrow/array/tStringArray.m @@ -22,7 +22,7 @@ MatlabArrayFcn = @string MatlabConversionFcn = @string NullSubstitutionValue = string(missing) - ArrowType = arrow.type.StringType + ArrowType = arrow.type.string end methods(TestClassSetup) diff --git a/matlab/test/arrow/array/tUInt16Array.m b/matlab/test/arrow/array/tUInt16Array.m index b79a753694684..eed53c7882b47 100644 --- a/matlab/test/arrow/array/tUInt16Array.m +++ b/matlab/test/arrow/array/tUInt16Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("uint16") MinValue = intmin("uint16") NullSubstitutionValue = uint16(0) - ArrowType = arrow.type.UInt16Type + ArrowType = arrow.type.uint16 end end diff --git a/matlab/test/arrow/array/tUInt32Array.m b/matlab/test/arrow/array/tUInt32Array.m index 157cad941724d..b5e1970cbcc96 100644 --- a/matlab/test/arrow/array/tUInt32Array.m +++ b/matlab/test/arrow/array/tUInt32Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("uint32") MinValue = intmin("uint32") NullSubstitutionValue = uint32(0) - ArrowType = arrow.type.UInt32Type + ArrowType = arrow.type.uint32 end end diff --git a/matlab/test/arrow/array/tUInt64Array.m b/matlab/test/arrow/array/tUInt64Array.m index 41e479e816263..6cd2c9cba6911 100644 --- a/matlab/test/arrow/array/tUInt64Array.m +++ b/matlab/test/arrow/array/tUInt64Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("uint64") MinValue = intmin("uint64") NullSubstitutionValue = uint64(0) - ArrowType = arrow.type.UInt64Type + ArrowType = arrow.type.uint64 end end diff --git a/matlab/test/arrow/array/tUInt8Array.m b/matlab/test/arrow/array/tUInt8Array.m index 4aca2cced1c8d..68365958bc683 100644 --- a/matlab/test/arrow/array/tUInt8Array.m +++ b/matlab/test/arrow/array/tUInt8Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("uint8") MinValue = intmin("uint8") NullSubstitutionValue = uint8(0) - ArrowType = arrow.type.UInt8Type + ArrowType = arrow.type.uint8 end end diff --git a/matlab/test/arrow/type/hFixedWidthType.m b/matlab/test/arrow/type/hFixedWidthType.m index 1f2a5e413dd70..308ac46011a6c 100644 --- a/matlab/test/arrow/type/hFixedWidthType.m +++ b/matlab/test/arrow/type/hFixedWidthType.m @@ -21,9 +21,16 @@ ArrowType TypeID BitWidth + ClassName end methods(Test) + function TestClass(testCase) + % Verify ArrowType is an object of the expected class type. + name = string(class(testCase.ArrowType)); + testCase.verifyEqual(name, testCase.ClassName); + end + function TestTypeID(testCase) % Verify ID is set to the appropriate arrow.type.ID value. arrowType = testCase.ArrowType; diff --git a/matlab/test/arrow/type/tBooleanType.m b/matlab/test/arrow/type/tBooleanType.m index 900ff3d9b3390..94de09a3e58f1 100644 --- a/matlab/test/arrow/type/tBooleanType.m +++ b/matlab/test/arrow/type/tBooleanType.m @@ -17,8 +17,9 @@ % Test class for arrow.type.BooleanType properties - ArrowType = arrow.type.BooleanType + ArrowType = arrow.type.boolean TypeID = arrow.type.ID.Boolean - BitWidth = int32(1); + BitWidth = int32(1) + ClassName = "arrow.type.BooleanType" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tFloat32Type.m b/matlab/test/arrow/type/tFloat32Type.m index af407559ee24f..c54fcfd32809b 100644 --- a/matlab/test/arrow/type/tFloat32Type.m +++ b/matlab/test/arrow/type/tFloat32Type.m @@ -17,8 +17,9 @@ % Test class for arrow.type.Float32Type properties - ArrowType = arrow.type.Float32Type + ArrowType = arrow.type.float32 TypeID = arrow.type.ID.Float32 - BitWidth = int32(32); + BitWidth = int32(32) + ClassName = "arrow.type.Float32Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tFloat64Type.m b/matlab/test/arrow/type/tFloat64Type.m index d5ccd8ef259a8..6b5648dfc10e3 100644 --- a/matlab/test/arrow/type/tFloat64Type.m +++ b/matlab/test/arrow/type/tFloat64Type.m @@ -17,8 +17,10 @@ % Test class for arrow.type.Float64Type properties - ArrowType = arrow.type.Float64Type + ArrowType = arrow.type.float64 TypeID = arrow.type.ID.Float64 - BitWidth = int32(64); + BitWidth = int32(64) + ClassName = "arrow.type.Float64Type" + end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tInt16Type.m b/matlab/test/arrow/type/tInt16Type.m index 32e9ff5cb2fdf..a929ba688b5cd 100644 --- a/matlab/test/arrow/type/tInt16Type.m +++ b/matlab/test/arrow/type/tInt16Type.m @@ -17,8 +17,9 @@ % Test class for arrow.type.Int16Type properties - ArrowType = arrow.type.Int16Type + ArrowType = arrow.type.int16 TypeID = arrow.type.ID.Int16 - BitWidth = int32(16); + BitWidth = int32(16) + ClassName = "arrow.type.Int16Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tInt32Type.m b/matlab/test/arrow/type/tInt32Type.m index 1076ef802654e..6d59b5454e7fc 100644 --- a/matlab/test/arrow/type/tInt32Type.m +++ b/matlab/test/arrow/type/tInt32Type.m @@ -17,8 +17,9 @@ % Test class for arrow.type.Int32Type properties - ArrowType = arrow.type.Int32Type + ArrowType = arrow.type.int32 TypeID = arrow.type.ID.Int32 - BitWidth = int32(32); + BitWidth = int32(32) + ClassName = "arrow.type.Int32Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tInt64Type.m b/matlab/test/arrow/type/tInt64Type.m index 24b94c4c04b6c..6ff0d2b07cbac 100644 --- a/matlab/test/arrow/type/tInt64Type.m +++ b/matlab/test/arrow/type/tInt64Type.m @@ -17,8 +17,9 @@ % Test class for arrow.type.Int64Type properties - ArrowType = arrow.type.Int64Type + ArrowType = arrow.type.int64 TypeID = arrow.type.ID.Int64 - BitWidth = int32(64); + BitWidth = int32(64) + ClassName = "arrow.type.Int64Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tInt8Type.m b/matlab/test/arrow/type/tInt8Type.m index 57adf7bd2f118..396be3a3f715a 100644 --- a/matlab/test/arrow/type/tInt8Type.m +++ b/matlab/test/arrow/type/tInt8Type.m @@ -17,8 +17,9 @@ % Test class for arrow.type.Int8Type properties - ArrowType = arrow.type.Int8Type + ArrowType = arrow.type.int8 TypeID = arrow.type.ID.Int8 - BitWidth = int32(8); + BitWidth = int32(8) + ClassName = "arrow.type.Int8Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tStringType.m b/matlab/test/arrow/type/tStringType.m index a4a0309218316..057ffd5426345 100644 --- a/matlab/test/arrow/type/tStringType.m +++ b/matlab/test/arrow/type/tStringType.m @@ -19,14 +19,14 @@ methods (Test) function Basic(tc) - type = arrow.type.StringType; + type = arrow.type.string; className = string(class(type)); tc.verifyEqual(className, "arrow.type.StringType"); tc.verifyEqual(type.ID, arrow.type.ID.String); end function NumFields(tc) - type = arrow.type.StringType; + type = arrow.type.string; tc.verifyEqual(type.NumFields, int32(0)); end diff --git a/matlab/test/arrow/type/tTimestampType.m b/matlab/test/arrow/type/tTimestampType.m index 95b06d7b56faf..fa893d2d930de 100644 --- a/matlab/test/arrow/type/tTimestampType.m +++ b/matlab/test/arrow/type/tTimestampType.m @@ -17,15 +17,22 @@ % Test class for arrow.type.TimestampType properties - ArrowType = arrow.type.TimestampType + ArrowType = arrow.type.timestamp TypeID = arrow.type.ID.Timestamp - BitWidth = int32(64); + BitWidth = int32(64) + ClassName = "arrow.type.TimestampType" end methods(Test) + function TestClass(testCase) + % Verify ArrowType is an object of the expected class type. + name = string(class(testCase.ArrowType)); + testCase.verifyEqual(name, testCase.ClassName); + end + function DefaultTimeUnit(testCase) % Verify the default TimeUnit is Microsecond - type = arrow.type.TimestampType; + type = arrow.type.timestamp; actualUnit = type.TimeUnit; expectedUnit = arrow.type.TimeUnit.Microsecond; testCase.verifyEqual(actualUnit, expectedUnit); @@ -33,7 +40,7 @@ function DefaultTimeUnit(testCase) function DefaultTimeZone(testCase) % Verify the default TimeZone is "" - type = arrow.type.TimestampType; + type = arrow.type.timestamp; actualTimezone = type.TimeZone; expectedTimezone = ""; testCase.verifyEqual(actualTimezone, expectedTimezone); @@ -46,7 +53,7 @@ function SupplyTimeUnitEnum(testCase) TimeUnit.Microsecond, TimeUnit.Nanosecond]; for unit = expectedUnit - type = TimestampType(TimeUnit=unit); + type = timestamp(TimeUnit=unit); testCase.verifyEqual(type.TimeUnit, unit); end end @@ -60,42 +67,42 @@ function SupplyTimeUnitString(testCase) TimeUnit.Microsecond, TimeUnit.Nanosecond]; for ii = 1:numel(unitString) - type = TimestampType(TimeUnit=unitString(ii)); + type = timestamp(TimeUnit=unitString(ii)); testCase.verifyEqual(type.TimeUnit, expectedUnit(ii)); end end function SupplyTimeZone(testCase) % Supply the TimeZone. - type = arrow.type.TimestampType(TimeZone="America/New_York"); + type = arrow.type.timestamp(TimeZone="America/New_York"); testCase.verifyEqual(type.TimeZone, "America/New_York"); end function ErrorIfMissingStringTimeZone(testCase) - fcn = @() arrow.type.TimestampType(TimeZone=string(missing)); + fcn = @() arrow.type.timestamp(TimeZone=string(missing)); testCase.verifyError(fcn, "MATLAB:validators:mustBeNonmissing"); end function ErrorIfTimeZoneIsNonScalar(testCase) - fcn = @() arrow.type.TimestampType(TimeZone=["a", "b"]); + fcn = @() arrow.type.timestamp(TimeZone=["a", "b"]); testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); - fcn = @() arrow.type.TimestampType(TimeZone=strings(0, 0)); + fcn = @() arrow.type.timestamp(TimeZone=strings(0, 0)); testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); end function ErrorIfAmbiguousTimeUnit(testCase) - fcn = @() arrow.type.TimestampType(TimeUnit="mi"); + fcn = @() arrow.type.timestamp(TimeUnit="mi"); testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); end function ErrorIfTimeUnitIsNonScalar(testCase) units = [arrow.type.TimeUnit.Second; arrow.type.TimeUnit.Millisecond]; - fcn = @() arrow.type.TimestampType(TimeZone=units); + fcn = @() arrow.type.timestamp(TimeZone=units); testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); units = ["second" "millisecond"]; - fcn = @() arrow.type.TimestampType(TimeZone=units); + fcn = @() arrow.type.timestamp(TimeZone=units); testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); end end diff --git a/matlab/test/arrow/type/tUInt16Type.m b/matlab/test/arrow/type/tUInt16Type.m index c0823b4f6962b..ede66f6324691 100644 --- a/matlab/test/arrow/type/tUInt16Type.m +++ b/matlab/test/arrow/type/tUInt16Type.m @@ -17,8 +17,9 @@ % Test class for arrow.type.UInt16Type properties - ArrowType = arrow.type.UInt16Type + ArrowType = arrow.type.uint16 TypeID = arrow.type.ID.UInt16 - BitWidth = int32(16); + BitWidth = int32(16) + ClassName = "arrow.type.UInt16Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tUInt32Type.m b/matlab/test/arrow/type/tUInt32Type.m index 15fe93106e3da..def24c76ceb76 100644 --- a/matlab/test/arrow/type/tUInt32Type.m +++ b/matlab/test/arrow/type/tUInt32Type.m @@ -17,8 +17,9 @@ % Test class for arrow.type.UInt32Type properties - ArrowType = arrow.type.UInt32Type + ArrowType = arrow.type.uint32 TypeID = arrow.type.ID.UInt32 - BitWidth = int32(32); + BitWidth = int32(32) + ClassName = "arrow.type.UInt32Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tUInt64Type.m b/matlab/test/arrow/type/tUInt64Type.m index 4646c91455e8a..9228e1cc504d6 100644 --- a/matlab/test/arrow/type/tUInt64Type.m +++ b/matlab/test/arrow/type/tUInt64Type.m @@ -17,8 +17,9 @@ % Test class for arrow.type.UInt64Type properties - ArrowType = arrow.type.UInt64Type + ArrowType = arrow.type.uint64 TypeID = arrow.type.ID.UInt64 - BitWidth = int32(64); + BitWidth = int32(64) + ClassName = "arrow.type.UInt64Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tUInt8Type.m b/matlab/test/arrow/type/tUInt8Type.m index ebd6b04b0eade..eec3aa5fdec25 100644 --- a/matlab/test/arrow/type/tUInt8Type.m +++ b/matlab/test/arrow/type/tUInt8Type.m @@ -17,8 +17,9 @@ % Test class for arrow.type.UInt64Type properties - ArrowType = arrow.type.UInt8Type + ArrowType = arrow.type.uint8 TypeID = arrow.type.ID.UInt8 - BitWidth = int32(8); + BitWidth = int32(8) + ClassName = "arrow.type.UInt8Type" end end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 41d2ee4a705d7..253632d221040 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -24,7 +24,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_NAME libmexclass) # libmexclass is accessible for CI without permission issues. set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_REPOSITORY "https://github.com/mathworks/libmexclass.git") # Use a specific Git commit hash to avoid libmexclass version changing unexpectedly. -set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_TAG "3465900") +set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_TAG "d04f88d") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_SOURCE_SUBDIR "libmexclass/cpp") From 0a731f576df7b4188e60e137bf46da0b6d746b7f Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Mon, 17 Jul 2023 17:43:04 -0700 Subject: [PATCH 27/35] MINOR: [Docs] Add C++/Run-End Encoded to the implementation status table (#36732) C++ passes the integration tests and has types for REE arrays. I believe it is fair to say that C++ supports REE. Authored-by: Weston Pace Signed-off-by: Sutou Kouhei --- docs/source/status.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/status.rst b/docs/source/status.rst index 6c55b4bd3e01a..5c8895b114ae3 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -96,7 +96,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Extension | ✓ | ✓ | ✓ | | | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ -| Run-End Encoded | | | ✓ | | | | | | +| Run-End Encoded | ✓ | | ✓ | | | | | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ Notes: From cbe50ee5eaaabee0073d0da05b75fcce76e9cf43 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Tue, 18 Jul 2023 16:15:30 +0800 Subject: [PATCH 28/35] GH-36712: [CI] Also update issue components when it's updated (#36723) This allows issue openers to change the component label themselves. * Closes: #36712 Authored-by: Jin Shang Signed-off-by: Sutou Kouhei --- .github/workflows/issue_bot.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/issue_bot.yml b/.github/workflows/issue_bot.yml index ffd56e440f430..7a62f2149662e 100644 --- a/.github/workflows/issue_bot.yml +++ b/.github/workflows/issue_bot.yml @@ -21,6 +21,7 @@ on: issues: types: - opened + - edited permissions: contents: read @@ -56,9 +57,9 @@ jobs: if (component_labels.length == 0) throw new Error('No components found!'); - await github.rest.issues.addLabels({ + await github.rest.issues.setLabels({ "owner": context.repo.owner, "repo": context.repo.repo, "issue_number": context.payload.issue.number, "labels": component_labels, - }); \ No newline at end of file + }); From 2c51a07ab181b4e678c6fbe09fcf832d248e289a Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 18 Jul 2023 17:34:02 +0900 Subject: [PATCH 29/35] GH-36573: [CI] Remove Travis CI related files and mentions (#36741) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Because we can't use Travis CI in apache/arrow since 2023-01. ### What changes are included in this PR? Remove Travis CI related files and mentions. Thanks Travis CI so far! ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * Closes: #36573 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- .travis.yml | 166 -------- ci/detect-changes.py | 362 ------------------ ci/docker/linux-apt-r.dockerfile | 4 - .../continuous_integration/crossbow.rst | 6 +- .../continuous_integration/overview.rst | 2 - docs/source/developers/release.rst | 4 +- 6 files changed, 5 insertions(+), 539 deletions(-) delete mode 100644 .travis.yml delete mode 100644 ci/detect-changes.py diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 2c5f2965107b8..0000000000000 --- a/.travis.yml +++ /dev/null @@ -1,166 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -dist: focal - -language: minimal - -cache: - directories: - - $TRAVIS_BUILD_DIR/.docker - -addons: - apt: - packages: - - python3-pip - -services: - - docker - -# Note that the global "env" setting isn't inherited automatically by -# matrix entries with their own "env", so we have to insert it explicitly. -env: &global_env - ARROW_ENABLE_TIMING_TESTS: "OFF" - COMPOSE_DOCKER_CLI_BUILD: 1 - DOCKER_BUILDKIT: 0 - DOCKER_VOLUME_PREFIX: $TRAVIS_BUILD_DIR/.docker/ - -jobs: - include: - - name: "C++ on s390x" - os: linux - arch: s390x - env: - <<: *global_env - ARCH: s390x - ARROW_CI_MODULES: "CPP" - DOCKER_IMAGE_ID: ubuntu-cpp - # Can't enable ARROW_MIMALLOC because of failures in memory pool tests. - # Can't enable ARROW_S3 because compiler is killed while compiling - # aws-sdk-cpp. - DOCKER_RUN_ARGS: >- - " - -e ARROW_FLIGHT=ON - -e ARROW_GCS=OFF - -e ARROW_MIMALLOC=OFF - -e ARROW_ORC=OFF - -e ARROW_PARQUET=OFF - -e ARROW_S3=OFF - -e ARROW_SUBSTRAIT=OFF - -e CMAKE_BUILD_PARALLEL_LEVEL=2 - -e CMAKE_UNITY_BUILD=ON - -e PARQUET_BUILD_EXAMPLES=OFF - -e PARQUET_BUILD_EXECUTABLES=OFF - -e Protobuf_SOURCE=BUNDLED - -e gRPC_SOURCE=BUNDLED - " - # The LLVM's APT repository causes download error for s390x binary - # We should use the LLVM provided by the default APT repository - CLANG_TOOLS: "10" - LLVM: "10" - UBUNTU: "20.04" - - - name: "Go on s390x" - os: linux - arch: s390x - env: - <<: *global_env - ARCH: s390x - ARROW_CI_MODULES: "GO" - DOCKER_IMAGE_ID: debian-go - - - name: "Java on s390x" - os: linux - arch: s390x - env: - <<: *global_env - ARCH: s390x - ARROW_CI_MODULES: "JAVA" - DOCKER_IMAGE_ID: debian-java - JDK: 11 - - - name: "Python on s390x" - os: linux - arch: s390x - env: - <<: *global_env - ARCH: s390x - ARROW_CI_MODULES: "PYTHON" - DOCKER_IMAGE_ID: ubuntu-python - # Can't enable ARROW_MIMALLOC because of failures in memory pool tests. - # Can't enable ARROW_S3 because compiler is killed while compiling - # aws-sdk-cpp. - DOCKER_RUN_ARGS: >- - " - -e ARROW_FLIGHT=ON - -e ARROW_GCS=OFF - -e ARROW_MIMALLOC=OFF - -e ARROW_ORC=OFF - -e ARROW_PARQUET=OFF - -e ARROW_PYTHON=ON - -e ARROW_S3=OFF - -e CMAKE_BUILD_PARALLEL_LEVEL=2 - -e CMAKE_UNITY_BUILD=ON - -e PARQUET_BUILD_EXAMPLES=OFF - -e PARQUET_BUILD_EXECUTABLES=OFF - -e Protobuf_SOURCE=BUNDLED - -e gRPC_SOURCE=BUNDLED - " - # The LLVM's APT repository causes download error for s390x binary - # We should use the LLVM provided by the default APT repository - CLANG_TOOLS: "10" - LLVM: "10" - UBUNTU: "20.04" - - allow_failures: - - name: "Java on s390x" - - name: "C++ on s390x" - - name: "Python on s390x" - -before_install: - - eval "$(python ci/detect-changes.py)" - - | - arrow_ci_affected=no - for arrow_ci_module in ${ARROW_CI_MODULES}; do - arrow_ci_affected_variable=ARROW_CI_${arrow_ci_module}_AFFECTED - if [ "$(eval "echo \$${arrow_ci_affected_variable}")" = "1" ]; then - arrow_ci_affected=yes - fi - done - if [ "${arrow_ci_affected}" = "no" ]; then - travis_terminate 0 - fi - -install: - - sudo -H pip3 install --upgrade pip - - sudo -H pip3 install 'docker-compose>=1.27.0' - - sudo -H pip3 install -e dev/archery[docker] - -script: - - export ARCHERY_DEFAULT_BRANCH=$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@) - - | - archery docker run \ - ${DOCKER_RUN_ARGS} \ - --volume ${PWD}/build:/build \ - ${DOCKER_IMAGE_ID} - -after_success: - - | - if [ "${TRAVIS_EVENT_TYPE}" = "push" -a \ - "${TRAVIS_REPO_SLUG}" = "apache/arrow" ]; then - archery docker push ${DOCKER_IMAGE_ID} || : - fi diff --git a/ci/detect-changes.py b/ci/detect-changes.py deleted file mode 100644 index 7669639ecd3a9..0000000000000 --- a/ci/detect-changes.py +++ /dev/null @@ -1,362 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import print_function - -import functools -import os -import pprint -import re -import sys -import subprocess - - -perr = functools.partial(print, file=sys.stderr) - - -def dump_env_vars(prefix, pattern=None): - if pattern is not None: - match = lambda s: re.search(pattern, s) - else: - match = lambda s: True - for name in sorted(os.environ): - if name.startswith(prefix) and match(name): - perr("- {0}: {1!r}".format(name, os.environ[name])) - - -def run_cmd(cmdline): - proc = subprocess.Popen(cmdline, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = proc.communicate() - if proc.returncode != 0: - raise RuntimeError("Command {cmdline} failed with code {returncode}, " - "stderr was:\n{stderr}\n" - .format(cmdline=cmdline, returncode=proc.returncode, - stderr=err.decode())) - return out - - -def get_commit_description(commit): - """ - Return the textual description (title + body) of the given git commit. - """ - out = run_cmd(["git", "show", "--no-patch", "--pretty=format:%B", - commit]) - return out.decode('utf-8', 'ignore') - - -def list_affected_files(commit_range): - """ - Return a list of files changed by the given git commit range. - """ - perr("Getting affected files from", repr(commit_range)) - out = run_cmd(["git", "diff", "--name-only", commit_range]) - return list(filter(None, (s.strip() for s in out.decode().splitlines()))) - - -def get_travis_head_commit(): - return os.environ['TRAVIS_COMMIT'] - - -def get_travis_commit_range(): - if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request': - # TRAVIS_COMMIT_RANGE is too pessimistic for PRs, as it may contain - # unrelated changes. Instead, use the same strategy as on AppVeyor - # below. - run_cmd(["git", "fetch", "-q", "origin", - "+refs/heads/{0}".format(os.environ['TRAVIS_BRANCH'])]) - merge_base = run_cmd(["git", "merge-base", - "HEAD", "FETCH_HEAD"]).decode().strip() - return "{0}..HEAD".format(merge_base) - else: - cr = os.environ['TRAVIS_COMMIT_RANGE'] - # See - # https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122 - return cr.replace('...', '..') - - -def get_travis_commit_description(): - # Prefer this to get_commit_description(get_travis_head_commit()), - # as rebasing or other repository events may make TRAVIS_COMMIT invalid - # at the time we inspect it - return os.environ['TRAVIS_COMMIT_MESSAGE'] - - -def list_travis_affected_files(): - """ - Return a list of files affected in the current Travis build. - """ - commit_range = get_travis_commit_range() - try: - return list_affected_files(commit_range) - except RuntimeError: - # TRAVIS_COMMIT_RANGE can contain invalid revisions when - # building a branch (not a PR) after rebasing: - # https://github.com/travis-ci/travis-ci/issues/2668 - if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request': - raise - # If it's a rebase, it's probably enough to use the last commit only - commit_range = '{0}^..'.format(get_travis_head_commit()) - return list_affected_files(commit_range) - - -def list_appveyor_affected_files(): - """ - Return a list of files affected in the current AppVeyor build. - This only works for PR builds. - """ - # Re-fetch PR base branch (e.g. origin/master), pointing FETCH_HEAD to it - run_cmd(["git", "fetch", "-q", "origin", - "+refs/heads/{0}".format(os.environ['APPVEYOR_REPO_BRANCH'])]) - # Compute base changeset between FETCH_HEAD (PR base) and HEAD (PR head) - merge_base = run_cmd(["git", "merge-base", - "HEAD", "FETCH_HEAD"]).decode().strip() - # Compute changes files between base changeset and HEAD - return list_affected_files("{0}..HEAD".format(merge_base)) - - -def list_github_actions_affected_files(): - """ - Return a list of files affected in the current GitHub Actions build. - """ - # GitHub Actions checkout `refs/remotes/pull/$PR/merge` where `HEAD` points - # to the merge commit while `HEAD^` points to the commit before. Hence, - # `..HEAD^` points to all commit between the default branch and the PR. - return list_affected_files("HEAD^..") - - -LANGUAGE_TOPICS = ['c_glib', 'cpp', 'docs', 'go', 'java', 'js', 'python', - 'r', 'ruby', 'csharp'] - -ALL_TOPICS = LANGUAGE_TOPICS + ['integration', 'dev'] - - -AFFECTED_DEPENDENCIES = { - 'java': ['integration', 'python'], - 'js': ['integration'], - 'ci': ALL_TOPICS, - 'cpp': ['python', 'c_glib', 'r', 'ruby', 'integration'], - 'format': LANGUAGE_TOPICS, - 'go': ['integration'], - '.travis.yml': ALL_TOPICS, - 'appveyor.yml': ALL_TOPICS, - # In theory, it should ignore CONTRIBUTING.md and ISSUE_TEMPLATE.md, but in - # practice it's going to be CI - '.github': ALL_TOPICS, - 'c_glib': ['ruby'] -} - -COMPONENTS = {'cpp', 'java', 'c_glib', 'r', 'ruby', 'integration', 'js', - 'csharp', 'go', 'docs', 'python', 'dev'} - - -def get_affected_topics(affected_files): - """ - Return a dict of topics affected by the given files. - Each dict value is True if affected, False otherwise. - """ - affected = dict.fromkeys(ALL_TOPICS, False) - - for path in affected_files: - parts = [] - head = path - while head: - head, tail = os.path.split(head) - parts.append(tail) - parts.reverse() - assert parts - p = parts[0] - fn = parts[-1] - if fn.startswith('README'): - continue - - if p in COMPONENTS: - affected[p] = True - - _path_already_affected = {} - - def _affect_dependencies(component): - if component in _path_already_affected: - # For circular dependencies, terminate - return - for topic in AFFECTED_DEPENDENCIES.get(component, ()): - affected[topic] = True - _affect_dependencies(topic) - _path_already_affected[topic] = True - - _affect_dependencies(p) - - return affected - - -def make_env_for_topics(affected): - return {'ARROW_CI_{0}_AFFECTED'.format(k.upper()): '1' if v else '0' - for k, v in affected.items()} - - -def get_unix_shell_eval(env): - """ - Return a shell-evalable string to setup some environment variables. - """ - return "; ".join(("export {0}='{1}'".format(k, v) - for k, v in env.items())) - - -def get_windows_shell_eval(env): - """ - Return a shell-evalable string to setup some environment variables. - """ - return "\n".join(('set "{0}={1}"'.format(k, v) - for k, v in env.items())) - - -def run_from_travis(): - perr("Environment variables (excerpt):") - dump_env_vars('TRAVIS_', '(BRANCH|COMMIT|PULL)') - if (os.environ['TRAVIS_REPO_SLUG'] == 'apache/arrow' and - os.environ['TRAVIS_BRANCH'] in ['master', 'main'] and - os.environ['TRAVIS_EVENT_TYPE'] != 'pull_request'): - # Never skip anything on default-branch builds in the official repo - affected = dict.fromkeys(ALL_TOPICS, True) - else: - desc = get_travis_commit_description() - if '[skip travis]' in desc: - # Skip everything - affected = dict.fromkeys(ALL_TOPICS, False) - elif '[force ci]' in desc or '[force travis]' in desc: - # Test everything - affected = dict.fromkeys(ALL_TOPICS, True) - else: - # Test affected topics - affected_files = list_travis_affected_files() - perr("Affected files:", affected_files) - affected = get_affected_topics(affected_files) - assert set(affected) <= set(ALL_TOPICS), affected - - perr("Affected topics:") - perr(pprint.pformat(affected)) - return get_unix_shell_eval(make_env_for_topics(affected)) - - -def run_from_appveyor(): - perr("Environment variables (excerpt):") - dump_env_vars('APPVEYOR_', '(PULL|REPO)') - if not os.environ.get('APPVEYOR_PULL_REQUEST_HEAD_COMMIT'): - # Not a PR build, test everything - affected = dict.fromkeys(ALL_TOPICS, True) - else: - affected_files = list_appveyor_affected_files() - perr("Affected files:", affected_files) - affected = get_affected_topics(affected_files) - assert set(affected) <= set(ALL_TOPICS), affected - - perr("Affected topics:") - perr(pprint.pformat(affected)) - return get_windows_shell_eval(make_env_for_topics(affected)) - - -def run_from_github(): - perr("Environment variables (excerpt):") - dump_env_vars('GITHUB_', '(REPOSITORY|ACTOR|SHA|REF|HEAD_REF|BASE_REF|EVENT_NAME)') - if os.environ['GITHUB_EVENT_NAME'] != 'pull_request': - # Not a PR build, test everything - affected = dict.fromkeys(ALL_TOPICS, True) - else: - affected_files = list_github_actions_affected_files() - perr("Affected files:", affected_files) - affected = get_affected_topics(affected_files) - assert set(affected) <= set(ALL_TOPICS), affected - - perr("Affected topics:") - perr(pprint.pformat(affected)) - return get_unix_shell_eval(make_env_for_topics(affected)) - - -def test_get_affected_topics(): - affected_topics = get_affected_topics(['cpp/CMakeLists.txt']) - assert affected_topics == { - 'c_glib': True, - 'cpp': True, - 'docs': False, - 'go': False, - 'java': False, - 'js': False, - 'python': True, - 'r': True, - 'ruby': True, - 'csharp': False, - 'integration': True, - 'dev': False - } - - affected_topics = get_affected_topics(['format/Schema.fbs']) - assert affected_topics == { - 'c_glib': True, - 'cpp': True, - 'docs': True, - 'go': True, - 'java': True, - 'js': True, - 'python': True, - 'r': True, - 'ruby': True, - 'csharp': True, - 'integration': True, - 'dev': False - } - - affected_topics = get_affected_topics(['.github/workflows']) - assert affected_topics == { - 'c_glib': True, - 'cpp': True, - 'docs': True, - 'go': True, - 'java': True, - 'js': True, - 'python': True, - 'r': True, - 'ruby': True, - 'csharp': True, - 'integration': True, - 'dev': True, - } - - -if __name__ == "__main__": - # This script should have its output evaluated by a shell, - # e.g. "eval `python ci/detect-changes.py`" - if os.environ.get('TRAVIS'): - try: - print(run_from_travis()) - except Exception: - # Make sure the enclosing eval will return an error - print("exit 1") - raise - elif os.environ.get('APPVEYOR'): - try: - print(run_from_appveyor()) - except Exception: - print("exit 1") - raise - elif os.environ.get('GITHUB_WORKFLOW'): - try: - print(run_from_github()) - except Exception: - print("exit 1") - raise - else: - sys.exit("Script must be run under Travis-CI, AppVeyor or GitHub Actions") diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index 19f30717ca2e2..c59766c4a665c 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -47,10 +47,6 @@ RUN apt-get update -y && \ libxml2-dev \ libgit2-dev \ libssl-dev \ - # install clang to mirror what was done on Travis - clang \ - clang-format \ - clang-tidy \ # R CMD CHECK --as-cran needs pdflatex to build the package manual texlive-latex-base \ # Need locales so we can set UTF-8 diff --git a/docs/source/developers/continuous_integration/crossbow.rst b/docs/source/developers/continuous_integration/crossbow.rst index 663fc17c0a028..6308f077ac9a6 100644 --- a/docs/source/developers/continuous_integration/crossbow.rst +++ b/docs/source/developers/continuous_integration/crossbow.rst @@ -47,7 +47,7 @@ Executors Individual jobs are executed on public CI services, currently: - Linux: GitHub Actions, Travis CI, Azure Pipelines -- macOS: GitHub Actions, Travis CI, Azure Pipelines +- macOS: GitHub Actions, Azure Pipelines - Windows: GitHub Actions, Azure Pipelines Queue @@ -59,7 +59,7 @@ queue for the tasks. Anyone can host a ``queue`` repository (usually named ``/crossbow``). A job is a git commit on a particular git branch, containing the required -configuration files to run the requested builds (like ``.travis.yml``, +configuration files to run the requested builds (like ``.travis.yml``, ``azure-pipelines.yml``, or ``crossbow.yml`` for `GitHub Actions`_ ). Scheduler @@ -118,7 +118,7 @@ to step 3: ``https://travis-ci.com///settings`` - Confirm the `auto cancellation`_ feature is turned off for branch builds. This should be the default setting. - + 7. Install Python (minimum supported version is 3.8): | Miniconda is preferred, see installation instructions: diff --git a/docs/source/developers/continuous_integration/overview.rst b/docs/source/developers/continuous_integration/overview.rst index 70323c9e48927..1d82e845a3360 100644 --- a/docs/source/developers/continuous_integration/overview.rst +++ b/docs/source/developers/continuous_integration/overview.rst @@ -26,7 +26,6 @@ Some files central to Arrow CI are: - ``docker-compose.yml`` - here we define docker services which can be configured using either enviroment variables, or the default values for these variables. - ``.env`` - here we define default values to configure the services in ``docker-compose.yml`` -- ``.travis.yml`` - here we define workflows which run on Travis - ``appveyor.yml`` - here we define workflows that run on Appveyor We use :ref:`Docker` in order to have portable and reproducible Linux builds, as well as running Windows builds in Windows containers. We use :ref:`Archery` and :ref:`Crossbow` to help co-ordinate the various CI tasks. @@ -60,7 +59,6 @@ The ``.yml`` files in ``.github/worflows`` are workflows which are run on GitHub There are two other files which define action-triggered builds: -- ``.travis.yml`` - runs on all commits and is used to test on architectures such as ARM and S390x - ``appveyor.yml`` - runs on commits related to Python or C++ Extended builds diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index cb0d713f50d0c..066400b33ffb5 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -497,8 +497,8 @@ Be sure to go through on the following checklist: Our CI systems give us some coverage for the things that CRAN checks, but there are a couple of final tests we should do to confirm that the release binaries will work and that everything runs on the same infrastructure that - CRAN has, which is difficult/impossible to emulate fully on Travis or with - Docker. For a precise list of checks, see the + CRAN has, which is difficult/impossible to emulate fully with Docker. For a + precise list of checks, see the `packaging checklist `_. Once all checks are clean, we submit to CRAN, which has a web form for From 5acbd9ca71c0b2e379f4aba5fa134280bf277e92 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 18 Jul 2023 13:48:54 +0100 Subject: [PATCH 30/35] GH-36746: [R] Update NEWS.md for 12.0.1.1 release (#36747) ### What changes are included in this PR? Update NEWS.md in the R package to include 12.0.1.1 release ### Are these changes tested? No ### Are there any user-facing changes? No * Closes: #36746 Authored-by: Nic Crane Signed-off-by: Nic Crane --- r/NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/r/NEWS.md b/r/NEWS.md index f358c2aae45fc..45730a7b36018 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,6 +19,10 @@ # arrow 12.0.1.9000 +# arrow 12.0.1.1 + +* Update a package version reference to be text only instead of numeric due to CRAN update requiring this (#36353, #36364) + # arrow 12.0.1 * Update the version of the date library vendored with Arrow C++ library From 245141e00a6fe2e72da701a72be6e72ad116154f Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Tue, 18 Jul 2023 21:46:42 +0800 Subject: [PATCH 31/35] GH-35942: [C++] Improve Decimal ToReal accuracy (#36667) ### Rationale for this change The current implementation of `Decimal::ToReal` can be naively represented as the following pseudocode: ``` Real v = static_cast(decimal.as_int128/256()) return v * (10.0**-scale) ``` It stores the intermediate unscaled int128/256 value as a float/double. The unscaled int128/256 value can be very large when the decimal has a large scale, which causes precision issues such as in #36602. ### What changes are included in this PR? Avoid storing the unscaled large int as float if the representation is not precise, by spliting the decimal into integral and fractional parts and dealing with them separately. This algorithm guarantees that: 1. If the decimal is an integer, the conversion is exact. 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. 8 for float and 16 for double), the conversion is within 1 ULP of the exact value. For example Decimal128::ToReal(9999.999) falls into this category because the integer 9999999 is precisely representable by float, whereas 9999.9999 would be in the next category. 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) (e.g. 2^-23 for float and 2^-52 for double) of the exact value. Here "exact value" means the closest representable value by Real. I believe this algorithm is good enough, because an"exact" algorithm would require iterative multiplication and subtraction of decimals to determain the binary representation of its fractional part. Yet the result would still almost always be inaccurate because float/double can only accurately represent powers of two. IMHO It's not worth it to spend that many expensive operations just to improve the result by one ULP. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * Closes: #35942 Lead-authored-by: Jin Shang Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- .../arrow/compute/kernels/scalar_cast_test.cc | 3 +- cpp/src/arrow/util/basic_decimal.cc | 10 ++ cpp/src/arrow/util/basic_decimal.h | 4 + cpp/src/arrow/util/decimal.cc | 58 ++++++- cpp/src/arrow/util/decimal_internal.h | 4 + cpp/src/arrow/util/decimal_test.cc | 148 +++++++++++++----- 6 files changed, 183 insertions(+), 44 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 083a85eb346c5..1db06a762544b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -1025,7 +1025,8 @@ TEST(Cast, DecimalToFloating) { } } - // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() + // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() in + // decimal_test.cc } TEST(Cast, DecimalToString) { diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc index f2fd39d6f37ad..0835ab9074a48 100644 --- a/cpp/src/arrow/util/basic_decimal.cc +++ b/cpp/src/arrow/util/basic_decimal.cc @@ -969,6 +969,16 @@ bool BasicDecimal256::FitsInPrecision(int32_t precision) const { return BasicDecimal256::Abs(*this) < kDecimal256PowersOfTen[precision]; } +void BasicDecimal256::GetWholeAndFraction(int scale, BasicDecimal256* whole, + BasicDecimal256* fraction) const { + DCHECK_GE(scale, 0); + DCHECK_LE(scale, 76); + + BasicDecimal256 multiplier(kDecimal256PowersOfTen[scale]); + auto s = Divide(multiplier, whole, fraction); + DCHECK_EQ(s, DecimalStatus::kSuccess); +} + const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) { DCHECK_GE(scale, 0); DCHECK_LE(scale, 76); diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h index b263bb234a795..d8a91ea76b390 100644 --- a/cpp/src/arrow/util/basic_decimal.h +++ b/cpp/src/arrow/util/basic_decimal.h @@ -366,6 +366,10 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal - static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { + static Real ToRealPositiveNoSplit(const Decimal128& decimal, int32_t scale) { Real x = RealTraits::two_to_64(static_cast(decimal.high_bits())); x += static_cast(decimal.low_bits()); x *= LargePowerOfTen(-scale); return x; } + + /// An appoximate conversion from Decimal128 to Real that guarantees: + /// 1. If the decimal is an integer, the conversion is exact. + /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. + /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact + /// value. + /// 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) + /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value. + /// Here "exact value" means the closest representable value by Real. + template + static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { + if (scale <= 0 || (decimal.high_bits() == 0 && + decimal.low_bits() <= RealTraits::kMaxPreciseInteger)) { + // No need to split the decimal if it is already an integer (scale <= 0) or if it + // can be precisely represented by Real + return ToRealPositiveNoSplit(decimal, scale); + } + + // Split decimal into whole and fractional parts to avoid precision loss + BasicDecimal128 whole_decimal, fraction_decimal; + decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal); + + Real whole = ToRealPositiveNoSplit(whole_decimal, 0); + Real fraction = ToRealPositiveNoSplit(fraction_decimal, scale); + + return whole + fraction; + } }; } // namespace @@ -967,7 +994,7 @@ struct Decimal256RealConversion } template - static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { + static Real ToRealPositiveNoSplit(const Decimal256& decimal, int32_t scale) { DCHECK_GE(decimal, 0); Real x = 0; const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array()); @@ -978,6 +1005,33 @@ struct Decimal256RealConversion x *= LargePowerOfTen(-scale); return x; } + + /// An appoximate conversion from Decimal256 to Real that guarantees: + /// 1. If the decimal is an integer, the conversion is exact. + /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. + /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact + /// value. + /// 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) + /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value. + /// Here "exact value" means the closest representable value by Real. + template + static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { + const auto parts_le = decimal.little_endian_array(); + if (scale <= 0 || (parts_le[3] == 0 && parts_le[2] == 0 && parts_le[1] == 0 && + parts_le[0] < RealTraits::kMaxPreciseInteger)) { + // No need to split the decimal if it is already an integer (scale <= 0) or if it + // can be precisely represented by Real + return ToRealPositiveNoSplit(decimal, scale); + } + + // Split the decimal into whole and fractional parts to avoid precision loss + BasicDecimal256 whole_decimal, fraction_decimal; + decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal); + + Real whole = ToRealPositiveNoSplit(whole_decimal, 0); + Real fraction = ToRealPositiveNoSplit(fraction_decimal, scale); + return whole + fraction; + } }; } // namespace diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h index 041aac4ef860d..51a7229ab6678 100644 --- a/cpp/src/arrow/util/decimal_internal.h +++ b/cpp/src/arrow/util/decimal_internal.h @@ -451,6 +451,8 @@ struct RealTraits { static constexpr int kMantissaBits = 24; // ceil(log10(2 ^ kMantissaBits)) static constexpr int kMantissaDigits = 8; + // Integers between zero and kMaxPreciseInteger can be precisely represented + static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1; }; template <> @@ -464,6 +466,8 @@ struct RealTraits { static constexpr int kMantissaBits = 53; // ceil(log10(2 ^ kMantissaBits)) static constexpr int kMantissaDigits = 16; + // Integers between zero and kMaxPreciseInteger can be precisely represented + static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1; }; template diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index 1401750ce76d6..6376a9545a0f8 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -1050,6 +1050,24 @@ void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real ex << "Decimal value: " << decimal_value << " Scale: " << scale; } +template +void CheckDecimalToRealWithinOneULP(const std::string& decimal_value, int32_t scale, + Real expected) { + Decimal dec(decimal_value); + auto result = dec.template ToReal(scale); + ASSERT_TRUE(result == expected || result == std::nextafter(expected, expected + 1) || + result == std::nextafter(expected, expected - 1)) + << "Decimal value: " << decimal_value << " Scale: " << scale; +} + +template +void CheckDecimalToRealWithinEpsilon(const std::string& decimal_value, int32_t scale, + Real epsilon, Real expected) { + Decimal dec(decimal_value); + ASSERT_TRUE(std::abs(dec.template ToReal(scale) - expected) <= epsilon) + << "Decimal value: " << decimal_value << " Scale: " << scale; +} + template void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale, float expected) { @@ -1110,59 +1128,79 @@ class TestDecimalToReal : public ::testing::Test { } } } +}; - // Test precision of conversions to float values - void TestPrecision() { - // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) - CheckDecimalToReal("9223373136366403584", 0, 9.223373e+18f); - CheckDecimalToReal("-9223373136366403584", 0, -9.223373e+18f); - // 2**64 + 2**41 (exactly representable in a float) - CheckDecimalToReal("18446746272732807168", 0, 1.8446746e+19f); - CheckDecimalToReal("-18446746272732807168", 0, -1.8446746e+19f); - } +TYPED_TEST_SUITE(TestDecimalToReal, RealTypes); +TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); } + +// Custom test for Decimal::ToReal +template +class TestDecimalToRealFloat : public TestDecimalToReal> {}; +TYPED_TEST_SUITE(TestDecimalToRealFloat, DecimalTypes); - // Test conversions with a range of scales - void TestLargeValues(int32_t max_scale) { - // Note that exact comparisons would succeed on some platforms (Linux, macOS). - // Nevertheless, power-of-ten factors are not all exactly representable - // in binary floating point. - for (int32_t scale = -max_scale; scale <= max_scale; scale++) { +TYPED_TEST(TestDecimalToRealFloat, LargeValues) { + auto max_scale = TypeParam::kMaxScale; + // Note that exact comparisons would succeed on some platforms (Linux, macOS). + // Nevertheless, power-of-ten factors are not all exactly representable + // in binary floating point. + for (int32_t scale = -max_scale; scale <= max_scale; scale++) { #ifdef _WIN32 - // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero - if (scale == 45) continue; + // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero + if (scale == 45) continue; #endif - CheckDecimalToRealApprox("1", scale, Pow10(-scale)); - } - for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) { + CheckDecimalToRealApprox("1", scale, this->Pow10(-scale)); + } + for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) { #ifdef _WIN32 - // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero - if (scale == 45) continue; + // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero + if (scale == 45) continue; #endif - const Real factor = static_cast(123); - CheckDecimalToRealApprox("123", scale, factor * Pow10(-scale)); - } + const auto factor = static_cast(123); + CheckDecimalToRealApprox("123", scale, factor * this->Pow10(-scale)); } -}; - -TYPED_TEST_SUITE(TestDecimalToReal, RealTypes); - -TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); } +} -// Custom test for Decimal128::ToReal -class TestDecimal128ToRealFloat : public TestDecimalToReal> { -}; -TEST_F(TestDecimal128ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/38); } -TEST_F(TestDecimal128ToRealFloat, Precision) { this->TestPrecision(); } -// Custom test for Decimal256::ToReal -class TestDecimal256ToRealFloat : public TestDecimalToReal> { -}; -TEST_F(TestDecimal256ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/76); } -TEST_F(TestDecimal256ToRealFloat, Precision) { this->TestPrecision(); } +TYPED_TEST(TestDecimalToRealFloat, Precision) { + // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) + CheckDecimalToReal("9223373136366403584", 0, 9.223373e+18f); + CheckDecimalToReal("-9223373136366403584", 0, -9.223373e+18f); + // 2**64 + 2**41 (exactly representable in a float) + CheckDecimalToReal("18446746272732807168", 0, 1.8446746e+19f); + CheckDecimalToReal("-18446746272732807168", 0, -1.8446746e+19f); + + // Integers are always exact + auto scale = TypeParam::kMaxScale - 1; + std::string seven = "7."; + seven.append(scale, '0'); // pad with trailing zeros + CheckDecimalToReal(seven, scale, 7.0f); + CheckDecimalToReal("-" + seven, scale, -7.0f); + + CheckDecimalToReal("99999999999999999999.0000000000000000", 16, + 99999999999999999999.0f); + CheckDecimalToReal("-99999999999999999999.0000000000000000", 16, + -99999999999999999999.0f); + + // Small fractions are within one ULP + CheckDecimalToRealWithinOneULP("9999999.9", 1, 9999999.9f); + CheckDecimalToRealWithinOneULP("-9999999.9", 1, -9999999.9f); + CheckDecimalToRealWithinOneULP("9999999.999999", 6, 9999999.999999f); + CheckDecimalToRealWithinOneULP("-9999999.999999", 6, + -9999999.999999f); + + // Large fractions are within 2^-23 + constexpr float epsilon = 1.1920928955078125e-07f; // 2^-23 + CheckDecimalToRealWithinEpsilon( + "112334829348925.99070703983306884765625", 23, epsilon, + 112334829348925.99070703983306884765625f); + CheckDecimalToRealWithinEpsilon( + "1.987748987892758765582589910934859345", 36, epsilon, + 1.987748987892758765582589910934859345f); +} // ToReal tests are disabled on MinGW because of precision issues in results #ifndef __MINGW32__ -// Custom test for Decimal128::ToReal +// Custom test for Decimal::ToReal template class TestDecimalToRealDouble : public TestDecimalToReal> { }; @@ -1209,6 +1247,34 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) { 9.999999999999998e+47); CheckDecimalToReal("-99999999999999978859343891977453174784", -10, -9.999999999999998e+47); + // Integers are always exact + auto scale = TypeParam::kMaxScale - 1; + std::string seven = "7."; + seven.append(scale, '0'); + CheckDecimalToReal(seven, scale, 7.0); + CheckDecimalToReal("-" + seven, scale, -7.0); + + CheckDecimalToReal("99999999999999999999.0000000000000000", 16, + 99999999999999999999.0); + CheckDecimalToReal("-99999999999999999999.0000000000000000", 16, + -99999999999999999999.0); + + // Small fractions are within one ULP + CheckDecimalToRealWithinOneULP("9999999.9", 1, 9999999.9); + CheckDecimalToRealWithinOneULP("-9999999.9", 1, -9999999.9); + CheckDecimalToRealWithinOneULP("9999999.999999999999999", 15, + 9999999.999999999999999); + CheckDecimalToRealWithinOneULP("-9999999.999999999999999", 15, + -9999999.999999999999999); + + // Large fractions are within 2^-52 + constexpr double epsilon = 2.220446049250313080847263336181640625e-16; // 2^-52 + CheckDecimalToRealWithinEpsilon( + "112334829348925.99070703983306884765625", 23, epsilon, + 112334829348925.99070703983306884765625); + CheckDecimalToRealWithinEpsilon( + "1.987748987892758765582589910934859345", 36, epsilon, + 1.987748987892758765582589910934859345); } #endif // __MINGW32__ From de8df23a8cd9737b4df5bb1b68fc12a54f252d0d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 18 Jul 2023 16:41:55 +0200 Subject: [PATCH 32/35] GH-36744: [Python][Packaging] Add upper pin for cython<3 to pyarrow build dependencies (#36743) ### Rationale for this change Although we already fixed some cython 3 build issues (https://github.com/apache/arrow/pull/34726), some new have been introduced, which we are seeing now cython 3 is released (https://github.com/apache/arrow/issues/36730) Adding an upper pin (<3) for the release, so we have more time (the full 14.0 release cycle) to iron out issues. * Closes: #36744 Authored-by: Joris Van den Bossche Signed-off-by: Antoine Pitrou --- .github/workflows/dev.yml | 2 +- ci/conda_env_python.txt | 2 +- python/pyproject.toml | 2 +- python/requirements-build.txt | 2 +- python/requirements-wheel-build.txt | 2 +- python/setup.py | 7 ++++--- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 7c2437f6edfb5..119d11d9a399a 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -103,7 +103,7 @@ jobs: shell: bash run: | gem install test-unit - pip install cython setuptools six pytest jira + pip install "cython<3" setuptools six pytest jira - name: Run Release Test env: ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 04f985c94bb2c..4ae5c3614a1dc 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -18,7 +18,7 @@ # don't add pandas here, because it is not a mandatory test dependency boto3 # not a direct dependency of s3fs, but needed for our s3fs fixture cffi -cython +cython<3 cloudpickle fsspec hypothesis diff --git a/python/pyproject.toml b/python/pyproject.toml index fe8c938a9ce4f..7e61304585809 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -17,7 +17,7 @@ [build-system] requires = [ - "cython >= 0.29.31", + "cython >= 0.29.31,<3", "oldest-supported-numpy>=0.14", "setuptools_scm", "setuptools >= 40.1.0", diff --git a/python/requirements-build.txt b/python/requirements-build.txt index 507e9081373e2..6378d1b94e1bb 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,4 +1,4 @@ -cython>=0.29.31 +cython>=0.29.31,<3 oldest-supported-numpy>=0.14 setuptools_scm setuptools>=38.6.0 diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt index 6043d2ffb2c6e..e4f5243fbc2fe 100644 --- a/python/requirements-wheel-build.txt +++ b/python/requirements-wheel-build.txt @@ -1,4 +1,4 @@ -cython>=0.29.31 +cython>=0.29.31,<3 oldest-supported-numpy>=0.14 setuptools_scm setuptools>=58 diff --git a/python/setup.py b/python/setup.py index f06cb5a627562..dc529679c7f90 100755 --- a/python/setup.py +++ b/python/setup.py @@ -40,8 +40,9 @@ # Check if we're running 64-bit Python is_64_bit = sys.maxsize > 2**32 -if Cython.__version__ < '0.29.31': - raise Exception('Please upgrade to Cython 0.29.31 or newer') +if Cython.__version__ < '0.29.31' or Cython.__version__ >= '3.0': + raise Exception( + 'Please update your Cython version. Supported Cython >= 0.29.31, < 3.0') setup_dir = os.path.abspath(os.path.dirname(__file__)) @@ -491,7 +492,7 @@ def has_ext_modules(foo): 'pyarrow/_generated_version.py'), 'version_scheme': guess_next_dev_version }, - setup_requires=['setuptools_scm', 'cython >= 0.29.31'] + setup_requires, + setup_requires=['setuptools_scm', 'cython >= 0.29.31,<3'] + setup_requires, install_requires=install_requires, tests_require=['pytest', 'pandas', 'hypothesis'], python_requires='>=3.8', From f9904063b163c4ad44bef61e84a1e4a90b600d34 Mon Sep 17 00:00:00 2001 From: mwish Date: Wed, 19 Jul 2023 00:32:34 +0800 Subject: [PATCH 33/35] GH-35934:[C++][Parquet] PageIndex Read benchmark (#36702) ### Rationale for this change Add benchmark for read page index ### What changes are included in this PR? Just a benchmark in `cpp/src/parquet/bloom_filter_benchmark.cc` ### Are these changes tested? No ### Are there any user-facing changes? No * Closes: #35934 Lead-authored-by: mwish Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/dataset/dataset.h | 2 +- cpp/src/parquet/CMakeLists.txt | 5 +- cpp/src/parquet/benchmark_util.cc | 126 ++++++++++++++++++ cpp/src/parquet/benchmark_util.h | 47 +++++++ cpp/src/parquet/bloom_filter_benchmark.cc | 69 ++-------- cpp/src/parquet/level_conversion_benchmark.cc | 2 +- cpp/src/parquet/page_index_benchmark.cc | 107 +++++++++++++++ cpp/src/parquet/test_util.h | 2 +- 8 files changed, 296 insertions(+), 64 deletions(-) create mode 100644 cpp/src/parquet/benchmark_util.cc create mode 100644 cpp/src/parquet/benchmark_util.h create mode 100644 cpp/src/parquet/page_index_benchmark.cc diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h index 1db230b16e9c2..39936fbd7b5b2 100644 --- a/cpp/src/arrow/dataset/dataset.h +++ b/cpp/src/arrow/dataset/dataset.h @@ -82,7 +82,7 @@ class ARROW_DS_EXPORT FragmentSelection { /// \brief Instructions for scanning a particular fragment /// -/// The fragment scan request is dervied from ScanV2Options. The main +/// The fragment scan request is derived from ScanV2Options. The main /// difference is that the scan options are based on the dataset schema /// while the fragment request is based on the fragment schema. struct ARROW_DS_EXPORT FragmentScanRequest { diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index e6aad7cee2a3e..eb2e2d8fed88f 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -401,11 +401,14 @@ endif() add_parquet_test(file_deserialize_test SOURCES file_deserialize_test.cc test_util.cc) add_parquet_test(schema_test) -add_parquet_benchmark(bloom_filter_benchmark) +add_parquet_benchmark(bloom_filter_benchmark SOURCES bloom_filter_benchmark.cc + benchmark_util.cc) add_parquet_benchmark(column_reader_benchmark) add_parquet_benchmark(column_io_benchmark) add_parquet_benchmark(encoding_benchmark) add_parquet_benchmark(level_conversion_benchmark) +add_parquet_benchmark(page_index_benchmark SOURCES page_index_benchmark.cc + benchmark_util.cc) add_parquet_benchmark(arrow/reader_writer_benchmark PREFIX "parquet-arrow") if(ARROW_WITH_BROTLI) diff --git a/cpp/src/parquet/benchmark_util.cc b/cpp/src/parquet/benchmark_util.cc new file mode 100644 index 0000000000000..6220336e1c39e --- /dev/null +++ b/cpp/src/parquet/benchmark_util.cc @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "parquet/benchmark_util.h" + +#include + +namespace parquet::benchmark { + +namespace { + +void GenerateRandomString(uint32_t length, uint32_t seed, std::vector* heap) { + // Character set used to generate random string + const std::string charset = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + std::default_random_engine gen(seed); + std::uniform_int_distribution dist(0, static_cast(charset.size() - 1)); + + for (uint32_t i = 0; i < length; i++) { + heap->emplace_back(charset[dist(gen)]); + } +} + +template +void GenerateBenchmarkDataIntegerImpl(uint32_t size, uint32_t seed, T* data, + std::vector* heap, uint32_t) { + static_assert(std::is_integral_v); + heap->clear(); + std::default_random_engine gen(seed); + std::uniform_int_distribution d(std::numeric_limits::min(), + std::numeric_limits::max()); + for (uint32_t i = 0; i < size; ++i) { + data[i] = d(gen); + } +} + +template +void GenerateBenchmarkDataFloatImpl(uint32_t size, uint32_t seed, T* data, + std::vector* heap, uint32_t) { + static_assert(std::is_floating_point_v); + heap->clear(); + std::default_random_engine gen(seed); + std::uniform_real_distribution d(std::numeric_limits::lowest(), + std::numeric_limits::max()); + for (uint32_t i = 0; i < size; ++i) { + data[i] = d(gen); + } +} + +} // namespace + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, int32_t* data, + std::vector* heap, uint32_t data_string_length) { + GenerateBenchmarkDataIntegerImpl(size, seed, data, heap, data_string_length); +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, int64_t* data, + std::vector* heap, uint32_t data_string_length) { + GenerateBenchmarkDataIntegerImpl(size, seed, data, heap, data_string_length); +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, float* data, + std::vector* heap, uint32_t data_string_length) { + GenerateBenchmarkDataFloatImpl(size, seed, data, heap, data_string_length); +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, double* data, + std::vector* heap, uint32_t data_string_length) { + GenerateBenchmarkDataFloatImpl(size, seed, data, heap, data_string_length); +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, Int96* data, + std::vector* heap, uint32_t) { + heap->clear(); + std::default_random_engine gen(seed); + std::uniform_int_distribution d(std::numeric_limits::min(), + std::numeric_limits::max()); + for (uint32_t i = 0; i < size; ++i) { + data[i].value[0] = d(gen); + data[i].value[1] = d(gen); + data[i].value[2] = d(gen); + } +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, FLBA* data, + std::vector* heap, uint32_t data_string_length) { + heap->clear(); + GenerateRandomString(data_string_length * size, seed, heap); + for (uint32_t i = 0; i < size; ++i) { + data[i].ptr = heap->data() + i * data_string_length; + } +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, ByteArray* data, + std::vector* heap, uint32_t data_string_length) { + heap->clear(); + GenerateRandomString(data_string_length * size, seed, heap); + for (uint32_t i = 0; i < size; ++i) { + data[i].ptr = heap->data() + i * data_string_length; + data[i].len = data_string_length; + } +} + +} // namespace parquet::benchmark diff --git a/cpp/src/parquet/benchmark_util.h b/cpp/src/parquet/benchmark_util.h new file mode 100644 index 0000000000000..7996f7f85e898 --- /dev/null +++ b/cpp/src/parquet/benchmark_util.h @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "parquet/types.h" + +namespace parquet::benchmark { + +template +void GenerateBenchmarkData(uint32_t size, uint32_t seed, T* data, + std::vector* heap, uint32_t data_string_length); + +#define _GENERATE_BENCHMARK_DATA_DECL(KLASS) \ + template <> \ + void GenerateBenchmarkData(uint32_t size, uint32_t seed, KLASS* data, \ + std::vector* heap, uint32_t data_string_length); + +_GENERATE_BENCHMARK_DATA_DECL(int32_t) +_GENERATE_BENCHMARK_DATA_DECL(int64_t) +_GENERATE_BENCHMARK_DATA_DECL(float) +_GENERATE_BENCHMARK_DATA_DECL(double) +_GENERATE_BENCHMARK_DATA_DECL(ByteArray) +_GENERATE_BENCHMARK_DATA_DECL(FLBA) +_GENERATE_BENCHMARK_DATA_DECL(Int96) + +#undef _GENERATE_BENCHMARK_DATA_DECL + +} // namespace parquet::benchmark diff --git a/cpp/src/parquet/bloom_filter_benchmark.cc b/cpp/src/parquet/bloom_filter_benchmark.cc index fa934b1d5290a..13c731d975b2c 100644 --- a/cpp/src/parquet/bloom_filter_benchmark.cc +++ b/cpp/src/parquet/bloom_filter_benchmark.cc @@ -18,13 +18,13 @@ #include "benchmark/benchmark.h" #include "arrow/util/logging.h" +#include "parquet/benchmark_util.h" #include "parquet/bloom_filter.h" #include "parquet/properties.h" #include -namespace parquet { -namespace benchmark { +namespace parquet::benchmark { constexpr static uint32_t kNumBloomFilterInserts = 16 * 1024; // The sample string length for FLBA and ByteArray benchmarks @@ -40,63 +40,11 @@ std::unique_ptr CreateBloomFilter(uint32_t num_values) { return bloom_filter; } -void GenerateRandomString(uint32_t length, uint32_t seed, std::vector* heap) { - // Character set used to generate random string - const std::string charset = - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - - std::default_random_engine gen(seed); - std::uniform_int_distribution dist(0, static_cast(charset.size() - 1)); - - for (uint32_t i = 0; i < length; i++) { - heap->push_back(charset[dist(gen)]); - } -} - -template -void GenerateBenchmarkData(uint32_t size, uint32_t seed, T* data, - [[maybe_unused]] std::vector* heap = nullptr) { - if constexpr (std::is_integral_v) { - std::default_random_engine gen(seed); - std::uniform_int_distribution d(std::numeric_limits::min(), - std::numeric_limits::max()); - for (uint32_t i = 0; i < size; ++i) { - data[i] = d(gen); - } - } else if constexpr (std::is_floating_point_v) { - std::default_random_engine gen(seed); - std::uniform_real_distribution d(std::numeric_limits::lowest(), - std::numeric_limits::max()); - for (uint32_t i = 0; i < size; ++i) { - data[i] = d(gen); - } - } else if constexpr (std::is_same_v) { - GenerateRandomString(kDataStringLength * size, seed, heap); - for (uint32_t i = 0; i < size; ++i) { - data[i].ptr = heap->data() + i * kDataStringLength; - } - } else if constexpr (std::is_same_v) { - GenerateRandomString(kDataStringLength * size, seed, heap); - for (uint32_t i = 0; i < size; ++i) { - data[i].ptr = heap->data() + i * kDataStringLength; - data[i].len = kDataStringLength; - } - } else if constexpr (std::is_same_v) { - std::default_random_engine gen(seed); - std::uniform_int_distribution d(std::numeric_limits::min(), - std::numeric_limits::max()); - for (uint32_t i = 0; i < size; ++i) { - data[i].value[0] = d(gen); - data[i].value[1] = d(gen); - data[i].value[2] = d(gen); - } - } -} - std::vector GetHashValues(uint32_t num_values, uint32_t seed) { // Generate sample data values std::vector values(num_values); - GenerateBenchmarkData(num_values, seed, values.data()); + std::vector heap; + GenerateBenchmarkData(num_values, seed, values.data(), &heap, kDataStringLength); // Create a temp filter to compute hash values auto filter = CreateBloomFilter(/*num_values=*/8); std::vector hashes(num_values); @@ -109,7 +57,8 @@ static void BM_ComputeHash(::benchmark::State& state) { using T = typename DType::c_type; std::vector values(kNumBloomFilterInserts); std::vector heap; - GenerateBenchmarkData(kNumBloomFilterInserts, /*seed=*/0, values.data(), &heap); + GenerateBenchmarkData(kNumBloomFilterInserts, /*seed=*/0, values.data(), &heap, + kDataStringLength); auto filter = CreateBloomFilter(kNumBloomFilterInserts); for (auto _ : state) { uint64_t total = 0; @@ -136,7 +85,8 @@ static void BM_BatchComputeHash(::benchmark::State& state) { using T = typename DType::c_type; std::vector values(kNumBloomFilterInserts); std::vector heap; - GenerateBenchmarkData(kNumBloomFilterInserts, /*seed=*/0, values.data(), &heap); + GenerateBenchmarkData(kNumBloomFilterInserts, /*seed=*/0, values.data(), &heap, + kDataStringLength); auto filter = CreateBloomFilter(kNumBloomFilterInserts); std::vector hashes(kNumBloomFilterInserts); for (auto _ : state) { @@ -231,5 +181,4 @@ BENCHMARK(BM_BatchInsertHash); BENCHMARK(BM_FindExistingHash); BENCHMARK(BM_FindNonExistingHash); -} // namespace benchmark -} // namespace parquet +} // namespace parquet::benchmark diff --git a/cpp/src/parquet/level_conversion_benchmark.cc b/cpp/src/parquet/level_conversion_benchmark.cc index f9e91c4820f68..f3a4f8095e3a1 100644 --- a/cpp/src/parquet/level_conversion_benchmark.cc +++ b/cpp/src/parquet/level_conversion_benchmark.cc @@ -29,7 +29,7 @@ constexpr int16_t kMissingDefLevel = 0; // Definition Level indicating the values has an entry in the leaf element. constexpr int16_t kPresentDefLevel = 2; -// A repition level that indicates a repeated element. +// A repetition level that indicates a repeated element. constexpr int16_t kHasRepeatedElements = 1; std::vector RunDefinitionLevelsToBitmap(const std::vector& def_levels, diff --git a/cpp/src/parquet/page_index_benchmark.cc b/cpp/src/parquet/page_index_benchmark.cc new file mode 100644 index 0000000000000..5631034105056 --- /dev/null +++ b/cpp/src/parquet/page_index_benchmark.cc @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "benchmark/benchmark.h" + +#include "parquet/benchmark_util.h" +#include "parquet/metadata.h" +#include "parquet/page_index.h" +#include "parquet/schema.h" +#include "parquet/test_util.h" +#include "parquet/thrift_internal.h" + +namespace parquet::benchmark { + +void PageIndexSetArgs(::benchmark::internal::Benchmark* bench) { + bench->ArgNames({"num_pages"}); + bench->Range(8, 1024); +} + +void BM_ReadOffsetIndex(::benchmark::State& state) { + auto builder = OffsetIndexBuilder::Make(); + const int num_pages = static_cast(state.range(0)); + constexpr int64_t page_size = 1024; + constexpr int64_t first_row_index = 10000; + for (int i = 0; i < num_pages; ++i) { + builder->AddPage(page_size * i, page_size, first_row_index * i); + } + constexpr int64_t final_position = 4096; + builder->Finish(final_position); + auto sink = CreateOutputStream(); + builder->WriteTo(sink.get()); + auto buffer = sink->Finish().ValueOrDie(); + ReaderProperties properties; + for (auto _ : state) { + auto offset_index = OffsetIndex::Make( + buffer->data() + 0, static_cast(buffer->size()), properties); + ::benchmark::DoNotOptimize(offset_index); + } + state.SetBytesProcessed(state.iterations() * buffer->size()); + state.SetItemsProcessed(state.iterations() * num_pages); +} + +BENCHMARK(BM_ReadOffsetIndex)->Apply(PageIndexSetArgs); + +// The sample string length for FLBA and ByteArray benchmarks +constexpr static uint32_t kDataStringLength = 8; + +template +void BM_ReadColumnIndex(::benchmark::State& state) { + schema::NodePtr type = ::parquet::schema::PrimitiveNode::Make( + "b", Repetition::OPTIONAL, DType::type_num, ConvertedType::NONE, 8); + auto descr_ptr = + std::make_unique(type, /*def_level=*/1, /*rep_level=*/0); + auto descr = descr_ptr.get(); + + const int num_pages = static_cast(state.range(0)); + auto builder = ColumnIndexBuilder::Make(descr); + + const size_t values_per_page = 100; + for (int i = 0; i < num_pages; ++i) { + auto stats = MakeStatistics(descr); + std::vector heap; + std::vector values; + values.resize(values_per_page); + GenerateBenchmarkData(values_per_page, /*seed=*/0, values.data(), &heap, + kDataStringLength); + stats->Update(values.data(), values_per_page, /*null_count=*/0); + builder->AddPage(stats->Encode()); + } + + builder->Finish(); + auto sink = CreateOutputStream(); + builder->WriteTo(sink.get()); + auto buffer = sink->Finish().ValueOrDie(); + ReaderProperties properties; + for (auto _ : state) { + auto column_index = ColumnIndex::Make(*descr, buffer->data() + 0, + static_cast(buffer->size()), properties); + ::benchmark::DoNotOptimize(column_index); + } + state.SetBytesProcessed(state.iterations() * buffer->size()); + state.SetItemsProcessed(state.iterations() * num_pages); +} + +BENCHMARK_TEMPLATE(BM_ReadColumnIndex, Int64Type)->Apply(PageIndexSetArgs); +BENCHMARK_TEMPLATE(BM_ReadColumnIndex, DoubleType)->Apply(PageIndexSetArgs); +BENCHMARK_TEMPLATE(BM_ReadColumnIndex, FLBAType)->Apply(PageIndexSetArgs); +BENCHMARK_TEMPLATE(BM_ReadColumnIndex, ByteArrayType)->Apply(PageIndexSetArgs); + +} // namespace parquet::benchmark diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index dfb4b5d0fbf4a..b0aafa037ead1 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -556,7 +556,7 @@ static inline int MakePages(const ColumnDescriptor* d, int num_pages, int levels } else { num_values = num_levels; } - // Create repitition levels + // Create repetition levels if (max_rep_level > 0 && num_levels != 0) { rep_levels.resize(num_levels); // Using a different seed so that def_levels and rep_levels are different. From e8214734459eff5cfc9e67e8b1fdef46f6d8c2ea Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Tue, 18 Jul 2023 13:07:26 -0400 Subject: [PATCH 34/35] GH-36734: [MATLAB] template arrow::matlab::proxy::NumericArray on ArrowType instead of CType (#36738) ### Rationale for this change We decided to change the template parameter on `arrow::matlab::proxy::NumericArray` to `ArrowType` from `CType` to avoid writing duplicate code. If `proxy::NumericArray` is templated on `ArrowType`, we can use it to implement the proxies for `Date64Array`, `Date32Array`, `Time32Array`, `Time64Array`, and `TimestampArray`. This will help us avoid duplicating code. ### What changes are included in this PR? 1. Changed the template on `proxy::NumericArray` from `CType` to `ArrowType` 2. Re-implemented the C++ proxy object used for `TimestampArray` in terms of `proxy::NumericArray` 3. Defined a template specialization for `NumericArray::make` when the template parameter is `arrow::TimestampType` 4. Defined a `proxy::Traits` `struct` that is templated on `ArrowType`. Specializations of `Traits` define a`TypeProxy` typedef that can be used at compile-time to get the proxy class that is used to wrap an `ArrowType`. ### Are these changes tested? Existing tests used. ### Are there any user-facing changes? No. * Closes: #36734 Authored-by: Sarah Gilmore Signed-off-by: Kevin Gurney --- .../arrow/matlab/array/proxy/numeric_array.h | 66 +++++++++++-- .../matlab/array/proxy/timestamp_array.cc | 99 ------------------- .../matlab/array/proxy/timestamp_array.h | 43 -------- matlab/src/cpp/arrow/matlab/proxy/factory.cc | 23 +++-- .../src/cpp/arrow/matlab/type/proxy/traits.h | 90 +++++++++++++++++ .../cmake/BuildMatlabArrowInterface.cmake | 1 - 6 files changed, 158 insertions(+), 164 deletions(-) delete mode 100644 matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc delete mode 100644 matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h create mode 100644 matlab/src/cpp/arrow/matlab/type/proxy/traits.h diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h index c66c1d044fc12..f358e05db6318 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h @@ -24,7 +24,7 @@ #include "arrow/type_traits.h" #include "arrow/matlab/array/proxy/array.h" -#include "arrow/matlab/type/proxy/primitive_ctype.h" +#include "arrow/matlab/type/proxy/traits.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/bit/pack.h" @@ -33,20 +33,23 @@ #include "libmexclass/proxy/Proxy.h" +#include "arrow/matlab/type/time_unit.h" +#include "arrow/util/utf8.h" + namespace arrow::matlab::array::proxy { -template +template class NumericArray : public arrow::matlab::array::proxy::Array { public: - using ArrowType = typename arrow::CTypeTraits::ArrowType; NumericArray(const std::shared_ptr> numeric_array) : arrow::matlab::array::proxy::Array{std::move(numeric_array)} {} static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { using MatlabBuffer = arrow::matlab::buffer::MatlabBuffer; + using CType = typename arrow::TypeTraits::CType; using NumericArray = arrow::NumericArray; - using NumericArrayProxy = typename arrow::matlab::array::proxy::NumericArray; + using NumericArrayProxy = typename proxy::NumericArray; ::matlab::data::StructArray opts = constructor_arguments[0]; @@ -68,10 +71,11 @@ class NumericArray : public arrow::matlab::array::proxy::Array { protected: void toMATLAB(libmexclass::proxy::method::Context& context) override { - using ArrowArrayType = typename arrow::CTypeTraits::ArrayType; + using CType = typename arrow::TypeTraits::CType; + using NumericArray = arrow::NumericArray; const auto num_elements = static_cast(array->length()); - const auto numeric_array = std::static_pointer_cast(array); + const auto numeric_array = std::static_pointer_cast(array); const CType* const data_begin = numeric_array->raw_values(); const CType* const data_end = data_begin + num_elements; @@ -83,11 +87,55 @@ class NumericArray : public arrow::matlab::array::proxy::Array { } std::shared_ptr typeProxy() override { - using ArrowTypeProxy = type::proxy::PrimitiveCType; + using TypeProxy = typename type::proxy::Traits::TypeProxy; auto type = std::static_pointer_cast(array->type()); - return std::make_shared(std::move(type)); + return std::make_shared(std::move(type)); } - }; + // Specialization of NumericArray::Make for arrow::TimestampType. + template <> + libmexclass::proxy::MakeResult NumericArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + using MatlabBuffer = arrow::matlab::buffer::MatlabBuffer; + using TimestampArray = arrow::TimestampArray; + using TimestampArrayProxy = arrow::matlab::array::proxy::NumericArray; + + mda::StructArray opts = constructor_arguments[0]; + + // Get the mxArray from constructor arguments + const mda::TypedArray timestamp_mda = opts[0]["MatlabArray"]; + const mda::TypedArray validity_bitmap_mda = opts[0]["Valid"]; + + const mda::TypedArray timezone_mda = opts[0]["TimeZone"]; + const mda::TypedArray units_mda = opts[0]["TimeUnit"]; + + // extract the time zone string + const std::u16string& u16_timezone = timezone_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto timezone, + arrow::util::UTF16StringToUTF8(u16_timezone), + error::UNICODE_CONVERSION_ERROR_ID); + + // extract the time unit + const std::u16string& u16_timeunit = units_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto time_unit, + arrow::matlab::type::timeUnitFromString(u16_timeunit), + error::UKNOWN_TIME_UNIT_ERROR_ID) + + // create the timestamp_type + auto data_type = arrow::timestamp(time_unit, timezone); + auto array_length = static_cast(timestamp_mda.getNumberOfElements()); // cast size_t to int64_t + + auto data_buffer = std::make_shared(timestamp_mda); + + // Pack the validity bitmap values. + MATLAB_ASSIGN_OR_ERROR(auto packed_validity_bitmap, + bit::packValid(validity_bitmap_mda), + error::BITPACK_VALIDITY_BITMAP_ERROR_ID); + + auto array_data = arrow::ArrayData::Make(data_type, array_length, {packed_validity_bitmap, data_buffer}); + auto timestamp_array = std::static_pointer_cast(arrow::MakeArray(array_data)); + return std::make_shared(std::move(timestamp_array)); + } + } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc deleted file mode 100644 index b9bbf3d7e7942..0000000000000 --- a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/matlab/array/proxy/timestamp_array.h" -#include "arrow/matlab/type/proxy/timestamp_type.h" - -#include "arrow/matlab/error/error.h" -#include "arrow/matlab/bit/pack.h" -#include "arrow/matlab/bit/unpack.h" -#include "arrow/matlab/buffer/matlab_buffer.h" - -#include "arrow/matlab/type/time_unit.h" -#include "arrow/util/utf8.h" -#include "arrow/type.h" - -namespace arrow::matlab::array::proxy { - - TimestampArray::TimestampArray(std::shared_ptr array) - : arrow::matlab::array::proxy::Array{std::move(array)} {} - - libmexclass::proxy::MakeResult TimestampArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { - namespace mda = ::matlab::data; - using MatlabBuffer = arrow::matlab::buffer::MatlabBuffer; - using TimestampArray = arrow::TimestampArray; - using TimestampArrayProxy = arrow::matlab::array::proxy::TimestampArray; - - mda::StructArray opts = constructor_arguments[0]; - - // Get the mxArray from constructor arguments - const mda::TypedArray timestamp_mda = opts[0]["MatlabArray"]; - const mda::TypedArray validity_bitmap_mda = opts[0]["Valid"]; - - const mda::TypedArray timezone_mda = opts[0]["TimeZone"]; - const mda::TypedArray units_mda = opts[0]["TimeUnit"]; - - // extract the time zone string - const std::u16string& u16_timezone = timezone_mda[0]; - MATLAB_ASSIGN_OR_ERROR(const auto timezone, - arrow::util::UTF16StringToUTF8(u16_timezone), - error::UNICODE_CONVERSION_ERROR_ID); - - // extract the time unit - const std::u16string& u16_timeunit = units_mda[0]; - MATLAB_ASSIGN_OR_ERROR(const auto time_unit, - arrow::matlab::type::timeUnitFromString(u16_timeunit), - error::UKNOWN_TIME_UNIT_ERROR_ID) - - // create the timestamp_type - auto data_type = arrow::timestamp(time_unit, timezone); - auto array_length = static_cast(timestamp_mda.getNumberOfElements()); // cast size_t to int64_t - - auto data_buffer = std::make_shared(timestamp_mda); - - // Pack the validity bitmap values. - MATLAB_ASSIGN_OR_ERROR(auto packed_validity_bitmap, - bit::packValid(validity_bitmap_mda), - error::BITPACK_VALIDITY_BITMAP_ERROR_ID); - - auto array_data = arrow::ArrayData::Make(data_type, array_length, {packed_validity_bitmap, data_buffer}); - auto timestamp_array = std::static_pointer_cast(arrow::MakeArray(array_data)); - return std::make_shared(std::move(timestamp_array)); - } - - void TimestampArray::toMATLAB(libmexclass::proxy::method::Context& context) { - namespace mda = ::matlab::data; - - const auto num_elements = static_cast(array->length()); - const auto timestamp_array = std::static_pointer_cast(array); - const int64_t* const data_begin = timestamp_array->raw_values(); - const int64_t* const data_end = data_begin + num_elements; - - mda::ArrayFactory factory; - - // Constructs a TypedArray from the raw values. Makes a copy. - mda::TypedArray result = factory.createArray({num_elements, 1}, data_begin, data_end); - context.outputs[0] = result; - } - - std::shared_ptr TimestampArray::typeProxy() { - using TimestampProxyType = type::proxy::TimestampType; - auto type = std::static_pointer_cast(array->type()); - return std::make_shared(std::move(type)); - - } -} diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h deleted file mode 100644 index a312a129a21c2..0000000000000 --- a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "arrow/array.h" - -#include "arrow/matlab/array/proxy/array.h" - -#include "libmexclass/proxy/Proxy.h" - -#include "arrow/type_fwd.h" - -namespace arrow::matlab::array::proxy { - -class TimestampArray : public arrow::matlab::array::proxy::Array { - public: - TimestampArray(std::shared_ptr array); - - static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); - - protected: - void toMATLAB(libmexclass::proxy::method::Context& context) override; - - std::shared_ptr typeProxy() override; - -}; - -} diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 0f7751035a052..2fb3207e590c6 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -18,7 +18,6 @@ #include "arrow/matlab/array/proxy/boolean_array.h" #include "arrow/matlab/array/proxy/numeric_array.h" #include "arrow/matlab/array/proxy/string_array.h" -#include "arrow/matlab/array/proxy/timestamp_array.h" #include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/type/proxy/primitive_ctype.h" @@ -30,19 +29,19 @@ namespace arrow::matlab::proxy { libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, const FunctionArguments& constructor_arguments) { - REGISTER_PROXY(arrow.array.proxy.Float32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Float64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt8Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt16Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int8Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Float32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Float64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt8Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt16Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int8Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.array.proxy.BooleanArray , arrow::matlab::array::proxy::BooleanArray); REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray); - REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::TimestampArray); + REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch); REGISTER_PROXY(arrow.type.proxy.Float32Type , arrow::matlab::type::proxy::PrimitiveCType); REGISTER_PROXY(arrow.type.proxy.Float64Type , arrow::matlab::type::proxy::PrimitiveCType); diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/traits.h b/matlab/src/cpp/arrow/matlab/type/proxy/traits.h new file mode 100644 index 0000000000000..3d9a957a5e3dc --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/traits.h @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/type_fwd.h" + +#include "arrow/matlab/type/proxy/primitive_ctype.h" +#include "arrow/matlab/type/proxy/timestamp_type.h" +#include "arrow/matlab/type/proxy/string_type.h" + +namespace arrow::matlab::type::proxy { + + template + struct Traits; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = StringType; + }; + + template <> + struct Traits { + using TypeProxy = TimestampType; + }; +} diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 253632d221040..c10ce07280fa6 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -44,7 +44,6 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/c set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/boolean_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/string_array.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/pack.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/unpack.cc" From c7741fb4e633222346269e80b35b5df48051b585 Mon Sep 17 00:00:00 2001 From: Junming Chen Date: Wed, 19 Jul 2023 07:41:07 +0800 Subject: [PATCH 35/35] GH-34588:[C++][Python] Add a MetaFunction for "dictionary_decode" (#35356) **Rationale for this change** This PR is for [Issue-34588](https://github.com/apache/arrow/issues/34588). Discussing with @ westonpace, a MetaFunction for "dictionary_decode" is implemented instead of adding a compute kernel. **What changes are included in this PR?** C++: Meta Function of dictionary_decode. Python: Test **Are these changes tested?** One test in tests/test_compute.py * Closes: #34588 Lead-authored-by: Junming Chen Co-authored-by: Alenka Frim Co-authored-by: Weston Pace Signed-off-by: Weston Pace --- cpp/src/arrow/compute/kernels/vector_hash.cc | 37 ++++++++++++++++++++ cpp/src/arrow/compute/registry.cc | 1 + cpp/src/arrow/compute/registry_internal.h | 1 + python/pyarrow/tests/test_compute.py | 11 ++++++ 4 files changed, 50 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc index 2eab7ae8afaf2..a7bb2d88c291b 100644 --- a/cpp/src/arrow/compute/kernels/vector_hash.cc +++ b/cpp/src/arrow/compute/kernels/vector_hash.cc @@ -27,6 +27,7 @@ #include "arrow/array/dict_internal.h" #include "arrow/array/util.h" #include "arrow/compute/api_vector.h" +#include "arrow/compute/cast.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/result.h" #include "arrow/util/hashing.h" @@ -762,6 +763,38 @@ const FunctionDoc dictionary_encode_doc( ("Return a dictionary-encoded version of the input array."), {"array"}, "DictionaryEncodeOptions"); +// ---------------------------------------------------------------------- +// This function does not use any hashing utilities +// but is kept in this file to be near dictionary_encode +// Dictionary decode implementation + +const FunctionDoc dictionary_decode_doc{ + "Decodes a DictionaryArray to an Array", + ("Return a plain-encoded version of the array input\n" + "This function does nothing if the input is not a dictionary."), + {"dictionary_array"}}; + +class DictionaryDecodeMetaFunction : public MetaFunction { + public: + DictionaryDecodeMetaFunction() + : MetaFunction("dictionary_decode", Arity::Unary(), dictionary_decode_doc) {} + + Result ExecuteImpl(const std::vector& args, + const FunctionOptions* options, + ExecContext* ctx) const override { + if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) { + return args[0]; + } + + if (args[0].is_array() || args[0].is_chunked_array()) { + DictionaryType* dict_type = checked_cast(args[0].type().get()); + CastOptions cast_options = CastOptions::Safe(dict_type->value_type()); + return CallFunction("cast", args, &cast_options, ctx); + } else { + return Status::TypeError("Expected an Array or a Chunked Array"); + } + } +}; } // namespace void RegisterVectorHash(FunctionRegistry* registry) { @@ -819,6 +852,10 @@ void RegisterVectorHash(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunction(std::move(dict_encode))); } +void RegisterDictionaryDecode(FunctionRegistry* registry) { + DCHECK_OK(registry->AddFunction(std::make_shared())); +} + } // namespace internal } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc index a4b484a2069ea..7a54f78a03736 100644 --- a/cpp/src/arrow/compute/registry.cc +++ b/cpp/src/arrow/compute/registry.cc @@ -275,6 +275,7 @@ static std::unique_ptr CreateBuiltInRegistry() { // Register core kernels RegisterScalarCast(registry.get()); + RegisterDictionaryDecode(registry.get()); RegisterVectorHash(registry.get()); RegisterVectorSelection(registry.get()); diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h index b4239701d9573..cdc9f804e72f1 100644 --- a/cpp/src/arrow/compute/registry_internal.h +++ b/cpp/src/arrow/compute/registry_internal.h @@ -28,6 +28,7 @@ namespace internal { void RegisterScalarArithmetic(FunctionRegistry* registry); void RegisterScalarBoolean(FunctionRegistry* registry); void RegisterScalarCast(FunctionRegistry* registry); +void RegisterDictionaryDecode(FunctionRegistry* registry); void RegisterScalarComparison(FunctionRegistry* registry); void RegisterScalarIfElse(FunctionRegistry* registry); void RegisterScalarNested(FunctionRegistry* registry); diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index e47e5d3f3eb3b..98ab84c03900f 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1756,6 +1756,17 @@ def test_logical(): assert pc.invert(a) == pa.array([False, True, True, None]) +def test_dictionary_decode(): + array = pa.array(["a", "a", "b", "c", "b"]) + dictionary_array = array.dictionary_encode() + dictionary_array_decode = pc.dictionary_decode(dictionary_array) + + assert array != dictionary_array + + assert array == dictionary_array_decode + assert array == pc.dictionary_decode(array) + + def test_cast(): arr = pa.array([1, 2, 3, 4], type='int64') options = pc.CastOptions(pa.int8())