diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc index ed6152259891d..5fa533632f928 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc @@ -31,7 +31,6 @@ namespace arrow::matlab::array::proxy { // Register Proxy methods. REGISTER_METHOD(Array, toString); - REGISTER_METHOD(Array, toMATLAB); REGISTER_METHOD(Array, getLength); REGISTER_METHOD(Array, getValid); REGISTER_METHOD(Array, getType); diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/array/proxy/array.h index 185e107f75391..46e1fa5a81380 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h @@ -42,8 +42,6 @@ class Array : public libmexclass::proxy::Proxy { void getType(libmexclass::proxy::method::Context& context); - virtual void toMATLAB(libmexclass::proxy::method::Context& context) = 0; - void isEqual(libmexclass::proxy::method::Context& context); std::shared_ptr array; diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc index 5be0cfb5a3d13..6a6e478274823 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc @@ -25,7 +25,9 @@ namespace arrow::matlab::array::proxy { BooleanArray::BooleanArray(std::shared_ptr array) - : arrow::matlab::array::proxy::Array{std::move(array)} {} + : arrow::matlab::array::proxy::Array{std::move(array)} { + REGISTER_METHOD(BooleanArray, toMATLAB); + } libmexclass::proxy::MakeResult BooleanArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { ::matlab::data::StructArray opts = constructor_arguments[0]; diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h index 775673c29eada..edc00b178e42a 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h @@ -31,7 +31,7 @@ namespace arrow::matlab::array::proxy { static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); protected: - void toMATLAB(libmexclass::proxy::method::Context& context) override; + void toMATLAB(libmexclass::proxy::method::Context& context); }; } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h index f9da38dbaa062..4b4ddb6588678 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h @@ -40,7 +40,9 @@ class NumericArray : public arrow::matlab::array::proxy::Array { public: NumericArray(const std::shared_ptr> numeric_array) - : arrow::matlab::array::proxy::Array{std::move(numeric_array)} {} + : arrow::matlab::array::proxy::Array{std::move(numeric_array)} { + REGISTER_METHOD(NumericArray, toMATLAB); + } static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { using MatlabBuffer = arrow::matlab::buffer::MatlabBuffer; @@ -67,7 +69,7 @@ class NumericArray : public arrow::matlab::array::proxy::Array { } protected: - void toMATLAB(libmexclass::proxy::method::Context& context) override { + void toMATLAB(libmexclass::proxy::method::Context& context) { using CType = typename arrow::TypeTraits::CType; using NumericArray = arrow::NumericArray; diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc index c583e8851a3ac..7160e88a3c8a0 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc @@ -28,7 +28,9 @@ namespace arrow::matlab::array::proxy { StringArray::StringArray(const std::shared_ptr string_array) - : arrow::matlab::array::proxy::Array(std::move(string_array)) {} + : arrow::matlab::array::proxy::Array(std::move(string_array)) { + REGISTER_METHOD(StringArray, toMATLAB); + } libmexclass::proxy::MakeResult StringArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { namespace mda = ::matlab::data; diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h index bdcfedd7cdda3..4cc01f0a02f8c 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h @@ -32,7 +32,7 @@ namespace arrow::matlab::array::proxy { static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); protected: - void toMATLAB(libmexclass::proxy::method::Context& context) override; + void toMATLAB(libmexclass::proxy::method::Context& context); }; } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/struct_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/struct_array.cc new file mode 100644 index 0000000000000..c6d9e47a9b0c4 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/array/proxy/struct_array.cc @@ -0,0 +1,199 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/array/proxy/struct_array.h" +#include "arrow/matlab/array/proxy/wrap.h" +#include "arrow/matlab/bit/pack.h" +#include "arrow/matlab/error/error.h" +#include "arrow/matlab/index/validate.h" + +#include "arrow/util/utf8.h" + +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::array::proxy { + + StructArray::StructArray(std::shared_ptr struct_array) + : proxy::Array{std::move(struct_array)} { + REGISTER_METHOD(StructArray, getNumFields); + REGISTER_METHOD(StructArray, getFieldByIndex); + REGISTER_METHOD(StructArray, getFieldByName); + REGISTER_METHOD(StructArray, getFieldNames); + } + + libmexclass::proxy::MakeResult StructArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + using libmexclass::proxy::ProxyManager; + + mda::StructArray opts = constructor_arguments[0]; + const mda::TypedArray arrow_array_proxy_ids = opts[0]["ArrayProxyIDs"]; + const mda::StringArray field_names_mda = opts[0]["FieldNames"]; + const mda::TypedArray validity_bitmap_mda = opts[0]["Valid"]; + + std::vector> arrow_arrays; + arrow_arrays.reserve(arrow_array_proxy_ids.getNumberOfElements()); + + // Retrieve all of the Arrow Array Proxy instances from the libmexclass ProxyManager. + for (const auto& arrow_array_proxy_id : arrow_array_proxy_ids) { + auto proxy = ProxyManager::getProxy(arrow_array_proxy_id); + auto arrow_array_proxy = std::static_pointer_cast(proxy); + auto arrow_array = arrow_array_proxy->unwrap(); + arrow_arrays.push_back(arrow_array); + } + + // Convert the utf-16 encoded field names into utf-8 encoded strings + std::vector field_names; + field_names.reserve(field_names_mda.getNumberOfElements()); + for (const auto& field_name : field_names_mda) { + const auto field_name_utf16 = std::u16string(field_name); + MATLAB_ASSIGN_OR_ERROR(const auto field_name_utf8, + arrow::util::UTF16StringToUTF8(field_name_utf16), + error::UNICODE_CONVERSION_ERROR_ID); + field_names.push_back(field_name_utf8); + } + + // Pack the validity bitmap values. + MATLAB_ASSIGN_OR_ERROR(auto validity_bitmap_buffer, + bit::packValid(validity_bitmap_mda), + error::BITPACK_VALIDITY_BITMAP_ERROR_ID); + + // Create the StructArray + MATLAB_ASSIGN_OR_ERROR(auto array, + arrow::StructArray::Make(arrow_arrays, field_names, validity_bitmap_buffer), + error::STRUCT_ARRAY_MAKE_FAILED); + + // Construct the StructArray Proxy + auto struct_array = std::static_pointer_cast(array); + return std::make_shared(std::move(struct_array)); + } + + void StructArray::getNumFields(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + mda::ArrayFactory factory; + const auto num_fields = array->type()->num_fields(); + context.outputs[0] = factory.createScalar(num_fields); + } + + void StructArray::getFieldByIndex(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using namespace libmexclass::proxy; + + mda::StructArray args = context.inputs[0]; + const mda::TypedArray index_mda = args[0]["Index"]; + const auto matlab_index = int32_t(index_mda[0]); + + auto struct_array = std::static_pointer_cast(array); + + const auto num_fields = struct_array->type()->num_fields(); + + // Validate there is at least 1 field + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + index::validateNonEmptyContainer(num_fields), + context, error::INDEX_EMPTY_CONTAINER); + + // Validate the matlab index provided is within the range [1, num_fields] + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + index::validateInRange(matlab_index, num_fields), + context, error::INDEX_OUT_OF_RANGE); + + // Note: MATLAB uses 1-based indexing, so subtract 1. + const int32_t index = matlab_index - 1; + + auto field_array = struct_array->field(index); + + // Wrap the array within a proxy object if possible. + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto field_array_proxy, + proxy::wrap(field_array), + context, error::UNKNOWN_PROXY_FOR_ARRAY_TYPE); + const auto field_array_proxy_id = ProxyManager::manageProxy(field_array_proxy); + const auto type_id = field_array->type_id(); + + // Return a struct with two fields: ProxyID and TypeID. The MATLAB + // layer will use these values to construct the appropriate MATLAB + // arrow.array.Array subclass. + mda::ArrayFactory factory; + mda::StructArray output = factory.createStructArray({1, 1}, {"ProxyID", "TypeID"}); + output[0]["ProxyID"] = factory.createScalar(field_array_proxy_id); + output[0]["TypeID"] = factory.createScalar(static_cast(type_id)); + context.outputs[0] = output; + } + + void StructArray::getFieldByName(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using libmexclass::proxy::ProxyManager; + + mda::StructArray args = context.inputs[0]; + + const mda::StringArray name_mda = args[0]["Name"]; + const auto name_utf16 = std::u16string(name_mda[0]); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto name, + arrow::util::UTF16StringToUTF8(name_utf16), + context, error::UNICODE_CONVERSION_ERROR_ID); + + + auto struct_array = std::static_pointer_cast(array); + auto field_array = struct_array->GetFieldByName(name); + if (!field_array) { + // Return an error if we could not query the field by name. + const auto msg = "Could not find field named " + name + "."; + context.error = libmexclass::error::Error{ + error::ARROW_TABULAR_SCHEMA_AMBIGUOUS_FIELD_NAME, msg}; + return; + } + + // Wrap the array within a proxy object if possible. + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto field_array_proxy, + proxy::wrap(field_array), + context, error::UNKNOWN_PROXY_FOR_ARRAY_TYPE); + const auto field_array_proxy_id = ProxyManager::manageProxy(field_array_proxy); + const auto type_id = field_array->type_id(); + + // Return a struct with two fields: ProxyID and TypeID. The MATLAB + // layer will use these values to construct the appropriate MATLAB + // arrow.array.Array subclass. + mda::ArrayFactory factory; + mda::StructArray output = factory.createStructArray({1, 1}, {"ProxyID", "TypeID"}); + output[0]["ProxyID"] = factory.createScalar(field_array_proxy_id); + output[0]["TypeID"] = factory.createScalar(static_cast(type_id)); + context.outputs[0] = output; + } + + void StructArray::getFieldNames(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + const auto& fields = array->type()->fields(); + const auto num_fields = fields.size(); + std::vector names; + names.reserve(num_fields); + + for (size_t i = 0; i < num_fields; ++i) { + auto str_utf8 = fields[i]->name(); + + // MATLAB strings are UTF-16 encoded. Must convert UTF-8 + // encoded field names before returning to MATLAB. + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto str_utf16, + arrow::util::UTF8StringToUTF16(str_utf8), + context, error::UNICODE_CONVERSION_ERROR_ID); + const mda::MATLABString matlab_string = mda::MATLABString(std::move(str_utf16)); + names.push_back(matlab_string); + } + + mda::ArrayFactory factory; + context.outputs[0] = factory.createArray({1, num_fields}, names.begin(), names.end()); + } +} diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/struct_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/struct_array.h new file mode 100644 index 0000000000000..cfb548c4e50df --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/array/proxy/struct_array.h @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/matlab/array/proxy/array.h" + +namespace arrow::matlab::array::proxy { + +class StructArray : public arrow::matlab::array::proxy::Array { + public: + StructArray(std::shared_ptr struct_array); + + ~StructArray() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + + void getNumFields(libmexclass::proxy::method::Context& context); + + void getFieldByIndex(libmexclass::proxy::method::Context& context); + + void getFieldByName(libmexclass::proxy::method::Context& context); + + void getFieldNames(libmexclass::proxy::method::Context& context); + +}; + +} diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc b/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc index a8e3f239919cc..b14f4b18711cb 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/wrap.cc @@ -21,6 +21,7 @@ #include "arrow/matlab/array/proxy/boolean_array.h" #include "arrow/matlab/array/proxy/numeric_array.h" #include "arrow/matlab/array/proxy/string_array.h" +#include "arrow/matlab/array/proxy/struct_array.h" namespace arrow::matlab::array::proxy { @@ -61,6 +62,8 @@ namespace arrow::matlab::array::proxy { return std::make_shared>(std::static_pointer_cast(array)); case ID::STRING: return std::make_shared(std::static_pointer_cast(array)); + case ID::STRUCT: + return std::make_shared(std::static_pointer_cast(array)); default: return arrow::Status::NotImplemented("Unsupported DataType: " + array->type()->ToString()); } diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index ada9954353d9b..347bc25b5f3a6 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -195,7 +195,7 @@ namespace arrow::matlab::error { static const char* CHUNKED_ARRAY_MAKE_FAILED = "arrow:chunkedarray:MakeFailed"; static const char* CHUNKED_ARRAY_NUMERIC_INDEX_WITH_EMPTY_CHUNKED_ARRAY = "arrow:chunkedarray:NumericIndexWithEmptyChunkedArray"; static const char* CHUNKED_ARRAY_INVALID_NUMERIC_CHUNK_INDEX = "arrow:chunkedarray:InvalidNumericChunkIndex"; - + static const char* STRUCT_ARRAY_MAKE_FAILED = "arrow:array:StructArrayMakeFailed"; static const char* INDEX_EMPTY_CONTAINER = "arrow:index:EmptyContainer"; static const char* INDEX_OUT_OF_RANGE = "arrow:index:OutOfRange"; } diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index d1f46c7e2f71f..62ed84fedcf6a 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -21,6 +21,7 @@ #include "arrow/matlab/array/proxy/timestamp_array.h" #include "arrow/matlab/array/proxy/time32_array.h" #include "arrow/matlab/array/proxy/time64_array.h" +#include "arrow/matlab/array/proxy/struct_array.h" #include "arrow/matlab/array/proxy/chunked_array.h" #include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/tabular/proxy/table.h" @@ -57,6 +58,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.array.proxy.BooleanArray , arrow::matlab::array::proxy::BooleanArray); REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray); + REGISTER_PROXY(arrow.array.proxy.StructArray , arrow::matlab::array::proxy::StructArray); REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.array.proxy.Time32Array , arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.array.proxy.Time64Array , arrow::matlab::array::proxy::NumericArray); diff --git a/matlab/src/matlab/+arrow/+array/Array.m b/matlab/src/matlab/+arrow/+array/Array.m index 4505d4b006ad8..436d5b80aa6a8 100644 --- a/matlab/src/matlab/+arrow/+array/Array.m +++ b/matlab/src/matlab/+arrow/+array/Array.m @@ -21,12 +21,9 @@ Proxy end - properties (Dependent) + properties(Dependent, SetAccess=private, GetAccess=public) Length Valid % Validity bitmap - end - - properties(Dependent, SetAccess=private, GetAccess=public) Type(1, 1) arrow.type.Type end diff --git a/matlab/src/matlab/+arrow/+array/BooleanArray.m b/matlab/src/matlab/+arrow/+array/BooleanArray.m index b9ef36b5a70c9..dc38ef93e545c 100644 --- a/matlab/src/matlab/+arrow/+array/BooleanArray.m +++ b/matlab/src/matlab/+arrow/+array/BooleanArray.m @@ -16,8 +16,8 @@ classdef BooleanArray < arrow.array.Array % arrow.array.BooleanArray - properties (Hidden, SetAccess=private) - NullSubstitionValue = false; + properties (Hidden, GetAccess=public, SetAccess=private) + NullSubstitutionValue = false; end methods @@ -35,7 +35,7 @@ function matlabArray = toMATLAB(obj) matlabArray = obj.Proxy.toMATLAB(); - matlabArray(~obj.Valid) = obj.NullSubstitionValue; + matlabArray(~obj.Valid) = obj.NullSubstitutionValue; end end diff --git a/matlab/src/matlab/+arrow/+array/ChunkedArray.m b/matlab/src/matlab/+arrow/+array/ChunkedArray.m index 96d7bb57a4021..ede95323f4865 100644 --- a/matlab/src/matlab/+arrow/+array/ChunkedArray.m +++ b/matlab/src/matlab/+arrow/+array/ChunkedArray.m @@ -66,7 +66,8 @@ for ii = 1:obj.NumChunks chunk = obj.chunk(ii); endIndex = startIndex + chunk.Length - 1; - data(startIndex:endIndex) = toMATLAB(chunk); + % Use 2D indexing to support tabular MATLAB types. + data(startIndex:endIndex, :) = toMATLAB(chunk); startIndex = endIndex + 1; end end diff --git a/matlab/src/matlab/+arrow/+array/Date32Array.m b/matlab/src/matlab/+arrow/+array/Date32Array.m index a462bd4f85ac1..cfe56bc67fb94 100644 --- a/matlab/src/matlab/+arrow/+array/Date32Array.m +++ b/matlab/src/matlab/+arrow/+array/Date32Array.m @@ -17,7 +17,7 @@ classdef Date32Array < arrow.array.Array - properties(Access=private) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = NaT end diff --git a/matlab/src/matlab/+arrow/+array/Date64Array.m b/matlab/src/matlab/+arrow/+array/Date64Array.m index f5da26bbb5594..c67b82a5bbc47 100644 --- a/matlab/src/matlab/+arrow/+array/Date64Array.m +++ b/matlab/src/matlab/+arrow/+array/Date64Array.m @@ -17,7 +17,7 @@ classdef Date64Array < arrow.array.Array - properties(Access=private) + properties(Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = NaT end diff --git a/matlab/src/matlab/+arrow/+array/Float32Array.m b/matlab/src/matlab/+arrow/+array/Float32Array.m index fe90db335b5aa..d12e772c41428 100644 --- a/matlab/src/matlab/+arrow/+array/Float32Array.m +++ b/matlab/src/matlab/+arrow/+array/Float32Array.m @@ -16,7 +16,7 @@ classdef Float32Array < arrow.array.NumericArray % arrow.array.Float32Array - properties (Access=protected) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = single(NaN); end diff --git a/matlab/src/matlab/+arrow/+array/Float64Array.m b/matlab/src/matlab/+arrow/+array/Float64Array.m index ecf91e28954b5..028331b4f99c0 100644 --- a/matlab/src/matlab/+arrow/+array/Float64Array.m +++ b/matlab/src/matlab/+arrow/+array/Float64Array.m @@ -16,7 +16,7 @@ classdef Float64Array < arrow.array.NumericArray % arrow.array.Float64Array - properties (Access=protected) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = NaN; end diff --git a/matlab/src/matlab/+arrow/+array/Int16Array.m b/matlab/src/matlab/+arrow/+array/Int16Array.m index 53c96c6eeb85c..aee94b39c8969 100644 --- a/matlab/src/matlab/+arrow/+array/Int16Array.m +++ b/matlab/src/matlab/+arrow/+array/Int16Array.m @@ -16,7 +16,7 @@ classdef Int16Array < arrow.array.NumericArray % arrow.array.Int16Array - properties (Access=protected) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = int16(0) end diff --git a/matlab/src/matlab/+arrow/+array/Int32Array.m b/matlab/src/matlab/+arrow/+array/Int32Array.m index d85bcaf627f7b..a0c0c76afa0e7 100644 --- a/matlab/src/matlab/+arrow/+array/Int32Array.m +++ b/matlab/src/matlab/+arrow/+array/Int32Array.m @@ -16,7 +16,7 @@ classdef Int32Array < arrow.array.NumericArray % arrow.array.Int32Array - properties (Access=protected) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = int32(0) end diff --git a/matlab/src/matlab/+arrow/+array/Int64Array.m b/matlab/src/matlab/+arrow/+array/Int64Array.m index 72199df88ded1..1f8b1c793984a 100644 --- a/matlab/src/matlab/+arrow/+array/Int64Array.m +++ b/matlab/src/matlab/+arrow/+array/Int64Array.m @@ -16,7 +16,7 @@ classdef Int64Array < arrow.array.NumericArray % arrow.array.Int64Array - properties (Access=protected) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = int64(0); end diff --git a/matlab/src/matlab/+arrow/+array/Int8Array.m b/matlab/src/matlab/+arrow/+array/Int8Array.m index 0e9d8eec0edf5..02e21178ffe49 100644 --- a/matlab/src/matlab/+arrow/+array/Int8Array.m +++ b/matlab/src/matlab/+arrow/+array/Int8Array.m @@ -16,7 +16,7 @@ classdef Int8Array < arrow.array.NumericArray % arrow.array.Int8Array - properties (Access=protected) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = int8(0); end diff --git a/matlab/src/matlab/+arrow/+array/NumericArray.m b/matlab/src/matlab/+arrow/+array/NumericArray.m index 8f465ce425e23..088ccfd6aa53f 100644 --- a/matlab/src/matlab/+arrow/+array/NumericArray.m +++ b/matlab/src/matlab/+arrow/+array/NumericArray.m @@ -16,7 +16,7 @@ classdef NumericArray < arrow.array.Array % arrow.array.NumericArray - properties(Abstract, Access=protected) + properties(Abstract, Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue; end diff --git a/matlab/src/matlab/+arrow/+array/StringArray.m b/matlab/src/matlab/+arrow/+array/StringArray.m index 18fdec9ac70c3..e016aeb704a4d 100644 --- a/matlab/src/matlab/+arrow/+array/StringArray.m +++ b/matlab/src/matlab/+arrow/+array/StringArray.m @@ -16,8 +16,8 @@ classdef StringArray < arrow.array.Array % arrow.array.StringArray - properties (Hidden, SetAccess=private) - NullSubstitionValue = string(missing); + properties (Hidden, GetAccess=public, SetAccess=private) + NullSubstitutionValue = string(missing); end methods @@ -35,7 +35,7 @@ function matlabArray = toMATLAB(obj) matlabArray = obj.Proxy.toMATLAB(); - matlabArray(~obj.Valid) = obj.NullSubstitionValue; + matlabArray(~obj.Valid) = obj.NullSubstitutionValue; end end diff --git a/matlab/src/matlab/+arrow/+array/StructArray.m b/matlab/src/matlab/+arrow/+array/StructArray.m new file mode 100644 index 0000000000000..589e39fecd015 --- /dev/null +++ b/matlab/src/matlab/+arrow/+array/StructArray.m @@ -0,0 +1,146 @@ +% arrow.array.StructArray + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef StructArray < arrow.array.Array + + properties (Dependent, GetAccess=public, SetAccess=private) + NumFields + FieldNames + end + + properties (Hidden, Dependent, GetAccess=public, SetAccess=private) + NullSubstitutionValue + end + + methods + function obj = StructArray(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.array.proxy.StructArray")} + end + import arrow.internal.proxy.validate + obj@arrow.array.Array(proxy); + end + + function numFields = get.NumFields(obj) + numFields = obj.Proxy.getNumFields(); + end + + function fieldNames = get.FieldNames(obj) + fieldNames = obj.Proxy.getFieldNames(); + end + + function F = field(obj, idx) + import arrow.internal.validate.* + + idx = index.numericOrString(idx, "int32", AllowNonScalar=false); + + if isnumeric(idx) + args = struct(Index=idx); + fieldStruct = obj.Proxy.getFieldByIndex(args); + else + args = struct(Name=idx); + fieldStruct = obj.Proxy.getFieldByName(args); + end + + traits = arrow.type.traits.traits(arrow.type.ID(fieldStruct.TypeID)); + proxy = libmexclass.proxy.Proxy(Name=traits.ArrayProxyClassName, ID=fieldStruct.ProxyID); + F = traits.ArrayConstructor(proxy); + end + + function T = toMATLAB(obj) + T = table(obj); + end + + function T = table(obj) + import arrow.tabular.internal.* + + numFields = obj.NumFields; + matlabArrays = cell(1, numFields); + + invalid = ~obj.Valid; + numInvalid = nnz(invalid); + + for ii = 1:numFields + arrowArray = obj.field(ii); + matlabArray = toMATLAB(arrowArray); + if numInvalid ~= 0 + % MATLAB tables do not support null values themselves. + % So, to encode the StructArray's null values, we + % iterate over each variable in the resulting MATLAB + % table, and for each variable, we set the value of all + % null elements to the "NullSubstitutionValue" that + % corresponds to the variable's type (e.g. NaN for + % double, NaT for datetime, etc.). + matlabArray(invalid, :) = repmat(arrowArray.NullSubstitutionValue, [numInvalid 1]); + end + matlabArrays{ii} = matlabArray; + end + + fieldNames = [obj.Type.Fields.Name]; + validVariableNames = makeValidVariableNames(fieldNames); + validDimensionNames = makeValidDimensionNames(validVariableNames); + + T = table(matlabArrays{:}, ... + VariableNames=validVariableNames, ... + DimensionNames=validDimensionNames); + end + + function nullSubVal = get.NullSubstitutionValue(obj) + % Return a cell array containing each field's type-specifc + % "null" value. For example, NaN is the type-specific null + % value for Float32Arrays and Float64Arrays + numFields = obj.NumFields; + nullSubVal = cell(1, numFields); + for ii = 1:obj.NumFields + nullSubVal{ii} = obj.field(ii).NullSubstitutionValue; + end + end + end + + methods (Static) + function array = fromArrays(arrowArrays, opts) + arguments(Repeating) + arrowArrays(1, 1) arrow.array.Array + end + arguments + opts.FieldNames(1, :) string {mustBeNonmissing} = compose("Field%d", 1:numel(arrowArrays)) + opts.Valid + end + + import arrow.tabular.internal.validateArrayLengths + import arrow.tabular.internal.validateColumnNames + import arrow.array.internal.getArrayProxyIDs + import arrow.internal.validate.parseValid + + if numel(arrowArrays) == 0 + error("arrow:struct:ZeroFields", ... + "Must supply at least one field array."); + end + + validateArrayLengths(arrowArrays); + validateColumnNames(opts.FieldNames, numel(arrowArrays)); + validElements = parseValid(opts, arrowArrays{1}.Length); + + arrayProxyIDs = getArrayProxyIDs(arrowArrays); + args = struct(ArrayProxyIDs=arrayProxyIDs, ... + FieldNames=opts.FieldNames, Valid=validElements); + proxyName = "arrow.array.proxy.StructArray"; + proxy = arrow.internal.proxy.create(proxyName, args); + array = arrow.array.StructArray(proxy); + end + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+array/Time32Array.m b/matlab/src/matlab/+arrow/+array/Time32Array.m index 85babd26a721a..ae40a3a0b740c 100644 --- a/matlab/src/matlab/+arrow/+array/Time32Array.m +++ b/matlab/src/matlab/+arrow/+array/Time32Array.m @@ -17,7 +17,7 @@ classdef Time32Array < arrow.array.Array - properties(Access=private) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = seconds(NaN); end diff --git a/matlab/src/matlab/+arrow/+array/Time64Array.m b/matlab/src/matlab/+arrow/+array/Time64Array.m index f85eeb1f8f0c9..cd4b948324272 100644 --- a/matlab/src/matlab/+arrow/+array/Time64Array.m +++ b/matlab/src/matlab/+arrow/+array/Time64Array.m @@ -17,7 +17,7 @@ classdef Time64Array < arrow.array.Array - properties(Access=private) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = seconds(NaN); end diff --git a/matlab/src/matlab/+arrow/+array/TimestampArray.m b/matlab/src/matlab/+arrow/+array/TimestampArray.m index 80198f965fe92..9289d0a099f7c 100644 --- a/matlab/src/matlab/+arrow/+array/TimestampArray.m +++ b/matlab/src/matlab/+arrow/+array/TimestampArray.m @@ -16,7 +16,7 @@ classdef TimestampArray < arrow.array.Array % arrow.array.TimestampArray - properties(Access=private) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = NaT; end diff --git a/matlab/src/matlab/+arrow/+array/UInt16Array.m b/matlab/src/matlab/+arrow/+array/UInt16Array.m index 9d3f33c279175..d5487ee130d93 100644 --- a/matlab/src/matlab/+arrow/+array/UInt16Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt16Array.m @@ -16,7 +16,7 @@ classdef UInt16Array < arrow.array.NumericArray % arrow.array.UInt16Array - properties (Access=protected) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = uint16(0) end diff --git a/matlab/src/matlab/+arrow/+array/UInt32Array.m b/matlab/src/matlab/+arrow/+array/UInt32Array.m index 5235d4fb15576..43c1caac3b791 100644 --- a/matlab/src/matlab/+arrow/+array/UInt32Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt32Array.m @@ -16,7 +16,7 @@ classdef UInt32Array < arrow.array.NumericArray % arrow.array.UInt32Array - properties (Access=protected) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = uint32(0) end diff --git a/matlab/src/matlab/+arrow/+array/UInt64Array.m b/matlab/src/matlab/+arrow/+array/UInt64Array.m index 2d69bd031ac31..047e7102dd5c5 100644 --- a/matlab/src/matlab/+arrow/+array/UInt64Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt64Array.m @@ -16,7 +16,7 @@ classdef UInt64Array < arrow.array.NumericArray % arrow.array.UInt64Array - properties (Access=protected) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = uint64(0) end diff --git a/matlab/src/matlab/+arrow/+array/UInt8Array.m b/matlab/src/matlab/+arrow/+array/UInt8Array.m index 3d007376bc89a..901a003161220 100644 --- a/matlab/src/matlab/+arrow/+array/UInt8Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt8Array.m @@ -16,7 +16,7 @@ classdef UInt8Array < arrow.array.NumericArray % arrow.array.UInt8Array - properties (Access=protected) + properties (Hidden, GetAccess=public, SetAccess=private) NullSubstitutionValue = uint8(0) end diff --git a/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m b/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m index c0bedaf2faf39..d3a751ca46731 100644 --- a/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m +++ b/matlab/src/matlab/+arrow/+internal/+test/+tabular/createAllSupportedArrayTypes.m @@ -23,6 +23,10 @@ opts.NumRows(1, 1) {mustBeFinite, mustBeNonnegative} = 3; end + % Seed the random number generator to ensure + % reproducible results in tests. + rng(1); + import arrow.type.ID import arrow.array.* @@ -59,6 +63,13 @@ matlabData{ii} = randomDatetimes(opts.NumRows); cmd = compose("%s.fromMATLAB(matlabData{ii})", name); arrowArrays{ii} = eval(cmd); + elseif name == "arrow.array.StructArray" + dates = randomDatetimes(opts.NumRows); + strings = randomStrings(opts.NumRows); + timestampArray = arrow.array(dates); + stringArray = arrow.array(strings); + arrowArrays{ii} = StructArray.fromArrays(timestampArray, stringArray); + matlabData{ii} = table(dates, strings, VariableNames=["Field1", "Field2"]); else error("arrow:test:SupportedArrayCase", ... "Missing if-branch for array class " + name); diff --git a/matlab/src/matlab/+arrow/+internal/+validate/parseValid.m b/matlab/src/matlab/+arrow/+internal/+validate/parseValid.m new file mode 100644 index 0000000000000..3281e24ec1963 --- /dev/null +++ b/matlab/src/matlab/+arrow/+internal/+validate/parseValid.m @@ -0,0 +1,46 @@ +%PARSEVALID Utility function for parsing the Valid name-value pair. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function validElements = parseValid(opts, numElements) + if ~isfield(opts, "Valid") + % If Valid is not a field in opts, return an empty logical array. + validElements = logical.empty(0, 1); + return; + end + + valid = opts.Valid; + if islogical(valid) + validElements = reshape(valid, [], 1); + if ~isscalar(validElements) + % Verify the logical vector has the correct number of elements + validateattributes(validElements, "logical", {'numel', numElements}); + elseif validElements == false + validElements = false(numElements, 1); + else % validElements == true + % Return an empty logical to represent all elements are valid. + validElements = logical.empty(0, 1); + end + else + % valid is a list of indices. Verify the indices are numeric, + % integers, and within the range [1, numElements] + validateattributes(valid, "numeric", {'integer', '>', 0, '<=', numElements}); + % Create a logical vector that contains true values at the indices + % specified by opts.Valid. + validElements = false([numElements 1]); + validElements(valid) = true; + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+internal/+validate/parseValidElements.m b/matlab/src/matlab/+arrow/+internal/+validate/parseValidElements.m index 4081f4092740b..8a43dbb4d78e1 100644 --- a/matlab/src/matlab/+arrow/+internal/+validate/parseValidElements.m +++ b/matlab/src/matlab/+arrow/+internal/+validate/parseValidElements.m @@ -21,7 +21,7 @@ % precedence over InferNulls. if isfield(opts, "Valid") - validElements = parseValid(numel(data), opts.Valid); + validElements = arrow.internal.validate.parseValid(opts, numel(data)); else validElements = parseInferNulls(data, opts.InferNulls); end @@ -33,29 +33,6 @@ end end -function validElements = parseValid(numElements, valid) - if islogical(valid) - validElements = reshape(valid, [], 1); - if ~isscalar(validElements) - % Verify the logical vector has the correct number of elements - validateattributes(validElements, "logical", {'numel', numElements}); - elseif validElements == false - validElements = false(numElements, 1); - else % validElements == true - % Return an empty logical to represent all elements are valid. - validElements = logical.empty(0, 1); - end - else - % valid is a list of indices. Verify the indices are numeric, - % integers, and within the range 1 < indices < numElements. - validateattributes(valid, "numeric", {'integer', '>', 0, '<=', numElements}); - % Create a logical vector that contains true values at the indices - % specified by opts.Valid. - validElements = false([numElements 1]); - validElements(valid) = true; - end -end - function validElements = parseInferNulls(data, inferNulls) if inferNulls && ~(isinteger(data) || islogical(data)) % Only call ismissing on data types that have a "missing" value, diff --git a/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m b/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m index a8ed98f8ae468..0f8b7b3a2a663 100644 --- a/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m +++ b/matlab/src/matlab/+arrow/+type/+traits/StructTraits.m @@ -16,21 +16,18 @@ classdef StructTraits < arrow.type.traits.TypeTraits properties (Constant) - % TODO: When arrow.array.StructArray is implemented, set these - % properties appropriately - ArrayConstructor = missing - ArrayClassName = missing - ArrayProxyClassName = missing + ArrayConstructor = @arrow.array.StructArray + ArrayClassName = "arrow.array.StructArray" + ArrayProxyClassName = "arrow.array.proxy.StructArray" + + % TODO: Implement fromMATLAB ArrayStaticConstructor = missing TypeConstructor = @arrow.type.StructType TypeClassName = "arrow.type.StructType" TypeProxyClassName = "arrow.type.proxy.StructType" - - % TODO: When arrow.array.StructArray is implemented, set these - % properties appropriately - MatlabConstructor = missing - MatlabClassName = missing + MatlabConstructor = @table + MatlabClassName = "table" end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/StructType.m b/matlab/src/matlab/+arrow/+type/StructType.m index 6c1318f6376f3..331ac75a2ee16 100644 --- a/matlab/src/matlab/+arrow/+type/StructType.m +++ b/matlab/src/matlab/+arrow/+type/StructType.m @@ -33,14 +33,28 @@ end methods (Hidden) - % TODO: Consider using a mixin approach to add this behavior. For - % example, ChunkedArray's toMATLAB method could check if its - % Type inherits from a mixin called "Preallocateable" (or something - % more descriptive). If so, we can call preallocateMATLABArray - % in the toMATLAB method. - function preallocateMATLABArray(~) - error("arrow:type:UnsupportedFunction", ... - "preallocateMATLABArray is not supported for StructType"); - end + function data = preallocateMATLABArray(obj, numElements) + import arrow.tabular.internal.* + + fields = obj.Fields; + + % Construct the VariableNames and VariableDimensionNames + fieldNames = [fields.Name]; + validVariableNames = makeValidVariableNames(fieldNames); + validDimensionNames = makeValidDimensionNames(validVariableNames); + + % Recursively call preallocateMATLABArray to handle + % preallocation of nested types + variableData = cell(1, numel(fields)); + for ii = 1:numel(fields) + type = fields(ii).Type; + variableData{ii} = preallocateMATLABArray(type, numElements); + end + + % Return a table with the appropriate schema and dimensions + data = table(variableData{:}, ... + VariableNames=validVariableNames, ... + DimensionNames=validDimensionNames); + end end end \ No newline at end of file diff --git a/matlab/test/arrow/array/tStructArray.m b/matlab/test/arrow/array/tStructArray.m new file mode 100644 index 0000000000000..639df65befbf5 --- /dev/null +++ b/matlab/test/arrow/array/tStructArray.m @@ -0,0 +1,277 @@ +%TSTRUCTARRAY Unit tests for arrow.array.StructArray + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tStructArray < matlab.unittest.TestCase + + properties + Float64Array = arrow.array([1 NaN 3 4 5]); + StringArray = arrow.array(["A" "B" "C" "D" missing]); + end + + methods (Test) + function Basic(tc) + import arrow.array.StructArray + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + tc.verifyInstanceOf(array, "arrow.array.StructArray"); + end + + function FieldNames(tc) + % Verify the FieldNames property is set to the expected value. + import arrow.array.StructArray + + % Default field names used + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + tc.verifyEqual(array.FieldNames, ["Field1", "Field2"]); + + % Field names provided + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["A", "B"]); + tc.verifyEqual(array.FieldNames, ["A", "B"]); + + % Duplicate field names provided + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["C", "C"]); + tc.verifyEqual(array.FieldNames, ["C", "C"]); + end + + function FieldNamesError(tc) + % Verify the FieldNames nv-pair errors when expected. + import arrow.array.StructArray + + % Wrong type provided + fcn = @() StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames={table table}); + tc.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + + % Wrong number of field names provided + fcn = @() StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames="A"); + tc.verifyError(fcn, "arrow:tabular:WrongNumberColumnNames"); + + % Missing string provided + fcn = @() StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["A" missing]); + tc.verifyError(fcn, "MATLAB:validators:mustBeNonmissing"); + end + + function FieldNamesNoSetter(tc) + % Verify the FieldNames property is read-only. + import arrow.array.StructArray + + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["X", "Y"]); + fcn = @() setfield(array, "FieldNames", ["A", "B"]); + tc.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + + function NumFields(tc) + % Verify the NumFields property is set to the expected value. + import arrow.array.StructArray + + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + tc.verifyEqual(array.NumFields, int32(2)); + end + + function NumFieldsNoSetter(tc) + % Verify the NumFields property is read-only. + import arrow.array.StructArray + + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + fcn = @() setfield(array, "NumFields", 10); + tc.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + + function Valid(tc) + % Verify the Valid property is set to the expected value. + import arrow.array.StructArray + + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + expectedValid = true([5 1]); + tc.verifyEqual(array.Valid, expectedValid); + + % Supply the Valid nv-pair + valid = [true true false true false]; + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray, Valid=valid); + tc.verifyEqual(array.Valid, valid'); + end + + function ValidNVPairError(tc) + % Verify the Valid nv-pair errors when expected. + import arrow.array.StructArray + + % Provided an invalid index + fcn = @() StructArray.fromArrays(tc.Float64Array, tc.StringArray, Valid=10); + tc.verifyError(fcn, "MATLAB:notLessEqual"); + + % Provided a logical vector with more elements than the array + % length + fcn = @() StructArray.fromArrays(tc.Float64Array, tc.StringArray, Valid=false([7 1])); + tc.verifyError(fcn, "MATLAB:incorrectNumel"); + end + + function ValidNoSetter(tc) + % Verify the Valid property is read-only. + import arrow.array.StructArray + + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + fcn = @() setfield(array, "Valid", false); + tc.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + + function Length(tc) + % Verify the Length property is set to the expected value. + import arrow.array.StructArray + + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + tc.verifyEqual(array.Length, int64(5)); + end + + function LengthNoSetter(tc) + % Verify the Length property is read-only. + import arrow.array.StructArray + + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + fcn = @() setfield(array, "Length", 1); + tc.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + + function Type(tc) + % Verify the Type property is set to the expected value. + import arrow.array.StructArray + + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["X", "Y"]); + field1 = arrow.field("X", arrow.float64()); + field2 = arrow.field("Y", arrow.string()); + expectedType = arrow.struct(field1, field2); + tc.verifyEqual(array.Type, expectedType); + end + + function TypeNoSetter(tc) + % Verify the Type property is read-only. + import arrow.array.StructArray + + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + fcn = @() setfield(array, "Type", tc.Float64Array.Type); + tc.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + + function FieldByIndex(tc) + import arrow.array.StructArray + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + + % Extract 1st field + field1 = array.field(1); + tc.verifyEqual(field1, tc.Float64Array); + + % Extract 2nd field + field2 = array.field(2); + tc.verifyEqual(field2, tc.StringArray); + end + + function FieldByIndexError(tc) + import arrow.array.StructArray + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + + % Supply a nonscalar vector + fcn = @() array.field([1 2]); + tc.verifyError(fcn, "arrow:badsubscript:NonScalar"); + + % Supply a noninteger + fcn = @() array.field(1.1); + tc.verifyError(fcn, "arrow:badsubscript:NonInteger"); + end + + function FieldByName(tc) + import arrow.array.StructArray + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + + % Extract 1st field + field1 = array.field("Field1"); + tc.verifyEqual(field1, tc.Float64Array); + + % Extract 2nd field + field2 = array.field("Field2"); + tc.verifyEqual(field2, tc.StringArray); + end + + function FieldByNameError(tc) + import arrow.array.StructArray + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray); + + % Supply a nonscalar string array + fcn = @() array.field(["Field1" "Field2"]); + tc.verifyError(fcn, "arrow:badsubscript:NonScalar"); + + % Supply a nonexistent field name + fcn = @() array.field("B"); + tc.verifyError(fcn, "arrow:tabular:schema:AmbiguousFieldName"); + end + + function toMATLAB(tc) + % Verify toMATLAB returns the expected MATLAB table + import arrow.array.StructArray + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["X", "Y"]); + expectedTable = table(toMATLAB(tc.Float64Array), toMATLAB(tc.StringArray), VariableNames=["X", "Y"]); + actualTable = toMATLAB(array); + tc.verifyEqual(actualTable, expectedTable); + + % Verify table elements that correspond to "null" values + % in the StructArray are set to the type-specific null values. + valid = [1 2 5]; + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["X", "Y"], Valid=valid); + float64NullValue = tc.Float64Array.NullSubstitutionValue; + stringNullValue = tc.StringArray.NullSubstitutionValue; + expectedTable([3 4], :) = repmat({float64NullValue stringNullValue}, [2 1]); + actualTable = toMATLAB(array); + tc.verifyEqual(actualTable, expectedTable); + end + + function table(tc) + % Verify toMATLAB returns the expected MATLAB table + import arrow.array.StructArray + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["X", "Y"]); + expectedTable = table(toMATLAB(tc.Float64Array), toMATLAB(tc.StringArray), VariableNames=["X", "Y"]); + actualTable = table(array); + tc.verifyEqual(actualTable, expectedTable); + + % Verify table elements that correspond to "null" values + % in the StructArray are set to the type-specific null values. + valid = [1 2 5]; + array = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["X", "Y"], Valid=valid); + float64NullValue = tc.Float64Array.NullSubstitutionValue; + stringNullValue = tc.StringArray.NullSubstitutionValue; + expectedTable([3 4], :) = repmat({float64NullValue stringNullValue}, [2 1]); + actualTable = toMATLAB(array); + tc.verifyEqual(actualTable, expectedTable); + end + + function IsEqualTrue(tc) + % Verify isequal returns true when expected. + import arrow.array.StructArray + array1 = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["X", "Y"]); + array2 = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["X", "Y"]); + tc.verifyTrue(isequal(array1, array2)); + end + + function IsEqualFalse(tc) + % Verify isequal returns false when expected. + import arrow.array.StructArray + array1 = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["X", "Y"]); + array2 = StructArray.fromArrays(tc.StringArray, tc.Float64Array, FieldNames=["X", "Y"]); + array3 = StructArray.fromArrays(tc.Float64Array, tc.StringArray, FieldNames=["A", "B"]); + % StructArrays have the same FieldNames but the Fields have different types. + tc.verifyFalse(isequal(array1, array2)); + % Fields of the StructArrays have the same types but the StructArrays have different FieldNames. + tc.verifyFalse(isequal(array1, array3)); + end + + end +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tStructTraits.m b/matlab/test/arrow/type/traits/tStructTraits.m index 6a97b1e1852d6..07833aca162b5 100644 --- a/matlab/test/arrow/type/traits/tStructTraits.m +++ b/matlab/test/arrow/type/traits/tStructTraits.m @@ -17,15 +17,15 @@ properties TraitsConstructor = @arrow.type.traits.StructTraits - ArrayConstructor = missing - ArrayClassName = missing - ArrayProxyClassName = missing + ArrayConstructor = @arrow.array.StructArray + ArrayClassName = "arrow.array.StructArray" + ArrayProxyClassName = "arrow.array.proxy.StructArray" ArrayStaticConstructor = missing TypeConstructor = @arrow.type.StructType TypeClassName = "arrow.type.StructType" TypeProxyClassName = "arrow.type.proxy.StructType" - MatlabConstructor = missing - MatlabClassName = missing + MatlabConstructor = @table + MatlabClassName = "table" end end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 294612dda370f..149a688b27e15 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -47,6 +47,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/time32_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/time64_array.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/struct_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/chunked_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/wrap.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc"