diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc index 6f5b8b12f2d2d..35dc496bddb00 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc @@ -15,9 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/matlab/array/proxy/array.h" +#include "arrow/util/utf8.h" +#include "arrow/matlab/array/proxy/array.h" #include "arrow/matlab/bit/unpack.h" +#include "arrow/matlab/error/error.h" namespace arrow::matlab::array::proxy { @@ -36,9 +38,9 @@ namespace arrow::matlab::array::proxy { void Array::toString(libmexclass::proxy::method::Context& context) { ::matlab::data::ArrayFactory factory; - - // TODO: handle non-ascii characters - auto str_mda = factory.createScalar(array->ToString()); + const auto str_utf8 = array->ToString(); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto str_utf16, arrow::util::UTF8StringToUTF16(str_utf8), context, error::UNICODE_CONVERSION_ERROR_ID); + auto str_mda = factory.createScalar(str_utf16); context.outputs[0] = str_mda; } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h index 62c6d9dc26830..43e7aec622d55 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h @@ -27,22 +27,12 @@ #include "arrow/matlab/array/proxy/array.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/bit/pack.h" +#include "arrow/matlab/bit/unpack.h" #include "libmexclass/proxy/Proxy.h" namespace arrow::matlab::array::proxy { -namespace { -const uint8_t* getUnpackedValidityBitmap(const ::matlab::data::TypedArray& valid_elements) { - if (valid_elements.getNumberOfElements() > 0) { - const auto valid_elements_iterator(valid_elements.cbegin()); - return reinterpret_cast(valid_elements_iterator.operator->()); - } else { - return nullptr; - } -} -} // anonymous namespace - template class NumericArray : public arrow::matlab::array::proxy::Array { public: @@ -70,7 +60,7 @@ class NumericArray : public arrow::matlab::array::proxy::Array { if (make_deep_copy) { // Get the unpacked validity bitmap (if it exists) - auto unpacked_validity_bitmap = getUnpackedValidityBitmap(valid_mda); + auto unpacked_validity_bitmap = bit::extract_ptr(valid_mda); BuilderType builder; diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc new file mode 100644 index 0000000000000..51f39d72fca6c --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/array/proxy/string_array.h" + +#include "arrow/array/builder_binary.h" + +#include "arrow/matlab/error/error.h" +#include "arrow/matlab/bit/pack.h" +#include "arrow/matlab/bit/unpack.h" +#include "arrow/util/utf8.h" + +namespace arrow::matlab::array::proxy { + + libmexclass::proxy::MakeResult StringArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + + mda::StructArray opts = constructor_arguments[0]; + const mda::StringArray array_mda = opts[0]["MatlabArray"]; + const mda::TypedArray unpacked_validity_bitmap_mda = opts[0]["Valid"]; + + // Convert UTF-16 encoded MATLAB string values to UTF-8 encoded Arrow string values. + const auto array_length = array_mda.getNumberOfElements(); + std::vector strings; + strings.reserve(array_length); + for (const auto& str : array_mda) { + if (!str) { + // Substitute MATLAB string(missing) values with the empty string value ("") + strings.emplace_back(""); + } else { + MATLAB_ASSIGN_OR_ERROR(auto str_utf8, arrow::util::UTF16StringToUTF8(*str), error::UNICODE_CONVERSION_ERROR_ID); + strings.push_back(std::move(str_utf8)); + } + } + + auto unpacked_validity_bitmap_ptr = bit::extract_ptr(unpacked_validity_bitmap_mda); + + // Build up an Arrow StringArray from a vector of UTF-8 encoded strings. + arrow::StringBuilder builder; + MATLAB_ERROR_IF_NOT_OK(builder.AppendValues(strings, unpacked_validity_bitmap_ptr), error::STRING_BUILDER_APPEND_FAILED); + MATLAB_ASSIGN_OR_ERROR(auto array, builder.Finish(), error::STRING_BUILDER_FINISH_FAILED); + + return std::make_shared(array); + } + + void StringArray::toMATLAB(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + // Convert UTF-8 encoded Arrow string values to UTF-16 encoded MATLAB string values. + auto array_length = static_cast(array->length()); + std::vector strings; + strings.reserve(array_length); + for (size_t i = 0; i < array_length; ++i) { + auto string_array = std::static_pointer_cast(array); + auto str_utf8 = string_array->GetView(i); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto str_utf16, arrow::util::UTF8StringToUTF16(str_utf8), context, error::UNICODE_CONVERSION_ERROR_ID); + const mda::MATLABString matlab_string = mda::MATLABString(std::move(str_utf16)); + strings.push_back(matlab_string); + } + + // Create a MATLAB String array from a vector of UTF-16 encoded strings. + mda::ArrayFactory factory; + auto array_mda = factory.createArray({array_length, 1}, strings.begin(), strings.end()); + context.outputs[0] = array_mda; + } + +} diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h new file mode 100644 index 0000000000000..de0c4625928e4 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/matlab/array/proxy/array.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::array::proxy { + + class StringArray : public arrow::matlab::array::proxy::Array { + public: + StringArray(const std::shared_ptr string_array) + : arrow::matlab::array::proxy::Array() { + array = string_array; + } + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void toMATLAB(libmexclass::proxy::method::Context& context) override; + }; + +} diff --git a/matlab/src/cpp/arrow/matlab/bit/unpack.cc b/matlab/src/cpp/arrow/matlab/bit/unpack.cc index f6c1644909a3c..7135d593cf752 100644 --- a/matlab/src/cpp/arrow/matlab/bit/unpack.cc +++ b/matlab/src/cpp/arrow/matlab/bit/unpack.cc @@ -38,4 +38,13 @@ namespace arrow::matlab::bit { return unpacked_matlab_logical_Array; } + + const uint8_t* extract_ptr(const ::matlab::data::TypedArray& unpacked_validity_bitmap) { + if (unpacked_validity_bitmap.getNumberOfElements() > 0) { + const auto unpacked_validity_bitmap_iterator(unpacked_validity_bitmap.cbegin()); + return reinterpret_cast(unpacked_validity_bitmap_iterator.operator->()); + } else { + return nullptr; + } + } } diff --git a/matlab/src/cpp/arrow/matlab/bit/unpack.h b/matlab/src/cpp/arrow/matlab/bit/unpack.h index 2d7294d9d55ea..b6debb85f837b 100644 --- a/matlab/src/cpp/arrow/matlab/bit/unpack.h +++ b/matlab/src/cpp/arrow/matlab/bit/unpack.h @@ -23,4 +23,5 @@ namespace arrow::matlab::bit { ::matlab::data::TypedArray unpack(const std::shared_ptr& packed_buffer, int64_t length); + const uint8_t* extract_ptr(const ::matlab::data::TypedArray& unpacked_validity_bitmap); } diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index 598db363f3a53..b1b7b75b8c84a 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -168,6 +168,7 @@ namespace arrow::matlab::error { static const char* SCHEMA_BUILDER_FINISH_ERROR_ID = "arrow:matlab:tabular:proxy:SchemaBuilderAddFields"; static const char* SCHEMA_BUILDER_ADD_FIELDS_ERROR_ID = "arrow:matlab:tabular:proxy:SchemaBuilderFinish"; static const char* UNICODE_CONVERSION_ERROR_ID = "arrow:matlab:unicode:UnicodeConversion"; + static const char* STRING_BUILDER_APPEND_FAILED = "arrow:matlab:array:string:StringBuilderAppendFailed"; + static const char* STRING_BUILDER_FINISH_FAILED = "arrow:matlab:array:string:StringBuilderFinishFailed"; static const char* UKNOWN_TIME_UNIT_ERROR_ID = "arrow:matlab:UnknownTimeUnit"; - } diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 94ee1ca89245f..41f1357bcedc5 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -17,8 +17,9 @@ #include "arrow/matlab/array/proxy/boolean_array.h" #include "arrow/matlab/array/proxy/numeric_array.h" -#include "arrow/matlab/tabular/proxy/record_batch.h" +#include "arrow/matlab/array/proxy/string_array.h" #include "arrow/matlab/array/proxy/timestamp_array.h" +#include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/error/error.h" #include "factory.h" @@ -37,6 +38,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.array.proxy.BooleanArray , arrow::matlab::array::proxy::BooleanArray); + REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray); REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::TimestampArray); REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch); return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name}; diff --git a/matlab/src/matlab/+arrow/+array/StringArray.m b/matlab/src/matlab/+arrow/+array/StringArray.m new file mode 100644 index 0000000000000..9ef3f0252586f --- /dev/null +++ b/matlab/src/matlab/+arrow/+array/StringArray.m @@ -0,0 +1,54 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef StringArray < arrow.array.Array +% arrow.array.StringArray + + properties (Hidden, SetAccess=private) + NullSubstitionValue = string(missing); + end + + properties(SetAccess=private, GetAccess=public) + Type = arrow.type.StringType + end + + methods + function obj = StringArray(data, opts) + arguments + data + opts.InferNulls(1,1) logical = true + opts.Valid + end + % Support constructing a StringArray from a cell array of strings (i.e. cellstr), + % or a string array, but not a char array. + if ~ischar(data) + data = convertCharsToStrings(data); + end + arrow.args.validateTypeAndShape(data, "string"); + validElements = arrow.args.parseValidElements(data, opts); + opts = struct(MatlabArray=data, Valid=validElements); + obj@arrow.array.Array("Name", "arrow.array.proxy.StringArray", "ConstructorArguments", {opts}); + end + + function data = string(obj) + data = obj.toMATLAB(); + end + + function matlabArray = toMATLAB(obj) + matlabArray = obj.Proxy.toMATLAB(); + matlabArray(~obj.Valid) = obj.NullSubstitionValue; + end + end +end diff --git a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m index 5e5ab1d1d7c94..a7feb0c0a3bd7 100644 --- a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m +++ b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m @@ -121,6 +121,8 @@ arrowArray = arrow.array.Int64Array(matlabArray); case "logical" arrowArray = arrow.array.BooleanArray(matlabArray); + case "string" + arrowArray = arrow.array.StringArray(matlabArray); case "datetime" arrowArray = arrow.array.TimestampArray(matlabArray); otherwise diff --git a/matlab/src/matlab/+arrow/+type/ID.m b/matlab/src/matlab/+arrow/+type/ID.m index 0450fe8aea453..2e320603d039c 100644 --- a/matlab/src/matlab/+arrow/+type/ID.m +++ b/matlab/src/matlab/+arrow/+type/ID.m @@ -28,7 +28,7 @@ % Float16 (10) not yet supported Float32 (11) Float64 (12) - % String (13) + String (13) % Binary (14) % FixedSizeBinary (15) % Date32 (16) diff --git a/matlab/src/matlab/+arrow/+type/StringType.m b/matlab/src/matlab/+arrow/+type/StringType.m new file mode 100644 index 0000000000000..66a15dd0ea3e2 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/StringType.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef StringType < arrow.type.Type +%STRINGTYPE Type class for string data. + + properties(SetAccess = protected) + ID = arrow.type.ID.String + end + + properties(Constant) + NumFields = 0 + NumBuffers = 3 + end + +end + diff --git a/matlab/test/arrow/array/tStringArray.m b/matlab/test/arrow/array/tStringArray.m new file mode 100644 index 0000000000000..000a57b27bcc2 --- /dev/null +++ b/matlab/test/arrow/array/tStringArray.m @@ -0,0 +1,231 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tStringArray < matlab.unittest.TestCase +% Test class for arrow.array.StringArray + + properties + ArrowArrayClassName(1, 1) string = "arrow.array.StringArray" + ArrowArrayConstructor = @arrow.array.StringArray + MatlabArrayFcn = @string + MatlabConversionFcn = @string + NullSubstitutionValue = string(missing) + ArrowType = arrow.type.StringType + end + + methods(TestClassSetup) + function verifyOnMatlabPath(tc) + % Verify the arrow array class is on the MATLAB Search Path. + tc.assertTrue(~isempty(which(tc.ArrowArrayClassName)), ... + """" + tc.ArrowArrayClassName + """must be on the MATLAB path. " + ... + "Use ""addpath"" to add folders to the MATLAB path."); + end + end + + methods(Test) + function BasicTest(tc) + A = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(["A", "B", "C"])); + className = string(class(A)); + tc.verifyEqual(className, tc.ArrowArrayClassName); + end + + function ToMATLAB(tc) + % Create array from a scalar + A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn("A")); + data = toMATLAB(A1); + tc.verifyEqual(data, tc.MatlabArrayFcn("A")); + + % Create array from a vector + A2 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(["A", "B", "C"])); + data = toMATLAB(A2); + tc.verifyEqual(data, tc.MatlabArrayFcn(["A", "B", "C"]')); + + % Create a StringArray from an empty 0x0 string vector + A3 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(string.empty(0, 0))); + data = toMATLAB(A3); + tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1))); + + % Create a StringArray from an empty 0x1 string vector + A4= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(string.empty(0, 1))); + data = toMATLAB(A4); + tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1))); + + % Create a StringArray from an empty 1x0 string vector + A5= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(string.empty(0, 1))); + data = toMATLAB(A5); + tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1))); + end + + function MatlabConversion(tc) + % Tests the type-specific conversion method (i.e. string) + + % Create array from a scalar + A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn("A")); + data = tc.MatlabConversionFcn(A1); + tc.verifyEqual(data, tc.MatlabArrayFcn("A")); + + % Create array from a vector + A2 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(["A", "B", "C"])); + data = tc.MatlabConversionFcn(A2); + tc.verifyEqual(data, tc.MatlabArrayFcn(["A", "B", "C"]')); + + % Create a StringArray from an empty 0x0 string vector + A3 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(string.empty(0, 0))); + data = tc.MatlabConversionFcn(A3); + tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1))); + + % Create a StringArray from an empty 0x1 string vector + A4= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(string.empty(0, 1))); + data = tc.MatlabConversionFcn(A4); + tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1))); + + % Create a StringArray from an empty 1x0 string vector + A5= tc.ArrowArrayConstructor(tc.MatlabArrayFcn(string.empty(0, 1))); + data = tc.MatlabConversionFcn(A5); + tc.verifyEqual(data, tc.MatlabArrayFcn(reshape([], 0, 1))); + end + + function LogicalValidNVPair(tc) + % Verify the expected elements are treated as null when Valid + % is provided as a logical array + data = tc.MatlabArrayFcn(["A", "B", "C"]'); + arrowArray = tc.ArrowArrayConstructor(data, Valid=[false true true]); + + expectedData = data; + expectedData(1) = tc.NullSubstitutionValue; + tc.verifyEqual(tc.MatlabConversionFcn(arrowArray), expectedData); + tc.verifyEqual(toMATLAB(arrowArray), expectedData); + tc.verifyEqual(arrowArray.Valid, [false; true; true]); + end + + function NumericValidNVPair(tc) + % Verify the expected elements are treated as null when Valid + % is provided as a array of indices + data = tc.MatlabArrayFcn(["A", "B", "C"]'); + arrowArray = tc.ArrowArrayConstructor(data, Valid=[1, 2]); + + expectedData = data; + expectedData(3) = tc.NullSubstitutionValue; + tc.verifyEqual(tc.MatlabConversionFcn(arrowArray), expectedData); + tc.verifyEqual(toMATLAB(arrowArray), expectedData); + tc.verifyEqual(arrowArray.Valid, [true; true; false]); + + + % Make sure the optimization where the valid-bitmap is stored as + % a nullptr works as expected. + expectedData = data; + arrowArray = tc.ArrowArrayConstructor(data, Valid=[1, 2, 3]); + tc.verifyEqual(tc.MatlabConversionFcn(arrowArray), expectedData); + tc.verifyEqual(toMATLAB(arrowArray), expectedData); + tc.verifyEqual(arrowArray.Valid, [true; true; true]); + end + + function ErrorIfNonVector(tc) + data = tc.MatlabArrayFcn(["A", "B", "A", "B", "A", "B", "A", "B", "A"]); + data = reshape(data, 3, 1, 3); + fcn = @() tc.ArrowArrayConstructor(tc.MatlabArrayFcn(data)); + tc.verifyError(fcn, "MATLAB:expectedVector"); + end + + function ErrorIfEmptyArrayIsNotTwoDimensional(tc) + data = tc.MatlabArrayFcn(reshape(string.empty(0, 0), [1 0 0])); + fcn = @() tc.ArrowArrayConstructor(data); + tc.verifyError(fcn, "MATLAB:expected2D"); + end + + function TestArrowType(tc) + % Verify the array has the expected arrow.type.Type object + data = tc.MatlabArrayFcn(["A", "B"]); + arrowArray = tc.ArrowArrayConstructor(data); + tc.verifyEqual(arrowArray.Type, tc.ArrowType); + end + + function Unicode(tc) + % Verify that Unicode characters are preserved during round-trip + % conversion. + smiley = "😀"; + tree = "🌲"; + mango = "🥭"; + + matlabArray = tc.MatlabArrayFcn([smiley; tree; mango]); + arrowArray = tc.ArrowArrayConstructor(matlabArray); + matlabArrayConverted = toMATLAB(arrowArray); + tc.verifyEqual(matlabArrayConverted, matlabArray); + end + + function Missing(tc) + % Verify that string(missing) values get mapped to the empty + % string value when InferNulls=false. + matlabArray = tc.MatlabArrayFcn(["A"; string(missing); string(missing)]); + arrowArray = tc.ArrowArrayConstructor(matlabArray, InferNulls=false); + matlabArrayConverted = toMATLAB(arrowArray); + tc.verifyEqual(matlabArrayConverted, ["A"; ""; ""]); + end + + function CellStr(tc) + % Verify that a StringArray can be constructed from + % a cell array of character vectors (i.e. cellstr). + + % Row vector + matlabArray = {'A', 'B', 'C'}; + arrowArray = tc.ArrowArrayConstructor(matlabArray); + matlabArrayConverted = toMATLAB(arrowArray); + tc.verifyEqual(matlabArrayConverted, string(matlabArray')); + + % Column vector + matlabArray = {'A'; 'B'; 'C'}; + arrowArray = tc.ArrowArrayConstructor(matlabArray); + matlabArrayConverted = toMATLAB(arrowArray); + tc.verifyEqual(matlabArrayConverted, string(matlabArray)); + + % One element cellstr + matlabArray = {''}; + arrowArray = tc.ArrowArrayConstructor(matlabArray); + matlabArrayConverted = toMATLAB(arrowArray); + tc.verifyEqual(matlabArrayConverted, string(matlabArray)); + + % Empty cell + matlabArray = {}; + arrowArray = tc.ArrowArrayConstructor(matlabArray); + matlabArrayConverted = toMATLAB(arrowArray); + tc.verifyEqual(matlabArrayConverted, string.empty(0, 1)); + end + + function ErrorIfChar(tc) + % Verify that an error is thrown when a char array + % is passed to the StringArray constructor. + + % Row vector + matlabArray = 'abc'; + tc.verifyError(@() tc.ArrowArrayConstructor(matlabArray), "MATLAB:invalidType"); + + % Column vector + matlabArray = ['a';'b';'c']; + tc.verifyError(@() tc.ArrowArrayConstructor(matlabArray), "MATLAB:invalidType"); + + % Empty char (0x0) + matlabArray = ''; + tc.verifyError(@() tc.ArrowArrayConstructor(matlabArray), "MATLAB:invalidType"); + + % Empty char (0x1) + matlabArray = char.empty(0, 1); + tc.verifyError(@() tc.ArrowArrayConstructor(matlabArray), "MATLAB:invalidType"); + + % Empty char (1x0) + matlabArray = char.empty(1, 0); + tc.verifyError(@() tc.ArrowArrayConstructor(matlabArray), "MATLAB:invalidType"); + end + end +end diff --git a/matlab/test/arrow/tabular/tRecordBatch.m b/matlab/test/arrow/tabular/tRecordBatch.m index d0b1df9621eea..89175c43dad7a 100644 --- a/matlab/test/arrow/tabular/tRecordBatch.m +++ b/matlab/test/arrow/tabular/tRecordBatch.m @@ -38,6 +38,7 @@ function SupportedTypes(tc) logical([1, 0, 1]'), ... single ([1, 2, 3]'), ... double ([1, 2, 3]'), ... + string (["A", "B", "C"]'), ... datetime(2023, 6, 28) + days(0:2)'); arrowRecordBatch = arrow.tabular.RecordBatch(TOriginal); TConverted = arrowRecordBatch.toMATLAB(); diff --git a/matlab/test/arrow/type/tStringType.m b/matlab/test/arrow/type/tStringType.m new file mode 100644 index 0000000000000..f3cf101ac6185 --- /dev/null +++ b/matlab/test/arrow/type/tStringType.m @@ -0,0 +1,41 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tStringType < matlab.unittest.TestCase +%TSTRINGTYPE Test class for arrow.type.StringType + + methods (Test) + + function Basic(tc) + type = arrow.type.StringType; + className = string(class(type)); + tc.verifyEqual(className, "arrow.type.StringType"); + tc.verifyEqual(type.ID, arrow.type.ID.String); + end + + function NumBuffers(tc) + type = arrow.type.StringType; + tc.verifyEqual(type.NumBuffers, 3); + end + + function NumFields(tc) + type = arrow.type.StringType; + tc.verifyEqual(type.NumFields, 0); + end + + end + +end + diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index f56321ea73132..27a64a19a91ef 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -42,8 +42,9 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/c set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/boolean_array.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/string_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/pack.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/unpack.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/time_unit.cc")