Skip to content

Commit

Permalink
apacheGH-36363: [MATLAB] Create proxy classes for the DataType class …
Browse files Browse the repository at this point in the history
…hierarchy (apache#36419)

### Rationale for this change

In the original pull request in which we added the MATLAB `arrow.type.<Type>Type` classes (e.g. `arrow.type.Float32Type`), we did implement these classes as proxies. At the time, we weren't sure if it would be advantageous to implement the type classes as proxies, but now realize it will be for composite data structures, i.e. `Schema`, `StructArray`, `ListArray`. 

### What changes are included in this PR?

1. All classes within the `arrow.type.Type` class hierarchy are implemented as proxies. 

### Are these changes tested?

Yes, we had existing tests for these classes. 

### Are there any user-facing changes?

No.

### Future Directions

1. In a followup PR request, we plan on integrating the proxy type classes and the array classes so that they share the same underlying C++` arrow::DataType` object. We thought doing so in this change would be too much code churn.

### Notes

Thank you @ kevingurney for the help!

* Closes: apache#36363

Lead-authored-by: Sarah Gilmore <sgilmore@mathworks.com>
Co-authored-by: sgilmore10 <74676073+sgilmore10@users.noreply.github.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Kevin Gurney <kgurney@mathworks.com>
  • Loading branch information
sgilmore10 and kou authored Jul 12, 2023
1 parent 085a0ba commit 6baf6a7
Show file tree
Hide file tree
Showing 51 changed files with 590 additions and 174 deletions.
9 changes: 5 additions & 4 deletions matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,14 @@ namespace arrow::matlab::array::proxy {
const mda::TypedArray<mda::MATLABString> units_mda = opts[0]["TimeUnit"];

// extract the time zone string
const std::u16string& u16_timezone = timezone_mda[0];
MATLAB_ASSIGN_OR_ERROR(const auto timezone, arrow::util::UTF16StringToUTF8(u16_timezone),
const std::u16string& utf16_timezone = timezone_mda[0];
MATLAB_ASSIGN_OR_ERROR(const auto timezone, arrow::util::UTF16StringToUTF8(utf16_timezone),
error::UNICODE_CONVERSION_ERROR_ID);

// extract the time unit
MATLAB_ASSIGN_OR_ERROR(const auto time_unit, arrow::matlab::type::timeUnitFromString(units_mda[0]),
error::UKNOWN_TIME_UNIT_ERROR_ID)
const std::u16string& utf16_unit = units_mda[0];
MATLAB_ASSIGN_OR_ERROR(const auto time_unit, arrow::matlab::type::timeUnitFromString(utf16_unit),
error::UKNOWN_TIME_UNIT_ERROR_ID);

// create the timestamp_type
auto data_type = arrow::timestamp(time_unit, timezone);
Expand Down
17 changes: 17 additions & 0 deletions matlab/src/cpp/arrow/matlab/proxy/factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
#include "arrow/matlab/array/proxy/timestamp_array.h"
#include "arrow/matlab/tabular/proxy/record_batch.h"
#include "arrow/matlab/error/error.h"
#include "arrow/matlab/type/proxy/primitive_ctype.h"
#include "arrow/matlab/type/proxy/string_type.h"
#include "arrow/matlab/type/proxy/timestamp_type.h"

#include "factory.h"

Expand All @@ -41,6 +44,20 @@ libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name,
REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray);
REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::TimestampArray);
REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch);
REGISTER_PROXY(arrow.type.proxy.Float32Type , arrow::matlab::type::proxy::PrimitiveCType<float>);
REGISTER_PROXY(arrow.type.proxy.Float64Type , arrow::matlab::type::proxy::PrimitiveCType<double>);
REGISTER_PROXY(arrow.type.proxy.UInt8Type , arrow::matlab::type::proxy::PrimitiveCType<uint8_t>);
REGISTER_PROXY(arrow.type.proxy.UInt16Type , arrow::matlab::type::proxy::PrimitiveCType<uint16_t>);
REGISTER_PROXY(arrow.type.proxy.UInt32Type , arrow::matlab::type::proxy::PrimitiveCType<uint32_t>);
REGISTER_PROXY(arrow.type.proxy.UInt64Type , arrow::matlab::type::proxy::PrimitiveCType<uint64_t>);
REGISTER_PROXY(arrow.type.proxy.Int8Type , arrow::matlab::type::proxy::PrimitiveCType<int8_t>);
REGISTER_PROXY(arrow.type.proxy.Int16Type , arrow::matlab::type::proxy::PrimitiveCType<int16_t>);
REGISTER_PROXY(arrow.type.proxy.Int32Type , arrow::matlab::type::proxy::PrimitiveCType<int32_t>);
REGISTER_PROXY(arrow.type.proxy.Int64Type , arrow::matlab::type::proxy::PrimitiveCType<int64_t>);
REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType<bool>);
REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType);
REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType);

return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name};
};

Expand Down
34 changes: 34 additions & 0 deletions matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.


#include "arrow/matlab/type/proxy/fixed_width_type.h"

namespace arrow::matlab::type::proxy {

FixedWidthType::FixedWidthType(std::shared_ptr<arrow::FixedWidthType> type) : Type(std::move(type)) {
REGISTER_METHOD(FixedWidthType, bitWidth);
}

void FixedWidthType::bitWidth(libmexclass::proxy::method::Context& context) {
namespace mda = ::matlab::data;
mda::ArrayFactory factory;

auto bit_width_mda = factory.createScalar(data_type->bit_width());
context.outputs[0] = bit_width_mda;
}
}
34 changes: 34 additions & 0 deletions matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once

#include "arrow/matlab/type/proxy/type.h"

namespace arrow::matlab::type::proxy {

class FixedWidthType : public arrow::matlab::type::proxy::Type {
public:
FixedWidthType(std::shared_ptr<arrow::FixedWidthType> type);

virtual ~FixedWidthType() {}

protected:
void bitWidth(libmexclass::proxy::method::Context& context);

};

}
55 changes: 55 additions & 0 deletions matlab/src/cpp/arrow/matlab/type/proxy/primitive_ctype.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/matlab/type/proxy/fixed_width_type.h"
#include "arrow/type_traits.h"

#include <type_traits>


namespace arrow::matlab::type::proxy {

template <typename CType>
using arrow_type_t = typename arrow::CTypeTraits<CType>::ArrowType;

template <typename CType>
using is_primitive = arrow::is_primitive_ctype<arrow_type_t<CType>>;

template<typename CType>
using enable_if_primitive = std::enable_if_t<is_primitive<CType>::value, bool>;

template<typename CType, enable_if_primitive<CType> = true>
class PrimitiveCType : public arrow::matlab::type::proxy::FixedWidthType {

using ArrowDataType = arrow_type_t<CType>;

public:
PrimitiveCType(std::shared_ptr<ArrowDataType> primitive_type) : arrow::matlab::type::proxy::FixedWidthType(std::move(primitive_type)) {
}

~PrimitiveCType() {}

static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
auto data_type = arrow::CTypeTraits<CType>::type_singleton();
return std::make_shared<PrimitiveCType>(std::static_pointer_cast<ArrowDataType>(std::move(data_type)));
}
};

}

28 changes: 28 additions & 0 deletions matlab/src/cpp/arrow/matlab/type/proxy/string_type.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/matlab/type/proxy/string_type.h"

namespace arrow::matlab::type::proxy {

StringType::StringType(std::shared_ptr<arrow::StringType> string_type) : Type(std::move(string_type)) {}

libmexclass::proxy::MakeResult StringType::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
auto string_type = std::static_pointer_cast<arrow::StringType>(arrow::utf8());
return std::make_shared<StringType>(std::move(string_type));
}
}
35 changes: 35 additions & 0 deletions matlab/src/cpp/arrow/matlab/type/proxy/string_type.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/matlab/type/proxy/type.h"

namespace arrow::matlab::type::proxy {

class StringType : public arrow::matlab::type::proxy::Type {

public:
StringType(std::shared_ptr<arrow::StringType> string_type);

~StringType() {}

static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments);
};

}

80 changes: 80 additions & 0 deletions matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/matlab/type/proxy/timestamp_type.h"
#include "arrow/matlab/type/time_unit.h"
#include "arrow/matlab/error/error.h"
#include "arrow/util/utf8.h"

namespace arrow::matlab::type::proxy {

TimestampType::TimestampType(std::shared_ptr<arrow::TimestampType> timestamp_type) : FixedWidthType(std::move(timestamp_type)) {
REGISTER_METHOD(TimestampType, timeUnit);
REGISTER_METHOD(TimestampType, timeZone);
}

libmexclass::proxy::MakeResult TimestampType::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
namespace mda = ::matlab::data;

using TimestampTypeProxy = arrow::matlab::type::proxy::TimestampType;

mda::StructArray opts = constructor_arguments[0];

// Get the mxArray from constructor arguments
const mda::StringArray timezone_mda = opts[0]["TimeZone"];
const mda::StringArray timeunit_mda = opts[0]["TimeUnit"];

// extract the time zone
const std::u16string& utf16_timezone = timezone_mda[0];
MATLAB_ASSIGN_OR_ERROR(const auto timezone,
arrow::util::UTF16StringToUTF8(utf16_timezone),
error::UNICODE_CONVERSION_ERROR_ID);

// extract the time unit
const std::u16string& utf16_timeunit = timeunit_mda[0];
MATLAB_ASSIGN_OR_ERROR(const auto timeunit,
arrow::matlab::type::timeUnitFromString(utf16_timeunit),
error::UKNOWN_TIME_UNIT_ERROR_ID);

auto type = arrow::timestamp(timeunit, timezone);
auto time_type = std::static_pointer_cast<arrow::TimestampType>(type);
return std::make_shared<TimestampTypeProxy>(std::move(time_type));
}

void TimestampType::timeZone(libmexclass::proxy::method::Context& context) {
namespace mda = ::matlab::data;
mda::ArrayFactory factory;

auto timestamp_type = std::static_pointer_cast<arrow::TimestampType>(data_type);
const auto timezone_utf8 = timestamp_type->timezone();
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto timezone_utf16,
arrow::util::UTF8StringToUTF16(timezone_utf8),
context, error::UNICODE_CONVERSION_ERROR_ID);
auto timezone_mda = factory.createScalar(timezone_utf16);
context.outputs[0] = timezone_mda;
}

void TimestampType::timeUnit(libmexclass::proxy::method::Context& context) {
namespace mda = ::matlab::data;
mda::ArrayFactory factory;

auto timestamp_type = std::static_pointer_cast<arrow::TimestampType>(data_type);
const auto timeunit = timestamp_type->unit();
auto timeunit_mda = factory.createScalar(static_cast<int16_t>(timeunit));
context.outputs[0] = timeunit_mda;
}
}
42 changes: 42 additions & 0 deletions matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/matlab/type/proxy/fixed_width_type.h"
#include "arrow/type_traits.h"

namespace arrow::matlab::type::proxy {

class TimestampType : public arrow::matlab::type::proxy::FixedWidthType {

public:
TimestampType(std::shared_ptr<arrow::TimestampType> timestamp_type);

~TimestampType() {}

static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments);

protected:

void timeZone(libmexclass::proxy::method::Context& context);

void timeUnit(libmexclass::proxy::method::Context& context);
};

}

Loading

0 comments on commit 6baf6a7

Please sign in to comment.