diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 7c2437f6edfb5..119d11d9a399a 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -103,7 +103,7 @@ jobs: shell: bash run: | gem install test-unit - pip install cython setuptools six pytest jira + pip install "cython<3" setuptools six pytest jira - name: Run Release Test env: ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/issue_bot.yml b/.github/workflows/issue_bot.yml index ffd56e440f430..7a62f2149662e 100644 --- a/.github/workflows/issue_bot.yml +++ b/.github/workflows/issue_bot.yml @@ -21,6 +21,7 @@ on: issues: types: - opened + - edited permissions: contents: read @@ -56,9 +57,9 @@ jobs: if (component_labels.length == 0) throw new Error('No components found!'); - await github.rest.issues.addLabels({ + await github.rest.issues.setLabels({ "owner": context.repo.owner, "repo": context.repo.repo, "issue_number": context.payload.issue.number, "labels": component_labels, - }); \ No newline at end of file + }); diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 2c5f2965107b8..0000000000000 --- a/.travis.yml +++ /dev/null @@ -1,166 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -dist: focal - -language: minimal - -cache: - directories: - - $TRAVIS_BUILD_DIR/.docker - -addons: - apt: - packages: - - python3-pip - -services: - - docker - -# Note that the global "env" setting isn't inherited automatically by -# matrix entries with their own "env", so we have to insert it explicitly. -env: &global_env - ARROW_ENABLE_TIMING_TESTS: "OFF" - COMPOSE_DOCKER_CLI_BUILD: 1 - DOCKER_BUILDKIT: 0 - DOCKER_VOLUME_PREFIX: $TRAVIS_BUILD_DIR/.docker/ - -jobs: - include: - - name: "C++ on s390x" - os: linux - arch: s390x - env: - <<: *global_env - ARCH: s390x - ARROW_CI_MODULES: "CPP" - DOCKER_IMAGE_ID: ubuntu-cpp - # Can't enable ARROW_MIMALLOC because of failures in memory pool tests. - # Can't enable ARROW_S3 because compiler is killed while compiling - # aws-sdk-cpp. - DOCKER_RUN_ARGS: >- - " - -e ARROW_FLIGHT=ON - -e ARROW_GCS=OFF - -e ARROW_MIMALLOC=OFF - -e ARROW_ORC=OFF - -e ARROW_PARQUET=OFF - -e ARROW_S3=OFF - -e ARROW_SUBSTRAIT=OFF - -e CMAKE_BUILD_PARALLEL_LEVEL=2 - -e CMAKE_UNITY_BUILD=ON - -e PARQUET_BUILD_EXAMPLES=OFF - -e PARQUET_BUILD_EXECUTABLES=OFF - -e Protobuf_SOURCE=BUNDLED - -e gRPC_SOURCE=BUNDLED - " - # The LLVM's APT repository causes download error for s390x binary - # We should use the LLVM provided by the default APT repository - CLANG_TOOLS: "10" - LLVM: "10" - UBUNTU: "20.04" - - - name: "Go on s390x" - os: linux - arch: s390x - env: - <<: *global_env - ARCH: s390x - ARROW_CI_MODULES: "GO" - DOCKER_IMAGE_ID: debian-go - - - name: "Java on s390x" - os: linux - arch: s390x - env: - <<: *global_env - ARCH: s390x - ARROW_CI_MODULES: "JAVA" - DOCKER_IMAGE_ID: debian-java - JDK: 11 - - - name: "Python on s390x" - os: linux - arch: s390x - env: - <<: *global_env - ARCH: s390x - ARROW_CI_MODULES: "PYTHON" - DOCKER_IMAGE_ID: ubuntu-python - # Can't enable ARROW_MIMALLOC because of failures in memory pool tests. - # Can't enable ARROW_S3 because compiler is killed while compiling - # aws-sdk-cpp. - DOCKER_RUN_ARGS: >- - " - -e ARROW_FLIGHT=ON - -e ARROW_GCS=OFF - -e ARROW_MIMALLOC=OFF - -e ARROW_ORC=OFF - -e ARROW_PARQUET=OFF - -e ARROW_PYTHON=ON - -e ARROW_S3=OFF - -e CMAKE_BUILD_PARALLEL_LEVEL=2 - -e CMAKE_UNITY_BUILD=ON - -e PARQUET_BUILD_EXAMPLES=OFF - -e PARQUET_BUILD_EXECUTABLES=OFF - -e Protobuf_SOURCE=BUNDLED - -e gRPC_SOURCE=BUNDLED - " - # The LLVM's APT repository causes download error for s390x binary - # We should use the LLVM provided by the default APT repository - CLANG_TOOLS: "10" - LLVM: "10" - UBUNTU: "20.04" - - allow_failures: - - name: "Java on s390x" - - name: "C++ on s390x" - - name: "Python on s390x" - -before_install: - - eval "$(python ci/detect-changes.py)" - - | - arrow_ci_affected=no - for arrow_ci_module in ${ARROW_CI_MODULES}; do - arrow_ci_affected_variable=ARROW_CI_${arrow_ci_module}_AFFECTED - if [ "$(eval "echo \$${arrow_ci_affected_variable}")" = "1" ]; then - arrow_ci_affected=yes - fi - done - if [ "${arrow_ci_affected}" = "no" ]; then - travis_terminate 0 - fi - -install: - - sudo -H pip3 install --upgrade pip - - sudo -H pip3 install 'docker-compose>=1.27.0' - - sudo -H pip3 install -e dev/archery[docker] - -script: - - export ARCHERY_DEFAULT_BRANCH=$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@) - - | - archery docker run \ - ${DOCKER_RUN_ARGS} \ - --volume ${PWD}/build:/build \ - ${DOCKER_IMAGE_ID} - -after_success: - - | - if [ "${TRAVIS_EVENT_TYPE}" = "push" -a \ - "${TRAVIS_REPO_SLUG}" = "apache/arrow" ]; then - archery docker push ${DOCKER_IMAGE_ID} || : - fi diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 04f985c94bb2c..4ae5c3614a1dc 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -18,7 +18,7 @@ # don't add pandas here, because it is not a mandatory test dependency boto3 # not a direct dependency of s3fs, but needed for our s3fs fixture cffi -cython +cython<3 cloudpickle fsspec hypothesis diff --git a/ci/detect-changes.py b/ci/detect-changes.py deleted file mode 100644 index 7669639ecd3a9..0000000000000 --- a/ci/detect-changes.py +++ /dev/null @@ -1,362 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from __future__ import print_function - -import functools -import os -import pprint -import re -import sys -import subprocess - - -perr = functools.partial(print, file=sys.stderr) - - -def dump_env_vars(prefix, pattern=None): - if pattern is not None: - match = lambda s: re.search(pattern, s) - else: - match = lambda s: True - for name in sorted(os.environ): - if name.startswith(prefix) and match(name): - perr("- {0}: {1!r}".format(name, os.environ[name])) - - -def run_cmd(cmdline): - proc = subprocess.Popen(cmdline, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = proc.communicate() - if proc.returncode != 0: - raise RuntimeError("Command {cmdline} failed with code {returncode}, " - "stderr was:\n{stderr}\n" - .format(cmdline=cmdline, returncode=proc.returncode, - stderr=err.decode())) - return out - - -def get_commit_description(commit): - """ - Return the textual description (title + body) of the given git commit. - """ - out = run_cmd(["git", "show", "--no-patch", "--pretty=format:%B", - commit]) - return out.decode('utf-8', 'ignore') - - -def list_affected_files(commit_range): - """ - Return a list of files changed by the given git commit range. - """ - perr("Getting affected files from", repr(commit_range)) - out = run_cmd(["git", "diff", "--name-only", commit_range]) - return list(filter(None, (s.strip() for s in out.decode().splitlines()))) - - -def get_travis_head_commit(): - return os.environ['TRAVIS_COMMIT'] - - -def get_travis_commit_range(): - if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request': - # TRAVIS_COMMIT_RANGE is too pessimistic for PRs, as it may contain - # unrelated changes. Instead, use the same strategy as on AppVeyor - # below. - run_cmd(["git", "fetch", "-q", "origin", - "+refs/heads/{0}".format(os.environ['TRAVIS_BRANCH'])]) - merge_base = run_cmd(["git", "merge-base", - "HEAD", "FETCH_HEAD"]).decode().strip() - return "{0}..HEAD".format(merge_base) - else: - cr = os.environ['TRAVIS_COMMIT_RANGE'] - # See - # https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122 - return cr.replace('...', '..') - - -def get_travis_commit_description(): - # Prefer this to get_commit_description(get_travis_head_commit()), - # as rebasing or other repository events may make TRAVIS_COMMIT invalid - # at the time we inspect it - return os.environ['TRAVIS_COMMIT_MESSAGE'] - - -def list_travis_affected_files(): - """ - Return a list of files affected in the current Travis build. - """ - commit_range = get_travis_commit_range() - try: - return list_affected_files(commit_range) - except RuntimeError: - # TRAVIS_COMMIT_RANGE can contain invalid revisions when - # building a branch (not a PR) after rebasing: - # https://github.com/travis-ci/travis-ci/issues/2668 - if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request': - raise - # If it's a rebase, it's probably enough to use the last commit only - commit_range = '{0}^..'.format(get_travis_head_commit()) - return list_affected_files(commit_range) - - -def list_appveyor_affected_files(): - """ - Return a list of files affected in the current AppVeyor build. - This only works for PR builds. - """ - # Re-fetch PR base branch (e.g. origin/master), pointing FETCH_HEAD to it - run_cmd(["git", "fetch", "-q", "origin", - "+refs/heads/{0}".format(os.environ['APPVEYOR_REPO_BRANCH'])]) - # Compute base changeset between FETCH_HEAD (PR base) and HEAD (PR head) - merge_base = run_cmd(["git", "merge-base", - "HEAD", "FETCH_HEAD"]).decode().strip() - # Compute changes files between base changeset and HEAD - return list_affected_files("{0}..HEAD".format(merge_base)) - - -def list_github_actions_affected_files(): - """ - Return a list of files affected in the current GitHub Actions build. - """ - # GitHub Actions checkout `refs/remotes/pull/$PR/merge` where `HEAD` points - # to the merge commit while `HEAD^` points to the commit before. Hence, - # `..HEAD^` points to all commit between the default branch and the PR. - return list_affected_files("HEAD^..") - - -LANGUAGE_TOPICS = ['c_glib', 'cpp', 'docs', 'go', 'java', 'js', 'python', - 'r', 'ruby', 'csharp'] - -ALL_TOPICS = LANGUAGE_TOPICS + ['integration', 'dev'] - - -AFFECTED_DEPENDENCIES = { - 'java': ['integration', 'python'], - 'js': ['integration'], - 'ci': ALL_TOPICS, - 'cpp': ['python', 'c_glib', 'r', 'ruby', 'integration'], - 'format': LANGUAGE_TOPICS, - 'go': ['integration'], - '.travis.yml': ALL_TOPICS, - 'appveyor.yml': ALL_TOPICS, - # In theory, it should ignore CONTRIBUTING.md and ISSUE_TEMPLATE.md, but in - # practice it's going to be CI - '.github': ALL_TOPICS, - 'c_glib': ['ruby'] -} - -COMPONENTS = {'cpp', 'java', 'c_glib', 'r', 'ruby', 'integration', 'js', - 'csharp', 'go', 'docs', 'python', 'dev'} - - -def get_affected_topics(affected_files): - """ - Return a dict of topics affected by the given files. - Each dict value is True if affected, False otherwise. - """ - affected = dict.fromkeys(ALL_TOPICS, False) - - for path in affected_files: - parts = [] - head = path - while head: - head, tail = os.path.split(head) - parts.append(tail) - parts.reverse() - assert parts - p = parts[0] - fn = parts[-1] - if fn.startswith('README'): - continue - - if p in COMPONENTS: - affected[p] = True - - _path_already_affected = {} - - def _affect_dependencies(component): - if component in _path_already_affected: - # For circular dependencies, terminate - return - for topic in AFFECTED_DEPENDENCIES.get(component, ()): - affected[topic] = True - _affect_dependencies(topic) - _path_already_affected[topic] = True - - _affect_dependencies(p) - - return affected - - -def make_env_for_topics(affected): - return {'ARROW_CI_{0}_AFFECTED'.format(k.upper()): '1' if v else '0' - for k, v in affected.items()} - - -def get_unix_shell_eval(env): - """ - Return a shell-evalable string to setup some environment variables. - """ - return "; ".join(("export {0}='{1}'".format(k, v) - for k, v in env.items())) - - -def get_windows_shell_eval(env): - """ - Return a shell-evalable string to setup some environment variables. - """ - return "\n".join(('set "{0}={1}"'.format(k, v) - for k, v in env.items())) - - -def run_from_travis(): - perr("Environment variables (excerpt):") - dump_env_vars('TRAVIS_', '(BRANCH|COMMIT|PULL)') - if (os.environ['TRAVIS_REPO_SLUG'] == 'apache/arrow' and - os.environ['TRAVIS_BRANCH'] in ['master', 'main'] and - os.environ['TRAVIS_EVENT_TYPE'] != 'pull_request'): - # Never skip anything on default-branch builds in the official repo - affected = dict.fromkeys(ALL_TOPICS, True) - else: - desc = get_travis_commit_description() - if '[skip travis]' in desc: - # Skip everything - affected = dict.fromkeys(ALL_TOPICS, False) - elif '[force ci]' in desc or '[force travis]' in desc: - # Test everything - affected = dict.fromkeys(ALL_TOPICS, True) - else: - # Test affected topics - affected_files = list_travis_affected_files() - perr("Affected files:", affected_files) - affected = get_affected_topics(affected_files) - assert set(affected) <= set(ALL_TOPICS), affected - - perr("Affected topics:") - perr(pprint.pformat(affected)) - return get_unix_shell_eval(make_env_for_topics(affected)) - - -def run_from_appveyor(): - perr("Environment variables (excerpt):") - dump_env_vars('APPVEYOR_', '(PULL|REPO)') - if not os.environ.get('APPVEYOR_PULL_REQUEST_HEAD_COMMIT'): - # Not a PR build, test everything - affected = dict.fromkeys(ALL_TOPICS, True) - else: - affected_files = list_appveyor_affected_files() - perr("Affected files:", affected_files) - affected = get_affected_topics(affected_files) - assert set(affected) <= set(ALL_TOPICS), affected - - perr("Affected topics:") - perr(pprint.pformat(affected)) - return get_windows_shell_eval(make_env_for_topics(affected)) - - -def run_from_github(): - perr("Environment variables (excerpt):") - dump_env_vars('GITHUB_', '(REPOSITORY|ACTOR|SHA|REF|HEAD_REF|BASE_REF|EVENT_NAME)') - if os.environ['GITHUB_EVENT_NAME'] != 'pull_request': - # Not a PR build, test everything - affected = dict.fromkeys(ALL_TOPICS, True) - else: - affected_files = list_github_actions_affected_files() - perr("Affected files:", affected_files) - affected = get_affected_topics(affected_files) - assert set(affected) <= set(ALL_TOPICS), affected - - perr("Affected topics:") - perr(pprint.pformat(affected)) - return get_unix_shell_eval(make_env_for_topics(affected)) - - -def test_get_affected_topics(): - affected_topics = get_affected_topics(['cpp/CMakeLists.txt']) - assert affected_topics == { - 'c_glib': True, - 'cpp': True, - 'docs': False, - 'go': False, - 'java': False, - 'js': False, - 'python': True, - 'r': True, - 'ruby': True, - 'csharp': False, - 'integration': True, - 'dev': False - } - - affected_topics = get_affected_topics(['format/Schema.fbs']) - assert affected_topics == { - 'c_glib': True, - 'cpp': True, - 'docs': True, - 'go': True, - 'java': True, - 'js': True, - 'python': True, - 'r': True, - 'ruby': True, - 'csharp': True, - 'integration': True, - 'dev': False - } - - affected_topics = get_affected_topics(['.github/workflows']) - assert affected_topics == { - 'c_glib': True, - 'cpp': True, - 'docs': True, - 'go': True, - 'java': True, - 'js': True, - 'python': True, - 'r': True, - 'ruby': True, - 'csharp': True, - 'integration': True, - 'dev': True, - } - - -if __name__ == "__main__": - # This script should have its output evaluated by a shell, - # e.g. "eval `python ci/detect-changes.py`" - if os.environ.get('TRAVIS'): - try: - print(run_from_travis()) - except Exception: - # Make sure the enclosing eval will return an error - print("exit 1") - raise - elif os.environ.get('APPVEYOR'): - try: - print(run_from_appveyor()) - except Exception: - print("exit 1") - raise - elif os.environ.get('GITHUB_WORKFLOW'): - try: - print(run_from_github()) - except Exception: - print("exit 1") - raise - else: - sys.exit("Script must be run under Travis-CI, AppVeyor or GitHub Actions") diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index 19f30717ca2e2..c59766c4a665c 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -47,10 +47,6 @@ RUN apt-get update -y && \ libxml2-dev \ libgit2-dev \ libssl-dev \ - # install clang to mirror what was done on Travis - clang \ - clang-format \ - clang-tidy \ # R CMD CHECK --as-cran needs pdflatex to build the package manual texlive-latex-base \ # Need locales so we can set UTF-8 diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index fd682d0e2a62a..f0f893c419616 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -83,7 +83,7 @@ pushd ${build_dir} cmake \ -Dabsl_SOURCE=${absl_SOURCE:-} \ - -DARROW_ACERO=${ARROW_ACERO:-ON} \ + -DARROW_ACERO=${ARROW_ACERO:-OFF} \ -DARROW_AZURE=${ARROW_AZURE:-OFF} \ -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \ -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \ diff --git a/ci/scripts/integration_dask.sh b/ci/scripts/integration_dask.sh index eeaba715b6ae7..d1e2ecdc847f2 100755 --- a/ci/scripts/integration_dask.sh +++ b/ci/scripts/integration_dask.sh @@ -33,6 +33,9 @@ python -c "import dask.dataframe" pytest -v --pyargs dask.dataframe.tests.test_dataframe pytest -v --pyargs dask.dataframe.io.tests.test_orc -pytest -v --pyargs dask.dataframe.io.tests.test_parquet +# skip failing parquet tests +# test_pandas_timestamp_overflow_pyarrow is skipped because of GH-33321. +pytest -v --pyargs dask.dataframe.io.tests.test_parquet \ + -k "not test_pandas_timestamp_overflow_pyarrow" # this file contains parquet tests that use S3 filesystem pytest -v --pyargs dask.bytes.tests.test_s3 diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 6488ac13cbe77..635bc1684e6f2 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -138,6 +138,9 @@ if(ARROW_PACKAGE_PREFIX) if(NOT ENV{Boost_ROOT}) set(ENV{Boost_ROOT} ${ARROW_PACKAGE_PREFIX}) endif() + if(NOT DEFINED OPENSSL_ROOT_DIR) + set(OPENSSL_ROOT_DIR ${ARROW_PACKAGE_PREFIX}) + endif() endif() # For each dependency, set dependency source to global default, if unset @@ -913,6 +916,7 @@ set(EP_COMMON_CMAKE_ARGS -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=${CMAKE_EXPORT_NO_PACKAGE_REGISTRY} -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=${CMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY} -DCMAKE_INSTALL_LIBDIR=lib + -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT} -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE}) # Enable s/ccache if set by parent. @@ -3998,7 +4002,6 @@ if(ARROW_WITH_GRPC) if(GRPC_VENDORED) # Remove "v" from "vX.Y.Z" string(SUBSTRING ${ARROW_GRPC_BUILD_VERSION} 1 -1 ARROW_GRPC_VERSION) - set(GRPCPP_PP_INCLUDE TRUE) # Examples need to link to static Arrow if we're using static gRPC set(ARROW_GRPC_USE_SHARED OFF) else() @@ -4007,18 +4010,6 @@ if(ARROW_WITH_GRPC) else() set(ARROW_GRPC_VERSION ${gRPC_VERSION}) endif() - # grpc++ headers may reside in ${GRPC_INCLUDE_DIR}/grpc++ or ${GRPC_INCLUDE_DIR}/grpcpp - # depending on the gRPC version. - get_target_property(GRPC_INCLUDE_DIR gRPC::grpc++ INTERFACE_INCLUDE_DIRECTORIES) - if(GRPC_INCLUDE_DIR MATCHES "^\\$<" - OR # generator expression - EXISTS "${GRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h") - set(GRPCPP_PP_INCLUDE TRUE) - elseif(EXISTS "${GRPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h") - set(GRPCPP_PP_INCLUDE FALSE) - else() - message(FATAL_ERROR "Cannot find grpc++ headers in ${GRPC_INCLUDE_DIR}") - endif() if(ARROW_USE_ASAN) # Disable ASAN in system gRPC. add_library(gRPC::grpc_asan_suppressed INTERFACE IMPORTED) diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index 98e5918ebbf55..b7f5d878e5881 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -524,7 +524,7 @@ class KeyHasher { size_t index_; std::vector indices_; std::vector metadata_; - const RecordBatch* batch_; + std::atomic batch_; std::vector hashes_; LightContext ctx_; std::vector column_arrays_; @@ -819,7 +819,6 @@ class InputState { have_active_batch &= !queue_.TryPop(); if (have_active_batch) { DCHECK_GT(queue_.UnsyncFront()->num_rows(), 0); // empty batches disallowed - key_hasher_->Invalidate(); // batch changed - invalidate key hasher's cache memo_.UpdateTime(GetTime(queue_.UnsyncFront().get(), 0)); // time changed } } @@ -897,7 +896,8 @@ class InputState { Status Push(const std::shared_ptr& rb) { if (rb->num_rows() > 0) { - queue_.Push(rb); // only after above updates - push batch for processing + key_hasher_->Invalidate(); // batch changed - invalidate key hasher's cache + queue_.Push(rb); // only now push batch for processing } else { ++batches_processed_; // don't enqueue empty batches, just record as processed } diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 083a85eb346c5..1db06a762544b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -1025,7 +1025,8 @@ TEST(Cast, DecimalToFloating) { } } - // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() + // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() in + // decimal_test.cc } TEST(Cast, DecimalToString) { diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc index 2eab7ae8afaf2..a7bb2d88c291b 100644 --- a/cpp/src/arrow/compute/kernels/vector_hash.cc +++ b/cpp/src/arrow/compute/kernels/vector_hash.cc @@ -27,6 +27,7 @@ #include "arrow/array/dict_internal.h" #include "arrow/array/util.h" #include "arrow/compute/api_vector.h" +#include "arrow/compute/cast.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/result.h" #include "arrow/util/hashing.h" @@ -762,6 +763,38 @@ const FunctionDoc dictionary_encode_doc( ("Return a dictionary-encoded version of the input array."), {"array"}, "DictionaryEncodeOptions"); +// ---------------------------------------------------------------------- +// This function does not use any hashing utilities +// but is kept in this file to be near dictionary_encode +// Dictionary decode implementation + +const FunctionDoc dictionary_decode_doc{ + "Decodes a DictionaryArray to an Array", + ("Return a plain-encoded version of the array input\n" + "This function does nothing if the input is not a dictionary."), + {"dictionary_array"}}; + +class DictionaryDecodeMetaFunction : public MetaFunction { + public: + DictionaryDecodeMetaFunction() + : MetaFunction("dictionary_decode", Arity::Unary(), dictionary_decode_doc) {} + + Result ExecuteImpl(const std::vector& args, + const FunctionOptions* options, + ExecContext* ctx) const override { + if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) { + return args[0]; + } + + if (args[0].is_array() || args[0].is_chunked_array()) { + DictionaryType* dict_type = checked_cast(args[0].type().get()); + CastOptions cast_options = CastOptions::Safe(dict_type->value_type()); + return CallFunction("cast", args, &cast_options, ctx); + } else { + return Status::TypeError("Expected an Array or a Chunked Array"); + } + } +}; } // namespace void RegisterVectorHash(FunctionRegistry* registry) { @@ -819,6 +852,10 @@ void RegisterVectorHash(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunction(std::move(dict_encode))); } +void RegisterDictionaryDecode(FunctionRegistry* registry) { + DCHECK_OK(registry->AddFunction(std::make_shared())); +} + } // namespace internal } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc index a4b484a2069ea..7a54f78a03736 100644 --- a/cpp/src/arrow/compute/registry.cc +++ b/cpp/src/arrow/compute/registry.cc @@ -275,6 +275,7 @@ static std::unique_ptr CreateBuiltInRegistry() { // Register core kernels RegisterScalarCast(registry.get()); + RegisterDictionaryDecode(registry.get()); RegisterVectorHash(registry.get()); RegisterVectorSelection(registry.get()); diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h index b4239701d9573..cdc9f804e72f1 100644 --- a/cpp/src/arrow/compute/registry_internal.h +++ b/cpp/src/arrow/compute/registry_internal.h @@ -28,6 +28,7 @@ namespace internal { void RegisterScalarArithmetic(FunctionRegistry* registry); void RegisterScalarBoolean(FunctionRegistry* registry); void RegisterScalarCast(FunctionRegistry* registry); +void RegisterDictionaryDecode(FunctionRegistry* registry); void RegisterScalarComparison(FunctionRegistry* registry); void RegisterScalarIfElse(FunctionRegistry* registry); void RegisterScalarNested(FunctionRegistry* registry); diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index 78f90ea37f7af..f69f60a5af434 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -56,7 +56,9 @@ void TempVectorStack::release(int id, uint32_t num_bytes) { --num_vectors_; } -inline uint64_t bit_util::SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { +namespace bit_util { + +inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { // This will not be correct on big-endian architectures. #if !ARROW_LITTLE_ENDIAN ARROW_DCHECK(false); @@ -73,7 +75,7 @@ inline uint64_t bit_util::SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes } } -inline void bit_util::SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) { +inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value) { // This will not be correct on big-endian architectures. #if !ARROW_LITTLE_ENDIAN ARROW_DCHECK(false); @@ -88,8 +90,8 @@ inline void bit_util::SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_ } } -inline void bit_util::bits_to_indexes_helper(uint64_t word, uint16_t base_index, - int* num_indexes, uint16_t* indexes) { +inline void bits_to_indexes_helper(uint64_t word, uint16_t base_index, int* num_indexes, + uint16_t* indexes) { int n = *num_indexes; while (word) { indexes[n++] = base_index + static_cast(CountTrailingZeros(word)); @@ -98,9 +100,8 @@ inline void bit_util::bits_to_indexes_helper(uint64_t word, uint16_t base_index, *num_indexes = n; } -inline void bit_util::bits_filter_indexes_helper(uint64_t word, - const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes) { +inline void bits_filter_indexes_helper(uint64_t word, const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes) { int n = *num_indexes; while (word) { indexes[n++] = input_indexes[CountTrailingZeros(word)]; @@ -110,21 +111,21 @@ inline void bit_util::bits_filter_indexes_helper(uint64_t word, } template -void bit_util::bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes, uint16_t base_index) { +void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, + const uint8_t* bits, const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes, + uint16_t base_index = 0) { // 64 bits at a time constexpr int unroll = 64; int tail = num_bits % unroll; #if defined(ARROW_HAVE_AVX2) if (hardware_flags & arrow::internal::CpuInfo::AVX2) { if (filter_input_indexes) { - bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, input_indexes, - num_indexes, indexes); + avx2::bits_filter_indexes_avx2(bit_to_search, num_bits - tail, bits, input_indexes, + num_indexes, indexes); } else { - bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes, indexes, - base_index); + avx2::bits_to_indexes_avx2(bit_to_search, num_bits - tail, bits, num_indexes, + indexes, base_index); } } else { #endif @@ -160,9 +161,9 @@ void bit_util::bits_to_indexes_internal(int64_t hardware_flags, const int num_bi } } -void bit_util::bits_to_indexes(int bit_to_search, int64_t hardware_flags, int num_bits, - const uint8_t* bits, int* num_indexes, uint16_t* indexes, - int bit_offset) { +void bits_to_indexes(int bit_to_search, int64_t hardware_flags, int num_bits, + const uint8_t* bits, int* num_indexes, uint16_t* indexes, + int bit_offset) { bits += bit_offset / 8; bit_offset %= 8; *num_indexes = 0; @@ -193,10 +194,9 @@ void bit_util::bits_to_indexes(int bit_to_search, int64_t hardware_flags, int nu *num_indexes += num_indexes_new; } -void bit_util::bits_filter_indexes(int bit_to_search, int64_t hardware_flags, - const int num_bits, const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes, int bit_offset) { +void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, const int num_bits, + const uint8_t* bits, const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes, int bit_offset) { bits += bit_offset / 8; bit_offset %= 8; if (bit_offset != 0) { @@ -226,10 +226,9 @@ void bit_util::bits_filter_indexes(int bit_to_search, int64_t hardware_flags, } } -void bit_util::bits_split_indexes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, int* num_indexes_bit0, - uint16_t* indexes_bit0, uint16_t* indexes_bit1, - int bit_offset) { +void bits_split_indexes(int64_t hardware_flags, const int num_bits, const uint8_t* bits, + int* num_indexes_bit0, uint16_t* indexes_bit0, + uint16_t* indexes_bit1, int bit_offset) { bits_to_indexes(0, hardware_flags, num_bits, bits, num_indexes_bit0, indexes_bit0, bit_offset); int num_indexes_bit1; @@ -237,8 +236,8 @@ void bit_util::bits_split_indexes(int64_t hardware_flags, const int num_bits, bit_offset); } -void bit_util::bits_to_bytes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, uint8_t* bytes, int bit_offset) { +void bits_to_bytes(int64_t hardware_flags, const int num_bits, const uint8_t* bits, + uint8_t* bytes, int bit_offset) { bits += bit_offset / 8; bit_offset %= 8; if (bit_offset != 0) { @@ -258,7 +257,7 @@ void bit_util::bits_to_bytes(int64_t hardware_flags, const int num_bits, if (hardware_flags & arrow::internal::CpuInfo::AVX2) { // The function call below processes whole 32 bit chunks together. num_processed = num_bits - (num_bits % 32); - bits_to_bytes_avx2(num_processed, bits, bytes); + avx2::bits_to_bytes_avx2(num_processed, bits, bytes); } #endif // Processing 8 bits at a time @@ -290,8 +289,8 @@ void bit_util::bits_to_bytes(int64_t hardware_flags, const int num_bits, } } -void bit_util::bytes_to_bits(int64_t hardware_flags, const int num_bits, - const uint8_t* bytes, uint8_t* bits, int bit_offset) { +void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* bytes, + uint8_t* bits, int bit_offset) { bits += bit_offset / 8; bit_offset %= 8; if (bit_offset != 0) { @@ -314,7 +313,7 @@ void bit_util::bytes_to_bits(int64_t hardware_flags, const int num_bits, if (hardware_flags & arrow::internal::CpuInfo::AVX2) { // The function call below processes whole 32 bit chunks together. num_processed = num_bits - (num_bits % 32); - bytes_to_bits_avx2(num_processed, bytes, bits); + avx2::bytes_to_bits_avx2(num_processed, bytes, bits); } #endif // Process 8 bits at a time @@ -338,11 +337,11 @@ void bit_util::bytes_to_bits(int64_t hardware_flags, const int num_bits, } } -bool bit_util::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, - uint32_t num_bytes) { +bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, + uint32_t num_bytes) { #if defined(ARROW_HAVE_AVX2) if (hardware_flags & arrow::internal::CpuInfo::AVX2) { - return are_all_bytes_zero_avx2(bytes, num_bytes); + return avx2::are_all_bytes_zero_avx2(bytes, num_bytes); } #endif uint64_t result_or = 0; @@ -358,6 +357,7 @@ bool bit_util::are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, return result_or == 0; } +} // namespace bit_util } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 6e1bb79674cba..489139eab87f2 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -139,69 +139,55 @@ class TempVectorHolder { uint32_t num_elements_; }; -class ARROW_EXPORT bit_util { - public: - static void bits_to_indexes(int bit_to_search, int64_t hardware_flags, - const int num_bits, const uint8_t* bits, int* num_indexes, - uint16_t* indexes, int bit_offset = 0); +namespace bit_util { - static void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, +ARROW_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags, const int num_bits, const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes, int bit_offset = 0); + int* num_indexes, uint16_t* indexes, + int bit_offset = 0); - // Input and output indexes may be pointing to the same data (in-place filtering). - static void bits_split_indexes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, int* num_indexes_bit0, - uint16_t* indexes_bit0, uint16_t* indexes_bit1, - int bit_offset = 0); +ARROW_EXPORT void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, + const int num_bits, const uint8_t* bits, + const uint16_t* input_indexes, int* num_indexes, + uint16_t* indexes, int bit_offset = 0); - // Bit 1 is replaced with byte 0xFF. - static void bits_to_bytes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, uint8_t* bytes, int bit_offset = 0); +// Input and output indexes may be pointing to the same data (in-place filtering). +ARROW_EXPORT void bits_split_indexes(int64_t hardware_flags, const int num_bits, + const uint8_t* bits, int* num_indexes_bit0, + uint16_t* indexes_bit0, uint16_t* indexes_bit1, + int bit_offset = 0); - // Return highest bit of each byte. - static void bytes_to_bits(int64_t hardware_flags, const int num_bits, - const uint8_t* bytes, uint8_t* bits, int bit_offset = 0); +// Bit 1 is replaced with byte 0xFF. +ARROW_EXPORT void bits_to_bytes(int64_t hardware_flags, const int num_bits, + const uint8_t* bits, uint8_t* bytes, int bit_offset = 0); - static bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, - uint32_t num_bytes); +// Return highest bit of each byte. +ARROW_EXPORT void bytes_to_bits(int64_t hardware_flags, const int num_bits, + const uint8_t* bytes, uint8_t* bits, int bit_offset = 0); - private: - inline static uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes); - inline static void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t value); - inline static void bits_to_indexes_helper(uint64_t word, uint16_t base_index, - int* num_indexes, uint16_t* indexes); - inline static void bits_filter_indexes_helper(uint64_t word, - const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes); - template - static void bits_to_indexes_internal(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes, - uint16_t base_index = 0); +ARROW_EXPORT bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, + uint32_t num_bytes); #if defined(ARROW_HAVE_AVX2) - static void bits_to_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, int* num_indexes, - uint16_t* indexes, uint16_t base_index = 0); - static void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes); - template - static void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, - int* num_indexes, uint16_t* indexes, - uint16_t base_index = 0); - template - static void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits, + +namespace avx2 { +ARROW_EXPORT void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, + const uint8_t* bits, const uint16_t* input_indexes, int* num_indexes, uint16_t* indexes); - static void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes); - static void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits); - static bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes); +ARROW_EXPORT void bits_to_indexes_avx2(int bit_to_search, const int num_bits, + const uint8_t* bits, int* num_indexes, + uint16_t* indexes, uint16_t base_index = 0); +ARROW_EXPORT void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, + uint8_t* bytes); +ARROW_EXPORT void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, + uint8_t* bits); +ARROW_EXPORT bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes); +} // namespace avx2 + #endif -}; +} // namespace bit_util } // namespace util namespace compute { diff --git a/cpp/src/arrow/compute/util_avx2.cc b/cpp/src/arrow/compute/util_avx2.cc index 7c2a378254562..89ec6aa97a608 100644 --- a/cpp/src/arrow/compute/util_avx2.cc +++ b/cpp/src/arrow/compute/util_avx2.cc @@ -16,30 +16,18 @@ // under the License. #include +#include -#include "arrow/acero/util.h" #include "arrow/util/bit_util.h" - -namespace arrow { -namespace util { +#include "arrow/util/logging.h" #if defined(ARROW_HAVE_AVX2) -void bit_util::bits_to_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, int* num_indexes, - uint16_t* indexes, uint16_t base_index) { - if (bit_to_search == 0) { - bits_to_indexes_imp_avx2<0>(num_bits, bits, num_indexes, indexes, base_index); - } else { - ARROW_DCHECK(bit_to_search == 1); - bits_to_indexes_imp_avx2<1>(num_bits, bits, num_indexes, indexes, base_index); - } -} +namespace arrow::util::avx2 { template -void bit_util::bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, - int* num_indexes, uint16_t* indexes, - uint16_t base_index) { +void bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, int* num_indexes, + uint16_t* indexes, uint16_t base_index = 0) { // 64 bits at a time constexpr int unroll = 64; @@ -82,21 +70,20 @@ void bit_util::bits_to_indexes_imp_avx2(const int num_bits, const uint8_t* bits, } } -void bit_util::bits_filter_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes) { +void bits_to_indexes_avx2(int bit_to_search, const int num_bits, const uint8_t* bits, + int* num_indexes, uint16_t* indexes, uint16_t base_index) { if (bit_to_search == 0) { - bits_filter_indexes_imp_avx2<0>(num_bits, bits, input_indexes, num_indexes, indexes); + bits_to_indexes_imp_avx2<0>(num_bits, bits, num_indexes, indexes, base_index); } else { - bits_filter_indexes_imp_avx2<1>(num_bits, bits, input_indexes, num_indexes, indexes); + ARROW_DCHECK(bit_to_search == 1); + bits_to_indexes_imp_avx2<1>(num_bits, bits, num_indexes, indexes, base_index); } } template -void bit_util::bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits, - const uint16_t* input_indexes, - int* out_num_indexes, uint16_t* indexes) { +void bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* bits, + const uint16_t* input_indexes, int* out_num_indexes, + uint16_t* indexes) { // 64 bits at a time constexpr int unroll = 64; @@ -167,8 +154,17 @@ void bit_util::bits_filter_indexes_imp_avx2(const int num_bits, const uint8_t* b *out_num_indexes = num_indexes; } -void bit_util::bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, - uint8_t* bytes) { +void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, const uint8_t* bits, + const uint16_t* input_indexes, int* num_indexes, + uint16_t* indexes) { + if (bit_to_search == 0) { + bits_filter_indexes_imp_avx2<0>(num_bits, bits, input_indexes, num_indexes, indexes); + } else { + bits_filter_indexes_imp_avx2<1>(num_bits, bits, input_indexes, num_indexes, indexes); + } +} + +void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes) { constexpr int unroll = 32; constexpr uint64_t kEachByteIs1 = 0x0101010101010101ULL; @@ -188,8 +184,7 @@ void bit_util::bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, } } -void bit_util::bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, - uint8_t* bits) { +void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits) { constexpr int unroll = 32; // Processing 32 bits at a time for (int i = 0; i < num_bits / unroll; ++i) { @@ -198,7 +193,7 @@ void bit_util::bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, } } -bool bit_util::are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) { +bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) { __m256i result_or = _mm256_setzero_si256(); uint32_t i; for (i = 0; i < num_bytes / 32; ++i) { @@ -216,7 +211,6 @@ bool bit_util::are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes) return result_or32 == 0; } -#endif // ARROW_HAVE_AVX2 +} // namespace arrow::util::avx2 -} // namespace util -} // namespace arrow +#endif // ARROW_HAVE_AVX2 diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h index 1db230b16e9c2..39936fbd7b5b2 100644 --- a/cpp/src/arrow/dataset/dataset.h +++ b/cpp/src/arrow/dataset/dataset.h @@ -82,7 +82,7 @@ class ARROW_DS_EXPORT FragmentSelection { /// \brief Instructions for scanning a particular fragment /// -/// The fragment scan request is dervied from ScanV2Options. The main +/// The fragment scan request is derived from ScanV2Options. The main /// difference is that the scan options are based on the dataset schema /// while the fragment request is based on the fragment schema. struct ARROW_DS_EXPORT FragmentScanRequest { diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc index d56dc81e356bd..1e7ea9bb002bb 100644 --- a/cpp/src/arrow/flight/flight_test.cc +++ b/cpp/src/arrow/flight/flight_test.cc @@ -46,11 +46,7 @@ #error "gRPC headers should not be in public API" #endif -#ifdef GRPCPP_PP_INCLUDE #include -#else -#include -#endif // Include before test_util.h (boost), contains Windows fixes #include "arrow/flight/platform.h" diff --git a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h index 1085a946966c8..5005fc6b16eb4 100644 --- a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h +++ b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h @@ -31,17 +31,8 @@ #pragma warning(disable : 4267) #endif -#ifdef GRPCPP_PP_INCLUDE #include -#else -#include -#endif - -#ifdef GRPCPP_PP_INCLUDE #include -#else -#include -#endif #ifdef _MSC_VER #pragma warning(pop) diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc index a1d0e3266b4e6..89f088638320e 100644 --- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc +++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc @@ -25,15 +25,10 @@ #include #include -#include "arrow/util/config.h" -#ifdef GRPCPP_PP_INCLUDE #include #if defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS) #include #endif -#else -#include -#endif #include diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc index dcf9c3f8c9f4b..2c7a1d5e99234 100644 --- a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc +++ b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc @@ -25,12 +25,7 @@ #include #include -#include "arrow/util/config.h" -#ifdef GRPCPP_PP_INCLUDE #include -#else -#include -#endif #include "arrow/buffer.h" #include "arrow/flight/serialization_internal.h" diff --git a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc index 8514ca361df33..cff111d64df91 100644 --- a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc +++ b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc @@ -36,13 +36,8 @@ #include #include -#ifdef GRPCPP_PP_INCLUDE #include #include -#else -#include -#include -#endif #if defined(_MSC_VER) #pragma warning(pop) diff --git a/cpp/src/arrow/flight/transport/grpc/util_internal.cc b/cpp/src/arrow/flight/transport/grpc/util_internal.cc index f9bf26058ad58..f431fc30ec87a 100644 --- a/cpp/src/arrow/flight/transport/grpc/util_internal.cc +++ b/cpp/src/arrow/flight/transport/grpc/util_internal.cc @@ -22,11 +22,7 @@ #include #include -#ifdef GRPCPP_PP_INCLUDE #include -#else -#include -#endif #include "arrow/flight/transport.h" #include "arrow/flight/types.h" diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc index f2fd39d6f37ad..0835ab9074a48 100644 --- a/cpp/src/arrow/util/basic_decimal.cc +++ b/cpp/src/arrow/util/basic_decimal.cc @@ -969,6 +969,16 @@ bool BasicDecimal256::FitsInPrecision(int32_t precision) const { return BasicDecimal256::Abs(*this) < kDecimal256PowersOfTen[precision]; } +void BasicDecimal256::GetWholeAndFraction(int scale, BasicDecimal256* whole, + BasicDecimal256* fraction) const { + DCHECK_GE(scale, 0); + DCHECK_LE(scale, 76); + + BasicDecimal256 multiplier(kDecimal256PowersOfTen[scale]); + auto s = Divide(multiplier, whole, fraction); + DCHECK_EQ(s, DecimalStatus::kSuccess); +} + const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) { DCHECK_GE(scale, 0); DCHECK_LE(scale, 76); diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h index b263bb234a795..d8a91ea76b390 100644 --- a/cpp/src/arrow/util/basic_decimal.h +++ b/cpp/src/arrow/util/basic_decimal.h @@ -366,6 +366,10 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal - static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { + static Real ToRealPositiveNoSplit(const Decimal128& decimal, int32_t scale) { Real x = RealTraits::two_to_64(static_cast(decimal.high_bits())); x += static_cast(decimal.low_bits()); x *= LargePowerOfTen(-scale); return x; } + + /// An appoximate conversion from Decimal128 to Real that guarantees: + /// 1. If the decimal is an integer, the conversion is exact. + /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. + /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact + /// value. + /// 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) + /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value. + /// Here "exact value" means the closest representable value by Real. + template + static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { + if (scale <= 0 || (decimal.high_bits() == 0 && + decimal.low_bits() <= RealTraits::kMaxPreciseInteger)) { + // No need to split the decimal if it is already an integer (scale <= 0) or if it + // can be precisely represented by Real + return ToRealPositiveNoSplit(decimal, scale); + } + + // Split decimal into whole and fractional parts to avoid precision loss + BasicDecimal128 whole_decimal, fraction_decimal; + decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal); + + Real whole = ToRealPositiveNoSplit(whole_decimal, 0); + Real fraction = ToRealPositiveNoSplit(fraction_decimal, scale); + + return whole + fraction; + } }; } // namespace @@ -967,7 +994,7 @@ struct Decimal256RealConversion } template - static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { + static Real ToRealPositiveNoSplit(const Decimal256& decimal, int32_t scale) { DCHECK_GE(decimal, 0); Real x = 0; const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array()); @@ -978,6 +1005,33 @@ struct Decimal256RealConversion x *= LargePowerOfTen(-scale); return x; } + + /// An appoximate conversion from Decimal256 to Real that guarantees: + /// 1. If the decimal is an integer, the conversion is exact. + /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. + /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact + /// value. + /// 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) + /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value. + /// Here "exact value" means the closest representable value by Real. + template + static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { + const auto parts_le = decimal.little_endian_array(); + if (scale <= 0 || (parts_le[3] == 0 && parts_le[2] == 0 && parts_le[1] == 0 && + parts_le[0] < RealTraits::kMaxPreciseInteger)) { + // No need to split the decimal if it is already an integer (scale <= 0) or if it + // can be precisely represented by Real + return ToRealPositiveNoSplit(decimal, scale); + } + + // Split the decimal into whole and fractional parts to avoid precision loss + BasicDecimal256 whole_decimal, fraction_decimal; + decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal); + + Real whole = ToRealPositiveNoSplit(whole_decimal, 0); + Real fraction = ToRealPositiveNoSplit(fraction_decimal, scale); + return whole + fraction; + } }; } // namespace diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h index 041aac4ef860d..51a7229ab6678 100644 --- a/cpp/src/arrow/util/decimal_internal.h +++ b/cpp/src/arrow/util/decimal_internal.h @@ -451,6 +451,8 @@ struct RealTraits { static constexpr int kMantissaBits = 24; // ceil(log10(2 ^ kMantissaBits)) static constexpr int kMantissaDigits = 8; + // Integers between zero and kMaxPreciseInteger can be precisely represented + static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1; }; template <> @@ -464,6 +466,8 @@ struct RealTraits { static constexpr int kMantissaBits = 53; // ceil(log10(2 ^ kMantissaBits)) static constexpr int kMantissaDigits = 16; + // Integers between zero and kMaxPreciseInteger can be precisely represented + static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1; }; template diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index 1401750ce76d6..6376a9545a0f8 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -1050,6 +1050,24 @@ void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real ex << "Decimal value: " << decimal_value << " Scale: " << scale; } +template +void CheckDecimalToRealWithinOneULP(const std::string& decimal_value, int32_t scale, + Real expected) { + Decimal dec(decimal_value); + auto result = dec.template ToReal(scale); + ASSERT_TRUE(result == expected || result == std::nextafter(expected, expected + 1) || + result == std::nextafter(expected, expected - 1)) + << "Decimal value: " << decimal_value << " Scale: " << scale; +} + +template +void CheckDecimalToRealWithinEpsilon(const std::string& decimal_value, int32_t scale, + Real epsilon, Real expected) { + Decimal dec(decimal_value); + ASSERT_TRUE(std::abs(dec.template ToReal(scale) - expected) <= epsilon) + << "Decimal value: " << decimal_value << " Scale: " << scale; +} + template void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale, float expected) { @@ -1110,59 +1128,79 @@ class TestDecimalToReal : public ::testing::Test { } } } +}; - // Test precision of conversions to float values - void TestPrecision() { - // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) - CheckDecimalToReal("9223373136366403584", 0, 9.223373e+18f); - CheckDecimalToReal("-9223373136366403584", 0, -9.223373e+18f); - // 2**64 + 2**41 (exactly representable in a float) - CheckDecimalToReal("18446746272732807168", 0, 1.8446746e+19f); - CheckDecimalToReal("-18446746272732807168", 0, -1.8446746e+19f); - } +TYPED_TEST_SUITE(TestDecimalToReal, RealTypes); +TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); } + +// Custom test for Decimal::ToReal +template +class TestDecimalToRealFloat : public TestDecimalToReal> {}; +TYPED_TEST_SUITE(TestDecimalToRealFloat, DecimalTypes); - // Test conversions with a range of scales - void TestLargeValues(int32_t max_scale) { - // Note that exact comparisons would succeed on some platforms (Linux, macOS). - // Nevertheless, power-of-ten factors are not all exactly representable - // in binary floating point. - for (int32_t scale = -max_scale; scale <= max_scale; scale++) { +TYPED_TEST(TestDecimalToRealFloat, LargeValues) { + auto max_scale = TypeParam::kMaxScale; + // Note that exact comparisons would succeed on some platforms (Linux, macOS). + // Nevertheless, power-of-ten factors are not all exactly representable + // in binary floating point. + for (int32_t scale = -max_scale; scale <= max_scale; scale++) { #ifdef _WIN32 - // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero - if (scale == 45) continue; + // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero + if (scale == 45) continue; #endif - CheckDecimalToRealApprox("1", scale, Pow10(-scale)); - } - for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) { + CheckDecimalToRealApprox("1", scale, this->Pow10(-scale)); + } + for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) { #ifdef _WIN32 - // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero - if (scale == 45) continue; + // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero + if (scale == 45) continue; #endif - const Real factor = static_cast(123); - CheckDecimalToRealApprox("123", scale, factor * Pow10(-scale)); - } + const auto factor = static_cast(123); + CheckDecimalToRealApprox("123", scale, factor * this->Pow10(-scale)); } -}; - -TYPED_TEST_SUITE(TestDecimalToReal, RealTypes); - -TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); } +} -// Custom test for Decimal128::ToReal -class TestDecimal128ToRealFloat : public TestDecimalToReal> { -}; -TEST_F(TestDecimal128ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/38); } -TEST_F(TestDecimal128ToRealFloat, Precision) { this->TestPrecision(); } -// Custom test for Decimal256::ToReal -class TestDecimal256ToRealFloat : public TestDecimalToReal> { -}; -TEST_F(TestDecimal256ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/76); } -TEST_F(TestDecimal256ToRealFloat, Precision) { this->TestPrecision(); } +TYPED_TEST(TestDecimalToRealFloat, Precision) { + // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) + CheckDecimalToReal("9223373136366403584", 0, 9.223373e+18f); + CheckDecimalToReal("-9223373136366403584", 0, -9.223373e+18f); + // 2**64 + 2**41 (exactly representable in a float) + CheckDecimalToReal("18446746272732807168", 0, 1.8446746e+19f); + CheckDecimalToReal("-18446746272732807168", 0, -1.8446746e+19f); + + // Integers are always exact + auto scale = TypeParam::kMaxScale - 1; + std::string seven = "7."; + seven.append(scale, '0'); // pad with trailing zeros + CheckDecimalToReal(seven, scale, 7.0f); + CheckDecimalToReal("-" + seven, scale, -7.0f); + + CheckDecimalToReal("99999999999999999999.0000000000000000", 16, + 99999999999999999999.0f); + CheckDecimalToReal("-99999999999999999999.0000000000000000", 16, + -99999999999999999999.0f); + + // Small fractions are within one ULP + CheckDecimalToRealWithinOneULP("9999999.9", 1, 9999999.9f); + CheckDecimalToRealWithinOneULP("-9999999.9", 1, -9999999.9f); + CheckDecimalToRealWithinOneULP("9999999.999999", 6, 9999999.999999f); + CheckDecimalToRealWithinOneULP("-9999999.999999", 6, + -9999999.999999f); + + // Large fractions are within 2^-23 + constexpr float epsilon = 1.1920928955078125e-07f; // 2^-23 + CheckDecimalToRealWithinEpsilon( + "112334829348925.99070703983306884765625", 23, epsilon, + 112334829348925.99070703983306884765625f); + CheckDecimalToRealWithinEpsilon( + "1.987748987892758765582589910934859345", 36, epsilon, + 1.987748987892758765582589910934859345f); +} // ToReal tests are disabled on MinGW because of precision issues in results #ifndef __MINGW32__ -// Custom test for Decimal128::ToReal +// Custom test for Decimal::ToReal template class TestDecimalToRealDouble : public TestDecimalToReal> { }; @@ -1209,6 +1247,34 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) { 9.999999999999998e+47); CheckDecimalToReal("-99999999999999978859343891977453174784", -10, -9.999999999999998e+47); + // Integers are always exact + auto scale = TypeParam::kMaxScale - 1; + std::string seven = "7."; + seven.append(scale, '0'); + CheckDecimalToReal(seven, scale, 7.0); + CheckDecimalToReal("-" + seven, scale, -7.0); + + CheckDecimalToReal("99999999999999999999.0000000000000000", 16, + 99999999999999999999.0); + CheckDecimalToReal("-99999999999999999999.0000000000000000", 16, + -99999999999999999999.0); + + // Small fractions are within one ULP + CheckDecimalToRealWithinOneULP("9999999.9", 1, 9999999.9); + CheckDecimalToRealWithinOneULP("-9999999.9", 1, -9999999.9); + CheckDecimalToRealWithinOneULP("9999999.999999999999999", 15, + 9999999.999999999999999); + CheckDecimalToRealWithinOneULP("-9999999.999999999999999", 15, + -9999999.999999999999999); + + // Large fractions are within 2^-52 + constexpr double epsilon = 2.220446049250313080847263336181640625e-16; // 2^-52 + CheckDecimalToRealWithinEpsilon( + "112334829348925.99070703983306884765625", 23, epsilon, + 112334829348925.99070703983306884765625); + CheckDecimalToRealWithinEpsilon( + "1.987748987892758765582589910934859345", 36, epsilon, + 1.987748987892758765582589910934859345); } #endif // __MINGW32__ diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index e6aad7cee2a3e..eb2e2d8fed88f 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -401,11 +401,14 @@ endif() add_parquet_test(file_deserialize_test SOURCES file_deserialize_test.cc test_util.cc) add_parquet_test(schema_test) -add_parquet_benchmark(bloom_filter_benchmark) +add_parquet_benchmark(bloom_filter_benchmark SOURCES bloom_filter_benchmark.cc + benchmark_util.cc) add_parquet_benchmark(column_reader_benchmark) add_parquet_benchmark(column_io_benchmark) add_parquet_benchmark(encoding_benchmark) add_parquet_benchmark(level_conversion_benchmark) +add_parquet_benchmark(page_index_benchmark SOURCES page_index_benchmark.cc + benchmark_util.cc) add_parquet_benchmark(arrow/reader_writer_benchmark PREFIX "parquet-arrow") if(ARROW_WITH_BROTLI) diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index 7c608e4424753..f11101eb24298 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -50,9 +50,7 @@ using parquet::schema::PrimitiveNode; using ::testing::ElementsAre; -namespace parquet { - -namespace arrow { +namespace parquet::arrow { const auto BOOL = ::arrow::boolean(); const auto UINT8 = ::arrow::uint8(); @@ -1776,5 +1774,4 @@ TEST_F(TestLevels, ListErrors) { } } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/arrow_statistics_test.cc b/cpp/src/parquet/arrow/arrow_statistics_test.cc index 604f163a66f84..ad4496933ef4c 100644 --- a/cpp/src/parquet/arrow/arrow_statistics_test.cc +++ b/cpp/src/parquet/arrow/arrow_statistics_test.cc @@ -36,8 +36,7 @@ using arrow::Table; using arrow::io::BufferReader; -namespace parquet { -namespace arrow { +namespace parquet::arrow { struct StatisticsTestParam { std::shared_ptr<::arrow::Table> table; @@ -157,5 +156,4 @@ INSTANTIATE_TEST_SUITE_P( /*expected_min=*/"z", /*expected_max=*/"z"})); -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/path_internal.cc b/cpp/src/parquet/arrow/path_internal.cc index 2aeee6e500f5d..919c97f4323b6 100644 --- a/cpp/src/parquet/arrow/path_internal.cc +++ b/cpp/src/parquet/arrow/path_internal.cc @@ -108,8 +108,7 @@ #include "parquet/properties.h" -namespace parquet { -namespace arrow { +namespace parquet::arrow { namespace { @@ -901,5 +900,4 @@ Status MultipathLevelBuilder::Write(const Array& array, bool array_field_nullabl return Status::OK(); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/path_internal_test.cc b/cpp/src/parquet/arrow/path_internal_test.cc index 4645807007478..fb9c404247f3b 100644 --- a/cpp/src/parquet/arrow/path_internal_test.cc +++ b/cpp/src/parquet/arrow/path_internal_test.cc @@ -29,8 +29,7 @@ #include "parquet/properties.h" -namespace parquet { -namespace arrow { +namespace parquet::arrow { using ::arrow::default_memory_pool; using ::arrow::field; @@ -644,5 +643,4 @@ TEST_F(MultipathLevelBuilderTest, TestPrimitiveNonNullable) { EXPECT_THAT(results_[0].post_list_elements[0].end, Eq(4)); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index 40fbdcbb562b1..855fb5a5a4882 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -76,8 +76,7 @@ using parquet::internal::RecordReader; namespace bit_util = arrow::bit_util; -namespace parquet { -namespace arrow { +namespace parquet::arrow { namespace { ::arrow::Result> ChunksToSingle(const ChunkedArray& chunked) { @@ -1414,5 +1413,4 @@ Status FuzzReader(const uint8_t* data, int64_t size) { } // namespace internal -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc index a294b712a7ce3..7034f5839e1ea 100644 --- a/cpp/src/parquet/arrow/reader_internal.cc +++ b/cpp/src/parquet/arrow/reader_internal.cc @@ -94,8 +94,7 @@ using ParquetType = parquet::Type; namespace bit_util = arrow::bit_util; -namespace parquet { -namespace arrow { +namespace parquet::arrow { namespace { template @@ -856,5 +855,4 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr& va return Status::OK(); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/reconstruct_internal_test.cc b/cpp/src/parquet/arrow/reconstruct_internal_test.cc index 8a69f8266f1e6..4e1f421498e85 100644 --- a/cpp/src/parquet/arrow/reconstruct_internal_test.cc +++ b/cpp/src/parquet/arrow/reconstruct_internal_test.cc @@ -65,8 +65,7 @@ using testing::Eq; using testing::NotNull; using testing::SizeIs; -namespace parquet { -namespace arrow { +namespace parquet::arrow { using parquet::schema::GroupNode; using parquet::schema::NodePtr; @@ -1637,5 +1636,4 @@ TEST_F(TestReconstructColumn, ListList6) { // TODO legacy-list-in-struct etc.? -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index c5d5e0743a7f1..f713548d05a70 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -63,9 +63,7 @@ using parquet::LogicalType; using parquet::internal::LevelInfo; -namespace parquet { - -namespace arrow { +namespace parquet::arrow { // ---------------------------------------------------------------------- // Parquet to Arrow schema conversion @@ -1106,5 +1104,4 @@ Status SchemaManifest::Make(const SchemaDescriptor* schema, return Status::OK(); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc index 064bf4f55cc7e..da0427cb31000 100644 --- a/cpp/src/parquet/arrow/schema_internal.cc +++ b/cpp/src/parquet/arrow/schema_internal.cc @@ -23,9 +23,7 @@ using ArrowType = ::arrow::DataType; using ArrowTypeId = ::arrow::Type; using ParquetType = parquet::Type; -namespace parquet { - -namespace arrow { +namespace parquet::arrow { using ::arrow::Result; using ::arrow::Status; @@ -218,5 +216,4 @@ Result> GetArrowType( primitive.type_length(), int96_arrow_time_unit); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/schema_internal.h b/cpp/src/parquet/arrow/schema_internal.h index fb837c3ee6cab..55292ac35ab9c 100644 --- a/cpp/src/parquet/arrow/schema_internal.h +++ b/cpp/src/parquet/arrow/schema_internal.h @@ -24,8 +24,7 @@ namespace arrow { class DataType; } -namespace parquet { -namespace arrow { +namespace parquet::arrow { using ::arrow::Result; @@ -47,5 +46,4 @@ Result> GetArrowType( const schema::PrimitiveNode& primitive, ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO); -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc index 6d22f318f6b97..0c67e8d6bb3d4 100644 --- a/cpp/src/parquet/arrow/writer.cc +++ b/cpp/src/parquet/arrow/writer.cc @@ -73,8 +73,7 @@ using parquet::ParquetFileWriter; using parquet::ParquetVersion; using parquet::schema::GroupNode; -namespace parquet { -namespace arrow { +namespace parquet::arrow { namespace { @@ -600,5 +599,4 @@ Status WriteTable(const ::arrow::Table& table, ::arrow::MemoryPool* pool, return writer->Close(); } -} // namespace arrow -} // namespace parquet +} // namespace parquet::arrow diff --git a/cpp/src/parquet/benchmark_util.cc b/cpp/src/parquet/benchmark_util.cc new file mode 100644 index 0000000000000..6220336e1c39e --- /dev/null +++ b/cpp/src/parquet/benchmark_util.cc @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "parquet/benchmark_util.h" + +#include + +namespace parquet::benchmark { + +namespace { + +void GenerateRandomString(uint32_t length, uint32_t seed, std::vector* heap) { + // Character set used to generate random string + const std::string charset = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + std::default_random_engine gen(seed); + std::uniform_int_distribution dist(0, static_cast(charset.size() - 1)); + + for (uint32_t i = 0; i < length; i++) { + heap->emplace_back(charset[dist(gen)]); + } +} + +template +void GenerateBenchmarkDataIntegerImpl(uint32_t size, uint32_t seed, T* data, + std::vector* heap, uint32_t) { + static_assert(std::is_integral_v); + heap->clear(); + std::default_random_engine gen(seed); + std::uniform_int_distribution d(std::numeric_limits::min(), + std::numeric_limits::max()); + for (uint32_t i = 0; i < size; ++i) { + data[i] = d(gen); + } +} + +template +void GenerateBenchmarkDataFloatImpl(uint32_t size, uint32_t seed, T* data, + std::vector* heap, uint32_t) { + static_assert(std::is_floating_point_v); + heap->clear(); + std::default_random_engine gen(seed); + std::uniform_real_distribution d(std::numeric_limits::lowest(), + std::numeric_limits::max()); + for (uint32_t i = 0; i < size; ++i) { + data[i] = d(gen); + } +} + +} // namespace + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, int32_t* data, + std::vector* heap, uint32_t data_string_length) { + GenerateBenchmarkDataIntegerImpl(size, seed, data, heap, data_string_length); +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, int64_t* data, + std::vector* heap, uint32_t data_string_length) { + GenerateBenchmarkDataIntegerImpl(size, seed, data, heap, data_string_length); +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, float* data, + std::vector* heap, uint32_t data_string_length) { + GenerateBenchmarkDataFloatImpl(size, seed, data, heap, data_string_length); +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, double* data, + std::vector* heap, uint32_t data_string_length) { + GenerateBenchmarkDataFloatImpl(size, seed, data, heap, data_string_length); +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, Int96* data, + std::vector* heap, uint32_t) { + heap->clear(); + std::default_random_engine gen(seed); + std::uniform_int_distribution d(std::numeric_limits::min(), + std::numeric_limits::max()); + for (uint32_t i = 0; i < size; ++i) { + data[i].value[0] = d(gen); + data[i].value[1] = d(gen); + data[i].value[2] = d(gen); + } +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, FLBA* data, + std::vector* heap, uint32_t data_string_length) { + heap->clear(); + GenerateRandomString(data_string_length * size, seed, heap); + for (uint32_t i = 0; i < size; ++i) { + data[i].ptr = heap->data() + i * data_string_length; + } +} + +template <> +void GenerateBenchmarkData(uint32_t size, uint32_t seed, ByteArray* data, + std::vector* heap, uint32_t data_string_length) { + heap->clear(); + GenerateRandomString(data_string_length * size, seed, heap); + for (uint32_t i = 0; i < size; ++i) { + data[i].ptr = heap->data() + i * data_string_length; + data[i].len = data_string_length; + } +} + +} // namespace parquet::benchmark diff --git a/cpp/src/parquet/benchmark_util.h b/cpp/src/parquet/benchmark_util.h new file mode 100644 index 0000000000000..7996f7f85e898 --- /dev/null +++ b/cpp/src/parquet/benchmark_util.h @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "parquet/types.h" + +namespace parquet::benchmark { + +template +void GenerateBenchmarkData(uint32_t size, uint32_t seed, T* data, + std::vector* heap, uint32_t data_string_length); + +#define _GENERATE_BENCHMARK_DATA_DECL(KLASS) \ + template <> \ + void GenerateBenchmarkData(uint32_t size, uint32_t seed, KLASS* data, \ + std::vector* heap, uint32_t data_string_length); + +_GENERATE_BENCHMARK_DATA_DECL(int32_t) +_GENERATE_BENCHMARK_DATA_DECL(int64_t) +_GENERATE_BENCHMARK_DATA_DECL(float) +_GENERATE_BENCHMARK_DATA_DECL(double) +_GENERATE_BENCHMARK_DATA_DECL(ByteArray) +_GENERATE_BENCHMARK_DATA_DECL(FLBA) +_GENERATE_BENCHMARK_DATA_DECL(Int96) + +#undef _GENERATE_BENCHMARK_DATA_DECL + +} // namespace parquet::benchmark diff --git a/cpp/src/parquet/bloom_filter_benchmark.cc b/cpp/src/parquet/bloom_filter_benchmark.cc index fa934b1d5290a..13c731d975b2c 100644 --- a/cpp/src/parquet/bloom_filter_benchmark.cc +++ b/cpp/src/parquet/bloom_filter_benchmark.cc @@ -18,13 +18,13 @@ #include "benchmark/benchmark.h" #include "arrow/util/logging.h" +#include "parquet/benchmark_util.h" #include "parquet/bloom_filter.h" #include "parquet/properties.h" #include -namespace parquet { -namespace benchmark { +namespace parquet::benchmark { constexpr static uint32_t kNumBloomFilterInserts = 16 * 1024; // The sample string length for FLBA and ByteArray benchmarks @@ -40,63 +40,11 @@ std::unique_ptr CreateBloomFilter(uint32_t num_values) { return bloom_filter; } -void GenerateRandomString(uint32_t length, uint32_t seed, std::vector* heap) { - // Character set used to generate random string - const std::string charset = - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - - std::default_random_engine gen(seed); - std::uniform_int_distribution dist(0, static_cast(charset.size() - 1)); - - for (uint32_t i = 0; i < length; i++) { - heap->push_back(charset[dist(gen)]); - } -} - -template -void GenerateBenchmarkData(uint32_t size, uint32_t seed, T* data, - [[maybe_unused]] std::vector* heap = nullptr) { - if constexpr (std::is_integral_v) { - std::default_random_engine gen(seed); - std::uniform_int_distribution d(std::numeric_limits::min(), - std::numeric_limits::max()); - for (uint32_t i = 0; i < size; ++i) { - data[i] = d(gen); - } - } else if constexpr (std::is_floating_point_v) { - std::default_random_engine gen(seed); - std::uniform_real_distribution d(std::numeric_limits::lowest(), - std::numeric_limits::max()); - for (uint32_t i = 0; i < size; ++i) { - data[i] = d(gen); - } - } else if constexpr (std::is_same_v) { - GenerateRandomString(kDataStringLength * size, seed, heap); - for (uint32_t i = 0; i < size; ++i) { - data[i].ptr = heap->data() + i * kDataStringLength; - } - } else if constexpr (std::is_same_v) { - GenerateRandomString(kDataStringLength * size, seed, heap); - for (uint32_t i = 0; i < size; ++i) { - data[i].ptr = heap->data() + i * kDataStringLength; - data[i].len = kDataStringLength; - } - } else if constexpr (std::is_same_v) { - std::default_random_engine gen(seed); - std::uniform_int_distribution d(std::numeric_limits::min(), - std::numeric_limits::max()); - for (uint32_t i = 0; i < size; ++i) { - data[i].value[0] = d(gen); - data[i].value[1] = d(gen); - data[i].value[2] = d(gen); - } - } -} - std::vector GetHashValues(uint32_t num_values, uint32_t seed) { // Generate sample data values std::vector values(num_values); - GenerateBenchmarkData(num_values, seed, values.data()); + std::vector heap; + GenerateBenchmarkData(num_values, seed, values.data(), &heap, kDataStringLength); // Create a temp filter to compute hash values auto filter = CreateBloomFilter(/*num_values=*/8); std::vector hashes(num_values); @@ -109,7 +57,8 @@ static void BM_ComputeHash(::benchmark::State& state) { using T = typename DType::c_type; std::vector values(kNumBloomFilterInserts); std::vector heap; - GenerateBenchmarkData(kNumBloomFilterInserts, /*seed=*/0, values.data(), &heap); + GenerateBenchmarkData(kNumBloomFilterInserts, /*seed=*/0, values.data(), &heap, + kDataStringLength); auto filter = CreateBloomFilter(kNumBloomFilterInserts); for (auto _ : state) { uint64_t total = 0; @@ -136,7 +85,8 @@ static void BM_BatchComputeHash(::benchmark::State& state) { using T = typename DType::c_type; std::vector values(kNumBloomFilterInserts); std::vector heap; - GenerateBenchmarkData(kNumBloomFilterInserts, /*seed=*/0, values.data(), &heap); + GenerateBenchmarkData(kNumBloomFilterInserts, /*seed=*/0, values.data(), &heap, + kDataStringLength); auto filter = CreateBloomFilter(kNumBloomFilterInserts); std::vector hashes(kNumBloomFilterInserts); for (auto _ : state) { @@ -231,5 +181,4 @@ BENCHMARK(BM_BatchInsertHash); BENCHMARK(BM_FindExistingHash); BENCHMARK(BM_FindNonExistingHash); -} // namespace benchmark -} // namespace parquet +} // namespace parquet::benchmark diff --git a/cpp/src/parquet/bloom_filter_reader_test.cc b/cpp/src/parquet/bloom_filter_reader_test.cc index 64dd0d9b9d190..e297ab7045120 100644 --- a/cpp/src/parquet/bloom_filter_reader_test.cc +++ b/cpp/src/parquet/bloom_filter_reader_test.cc @@ -22,8 +22,7 @@ #include "parquet/file_reader.h" #include "parquet/test_util.h" -namespace parquet { -namespace test { +namespace parquet::test { TEST(BloomFilterReader, ReadBloomFilter) { std::string dir_string(parquet::test::get_data_dir()); @@ -70,5 +69,4 @@ TEST(BloomFilterReader, FileNotHaveBloomFilter) { ASSERT_EQ(nullptr, bloom_filter); } -} // namespace test -} // namespace parquet +} // namespace parquet::test diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc index 6285c4c12539d..7a910e4220831 100644 --- a/cpp/src/parquet/encoding_test.cc +++ b/cpp/src/parquet/encoding_test.cc @@ -48,9 +48,7 @@ using arrow::internal::checked_cast; namespace bit_util = arrow::bit_util; -namespace parquet { - -namespace test { +namespace parquet::test { TEST(VectorBooleanTest, TestEncodeBoolDecode) { // PARQUET-454 @@ -1910,5 +1908,4 @@ TEST(DeltaLengthByteArrayEncodingAdHoc, ArrowDirectPut) { CheckDecode(encoded, ::arrow::ArrayFromJSON(::arrow::large_binary(), values)); } -} // namespace test -} // namespace parquet +} // namespace parquet::test diff --git a/cpp/src/parquet/encryption/crypto_factory.cc b/cpp/src/parquet/encryption/crypto_factory.cc index 67e3d8c5f297e..ebb7c3c7b37fb 100644 --- a/cpp/src/parquet/encryption/crypto_factory.cc +++ b/cpp/src/parquet/encryption/crypto_factory.cc @@ -26,8 +26,7 @@ #include "parquet/encryption/file_system_key_material_store.h" #include "parquet/encryption/key_toolkit_internal.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { void CryptoFactory::RegisterKmsClientFactory( std::shared_ptr kms_client_factory) { @@ -192,5 +191,4 @@ void CryptoFactory::RotateMasterKeys( double_wrapping, cache_lifetime_seconds); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/crypto_factory.h b/cpp/src/parquet/encryption/crypto_factory.h index 14015a95c85e5..291cccf30f8e3 100644 --- a/cpp/src/parquet/encryption/crypto_factory.h +++ b/cpp/src/parquet/encryption/crypto_factory.h @@ -25,8 +25,7 @@ #include "parquet/encryption/kms_client_factory.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { static constexpr ParquetCipher::type kDefaultEncryptionAlgorithm = ParquetCipher::AES_GCM_V1; @@ -152,5 +151,4 @@ class PARQUET_EXPORT CryptoFactory { KeyToolkit key_toolkit_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc index 1c4d3d8dc4897..6e66efeff6326 100644 --- a/cpp/src/parquet/encryption/encryption_internal.cc +++ b/cpp/src/parquet/encryption/encryption_internal.cc @@ -31,8 +31,7 @@ using parquet::ParquetException; -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr int kGcmMode = 0; constexpr int kCtrMode = 1; @@ -649,5 +648,4 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) { void RandBytes(unsigned char* buf, int num) { RAND_bytes(buf, num); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/encryption_internal.h b/cpp/src/parquet/encryption/encryption_internal.h index 24093c68be531..4ed5b5cf61243 100644 --- a/cpp/src/parquet/encryption/encryption_internal.h +++ b/cpp/src/parquet/encryption/encryption_internal.h @@ -26,8 +26,7 @@ using parquet::ParquetCipher; -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr int kGcmTagLength = 16; constexpr int kNonceLength = 12; @@ -129,5 +128,4 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD); // Wraps OpenSSL RAND_bytes function void RandBytes(unsigned char* buf, int num); -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc b/cpp/src/parquet/encryption/encryption_internal_nossl.cc index bb203f0fd877d..0241923474de9 100644 --- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc +++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc @@ -18,8 +18,7 @@ #include "parquet/encryption/encryption_internal.h" #include "parquet/exception.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { void ThrowOpenSSLRequiredException() { throw ParquetException( @@ -115,5 +114,4 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) { void RandBytes(unsigned char* buf, int num) { ThrowOpenSSLRequiredException(); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_key_material_store.h b/cpp/src/parquet/encryption/file_key_material_store.h index 862e8d9761b0d..83f028a4bc1e9 100644 --- a/cpp/src/parquet/encryption/file_key_material_store.h +++ b/cpp/src/parquet/encryption/file_key_material_store.h @@ -24,8 +24,7 @@ #include "arrow/filesystem/filesystem.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { /// Stores encryption key material outside the Parquet file, for example in a separate /// small file in the same folder. This is important for “key rotation”, when MEKs have to @@ -55,5 +54,4 @@ class PARQUET_EXPORT FileKeyMaterialStore { virtual ~FileKeyMaterialStore() {} }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index a2e2d2df5b284..50cc6eee539f7 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -22,8 +22,7 @@ #include "parquet/encryption/file_key_unwrapper.h" #include "parquet/encryption/key_metadata.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { FileKeyUnwrapper::FileKeyUnwrapper( KeyToolkit* key_toolkit, const KmsConnectionConfig& kms_connection_config, @@ -136,5 +135,4 @@ std::shared_ptr FileKeyUnwrapper::GetKmsClientFromConfigOrKeyMaterial cache_entry_lifetime_seconds_); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.h b/cpp/src/parquet/encryption/file_key_unwrapper.h index 3400641ed91c5..71b245788a713 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.h +++ b/cpp/src/parquet/encryption/file_key_unwrapper.h @@ -27,8 +27,7 @@ #include "parquet/encryption/kms_client.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { // This class will retrieve the key from "key metadata", following these steps: // 1. Parse "key metadata" (see structure in KeyMetadata class). @@ -78,5 +77,4 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { std::shared_ptr<::arrow::fs::FileSystem> file_system_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_key_wrapper.cc b/cpp/src/parquet/encryption/file_key_wrapper.cc index 4f0f1d219acba..704651ebaa8b3 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.cc +++ b/cpp/src/parquet/encryption/file_key_wrapper.cc @@ -22,8 +22,7 @@ #include "parquet/encryption/key_toolkit_internal.h" #include "parquet/exception.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { FileKeyWrapper::FileKeyWrapper(KeyToolkit* key_toolkit, const KmsConnectionConfig& kms_connection_config, @@ -124,5 +123,4 @@ KeyEncryptionKey FileKeyWrapper::CreateKeyEncryptionKey( return KeyEncryptionKey(kek_bytes, kek_id, encoded_wrapped_kek); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_key_wrapper.h b/cpp/src/parquet/encryption/file_key_wrapper.h index 95ad6ec4829bf..26b9719de64db 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.h +++ b/cpp/src/parquet/encryption/file_key_wrapper.h @@ -29,8 +29,7 @@ #include "parquet/encryption/kms_client.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { // This class will generate "key metadata" from "data encryption key" and "master key", // following these steps: @@ -82,5 +81,4 @@ class PARQUET_EXPORT FileKeyWrapper { uint16_t key_counter_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_system_key_material_store.cc b/cpp/src/parquet/encryption/file_system_key_material_store.cc index 494a75e4cc307..2d898c1d3970f 100644 --- a/cpp/src/parquet/encryption/file_system_key_material_store.cc +++ b/cpp/src/parquet/encryption/file_system_key_material_store.cc @@ -27,8 +27,7 @@ #include "parquet/encryption/key_material.h" #include "parquet/exception.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr const char FileSystemKeyMaterialStore::kKeyMaterialFilePrefix[]; constexpr const char FileSystemKeyMaterialStore::kTempFilePrefix[]; @@ -139,5 +138,4 @@ void FileSystemKeyMaterialStore::MoveMaterialTo( } } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/file_system_key_material_store.h b/cpp/src/parquet/encryption/file_system_key_material_store.h index 6fbdd55e9413c..896a53202f589 100644 --- a/cpp/src/parquet/encryption/file_system_key_material_store.h +++ b/cpp/src/parquet/encryption/file_system_key_material_store.h @@ -25,8 +25,7 @@ #include "parquet/encryption/file_key_material_store.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { /// A FileKeyMaterialStore that stores key material in a file system file in the same /// folder as the Parquet file. @@ -87,5 +86,4 @@ class PARQUET_EXPORT FileSystemKeyMaterialStore : public FileKeyMaterialStore { std::unordered_map key_material_map_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_encryption_key.h b/cpp/src/parquet/encryption/key_encryption_key.h index 153bb4b5e2885..62263ee3cd506 100644 --- a/cpp/src/parquet/encryption/key_encryption_key.h +++ b/cpp/src/parquet/encryption/key_encryption_key.h @@ -22,8 +22,7 @@ #include "arrow/util/base64.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { // In the double wrapping mode, each "data encryption key" (DEK) is encrypted with a “key // encryption key” (KEK), that in turn is encrypted with a "master encryption key" (MEK). @@ -55,5 +54,4 @@ class KeyEncryptionKey { std::string encoded_wrapped_kek_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_management_test.cc b/cpp/src/parquet/encryption/key_management_test.cc index 5eebde0c29584..f733c43ee1e79 100644 --- a/cpp/src/parquet/encryption/key_management_test.cc +++ b/cpp/src/parquet/encryption/key_management_test.cc @@ -37,9 +37,7 @@ #include "parquet/file_reader.h" #include "parquet/test_util.h" -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { class TestEncryptionKeyManagement : public ::testing::Test { protected: @@ -387,6 +385,4 @@ TEST_F(TestEncryptionKeyManagement, ReadParquetMRExternalKeyMaterialFile) { } } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/key_material.cc b/cpp/src/parquet/encryption/key_material.cc index 372279c33a5bd..1cebf5900f316 100644 --- a/cpp/src/parquet/encryption/key_material.cc +++ b/cpp/src/parquet/encryption/key_material.cc @@ -25,8 +25,7 @@ using ::arrow::json::internal::ObjectParser; using ::arrow::json::internal::ObjectWriter; -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr const char KeyMaterial::kKeyMaterialTypeField[]; constexpr const char KeyMaterial::kKeyMaterialType1[]; @@ -155,5 +154,4 @@ std::string KeyMaterial::SerializeToJson( return json_writer.Serialize(); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_material.h b/cpp/src/parquet/encryption/key_material.h index f20d23ea35d3b..3e7e862c996d3 100644 --- a/cpp/src/parquet/encryption/key_material.h +++ b/cpp/src/parquet/encryption/key_material.h @@ -29,8 +29,7 @@ class ObjectParser; } // namespace json } // namespace arrow -namespace parquet { -namespace encryption { +namespace parquet::encryption { // KeyMaterial class represents the "key material", keeping the information that allows // readers to recover an encryption key (see description of the KeyMetadata class). The @@ -127,5 +126,4 @@ class PARQUET_EXPORT KeyMaterial { std::string encoded_wrapped_dek_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_metadata.cc b/cpp/src/parquet/encryption/key_metadata.cc index 624626c890cc5..e23a67b6b86ee 100644 --- a/cpp/src/parquet/encryption/key_metadata.cc +++ b/cpp/src/parquet/encryption/key_metadata.cc @@ -24,8 +24,7 @@ using ::arrow::json::internal::ObjectParser; using ::arrow::json::internal::ObjectWriter; -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr const char KeyMetadata::kKeyMaterialInternalStorageField[]; constexpr const char KeyMetadata::kKeyReferenceField[]; @@ -85,5 +84,4 @@ std::string KeyMetadata::CreateSerializedForExternalMaterial( return json_writer.Serialize(); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_metadata.h b/cpp/src/parquet/encryption/key_metadata.h index b6dc349f19bdf..6fe8ac7ccb9db 100644 --- a/cpp/src/parquet/encryption/key_metadata.h +++ b/cpp/src/parquet/encryption/key_metadata.h @@ -24,8 +24,7 @@ #include "parquet/exception.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { // Parquet encryption specification defines "key metadata" as an arbitrary byte array, // generated by file writers for each encryption key, and passed to the low level API for @@ -89,5 +88,4 @@ class PARQUET_EXPORT KeyMetadata { ::std::variant key_material_or_reference_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_metadata_test.cc b/cpp/src/parquet/encryption/key_metadata_test.cc index 3f891ef26db83..f9409edf2a8d9 100644 --- a/cpp/src/parquet/encryption/key_metadata_test.cc +++ b/cpp/src/parquet/encryption/key_metadata_test.cc @@ -22,9 +22,7 @@ #include "parquet/encryption/key_material.h" #include "parquet/encryption/key_metadata.h" -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { TEST(KeyMetadataTest, InternalMaterialStorage) { bool is_footer_key = true; @@ -72,6 +70,4 @@ TEST(KeyMetadataTest, ExternalMaterialStorage) { ASSERT_EQ(key_metadata.key_reference(), key_reference); } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/key_toolkit.cc b/cpp/src/parquet/encryption/key_toolkit.cc index 0b8543b458289..cb488d3fa23a0 100644 --- a/cpp/src/parquet/encryption/key_toolkit.cc +++ b/cpp/src/parquet/encryption/key_toolkit.cc @@ -27,8 +27,7 @@ #include "parquet/encryption/file_system_key_material_store.h" #include "parquet/encryption/key_toolkit_internal.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { std::shared_ptr KeyToolkit::GetKmsClient( const KmsConnectionConfig& kms_connection_config, double cache_entry_lifetime_ms) { @@ -119,5 +118,4 @@ void KeyToolkit::RemoveCacheEntriesForAllTokens() { kek_read_cache_per_token().Clear(); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_toolkit.h b/cpp/src/parquet/encryption/key_toolkit.h index d65f5d8a2d0f9..f63ade4c8c93f 100644 --- a/cpp/src/parquet/encryption/key_toolkit.h +++ b/cpp/src/parquet/encryption/key_toolkit.h @@ -26,8 +26,7 @@ #include "parquet/encryption/two_level_cache_with_expiration.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { static constexpr uint64_t kCacheCleanPeriodForKeyRotation = 60 * 60; // 1 hour @@ -104,5 +103,4 @@ class PARQUET_EXPORT KeyWithMasterId { const std::string master_id_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index 6e0e4e6c65e1e..bdd65d8de3919 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -20,9 +20,7 @@ #include "parquet/encryption/encryption_internal.h" #include "parquet/encryption/key_toolkit_internal.h" -namespace parquet { -namespace encryption { -namespace internal { +namespace parquet::encryption::internal { // Acceptable key lengths in number of bits, used to validate the data key lengths // configured by users and the master key lengths fetched from KMS server. @@ -77,6 +75,4 @@ bool ValidateKeyLength(int32_t key_length_bits) { return found_key_length != std::end(kAcceptableDataKeyLengths); } -} // namespace internal -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::internal diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.h b/cpp/src/parquet/encryption/key_toolkit_internal.h index bcc60bdad68e2..8474a91fc1aba 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.h +++ b/cpp/src/parquet/encryption/key_toolkit_internal.h @@ -21,9 +21,7 @@ #include "parquet/platform.h" -namespace parquet { -namespace encryption { -namespace internal { +namespace parquet::encryption::internal { /// Encrypts "key" with "master_key", using AES-GCM and the "aad" PARQUET_EXPORT @@ -38,6 +36,4 @@ std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, PARQUET_EXPORT bool ValidateKeyLength(int32_t key_length_bits); -} // namespace internal -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::internal diff --git a/cpp/src/parquet/encryption/key_wrapping_test.cc b/cpp/src/parquet/encryption/key_wrapping_test.cc index dba9d67dfe13d..198ceb9bf4b11 100644 --- a/cpp/src/parquet/encryption/key_wrapping_test.cc +++ b/cpp/src/parquet/encryption/key_wrapping_test.cc @@ -26,9 +26,7 @@ #include "parquet/encryption/test_encryption_util.h" #include "parquet/encryption/test_in_memory_kms.h" -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { class KeyWrappingTest : public ::testing::Test { protected: @@ -113,6 +111,4 @@ TEST_F(KeyWrappingTest, ExternalMaterialStorage) { this->WrapThenUnwrap(false, false, false); } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/kms_client.cc b/cpp/src/parquet/encryption/kms_client.cc index b9c720272c479..fee03dd3db656 100644 --- a/cpp/src/parquet/encryption/kms_client.cc +++ b/cpp/src/parquet/encryption/kms_client.cc @@ -17,8 +17,7 @@ #include "parquet/encryption/kms_client.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr const char KmsClient::kKmsInstanceIdDefault[]; constexpr const char KmsClient::kKmsInstanceUrlDefault[]; @@ -40,5 +39,4 @@ void KmsConnectionConfig::SetDefaultIfEmpty() { } } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/kms_client.h b/cpp/src/parquet/encryption/kms_client.h index 5ffa604ffd198..a55fd552eed5f 100644 --- a/cpp/src/parquet/encryption/kms_client.h +++ b/cpp/src/parquet/encryption/kms_client.h @@ -26,8 +26,7 @@ #include "parquet/exception.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { /// This class wraps the key access token of a KMS server. If your token changes over /// time, you should keep the reference to the KeyAccessToken object and call Refresh() @@ -91,5 +90,4 @@ class PARQUET_EXPORT KmsClient { virtual ~KmsClient() {} }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/kms_client_factory.h b/cpp/src/parquet/encryption/kms_client_factory.h index eac8dfc5d06e2..7a7c77c7eebbf 100644 --- a/cpp/src/parquet/encryption/kms_client_factory.h +++ b/cpp/src/parquet/encryption/kms_client_factory.h @@ -20,8 +20,7 @@ #include "parquet/encryption/kms_client.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { class PARQUET_EXPORT KmsClientFactory { public: @@ -36,5 +35,4 @@ class PARQUET_EXPORT KmsClientFactory { bool wrap_locally_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.cc b/cpp/src/parquet/encryption/local_wrap_kms_client.cc index 1b89dc57d0e52..23e28bb8e61be 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.cc +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.cc @@ -25,8 +25,7 @@ using ::arrow::json::internal::ObjectParser; using ::arrow::json::internal::ObjectWriter; -namespace parquet { -namespace encryption { +namespace parquet::encryption { constexpr const char LocalWrapKmsClient::kLocalWrapNoKeyVersion[]; @@ -112,5 +111,4 @@ std::string LocalWrapKmsClient::GetKeyFromServer(const std::string& key_identifi return master_key; } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.h b/cpp/src/parquet/encryption/local_wrap_kms_client.h index 65cf8f42c7964..3c90d82960525 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.h +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.h @@ -25,8 +25,7 @@ #include "parquet/encryption/kms_client.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { /// This class supports local wrapping mode, master keys will be fetched from the KMS /// server and used to encrypt other keys (data encryption keys or key encryption keys). @@ -92,5 +91,4 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { ::arrow::util::ConcurrentMap master_key_cache_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/properties_test.cc b/cpp/src/parquet/encryption/properties_test.cc index 0eb5cba201a24..895cf6c63431e 100644 --- a/cpp/src/parquet/encryption/properties_test.cc +++ b/cpp/src/parquet/encryption/properties_test.cc @@ -22,9 +22,7 @@ #include "parquet/encryption/encryption.h" #include "parquet/encryption/test_encryption_util.h" -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { TEST(TestColumnEncryptionProperties, ColumnEncryptedWithOwnKey) { std::string column_path_1 = "column_1"; @@ -271,6 +269,4 @@ TEST(TestDecryptionProperties, UsingExplicitFooterAndColumnKeys) { ASSERT_EQ(kColumnEncryptionKey2, props->column_key(column_path_2)); } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/read_configurations_test.cc b/cpp/src/parquet/encryption/read_configurations_test.cc index 0bdb67ee9eadc..10de7198ac5ff 100644 --- a/cpp/src/parquet/encryption/read_configurations_test.cc +++ b/cpp/src/parquet/encryption/read_configurations_test.cc @@ -81,9 +81,7 @@ */ -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { using parquet::test::ParquetTestException; @@ -272,6 +270,4 @@ INSTANTIATE_TEST_SUITE_P( 5, "encrypt_columns_and_footer_disable_aad_storage.parquet.encrypted"), std::make_tuple(6, "encrypt_columns_and_footer_ctr.parquet.encrypted"))); -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/test_encryption_util.cc b/cpp/src/parquet/encryption/test_encryption_util.cc index 26ed15ae031e1..694ed3cf42d9e 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.cc +++ b/cpp/src/parquet/encryption/test_encryption_util.cc @@ -37,9 +37,7 @@ using parquet::Type; using parquet::schema::GroupNode; using parquet::schema::PrimitiveNode; -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { std::string data_file(const char* file) { std::string dir_string(parquet::test::get_data_dir()); @@ -511,6 +509,4 @@ void FileDecryptor::CheckFile(parquet::ParquetFileReader* file_reader, } } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h index c2190709aff96..19c230ee5ff99 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.h +++ b/cpp/src/parquet/encryption/test_encryption_util.h @@ -37,8 +37,7 @@ namespace parquet { class ParquetFileReader; -namespace encryption { -namespace test { +namespace encryption::test { using ::arrow::internal::TemporaryDir; @@ -122,6 +121,5 @@ class FileDecryptor { FileDecryptionProperties* file_decryption_properties); }; -} // namespace test -} // namespace encryption +} // namespace encryption::test } // namespace parquet diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.cc b/cpp/src/parquet/encryption/test_in_memory_kms.cc index 5389196b6fa39..e1339ab48b5d6 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.cc +++ b/cpp/src/parquet/encryption/test_in_memory_kms.cc @@ -21,8 +21,7 @@ #include "parquet/encryption/test_in_memory_kms.h" #include "parquet/exception.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { std::unordered_map TestOnlyLocalWrapInMemoryKms::master_key_map_; @@ -95,5 +94,4 @@ std::string TestOnlyInServerWrapKms::GetMasterKeyFromServer( return wrapping_master_key_map_.at(master_key_identifier); } -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.h b/cpp/src/parquet/encryption/test_in_memory_kms.h index bf887191d1efc..c5fdc797b8ca7 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.h +++ b/cpp/src/parquet/encryption/test_in_memory_kms.h @@ -25,8 +25,7 @@ #include "parquet/encryption/local_wrap_kms_client.h" #include "parquet/platform.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { // This is a mock class, built for testing only. Don't use it as an example of // LocalWrapKmsClient implementation. @@ -92,5 +91,4 @@ class TestOnlyInMemoryKmsClientFactory : public KmsClientFactory { } }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/two_level_cache_with_expiration.h b/cpp/src/parquet/encryption/two_level_cache_with_expiration.h index fbd06dc7d20e0..76c2b82770000 100644 --- a/cpp/src/parquet/encryption/two_level_cache_with_expiration.h +++ b/cpp/src/parquet/encryption/two_level_cache_with_expiration.h @@ -23,8 +23,7 @@ #include "arrow/util/concurrent_map.h" #include "arrow/util/mutex.h" -namespace parquet { -namespace encryption { +namespace parquet::encryption { using ::arrow::util::ConcurrentMap; @@ -155,5 +154,4 @@ class TwoLevelCacheWithExpiration { ::arrow::util::Mutex mutex_; }; -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/two_level_cache_with_expiration_test.cc b/cpp/src/parquet/encryption/two_level_cache_with_expiration_test.cc index f375a5c5b315c..d8f2c6255145f 100644 --- a/cpp/src/parquet/encryption/two_level_cache_with_expiration_test.cc +++ b/cpp/src/parquet/encryption/two_level_cache_with_expiration_test.cc @@ -25,9 +25,7 @@ #include "parquet/encryption/two_level_cache_with_expiration.h" -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { using ::arrow::SleepFor; @@ -172,6 +170,4 @@ TEST_F(TwoLevelCacheWithExpirationTest, MultiThread) { clean_thread.join(); } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/write_configurations_test.cc b/cpp/src/parquet/encryption/write_configurations_test.cc index 580c95fdfd2aa..e262003db3e6a 100644 --- a/cpp/src/parquet/encryption/write_configurations_test.cc +++ b/cpp/src/parquet/encryption/write_configurations_test.cc @@ -60,9 +60,7 @@ * keys. Use the alternative (AES_GCM_CTR_V1) algorithm. */ -namespace parquet { -namespace encryption { -namespace test { +namespace parquet::encryption::test { using FileClass = ::arrow::io::FileOutputStream; @@ -231,6 +229,4 @@ void TestEncryptionConfiguration::SetUpTestCase() { temp_dir = temp_data_dir().ValueOrDie(); } -} // namespace test -} // namespace encryption -} // namespace parquet +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/level_comparison.cc b/cpp/src/parquet/level_comparison.cc index c9ad6b76c7280..f3188e987d081 100644 --- a/cpp/src/parquet/level_comparison.cc +++ b/cpp/src/parquet/level_comparison.cc @@ -25,8 +25,7 @@ #include "arrow/util/dispatch.h" -namespace parquet { -namespace internal { +namespace parquet::internal { #if defined(ARROW_HAVE_RUNTIME_AVX2) MinMax FindMinMaxAvx2(const int16_t* levels, int64_t num_levels); @@ -78,5 +77,4 @@ MinMax FindMinMax(const int16_t* levels, int64_t num_levels) { return dispatch.func(levels, num_levels); } -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/level_comparison.h b/cpp/src/parquet/level_comparison.h index 38e7ef8e2ec3f..3ae442dd46e57 100644 --- a/cpp/src/parquet/level_comparison.h +++ b/cpp/src/parquet/level_comparison.h @@ -21,8 +21,7 @@ #include "parquet/platform.h" -namespace parquet { -namespace internal { +namespace parquet::internal { /// Builds a bitmap where each set bit indicates the corresponding level is greater /// than rhs. @@ -36,5 +35,4 @@ struct MinMax { MinMax FindMinMax(const int16_t* levels, int64_t num_levels); -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/level_comparison_inc.h b/cpp/src/parquet/level_comparison_inc.h index 055f81ffae898..cfee506654331 100644 --- a/cpp/src/parquet/level_comparison_inc.h +++ b/cpp/src/parquet/level_comparison_inc.h @@ -24,9 +24,7 @@ #ifndef PARQUET_IMPL_NAMESPACE #error "PARQUET_IMPL_NAMESPACE must be defined" #endif -namespace parquet { -namespace internal { -namespace PARQUET_IMPL_NAMESPACE { +namespace parquet::internal::PARQUET_IMPL_NAMESPACE { /// Builds a bitmap by applying predicate to the level vector provided. /// /// \param[in] levels Rep or def level array. @@ -60,6 +58,4 @@ inline uint64_t GreaterThanBitmapImpl(const int16_t* levels, int64_t num_levels, return LevelsToBitmap(levels, num_levels, [rhs](int16_t value) { return value > rhs; }); } -} // namespace PARQUET_IMPL_NAMESPACE -} // namespace internal -} // namespace parquet +} // namespace parquet::internal::PARQUET_IMPL_NAMESPACE diff --git a/cpp/src/parquet/level_conversion.cc b/cpp/src/parquet/level_conversion.cc index 2e5bcacea55d6..1271afd866d14 100644 --- a/cpp/src/parquet/level_conversion.cc +++ b/cpp/src/parquet/level_conversion.cc @@ -31,8 +31,7 @@ #include "parquet/level_conversion_inc.h" #undef PARQUET_IMPL_NAMESPACE -namespace parquet { -namespace internal { +namespace parquet::internal { namespace { using ::arrow::internal::CpuInfo; @@ -179,5 +178,4 @@ void DefRepLevelsToBitmap(const int16_t* def_levels, const int16_t* rep_levels, output, /*offsets=*/nullptr); } -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/level_conversion.h b/cpp/src/parquet/level_conversion.h index 480d82ed0d81a..3f56b2de36a78 100644 --- a/cpp/src/parquet/level_conversion.h +++ b/cpp/src/parquet/level_conversion.h @@ -23,8 +23,7 @@ #include "parquet/platform.h" #include "parquet/schema.h" -namespace parquet { -namespace internal { +namespace parquet::internal { struct PARQUET_EXPORT LevelInfo { LevelInfo() @@ -196,5 +195,4 @@ void PARQUET_EXPORT DefRepLevelsToBitmap(const int16_t* def_levels, // (i.e. it isn't hidden by runtime dispatch). uint64_t PARQUET_EXPORT TestOnlyExtractBitsSoftware(uint64_t bitmap, uint64_t selection); -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/level_conversion_benchmark.cc b/cpp/src/parquet/level_conversion_benchmark.cc index f9e91c4820f68..f3a4f8095e3a1 100644 --- a/cpp/src/parquet/level_conversion_benchmark.cc +++ b/cpp/src/parquet/level_conversion_benchmark.cc @@ -29,7 +29,7 @@ constexpr int16_t kMissingDefLevel = 0; // Definition Level indicating the values has an entry in the leaf element. constexpr int16_t kPresentDefLevel = 2; -// A repition level that indicates a repeated element. +// A repetition level that indicates a repeated element. constexpr int16_t kHasRepeatedElements = 1; std::vector RunDefinitionLevelsToBitmap(const std::vector& def_levels, diff --git a/cpp/src/parquet/level_conversion_bmi2.cc b/cpp/src/parquet/level_conversion_bmi2.cc index 274d54e503c81..a39d1fd1eb461 100644 --- a/cpp/src/parquet/level_conversion_bmi2.cc +++ b/cpp/src/parquet/level_conversion_bmi2.cc @@ -20,8 +20,7 @@ #include "parquet/level_conversion_inc.h" #undef PARQUET_IMPL_NAMESPACE -namespace parquet { -namespace internal { +namespace parquet::internal { void DefLevelsToBitmapBmi2WithRepeatedParent(const int16_t* def_levels, int64_t num_def_levels, LevelInfo level_info, ValidityBitmapInputOutput* output) { @@ -29,5 +28,4 @@ void DefLevelsToBitmapBmi2WithRepeatedParent(const int16_t* def_levels, level_info, output); } -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/level_conversion_inc.h b/cpp/src/parquet/level_conversion_inc.h index 710d2f6237913..0bcdbccb34a73 100644 --- a/cpp/src/parquet/level_conversion_inc.h +++ b/cpp/src/parquet/level_conversion_inc.h @@ -29,13 +29,10 @@ #include "arrow/util/simd.h" #include "parquet/exception.h" #include "parquet/level_comparison.h" - -namespace parquet { -namespace internal { #ifndef PARQUET_IMPL_NAMESPACE #error "PARQUET_IMPL_NAMESPACE must be defined" #endif -namespace PARQUET_IMPL_NAMESPACE { +namespace parquet::internal::PARQUET_IMPL_NAMESPACE { // clang-format off /* Python code to generate lookup table: @@ -352,6 +349,4 @@ void DefLevelsToBitmapSimd(const int16_t* def_levels, int64_t num_def_levels, writer.Finish(); } -} // namespace PARQUET_IMPL_NAMESPACE -} // namespace internal -} // namespace parquet +} // namespace parquet::internal::PARQUET_IMPL_NAMESPACE diff --git a/cpp/src/parquet/level_conversion_test.cc b/cpp/src/parquet/level_conversion_test.cc index bfce74ae3a868..b12680089b839 100644 --- a/cpp/src/parquet/level_conversion_test.cc +++ b/cpp/src/parquet/level_conversion_test.cc @@ -31,8 +31,7 @@ #include "arrow/util/bitmap.h" #include "arrow/util/ubsan.h" -namespace parquet { -namespace internal { +namespace parquet::internal { using ::arrow::internal::Bitmap; using ::testing::ElementsAreArray; @@ -357,5 +356,4 @@ TEST(TestOnlyExtractBitsSoftware, BasicTest) { check(0xFECBDA9876543210ULL, 0xF00FF00FF00FF00FULL, 0xFBD87430ULL); } -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/page_index_benchmark.cc b/cpp/src/parquet/page_index_benchmark.cc new file mode 100644 index 0000000000000..5631034105056 --- /dev/null +++ b/cpp/src/parquet/page_index_benchmark.cc @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "benchmark/benchmark.h" + +#include "parquet/benchmark_util.h" +#include "parquet/metadata.h" +#include "parquet/page_index.h" +#include "parquet/schema.h" +#include "parquet/test_util.h" +#include "parquet/thrift_internal.h" + +namespace parquet::benchmark { + +void PageIndexSetArgs(::benchmark::internal::Benchmark* bench) { + bench->ArgNames({"num_pages"}); + bench->Range(8, 1024); +} + +void BM_ReadOffsetIndex(::benchmark::State& state) { + auto builder = OffsetIndexBuilder::Make(); + const int num_pages = static_cast(state.range(0)); + constexpr int64_t page_size = 1024; + constexpr int64_t first_row_index = 10000; + for (int i = 0; i < num_pages; ++i) { + builder->AddPage(page_size * i, page_size, first_row_index * i); + } + constexpr int64_t final_position = 4096; + builder->Finish(final_position); + auto sink = CreateOutputStream(); + builder->WriteTo(sink.get()); + auto buffer = sink->Finish().ValueOrDie(); + ReaderProperties properties; + for (auto _ : state) { + auto offset_index = OffsetIndex::Make( + buffer->data() + 0, static_cast(buffer->size()), properties); + ::benchmark::DoNotOptimize(offset_index); + } + state.SetBytesProcessed(state.iterations() * buffer->size()); + state.SetItemsProcessed(state.iterations() * num_pages); +} + +BENCHMARK(BM_ReadOffsetIndex)->Apply(PageIndexSetArgs); + +// The sample string length for FLBA and ByteArray benchmarks +constexpr static uint32_t kDataStringLength = 8; + +template +void BM_ReadColumnIndex(::benchmark::State& state) { + schema::NodePtr type = ::parquet::schema::PrimitiveNode::Make( + "b", Repetition::OPTIONAL, DType::type_num, ConvertedType::NONE, 8); + auto descr_ptr = + std::make_unique(type, /*def_level=*/1, /*rep_level=*/0); + auto descr = descr_ptr.get(); + + const int num_pages = static_cast(state.range(0)); + auto builder = ColumnIndexBuilder::Make(descr); + + const size_t values_per_page = 100; + for (int i = 0; i < num_pages; ++i) { + auto stats = MakeStatistics(descr); + std::vector heap; + std::vector values; + values.resize(values_per_page); + GenerateBenchmarkData(values_per_page, /*seed=*/0, values.data(), &heap, + kDataStringLength); + stats->Update(values.data(), values_per_page, /*null_count=*/0); + builder->AddPage(stats->Encode()); + } + + builder->Finish(); + auto sink = CreateOutputStream(); + builder->WriteTo(sink.get()); + auto buffer = sink->Finish().ValueOrDie(); + ReaderProperties properties; + for (auto _ : state) { + auto column_index = ColumnIndex::Make(*descr, buffer->data() + 0, + static_cast(buffer->size()), properties); + ::benchmark::DoNotOptimize(column_index); + } + state.SetBytesProcessed(state.iterations() * buffer->size()); + state.SetItemsProcessed(state.iterations() * num_pages); +} + +BENCHMARK_TEMPLATE(BM_ReadColumnIndex, Int64Type)->Apply(PageIndexSetArgs); +BENCHMARK_TEMPLATE(BM_ReadColumnIndex, DoubleType)->Apply(PageIndexSetArgs); +BENCHMARK_TEMPLATE(BM_ReadColumnIndex, FLBAType)->Apply(PageIndexSetArgs); +BENCHMARK_TEMPLATE(BM_ReadColumnIndex, ByteArrayType)->Apply(PageIndexSetArgs); + +} // namespace parquet::benchmark diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index dfb4b5d0fbf4a..b0aafa037ead1 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -556,7 +556,7 @@ static inline int MakePages(const ColumnDescriptor* d, int num_pages, int levels } else { num_values = num_levels; } - // Create repitition levels + // Create repetition levels if (max_rep_level > 0 && num_levels != 0) { rep_levels.resize(num_levels); // Using a different seed so that def_levels and rep_levels are different. diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index f35384b8df1ef..e81e9de0a1efa 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -30,13 +30,11 @@ #include "parquet/type_fwd.h" #include "parquet/windows_fixup.h" // for OPTIONAL -namespace arrow { -namespace util { +namespace arrow::util { class Codec; -} // namespace util -} // namespace arrow +} // namespace arrow::util namespace parquet { diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs index 56e0468f9415c..c748eed915d89 100644 --- a/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs +++ b/csharp/src/Apache.Arrow/C/CArrowArrayStreamExporter.cs @@ -165,7 +165,7 @@ sealed unsafe class ExportedArrayStream : IDisposable public static void Free(void** ptr) { - GCHandle gch = GCHandle.FromIntPtr((IntPtr)ptr); + GCHandle gch = GCHandle.FromIntPtr((IntPtr)(*ptr)); if (!gch.IsAllocated) { return; diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 90b2e9b034eea..0f36a5ba9025c 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -78,7 +78,24 @@ def get_json(url, headers=None): response = requests.get(url, headers=headers) if response.status_code != 200: raise ValueError(response.json()) - return response.json() + # GitHub returns a link header with the next, previous, last + # page if there is pagination on the response. See: + # https://docs.github.com/en/rest/guides/using-pagination-in-the-rest-api#using-link-headers + next_responses = None + if "link" in response.headers: + links = response.headers['link'].split(', ') + for link in links: + if 'rel="next"' in link: + # Format: '; rel="next"' + next_url = link.split(";")[0][1:-1] + next_responses = get_json(next_url, headers) + responses = response.json() + if next_responses: + if isinstance(responses, list): + responses.extend(next_responses) + else: + raise ValueError('GitHub response was paginated and is not a list') + return responses def run_cmd(cmd): diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb index c0df6a32175eb..4586649d0c0bc 100644 --- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb +++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow-static.rb @@ -25,7 +25,7 @@ class ApacheArrowStatic < Formula # Uncomment and update to test on a release candidate # mirror "https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-8.0.0-rc1/apache-arrow-8.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" - head "https://github.com/apache/arrow.git" + head "https://github.com/apache/arrow.git", branch: "main" bottle do sha256 cellar: :any, arm64_big_sur: "ef89d21a110b89840cc6148add685d407e75bd633bc8f79625eb33d00e3694b4" diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb index c09436d777ae9..b47d0edfe0dd7 100644 --- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb @@ -21,7 +21,7 @@ class ApacheArrow < Formula homepage "https://arrow.apache.org/" url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-12.0.1.9000/apache-arrow-12.0.1.9000.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" - head "https://github.com/apache/arrow.git" + head "https://github.com/apache/arrow.git", branch: "main" bottle do cellar :any @@ -35,6 +35,7 @@ class ApacheArrow < Formula depends_on "aws-sdk-cpp" depends_on "brotli" depends_on "lz4" + depends_on "openssl@1.1" depends_on "snappy" depends_on "thrift" depends_on "zstd" diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja index 1fdfc08be03e6..475494af18ce6 100644 --- a/dev/tasks/macros.jinja +++ b/dev/tasks/macros.jinja @@ -142,7 +142,7 @@ on: run: | PATH=$(echo $(ruby -r rubygems -e 'puts Gem.user_dir') | sed "s/C:\//\/c\//")/bin:$PATH gem install --user-install gemfury - fury \ + fury push \ --api-token=${CROSSBOW_GEMFURY_TOKEN} \ --as=${CROSSBOW_GEMFURY_ORG} \ {{ pattern }} @@ -238,13 +238,14 @@ on: cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow*.rb tools/ # Pin the git commit in the formula to match - cd tools + pushd tools if [ "{{ is_fork }}" == "true" ]; then - sed -i.bak -E -e 's/apache\/arrow.git"$/{{ arrow.github_repo.split("/") | join("\/") }}.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow*.rb + sed -i.bak -E -e 's/apache\/arrow.git", branch: "main"$/{{ arrow.github_repo.split("/") | join("\/") }}.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow*.rb else - sed -i.bak -E -e 's/arrow.git"$/arrow.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow*.rb + sed -i.bak -E -e 's/arrow.git", branch: "main"$/arrow.git", :revision => "'"{{ arrow.head }}"'"/' apache-arrow*.rb fi rm -f apache-arrow*.rb.bak + popd {% endmacro %} {%- macro github_change_r_pkg_version(is_fork, version) -%} diff --git a/docs/source/cpp/env_vars.rst b/docs/source/cpp/env_vars.rst index 06fd73ffd0d98..e8490735926c1 100644 --- a/docs/source/cpp/env_vars.rst +++ b/docs/source/cpp/env_vars.rst @@ -26,6 +26,29 @@ Arrow C++ at runtime. Many of these variables are inspected only once per process (for example, when the Arrow C++ DLL is loaded), so you cannot assume that changing their value later will have an effect. +.. envvar:: ACERO_ALIGNMENT_HANDLING + + Arrow C++'s Acero module performs computation on streams of data. This + computation may involve a form of "type punning" that is technically + undefined behavior if the underlying array is not properly aligned. On + most modern CPUs this is not an issue, but some older CPUs may crash or + suffer poor performance. For this reason it is recommended that all + incoming array buffers are properly aligned, but some data sources + such as :ref:`Flight ` may produce unaligned buffers. + + The value of this environment variable controls what will happen when + Acero detects an unaligned buffer: + + - ``warn``: a warning is emitted + - ``ignore``: nothing, alignment checking is disabled + - ``reallocate``: the buffer is reallocated to a properly aligned address + - ``error``: the operation fails with an error + + The default behavior is ``warn``. On modern hardware it is usually safe + to change this to ``ignore``. Changing to ``reallocate`` is the safest + option but this will have a significant performance impact as the buffer + will need to be copied. + .. envvar:: ARROW_DEBUG_MEMORY_POOL Enable rudimentary memory checks to guard against buffer overflows. diff --git a/docs/source/developers/continuous_integration/crossbow.rst b/docs/source/developers/continuous_integration/crossbow.rst index 663fc17c0a028..6308f077ac9a6 100644 --- a/docs/source/developers/continuous_integration/crossbow.rst +++ b/docs/source/developers/continuous_integration/crossbow.rst @@ -47,7 +47,7 @@ Executors Individual jobs are executed on public CI services, currently: - Linux: GitHub Actions, Travis CI, Azure Pipelines -- macOS: GitHub Actions, Travis CI, Azure Pipelines +- macOS: GitHub Actions, Azure Pipelines - Windows: GitHub Actions, Azure Pipelines Queue @@ -59,7 +59,7 @@ queue for the tasks. Anyone can host a ``queue`` repository (usually named ``/crossbow``). A job is a git commit on a particular git branch, containing the required -configuration files to run the requested builds (like ``.travis.yml``, +configuration files to run the requested builds (like ``.travis.yml``, ``azure-pipelines.yml``, or ``crossbow.yml`` for `GitHub Actions`_ ). Scheduler @@ -118,7 +118,7 @@ to step 3: ``https://travis-ci.com///settings`` - Confirm the `auto cancellation`_ feature is turned off for branch builds. This should be the default setting. - + 7. Install Python (minimum supported version is 3.8): | Miniconda is preferred, see installation instructions: diff --git a/docs/source/developers/continuous_integration/overview.rst b/docs/source/developers/continuous_integration/overview.rst index 70323c9e48927..1d82e845a3360 100644 --- a/docs/source/developers/continuous_integration/overview.rst +++ b/docs/source/developers/continuous_integration/overview.rst @@ -26,7 +26,6 @@ Some files central to Arrow CI are: - ``docker-compose.yml`` - here we define docker services which can be configured using either enviroment variables, or the default values for these variables. - ``.env`` - here we define default values to configure the services in ``docker-compose.yml`` -- ``.travis.yml`` - here we define workflows which run on Travis - ``appveyor.yml`` - here we define workflows that run on Appveyor We use :ref:`Docker` in order to have portable and reproducible Linux builds, as well as running Windows builds in Windows containers. We use :ref:`Archery` and :ref:`Crossbow` to help co-ordinate the various CI tasks. @@ -60,7 +59,6 @@ The ``.yml`` files in ``.github/worflows`` are workflows which are run on GitHub There are two other files which define action-triggered builds: -- ``.travis.yml`` - runs on all commits and is used to test on architectures such as ARM and S390x - ``appveyor.yml`` - runs on commits related to Python or C++ Extended builds diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index cb0d713f50d0c..066400b33ffb5 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -497,8 +497,8 @@ Be sure to go through on the following checklist: Our CI systems give us some coverage for the things that CRAN checks, but there are a couple of final tests we should do to confirm that the release binaries will work and that everything runs on the same infrastructure that - CRAN has, which is difficult/impossible to emulate fully on Travis or with - Docker. For a precise list of checks, see the + CRAN has, which is difficult/impossible to emulate fully with Docker. For a + precise list of checks, see the `packaging checklist `_. Once all checks are clean, we submit to CRAN, which has a web form for diff --git a/docs/source/status.rst b/docs/source/status.rst index 6c55b4bd3e01a..5c8895b114ae3 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -96,7 +96,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Extension | ✓ | ✓ | ✓ | | | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ -| Run-End Encoded | | | ✓ | | | | | | +| Run-End Encoded | ✓ | | ✓ | | | | | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ Notes: diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index 7b2f10ea66723..dae9f5fefe242 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -283,7 +283,7 @@ func (exp *schemaExporter) export(field arrow.Field) { func allocateArrowSchemaArr(n int) (out []CArrowSchema) { s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = uintptr(C.malloc(C.sizeof_struct_ArrowSchema * C.size_t(n))) + s.Data = uintptr(C.calloc(C.size_t(n), C.sizeof_struct_ArrowSchema)) s.Len = n s.Cap = n @@ -292,7 +292,7 @@ func allocateArrowSchemaArr(n int) (out []CArrowSchema) { func allocateArrowSchemaPtrArr(n int) (out []*CArrowSchema) { s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = uintptr(C.malloc(C.size_t(unsafe.Sizeof((*CArrowSchema)(nil))) * C.size_t(n))) + s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*CArrowSchema)(nil))))) s.Len = n s.Cap = n @@ -301,7 +301,7 @@ func allocateArrowSchemaPtrArr(n int) (out []*CArrowSchema) { func allocateArrowArrayArr(n int) (out []CArrowArray) { s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = uintptr(C.malloc(C.sizeof_struct_ArrowArray * C.size_t(n))) + s.Data = uintptr(C.calloc(C.size_t(n), C.sizeof_struct_ArrowArray)) s.Len = n s.Cap = n @@ -310,7 +310,7 @@ func allocateArrowArrayArr(n int) (out []CArrowArray) { func allocateArrowArrayPtrArr(n int) (out []*CArrowArray) { s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = uintptr(C.malloc(C.size_t(unsafe.Sizeof((*CArrowArray)(nil))) * C.size_t(n))) + s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*CArrowArray)(nil))))) s.Len = n s.Cap = n @@ -319,7 +319,7 @@ func allocateArrowArrayPtrArr(n int) (out []*CArrowArray) { func allocateBufferPtrArr(n int) (out []*C.void) { s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = uintptr(C.malloc(C.size_t(unsafe.Sizeof((*C.void)(nil))) * C.size_t(n))) + s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof((*C.void)(nil))))) s.Len = n s.Cap = n diff --git a/go/arrow/cdata/cdata_fulltest.c b/go/arrow/cdata/cdata_fulltest.c index b85e1e8310f94..7aed597942b51 100644 --- a/go/arrow/cdata/cdata_fulltest.c +++ b/go/arrow/cdata/cdata_fulltest.c @@ -404,6 +404,7 @@ void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out) int test_exported_stream(struct ArrowArrayStream* stream) { while (1) { struct ArrowArray array; + memset(&array, 0, sizeof(array)); // Garbage - implementation should not try to call it, though! array.release = (void*)0xDEADBEEF; int rc = stream->get_next(stream, &array); @@ -447,3 +448,35 @@ void test_stream_schema_fallible(struct ArrowArrayStream* stream) { stream->private_data = &kFallibleStream; stream->release = FallibleRelease; } + +int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed) { + struct ArrowSchema schema; + // Try to confuse the Go GC by putting what looks like a Go pointer here. +#ifdef _WIN32 + // Thread-safe on Windows with the multithread CRT +#define DORAND rand() +#else +#define DORAND rand_r(&seed) +#endif + schema.name = (char*)(0xc000000000L + (DORAND % 0x2000)); + schema.format = (char*)(0xc000000000L + (DORAND % 0x2000)); + int rc = stream->get_schema(stream, &schema); + if (rc != 0) return rc; + schema.release(&schema); + + while (1) { + struct ArrowArray array; + array.release = (void*)(0xc000000000L + (DORAND % 0x2000)); + array.private_data = (void*)(0xc000000000L + (DORAND % 0x2000)); + int rc = stream->get_next(stream, &array); + if (rc != 0) return rc; + + if (array.release == NULL) { + stream->release(stream); + break; + } + array.release(&array); + } + return 0; +#undef DORAND +} diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go index f336dec3707da..0c4bbae3d5526 100644 --- a/go/arrow/cdata/cdata_test.go +++ b/go/arrow/cdata/cdata_test.go @@ -29,6 +29,7 @@ import ( "io" "runtime" "runtime/cgo" + "sync" "testing" "time" "unsafe" @@ -768,6 +769,34 @@ func TestExportRecordReaderStream(t *testing.T) { assert.EqualValues(t, len(reclist), i) } +func TestExportRecordReaderStreamLifetime(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + schema := arrow.NewSchema([]arrow.Field{ + {Name: "strings", Type: arrow.BinaryTypes.String, Nullable: false}, + }, nil) + + bldr := array.NewBuilder(mem, &arrow.StringType{}) + defer bldr.Release() + + arr := bldr.NewArray() + defer arr.Release() + + rec := array.NewRecord(schema, []arrow.Array{arr}, 0) + defer rec.Release() + + rdr, _ := array.NewRecordReader(schema, []arrow.Record{rec}) + defer rdr.Release() + + out := createTestStreamObj() + ExportRecordReader(rdr, out) + + // C Stream is holding on to memory + assert.NotEqual(t, 0, mem.CurrentAlloc()) + releaseStream(out) +} + func TestEmptyListExport(t *testing.T) { bldr := array.NewBuilder(memory.DefaultAllocator, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)) defer bldr.Release() @@ -940,3 +969,28 @@ func TestRecordReaderImportError(t *testing.T) { } assert.Contains(t, err.Error(), "Expected error message") } + +func TestConfuseGoGc(t *testing.T) { + // Regression test for https://github.com/apache/arrow-adbc/issues/729 + reclist := arrdata.Records["primitives"] + + var wg sync.WaitGroup + concurrency := 32 + wg.Add(concurrency) + + // XXX: this test is a bit expensive + for i := 0; i < concurrency; i++ { + go func() { + for i := 0; i < 256; i++ { + rdr, err := array.NewRecordReader(reclist[0].Schema(), reclist) + assert.NoError(t, err) + runtime.GC() + assert.NoError(t, confuseGoGc(rdr)) + runtime.GC() + } + wg.Done() + }() + } + + wg.Wait() +} diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go index fb6122964168b..c731c730c6bcd 100644 --- a/go/arrow/cdata/cdata_test_framework.go +++ b/go/arrow/cdata/cdata_test_framework.go @@ -21,11 +21,16 @@ package cdata // #include // #include +// #include // #include "arrow/c/abi.h" // #include "arrow/c/helpers.h" // // void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out); -// struct ArrowArray* get_test_arr() { return (struct ArrowArray*)(malloc(sizeof(struct ArrowArray))); } +// struct ArrowArray* get_test_arr() { +// struct ArrowArray* array = (struct ArrowArray*)malloc(sizeof(struct ArrowArray)); +// memset(array, 0, sizeof(*array)); +// return array; +// } // struct ArrowArrayStream* get_test_stream() { // struct ArrowArrayStream* out = (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream)); // memset(out, 0, sizeof(struct ArrowArrayStream)); @@ -56,11 +61,13 @@ package cdata // struct ArrowSchema** test_union(const char** fmts, const char** names, int64_t* flags, const int n); // int test_exported_stream(struct ArrowArrayStream* stream); // void test_stream_schema_fallible(struct ArrowArrayStream* stream); +// int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed); import "C" import ( "errors" "fmt" "io" + "math/rand" "unsafe" "github.com/apache/arrow/go/v13/arrow" @@ -271,15 +278,17 @@ func createCArr(arr arrow.Array) *CArrowArray { carr.null_count = C.int64_t(arr.NullN()) carr.offset = C.int64_t(arr.Data().Offset()) buffers := arr.Data().Buffers() - cbuf := []unsafe.Pointer{} - for _, b := range buffers { + cbufs := allocateBufferPtrArr(len(buffers)) + for i, b := range buffers { if b != nil { - cbuf = append(cbuf, C.CBytes(b.Bytes())) + cbufs[i] = (*C.void)(C.CBytes(b.Bytes())) + } else { + cbufs[i] = nil } } - carr.n_buffers = C.int64_t(len(cbuf)) - if len(cbuf) > 0 { - carr.buffers = &cbuf[0] + carr.n_buffers = C.int64_t(len(cbufs)) + if len(cbufs) > 0 { + carr.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cbufs[0])) } carr.release = (*[0]byte)(C.release_test_arr) @@ -350,3 +359,14 @@ func fallibleSchemaTest() error { } return nil } + +func confuseGoGc(reader array.RecordReader) error { + out := C.get_test_stream() + ExportRecordReader(reader, out) + rc := C.confuse_go_gc(out, C.uint(rand.Int())) + C.free(unsafe.Pointer(out)) + if rc == 0 { + return nil + } + return fmt.Errorf("Exported stream test failed with return code %d", int(rc)) +} diff --git a/go/arrow/cdata/exports.go b/go/arrow/cdata/exports.go index 2bbd45e58af01..118dec2c38b96 100644 --- a/go/arrow/cdata/exports.go +++ b/go/arrow/cdata/exports.go @@ -28,11 +28,14 @@ import ( // #include // #include "arrow/c/helpers.h" // -// typedef const char cchar_t; -// extern int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); -// extern int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); -// extern const char* streamGetError(struct ArrowArrayStream*); -// extern void streamRelease(struct ArrowArrayStream*); +// typedef const char cchar_t; +// extern int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); +// extern int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); +// extern const char* streamGetError(struct ArrowArrayStream*); +// extern void streamRelease(struct ArrowArrayStream*); +// // XXX(https://github.com/apache/arrow-adbc/issues/729) +// int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out); +// int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out); // import "C" @@ -154,10 +157,11 @@ func streamRelease(handle *CArrowArrayStream) { } func exportStream(rdr array.RecordReader, out *CArrowArrayStream) { - out.get_schema = (*[0]byte)(C.streamGetSchema) - out.get_next = (*[0]byte)(C.streamGetNext) + out.get_schema = (*[0]byte)(C.streamGetSchemaTrampoline) + out.get_next = (*[0]byte)(C.streamGetNextTrampoline) out.get_last_error = (*[0]byte)(C.streamGetError) out.release = (*[0]byte)(C.streamRelease) + rdr.Retain() h := cgo.NewHandle(cRecordReader{rdr: rdr, err: nil}) out.private_data = createHandle(h) } diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go index 64b8176ad221a..50404878005b9 100644 --- a/go/arrow/cdata/interface.go +++ b/go/arrow/cdata/interface.go @@ -198,6 +198,11 @@ func ImportCRecordReader(stream *CArrowArrayStream, schema *arrow.Schema) (arrio // the populating of the struct. Any memory allocated will be allocated using malloc // which means that it is invisible to the Go Garbage Collector and must be freed manually // using the callback on the CArrowSchema object. +// +// WARNING: the output ArrowSchema MUST BE ZERO INITIALIZED, or the Go garbage collector +// may error at runtime, due to CGO rules ("the current implementation may sometimes +// cause a runtime error if the contents of the C memory appear to be a Go pointer"). +// You have been warned! func ExportArrowSchema(schema *arrow.Schema, out *CArrowSchema) { dummy := arrow.Field{Type: arrow.StructOf(schema.Fields()...), Metadata: schema.Metadata()} exportField(dummy, out) @@ -220,6 +225,11 @@ func ExportArrowSchema(schema *arrow.Schema, out *CArrowSchema) { // The release function on the populated CArrowArray will properly decrease the reference counts, // and release the memory if the record has already been released. But since this must be explicitly // done, make sure it is released so that you do not create a memory leak. +// +// WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector +// may error at runtime, due to CGO rules ("the current implementation may sometimes +// cause a runtime error if the contents of the C memory appear to be a Go pointer"). +// You have been warned! func ExportArrowRecordBatch(rb arrow.Record, out *CArrowArray, outSchema *CArrowSchema) { children := make([]arrow.ArrayData, rb.NumCols()) for i := range rb.Columns() { @@ -243,6 +253,11 @@ func ExportArrowRecordBatch(rb arrow.Record, out *CArrowArray, outSchema *CArrow // being used by the arrow.Array passed in, in order to share with zero-copy across the C // Data Interface. See the documentation for ExportArrowRecordBatch for details on how to ensure // you do not leak memory and prevent unwanted, undefined or strange behaviors. +// +// WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector +// may error at runtime, due to CGO rules ("the current implementation may sometimes +// cause a runtime error if the contents of the C memory appear to be a Go pointer"). +// You have been warned! func ExportArrowArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { exportArray(arr, out, outSchema) } @@ -252,6 +267,11 @@ func ExportArrowArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema // CArrowArrayStream takes ownership of the RecordReader until the consumer calls the release // callback, as such it is unnecesary to call Release on the passed in reader unless it has // previously been retained. +// +// WARNING: the output ArrowArrayStream MUST BE ZERO INITIALIZED, or the Go garbage +// collector may error at runtime, due to CGO rules ("the current implementation may +// sometimes cause a runtime error if the contents of the C memory appear to be a Go +// pointer"). You have been warned! func ExportRecordReader(reader array.RecordReader, out *CArrowArrayStream) { exportStream(reader, out) } diff --git a/go/arrow/cdata/trampoline.c b/go/arrow/cdata/trampoline.c new file mode 100644 index 0000000000000..01db13fab4845 --- /dev/null +++ b/go/arrow/cdata/trampoline.c @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "arrow/c/abi.h" + +int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); +int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); + +int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out) { + // XXX(https://github.com/apache/arrow-adbc/issues/729) + memset(out, 0, sizeof(*out)); + return streamGetSchema(stream, out); +} + +int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out) { + // XXX(https://github.com/apache/arrow-adbc/issues/729) + memset(out, 0, sizeof(*out)); + return streamGetNext(stream, out); +} diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go index 6b201cadcee26..fa5a51cb5b8d5 100644 --- a/go/parquet/file/file_reader_test.go +++ b/go/parquet/file/file_reader_test.go @@ -333,3 +333,55 @@ func TestIncompleteMetadata(t *testing.T) { _, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) assert.Error(t, err) } + +func TestDeltaLengthByteArrayPackingWithNulls(t *testing.T) { + // produce file with DeltaLengthByteArray Encoding with mostly null values but one actual value. + root, _ := schema.NewGroupNode("schema", parquet.Repetitions.Repeated, schema.FieldList{ + schema.NewByteArrayNode("byte_array_col", parquet.Repetitions.Optional, -1), + }, -1) + props := parquet.NewWriterProperties(parquet.WithVersion(parquet.V2_LATEST), + parquet.WithEncoding(parquet.Encodings.DeltaLengthByteArray), parquet.WithDictionaryDefault(false)) + sink := encoding.NewBufferWriter(0, memory.DefaultAllocator) + + writer := file.NewParquetWriter(sink, root, file.WithWriterProps(props)) + rgw := writer.AppendRowGroup() + ccw, err := rgw.NextColumn() + assert.NoError(t, err) + const elements = 500 + data := make([]parquet.ByteArray, elements) + data[0] = parquet.ByteArray{1, 2, 3, 4, 5, 6, 7, 8} + + defLvls := make([]int16, elements) + repLvls := make([]int16, elements) + defLvls[0] = 1 + + _, err = ccw.(*file.ByteArrayColumnChunkWriter).WriteBatch(data, defLvls, repLvls) + assert.NoError(t, err) + assert.NoError(t, ccw.Close()) + assert.NoError(t, rgw.Close()) + assert.NoError(t, writer.Close()) + buf := sink.Finish() + defer buf.Release() + + // read file back in + reader, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) + assert.NoError(t, err) + defer reader.Close() + ccr, err := reader.RowGroup(0).Column(0) + assert.NoError(t, err) + const batchSize = 500 + + for ccr.HasNext() { + readData := make([]parquet.ByteArray, batchSize) + readdevLvls := make([]int16, batchSize) + readrepLvls := make([]int16, batchSize) + cr := ccr.(*file.ByteArrayColumnChunkReader) + + total, read, err := cr.ReadBatch(batchSize, readData, readdevLvls, readrepLvls) + assert.NoError(t, err) + assert.Equal(t, int64(batchSize), total) + assert.Equal(t, 1, read) + assert.Equal(t, data[0], readData[0]) + assert.NotNil(t, readData[0]) + } +} diff --git a/go/parquet/file/file_writer.go b/go/parquet/file/file_writer.go index c931377323e2b..cd0445f4180f1 100644 --- a/go/parquet/file/file_writer.go +++ b/go/parquet/file/file_writer.go @@ -18,6 +18,7 @@ package file import ( "encoding/binary" + "fmt" "io" "github.com/apache/arrow/go/v13/parquet" @@ -155,7 +156,7 @@ func (fw *Writer) startFile() { // Close closes any open row group writer and writes the file footer. Subsequent // calls to close will have no effect. -func (fw *Writer) Close() error { +func (fw *Writer) Close() (err error) { if fw.open { // if any functions here panic, we set open to be false so // that this doesn't get called again @@ -165,11 +166,20 @@ func (fw *Writer) Close() error { fw.rowGroupWriter.Close() } fw.rowGroupWriter = nil - defer fw.sink.Close() + defer func() { + ierr := fw.sink.Close() + if err != nil { + if ierr != nil { + err = fmt.Errorf("error on close:%w, %s", err, ierr) + } + return + } + + err = ierr + }() fileEncryptProps := fw.props.FileEncryptionProperties() if fileEncryptProps == nil { // non encrypted file - var err error if fw.FileMetadata, err = fw.metadata.Finish(); err != nil { return err } diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go index bba0d2be28d98..2cbdb910724ad 100644 --- a/go/parquet/file/file_writer_test.go +++ b/go/parquet/file/file_writer_test.go @@ -18,6 +18,7 @@ package file_test import ( "bytes" + "fmt" "reflect" "testing" @@ -395,3 +396,25 @@ func TestSerialize(t *testing.T) { }) } } + +type errCloseWriter struct { + sink *encoding.BufferWriter +} + +func (c *errCloseWriter) Write(p []byte) (n int, err error) { + return c.sink.Write(p) +} +func (c *errCloseWriter) Close() error { + return fmt.Errorf("error during close") +} +func (c *errCloseWriter) Bytes() []byte { + return c.sink.Bytes() +} + +func TestCloseError(t *testing.T) { + fields := schema.FieldList{schema.NewInt32Node("col", parquet.Repetitions.Required, 1)} + sc, _ := schema.NewGroupNode("schema", parquet.Repetitions.Required, fields, 0) + sink := &errCloseWriter{sink: encoding.NewBufferWriter(0, memory.DefaultAllocator)} + writer := file.NewParquetWriter(sink, sc) + assert.Error(t, writer.Close()) +} diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go index 2ebe6ad98354c..ab542eabb2d3d 100644 --- a/go/parquet/internal/encoding/delta_bit_packing.go +++ b/go/parquet/internal/encoding/delta_bit_packing.go @@ -156,7 +156,7 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { // Decode retrieves min(remaining values, len(out)) values from the data and returns the number // of values actually decoded and any errors encountered. func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) { - max := shared_utils.MinInt(len(out), d.nvals) + max := shared_utils.MinInt(len(out), int(d.totalValues)) if max == 0 { return 0, nil } @@ -315,7 +315,7 @@ const ( // Consists of a header followed by blocks of delta encoded values binary packed. // // Format -// [header] [block 1] [block 2] ... [block N] +// [header] [block 1] [block 2] ... [block N] // // Header // [block size] [number of mini blocks per block] [total value count] [first value] diff --git a/go/parquet/internal/encoding/delta_byte_array_test.go b/go/parquet/internal/encoding/delta_byte_array_test.go new file mode 100644 index 0000000000000..1c008505252fb --- /dev/null +++ b/go/parquet/internal/encoding/delta_byte_array_test.go @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "fmt" + "github.com/apache/arrow/go/v13/arrow/memory" + "github.com/apache/arrow/go/v13/parquet" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestDeltaByteArrayDecoder_SetData(t *testing.T) { + tests := []struct { + name string + nvalues int + data []byte + wantErr assert.ErrorAssertionFunc + }{ + { + name: "null only page", + nvalues: 126609, + data: []byte{128, 1, 4, 0, 0}, + wantErr: assert.NoError, + }, + } + for _, tt := range tests { + d := NewDecoder(parquet.Types.ByteArray, parquet.Encodings.DeltaLengthByteArray, nil, memory.DefaultAllocator) + t.Run(tt.name, func(t *testing.T) { + tt.wantErr(t, d.SetData(tt.nvalues, tt.data), fmt.Sprintf("SetData(%v, %v)", tt.nvalues, tt.data)) + }) + } +} diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go index c11ded1b8f352..d719dcf829cbd 100644 --- a/go/parquet/internal/encoding/delta_length_byte_array.go +++ b/go/parquet/internal/encoding/delta_length_byte_array.go @@ -117,7 +117,7 @@ func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error { if err := dec.SetData(nvalues, data); err != nil { return err } - d.lengths = make([]int32, nvalues) + d.lengths = make([]int32, dec.totalValues) dec.Decode(d.lengths) return d.decoder.SetData(nvalues, data[int(dec.bytesRead()):]) diff --git a/js/src/builder.ts b/js/src/builder.ts index 6f84154935f7b..90fe3ddcc9477 100644 --- a/js/src/builder.ts +++ b/js/src/builder.ts @@ -72,7 +72,7 @@ export interface BuilderOptions { * * @example * ```ts - * import { Builder, Utf8 } from 'apache-arrow'; + * import { makeBuilder, Utf8 } from 'apache-arrow'; * * const utf8Builder = makeBuilder({ * type: new Utf8(), diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc index 35dc496bddb00..c2d0330b5f78e 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc @@ -20,16 +20,21 @@ #include "arrow/matlab/array/proxy/array.h" #include "arrow/matlab/bit/unpack.h" #include "arrow/matlab/error/error.h" +#include "arrow/type_traits.h" +#include "arrow/visit_array_inline.h" + +#include "libmexclass/proxy/ProxyManager.h" namespace arrow::matlab::array::proxy { - Array::Array() { + Array::Array(std::shared_ptr array) : array{std::move(array)} { // Register Proxy methods. REGISTER_METHOD(Array, toString); REGISTER_METHOD(Array, toMATLAB); REGISTER_METHOD(Array, length); REGISTER_METHOD(Array, valid); + REGISTER_METHOD(Array, type); } std::shared_ptr Array::getArray() { @@ -69,4 +74,18 @@ namespace arrow::matlab::array::proxy { auto valid_elements_mda = bit::unpack(validity_bitmap, array_length); context.outputs[0] = valid_elements_mda; } -} + + void Array::type(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + mda::ArrayFactory factory; + + auto type_proxy = typeProxy(); + auto type_id = type_proxy->unwrap()->id(); + auto proxy_id = libmexclass::proxy::ProxyManager::manageProxy(type_proxy); + + context.outputs[0] = factory.createScalar(proxy_id); + context.outputs[1] = factory.createScalar(static_cast(type_id)); + + } +} \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/array/proxy/array.h index 94fad759759ca..55d48c26eff6f 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h @@ -18,6 +18,7 @@ #pragma once #include "arrow/array.h" +#include "arrow/matlab/type/proxy/type.h" #include "libmexclass/proxy/Proxy.h" @@ -25,7 +26,7 @@ namespace arrow::matlab::array::proxy { class Array : public libmexclass::proxy::Proxy { public: - Array(); + Array(std::shared_ptr array); virtual ~Array() {} @@ -39,8 +40,12 @@ class Array : public libmexclass::proxy::Proxy { void valid(libmexclass::proxy::method::Context& context); + void type(libmexclass::proxy::method::Context& context); + virtual void toMATLAB(libmexclass::proxy::method::Context& context) = 0; + virtual std::shared_ptr typeProxy() = 0; + std::shared_ptr array; }; diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc index 9a3b7ed4e22e9..281a0f732d73a 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/matlab/array/proxy/boolean_array.h" +#include "arrow/matlab/type/proxy/primitive_ctype.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/bit/pack.h" @@ -23,6 +24,9 @@ namespace arrow::matlab::array::proxy { + BooleanArray::BooleanArray(std::shared_ptr array) + : arrow::matlab::array::proxy::Array{std::move(array)} {} + libmexclass::proxy::MakeResult BooleanArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { ::matlab::data::StructArray opts = constructor_arguments[0]; @@ -40,7 +44,8 @@ namespace arrow::matlab::array::proxy { const auto array_length = logical_mda.getNumberOfElements(); auto array_data = arrow::ArrayData::Make(data_type, array_length, {validity_bitmap_buffer, data_buffer}); - return std::make_shared(arrow::MakeArray(array_data)); + auto arrow_array = std::static_pointer_cast(arrow::MakeArray(array_data)); + return std::make_shared(std::move(arrow_array)); } void BooleanArray::toMATLAB(libmexclass::proxy::method::Context& context) { @@ -50,4 +55,10 @@ namespace arrow::matlab::array::proxy { context.outputs[0] = logical_array_mda; } + std::shared_ptr BooleanArray::typeProxy() { + using BooleanTypeProxy = type::proxy::PrimitiveCType; + + auto type = std::static_pointer_cast(array->type()); + return std::make_shared(std::move(type)); + } } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h index 6966d1090ee56..5e6e51f0bc8ff 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/boolean_array.h @@ -20,21 +20,21 @@ #include "arrow/matlab/array/proxy/array.h" #include "libmexclass/proxy/Proxy.h" +#include "arrow/type_fwd.h" namespace arrow::matlab::array::proxy { class BooleanArray : public arrow::matlab::array::proxy::Array { public: - BooleanArray(const std::shared_ptr logical_array) - : arrow::matlab::array::proxy::Array() { - array = logical_array; - } + BooleanArray(std::shared_ptr array); static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); protected: void toMATLAB(libmexclass::proxy::method::Context& context) override; + std::shared_ptr typeProxy() override; + }; } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h index 24d2565f306c9..f358e05db6318 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h @@ -24,25 +24,32 @@ #include "arrow/type_traits.h" #include "arrow/matlab/array/proxy/array.h" +#include "arrow/matlab/type/proxy/traits.h" + #include "arrow/matlab/error/error.h" #include "arrow/matlab/bit/pack.h" #include "arrow/matlab/bit/unpack.h" +#include "arrow/matlab/buffer/matlab_buffer.h" #include "libmexclass/proxy/Proxy.h" +#include "arrow/matlab/type/time_unit.h" +#include "arrow/util/utf8.h" + namespace arrow::matlab::array::proxy { -template +template class NumericArray : public arrow::matlab::array::proxy::Array { public: - NumericArray(const std::shared_ptr numeric_array) - : arrow::matlab::array::proxy::Array() { - array = numeric_array; - } - static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { - using ArrowType = typename arrow::CTypeTraits::ArrowType; - using BuilderType = typename arrow::CTypeTraits::BuilderType; + NumericArray(const std::shared_ptr> numeric_array) + : arrow::matlab::array::proxy::Array{std::move(numeric_array)} {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + using MatlabBuffer = arrow::matlab::buffer::MatlabBuffer; + using CType = typename arrow::TypeTraits::CType; + using NumericArray = arrow::NumericArray; + using NumericArrayProxy = typename proxy::NumericArray; ::matlab::data::StructArray opts = constructor_arguments[0]; @@ -50,28 +57,25 @@ class NumericArray : public arrow::matlab::array::proxy::Array { const ::matlab::data::TypedArray numeric_mda = opts[0]["MatlabArray"]; const ::matlab::data::TypedArray valid_mda = opts[0]["Valid"]; - // Get raw pointer of mxArray - auto it(numeric_mda.cbegin()); - auto dt = it.operator->(); + auto data_buffer = std::make_shared(numeric_mda); const auto data_type = arrow::CTypeTraits::type_singleton(); const auto length = static_cast(numeric_mda.getNumberOfElements()); // cast size_t to int64_t - // Do not make a copy when creating arrow::Buffer - auto data_buffer = std::make_shared(reinterpret_cast(dt), - sizeof(CType) * numeric_mda.getNumberOfElements()); // Pack the validity bitmap values. MATLAB_ASSIGN_OR_ERROR(auto packed_validity_bitmap, bit::packValid(valid_mda), error::BITPACK_VALIDITY_BITMAP_ERROR_ID); auto array_data = arrow::ArrayData::Make(data_type, length, {packed_validity_bitmap, data_buffer}); - return std::make_shared>(arrow::MakeArray(array_data)); + auto numeric_array = std::static_pointer_cast(arrow::MakeArray(array_data)); + return std::make_shared(std::move(numeric_array)); } protected: void toMATLAB(libmexclass::proxy::method::Context& context) override { - using ArrowArrayType = typename arrow::CTypeTraits::ArrayType; + using CType = typename arrow::TypeTraits::CType; + using NumericArray = arrow::NumericArray; const auto num_elements = static_cast(array->length()); - const auto numeric_array = std::static_pointer_cast(array); + const auto numeric_array = std::static_pointer_cast(array); const CType* const data_begin = numeric_array->raw_values(); const CType* const data_end = data_begin + num_elements; @@ -81,6 +85,57 @@ class NumericArray : public arrow::matlab::array::proxy::Array { ::matlab::data::TypedArray result = factory.createArray({num_elements, 1}, data_begin, data_end); context.outputs[0] = result; } + + std::shared_ptr typeProxy() override { + using TypeProxy = typename type::proxy::Traits::TypeProxy; + auto type = std::static_pointer_cast(array->type()); + return std::make_shared(std::move(type)); + } }; + // Specialization of NumericArray::Make for arrow::TimestampType. + template <> + libmexclass::proxy::MakeResult NumericArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + using MatlabBuffer = arrow::matlab::buffer::MatlabBuffer; + using TimestampArray = arrow::TimestampArray; + using TimestampArrayProxy = arrow::matlab::array::proxy::NumericArray; + + mda::StructArray opts = constructor_arguments[0]; + + // Get the mxArray from constructor arguments + const mda::TypedArray timestamp_mda = opts[0]["MatlabArray"]; + const mda::TypedArray validity_bitmap_mda = opts[0]["Valid"]; + + const mda::TypedArray timezone_mda = opts[0]["TimeZone"]; + const mda::TypedArray units_mda = opts[0]["TimeUnit"]; + + // extract the time zone string + const std::u16string& u16_timezone = timezone_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto timezone, + arrow::util::UTF16StringToUTF8(u16_timezone), + error::UNICODE_CONVERSION_ERROR_ID); + + // extract the time unit + const std::u16string& u16_timeunit = units_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto time_unit, + arrow::matlab::type::timeUnitFromString(u16_timeunit), + error::UKNOWN_TIME_UNIT_ERROR_ID) + + // create the timestamp_type + auto data_type = arrow::timestamp(time_unit, timezone); + auto array_length = static_cast(timestamp_mda.getNumberOfElements()); // cast size_t to int64_t + + auto data_buffer = std::make_shared(timestamp_mda); + + // Pack the validity bitmap values. + MATLAB_ASSIGN_OR_ERROR(auto packed_validity_bitmap, + bit::packValid(validity_bitmap_mda), + error::BITPACK_VALIDITY_BITMAP_ERROR_ID); + + auto array_data = arrow::ArrayData::Make(data_type, array_length, {packed_validity_bitmap, data_buffer}); + auto timestamp_array = std::static_pointer_cast(arrow::MakeArray(array_data)); + return std::make_shared(std::move(timestamp_array)); + } + } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc index 51f39d72fca6c..16331f6195a22 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/matlab/array/proxy/string_array.h" +#include "arrow/matlab/type/proxy/string_type.h" #include "arrow/array/builder_binary.h" @@ -26,6 +27,9 @@ namespace arrow::matlab::array::proxy { + StringArray::StringArray(const std::shared_ptr string_array) + : arrow::matlab::array::proxy::Array(std::move(string_array)) {} + libmexclass::proxy::MakeResult StringArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { namespace mda = ::matlab::data; @@ -53,8 +57,8 @@ namespace arrow::matlab::array::proxy { arrow::StringBuilder builder; MATLAB_ERROR_IF_NOT_OK(builder.AppendValues(strings, unpacked_validity_bitmap_ptr), error::STRING_BUILDER_APPEND_FAILED); MATLAB_ASSIGN_OR_ERROR(auto array, builder.Finish(), error::STRING_BUILDER_FINISH_FAILED); - - return std::make_shared(array); + auto typed_array = std::static_pointer_cast(array); + return std::make_shared(std::move(typed_array)); } void StringArray::toMATLAB(libmexclass::proxy::method::Context& context) { @@ -78,4 +82,11 @@ namespace arrow::matlab::array::proxy { context.outputs[0] = array_mda; } + std::shared_ptr StringArray::typeProxy() { + using StringTypeProxy = type::proxy::StringType; + + auto type = std::static_pointer_cast(array->type()); + return std::make_shared(std::move(type)); + } + } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h index de0c4625928e4..abb2322edbd20 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/string_array.h @@ -21,19 +21,21 @@ #include "libmexclass/proxy/Proxy.h" +#include "arrow/type_fwd.h" + namespace arrow::matlab::array::proxy { class StringArray : public arrow::matlab::array::proxy::Array { public: - StringArray(const std::shared_ptr string_array) - : arrow::matlab::array::proxy::Array() { - array = string_array; - } - + StringArray(const std::shared_ptr string_array); + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); protected: void toMATLAB(libmexclass::proxy::method::Context& context) override; + + std::shared_ptr typeProxy() override; + }; } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc deleted file mode 100644 index aa79a4f99240e..0000000000000 --- a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/matlab/array/proxy/timestamp_array.h" - -#include "arrow/matlab/error/error.h" -#include "arrow/matlab/bit/pack.h" -#include "arrow/matlab/bit/unpack.h" - -#include "arrow/matlab/type/time_unit.h" -#include "arrow/util/utf8.h" -#include "arrow/type.h" -#include "arrow/builder.h" - - -namespace arrow::matlab::array::proxy { - - namespace { - const uint8_t* getUnpackedValidityBitmap(const ::matlab::data::TypedArray& valid_elements) { - const auto valid_elements_iterator(valid_elements.cbegin()); - return reinterpret_cast(valid_elements_iterator.operator->()); - } - } // anonymous namespace - - libmexclass::proxy::MakeResult TimestampArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { - namespace mda = ::matlab::data; - - mda::StructArray opts = constructor_arguments[0]; - - // Get the mxArray from constructor arguments - const mda::TypedArray timestamp_mda = opts[0]["MatlabArray"]; - const mda::TypedArray validity_bitmap_mda = opts[0]["Valid"]; - - const mda::TypedArray timezone_mda = opts[0]["TimeZone"]; - const mda::TypedArray units_mda = opts[0]["TimeUnit"]; - - // extract the time zone string - const std::u16string& u16_timezone = timezone_mda[0]; - MATLAB_ASSIGN_OR_ERROR(const auto timezone, arrow::util::UTF16StringToUTF8(u16_timezone), - error::UNICODE_CONVERSION_ERROR_ID); - - // extract the time unit - MATLAB_ASSIGN_OR_ERROR(const auto time_unit, arrow::matlab::type::timeUnitFromString(units_mda[0]), - error::UKNOWN_TIME_UNIT_ERROR_ID) - - // create the timestamp_type - auto data_type = arrow::timestamp(time_unit, timezone); - arrow::TimestampBuilder builder(data_type, arrow::default_memory_pool()); - - // Get raw pointer of mxArray - auto it(timestamp_mda.cbegin()); - auto dt = it.operator->(); - - // Pack the validity bitmap values. - const uint8_t* valid_mask = getUnpackedValidityBitmap(validity_bitmap_mda); - const auto num_elements = timestamp_mda.getNumberOfElements(); - - // Append values - MATLAB_ERROR_IF_NOT_OK(builder.AppendValues(dt, num_elements, valid_mask), error::APPEND_VALUES_ERROR_ID); - MATLAB_ASSIGN_OR_ERROR(auto timestamp_array, builder.Finish(), error::BUILD_ARRAY_ERROR_ID); - - return std::make_shared(timestamp_array); - } - - void TimestampArray::toMATLAB(libmexclass::proxy::method::Context& context) { - namespace mda = ::matlab::data; - - const auto num_elements = static_cast(array->length()); - const auto timestamp_array = std::static_pointer_cast(array); - const int64_t* const data_begin = timestamp_array->raw_values(); - const int64_t* const data_end = data_begin + num_elements; - - mda::ArrayFactory factory; - - // Constructs a TypedArray from the raw values. Makes a copy. - mda::TypedArray result = factory.createArray({num_elements, 1}, data_begin, data_end); - context.outputs[0] = result; - } -} diff --git a/matlab/src/cpp/arrow/matlab/buffer/matlab_buffer.h b/matlab/src/cpp/arrow/matlab/buffer/matlab_buffer.h new file mode 100644 index 0000000000000..80b237544ded8 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/buffer/matlab_buffer.h @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/buffer.h" + +#include "MatlabDataArray.hpp" + +namespace arrow::matlab::buffer { + + namespace mda = ::matlab::data; + + class MatlabBuffer : public arrow::Buffer { + public: + + template + MatlabBuffer(const mda::TypedArray typed_array) + : arrow::Buffer{nullptr, 0} + , array{typed_array} { + + // Get raw pointer of mxArray + auto it(typed_array.cbegin()); + auto dt = it.operator->(); + + data_ = reinterpret_cast(dt); + size_ = sizeof(CType) * static_cast(typed_array.getNumberOfElements()); + capacity_ = size_; + is_mutable_ = false; + } + private: + const mda::Array array; + }; +} \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 41f1357bcedc5..2fb3207e590c6 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -18,29 +18,45 @@ #include "arrow/matlab/array/proxy/boolean_array.h" #include "arrow/matlab/array/proxy/numeric_array.h" #include "arrow/matlab/array/proxy/string_array.h" -#include "arrow/matlab/array/proxy/timestamp_array.h" #include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/error/error.h" +#include "arrow/matlab/type/proxy/primitive_ctype.h" +#include "arrow/matlab/type/proxy/string_type.h" +#include "arrow/matlab/type/proxy/timestamp_type.h" #include "factory.h" namespace arrow::matlab::proxy { libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, const FunctionArguments& constructor_arguments) { - REGISTER_PROXY(arrow.array.proxy.Float32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Float64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt8Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt16Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int8Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Float32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Float64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt8Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt16Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int8Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.array.proxy.BooleanArray , arrow::matlab::array::proxy::BooleanArray); REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray); - REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::TimestampArray); + REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::NumericArray); REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch); + REGISTER_PROXY(arrow.type.proxy.Float32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Float64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt8Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt16Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int8Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int16Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); + REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); + return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name}; }; diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.cc b/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.cc new file mode 100644 index 0000000000000..9ede57f2ee1dd --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.cc @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#include "arrow/matlab/type/proxy/fixed_width_type.h" + +namespace arrow::matlab::type::proxy { + + FixedWidthType::FixedWidthType(std::shared_ptr type) : Type(std::move(type)) { + REGISTER_METHOD(FixedWidthType, bitWidth); + } + + void FixedWidthType::bitWidth(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + auto bit_width_mda = factory.createScalar(data_type->bit_width()); + context.outputs[0] = bit_width_mda; + } +} diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.h b/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.h new file mode 100644 index 0000000000000..e245acd55640e --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/fixed_width_type.h @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include "arrow/matlab/type/proxy/type.h" + +namespace arrow::matlab::type::proxy { + +class FixedWidthType : public arrow::matlab::type::proxy::Type { + public: + FixedWidthType(std::shared_ptr type); + + virtual ~FixedWidthType() {} + + protected: + void bitWidth(libmexclass::proxy::method::Context& context); + +}; + +} diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/primitive_ctype.h b/matlab/src/cpp/arrow/matlab/type/proxy/primitive_ctype.h new file mode 100644 index 0000000000000..0415972b44c5b --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/primitive_ctype.h @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/matlab/type/proxy/fixed_width_type.h" +#include "arrow/type_traits.h" + +#include + + +namespace arrow::matlab::type::proxy { + +template +using arrow_type_t = typename arrow::CTypeTraits::ArrowType; + +template +using is_primitive = arrow::is_primitive_ctype>; + +template +using enable_if_primitive = std::enable_if_t::value, bool>; + +template = true> +class PrimitiveCType : public arrow::matlab::type::proxy::FixedWidthType { + + using ArrowDataType = arrow_type_t; + + public: + PrimitiveCType(std::shared_ptr primitive_type) : arrow::matlab::type::proxy::FixedWidthType(std::move(primitive_type)) { + } + + ~PrimitiveCType() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + auto data_type = arrow::CTypeTraits::type_singleton(); + return std::make_shared(std::static_pointer_cast(std::move(data_type))); + } +}; + +} + diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/string_type.cc b/matlab/src/cpp/arrow/matlab/type/proxy/string_type.cc new file mode 100644 index 0000000000000..362dfba7344ea --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/string_type.cc @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/type/proxy/string_type.h" + +namespace arrow::matlab::type::proxy { + + StringType::StringType(std::shared_ptr string_type) : Type(std::move(string_type)) {} + + libmexclass::proxy::MakeResult StringType::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + auto string_type = std::static_pointer_cast(arrow::utf8()); + return std::make_shared(std::move(string_type)); + } +} diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/string_type.h b/matlab/src/cpp/arrow/matlab/type/proxy/string_type.h new file mode 100644 index 0000000000000..fd1808d9b8058 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/string_type.h @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/matlab/type/proxy/type.h" + +namespace arrow::matlab::type::proxy { + +class StringType : public arrow::matlab::type::proxy::Type { + + public: + StringType(std::shared_ptr string_type); + + ~StringType() {} + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); +}; + +} + diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc b/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc new file mode 100644 index 0000000000000..b1d35ee4874db --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/type/proxy/timestamp_type.h" +#include "arrow/matlab/type/time_unit.h" +#include "arrow/matlab/error/error.h" +#include "arrow/util/utf8.h" + +namespace arrow::matlab::type::proxy { + + TimestampType::TimestampType(std::shared_ptr timestamp_type) : FixedWidthType(std::move(timestamp_type)) { + REGISTER_METHOD(TimestampType, timeUnit); + REGISTER_METHOD(TimestampType, timeZone); + } + + libmexclass::proxy::MakeResult TimestampType::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + + using TimestampTypeProxy = arrow::matlab::type::proxy::TimestampType; + + mda::StructArray opts = constructor_arguments[0]; + + // Get the mxArray from constructor arguments + const mda::StringArray timezone_mda = opts[0]["TimeZone"]; + const mda::StringArray timeunit_mda = opts[0]["TimeUnit"]; + + // extract the time zone + const std::u16string& utf16_timezone = timezone_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto timezone, + arrow::util::UTF16StringToUTF8(utf16_timezone), + error::UNICODE_CONVERSION_ERROR_ID); + + // extract the time unit + const std::u16string& utf16_timeunit = timeunit_mda[0]; + MATLAB_ASSIGN_OR_ERROR(const auto timeunit, + arrow::matlab::type::timeUnitFromString(utf16_timeunit), + error::UKNOWN_TIME_UNIT_ERROR_ID); + + auto type = arrow::timestamp(timeunit, timezone); + auto time_type = std::static_pointer_cast(type); + return std::make_shared(std::move(time_type)); + } + + void TimestampType::timeZone(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + auto timestamp_type = std::static_pointer_cast(data_type); + const auto timezone_utf8 = timestamp_type->timezone(); + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(const auto timezone_utf16, + arrow::util::UTF8StringToUTF16(timezone_utf8), + context, error::UNICODE_CONVERSION_ERROR_ID); + auto timezone_mda = factory.createScalar(timezone_utf16); + context.outputs[0] = timezone_mda; + } + + void TimestampType::timeUnit(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + auto timestamp_type = std::static_pointer_cast(data_type); + const auto timeunit = timestamp_type->unit(); + auto timeunit_mda = factory.createScalar(static_cast(timeunit)); + context.outputs[0] = timeunit_mda; + } +} diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h b/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.h similarity index 67% rename from matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h rename to matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.h index ec67245564beb..71005dc3a980d 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h +++ b/matlab/src/cpp/arrow/matlab/type/proxy/timestamp_type.h @@ -17,26 +17,26 @@ #pragma once -#include "arrow/array.h" +#include "arrow/matlab/type/proxy/fixed_width_type.h" +#include "arrow/type_traits.h" -#include "arrow/matlab/array/proxy/array.h" +namespace arrow::matlab::type::proxy { -#include "libmexclass/proxy/Proxy.h" - -namespace arrow::matlab::array::proxy { - -class TimestampArray : public arrow::matlab::array::proxy::Array { +class TimestampType : public arrow::matlab::type::proxy::FixedWidthType { + public: - TimestampArray(const std::shared_ptr timestamp_array) - : arrow::matlab::array::proxy::Array() { - array = timestamp_array; - } + TimestampType(std::shared_ptr timestamp_type); + + ~TimestampType() {} static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); protected: - void toMATLAB(libmexclass::proxy::method::Context& context) override; + void timeZone(libmexclass::proxy::method::Context& context); + + void timeUnit(libmexclass::proxy::method::Context& context); }; } + diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/traits.h b/matlab/src/cpp/arrow/matlab/type/proxy/traits.h new file mode 100644 index 0000000000000..3d9a957a5e3dc --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/traits.h @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/type_fwd.h" + +#include "arrow/matlab/type/proxy/primitive_ctype.h" +#include "arrow/matlab/type/proxy/timestamp_type.h" +#include "arrow/matlab/type/proxy/string_type.h" + +namespace arrow::matlab::type::proxy { + + template + struct Traits; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = PrimitiveCType; + }; + + template <> + struct Traits { + using TypeProxy = StringType; + }; + + template <> + struct Traits { + using TypeProxy = TimestampType; + }; +} diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/type.cc b/matlab/src/cpp/arrow/matlab/type/proxy/type.cc new file mode 100644 index 0000000000000..f6a307ff3f62f --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/type.cc @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/type/proxy/type.h" + +namespace arrow::matlab::type::proxy { + + Type::Type(std::shared_ptr type) : data_type{std::move(type)} { + REGISTER_METHOD(Type, typeID); + REGISTER_METHOD(Type, numFields); + } + + std::shared_ptr Type::unwrap() { + return data_type; + } + + void Type::typeID(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + auto type_number_mda = factory.createScalar(static_cast(data_type->id())); + context.outputs[0] = type_number_mda; + } + + void Type::numFields(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::ArrayFactory factory; + + auto num_fields_mda = factory.createScalar(data_type->num_fields()); + context.outputs[0] = num_fields_mda; + } + +} + diff --git a/matlab/src/cpp/arrow/matlab/type/proxy/type.h b/matlab/src/cpp/arrow/matlab/type/proxy/type.h new file mode 100644 index 0000000000000..e94097aa73cb4 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/proxy/type.h @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/type.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::type::proxy { + +class Type : public libmexclass::proxy::Proxy { + public: + Type(std::shared_ptr type); + + virtual ~Type() {} + + std::shared_ptr unwrap(); + + protected: + + void typeID(libmexclass::proxy::method::Context& context); + + void numFields(libmexclass::proxy::method::Context& context); + + std::shared_ptr data_type; +}; + +} diff --git a/matlab/src/cpp/arrow/matlab/type/time_unit.cc b/matlab/src/cpp/arrow/matlab/type/time_unit.cc index 15ebfcfc0c06b..eb839b0e78096 100644 --- a/matlab/src/cpp/arrow/matlab/type/time_unit.cc +++ b/matlab/src/cpp/arrow/matlab/type/time_unit.cc @@ -20,7 +20,7 @@ namespace arrow::matlab::type { - arrow::Result timeUnitFromString(const std::u16string& unit_str) { + arrow::Result timeUnitFromString(std::u16string_view unit_str) { if (unit_str == u"Second") { return arrow::TimeUnit::type::SECOND; } else if (unit_str == u"Millisecond") { diff --git a/matlab/src/cpp/arrow/matlab/type/time_unit.h b/matlab/src/cpp/arrow/matlab/type/time_unit.h index cf3248d77b967..9534b1f902db7 100644 --- a/matlab/src/cpp/arrow/matlab/type/time_unit.h +++ b/matlab/src/cpp/arrow/matlab/type/time_unit.h @@ -18,10 +18,10 @@ #include "arrow/type_fwd.h" #include "arrow/result.h" -#include +#include namespace arrow::matlab::type { - arrow::Result timeUnitFromString(const std::u16string& unit_str); + arrow::Result timeUnitFromString(std::u16string_view unit_str); } diff --git a/matlab/src/matlab/+arrow/+array/Array.m b/matlab/src/matlab/+arrow/+array/Array.m index 9b8796c33b974..7426052764166 100644 --- a/matlab/src/matlab/+arrow/+array/Array.m +++ b/matlab/src/matlab/+arrow/+array/Array.m @@ -26,7 +26,7 @@ Valid % Validity bitmap end - properties(Abstract, SetAccess=private, GetAccess=public) + properties(Dependent, SetAccess=private, GetAccess=public) Type(1, 1) arrow.type.Type end @@ -46,6 +46,13 @@ function matlabArray = toMATLAB(obj) matlabArray = obj.Proxy.toMATLAB(); end + + function type = get.Type(obj) + [proxyID, typeID] = obj.Proxy.type(); + traits = arrow.type.traits.traits(arrow.type.ID(typeID)); + proxy = libmexclass.proxy.Proxy(Name=traits.TypeProxyClassName, ID=proxyID); + type = traits.TypeConstructor(proxy); + end end methods (Access = private) diff --git a/matlab/src/matlab/+arrow/+array/BooleanArray.m b/matlab/src/matlab/+arrow/+array/BooleanArray.m index e5c4cc527e552..f4d341efce9d3 100644 --- a/matlab/src/matlab/+arrow/+array/BooleanArray.m +++ b/matlab/src/matlab/+arrow/+array/BooleanArray.m @@ -20,10 +20,6 @@ NullSubstitionValue = false; end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.BooleanType - end - methods function obj = BooleanArray(data, opts) arguments diff --git a/matlab/src/matlab/+arrow/+array/Float32Array.m b/matlab/src/matlab/+arrow/+array/Float32Array.m index 29f23393a4346..c6be563d8621f 100644 --- a/matlab/src/matlab/+arrow/+array/Float32Array.m +++ b/matlab/src/matlab/+arrow/+array/Float32Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = single(NaN); end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Float32Type - end - methods function obj = Float32Array(data, varargin) obj@arrow.array.NumericArray(data, "single", ... diff --git a/matlab/src/matlab/+arrow/+array/Float64Array.m b/matlab/src/matlab/+arrow/+array/Float64Array.m index ab92715864275..ff43ebc0536c0 100644 --- a/matlab/src/matlab/+arrow/+array/Float64Array.m +++ b/matlab/src/matlab/+arrow/+array/Float64Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = NaN; end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Float64Type - end - methods function obj = Float64Array(data, varargin) obj@arrow.array.NumericArray(data, "double", ... diff --git a/matlab/src/matlab/+arrow/+array/Int16Array.m b/matlab/src/matlab/+arrow/+array/Int16Array.m index 23716d5f59ec5..533f0c9ef549d 100644 --- a/matlab/src/matlab/+arrow/+array/Int16Array.m +++ b/matlab/src/matlab/+arrow/+array/Int16Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = int16(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Int16Type - end - methods function obj = Int16Array(data, varargin) obj@arrow.array.NumericArray(data, "int16", ... diff --git a/matlab/src/matlab/+arrow/+array/Int32Array.m b/matlab/src/matlab/+arrow/+array/Int32Array.m index 8844576ae1ef9..0f977fb90f808 100644 --- a/matlab/src/matlab/+arrow/+array/Int32Array.m +++ b/matlab/src/matlab/+arrow/+array/Int32Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = int32(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Int32Type - end - methods function obj = Int32Array(data, varargin) obj@arrow.array.NumericArray(data, "int32", ... diff --git a/matlab/src/matlab/+arrow/+array/Int64Array.m b/matlab/src/matlab/+arrow/+array/Int64Array.m index 9f72c5f2a6854..94cad56519b11 100644 --- a/matlab/src/matlab/+arrow/+array/Int64Array.m +++ b/matlab/src/matlab/+arrow/+array/Int64Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = int64(0); end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Int64Type - end - methods function obj = Int64Array(data, varargin) obj@arrow.array.NumericArray(data, "int64", ... diff --git a/matlab/src/matlab/+arrow/+array/Int8Array.m b/matlab/src/matlab/+arrow/+array/Int8Array.m index f9774f6527493..83a14caa27287 100644 --- a/matlab/src/matlab/+arrow/+array/Int8Array.m +++ b/matlab/src/matlab/+arrow/+array/Int8Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = int8(0); end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.Int8Type - end - methods function obj = Int8Array(data, varargin) obj@arrow.array.NumericArray(data, "int8", ... diff --git a/matlab/src/matlab/+arrow/+array/NumericArray.m b/matlab/src/matlab/+arrow/+array/NumericArray.m index fa692724c6458..fb2fc1d333939 100644 --- a/matlab/src/matlab/+arrow/+array/NumericArray.m +++ b/matlab/src/matlab/+arrow/+array/NumericArray.m @@ -15,11 +15,6 @@ classdef NumericArray < arrow.array.Array % arrow.array.NumericArray - - - properties (Hidden, SetAccess=protected) - MatlabArray = [] - end properties(Abstract, Access=protected) NullSubstitutionValue; @@ -38,9 +33,6 @@ validElements = arrow.args.parseValidElements(data, opts); opts = struct(MatlabArray=data, Valid=validElements); obj@arrow.array.Array("Name", proxyName, "ConstructorArguments", {opts}); - obj.MatlabArray = cast(obj.MatlabArray, type); - % Store a reference to the array - obj.MatlabArray = data; end function matlabArray = toMATLAB(obj) diff --git a/matlab/src/matlab/+arrow/+array/StringArray.m b/matlab/src/matlab/+arrow/+array/StringArray.m index 9ef3f0252586f..ec2d53b371fe2 100644 --- a/matlab/src/matlab/+arrow/+array/StringArray.m +++ b/matlab/src/matlab/+arrow/+array/StringArray.m @@ -20,10 +20,6 @@ NullSubstitionValue = string(missing); end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.StringType - end - methods function obj = StringArray(data, opts) arguments diff --git a/matlab/src/matlab/+arrow/+array/TimestampArray.m b/matlab/src/matlab/+arrow/+array/TimestampArray.m index 0aa76beb99c7a..0f0da4e82130c 100644 --- a/matlab/src/matlab/+arrow/+array/TimestampArray.m +++ b/matlab/src/matlab/+arrow/+array/TimestampArray.m @@ -20,10 +20,6 @@ NullSubstitutionValue = NaT; end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.TimestampType % temporarily default value - end - methods function obj = TimestampArray(data, opts) arguments @@ -39,7 +35,6 @@ args = struct(MatlabArray=ptime, Valid=validElements, TimeZone=timezone, TimeUnit=string(opts.TimeUnit)); obj@arrow.array.Array("Name", "arrow.array.proxy.TimestampArray", "ConstructorArguments", {args}); - obj.Type = arrow.type.TimestampType(TimeUnit=opts.TimeUnit, TimeZone=timezone); end function dates = toMATLAB(obj) @@ -48,7 +43,7 @@ epoch = datetime(1970, 1, 1, TimeZone="UTC"); tz = obj.Type.TimeZone; - ticsPerSecond = obj.Type.TimeUnit.TicksPerSecond; + ticsPerSecond = ticksPerSecond(obj.Type.TimeUnit); dates = datetime(time, ConvertFrom="epochtime", Epoch=epoch, ... TimeZone=tz, TicksPerSecond=ticsPerSecond); @@ -72,7 +67,7 @@ % % TODO: convertTo may error if the datetime is 2^63-1 before or % after the epoch. We should throw a custom error in this case. - time(indices) = convertTo(dates(indices), "epochtime", TicksPerSecond=units.TicksPerSecond); + time(indices) = convertTo(dates(indices), "epochtime", TicksPerSecond=ticksPerSecond(units)); end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+array/UInt16Array.m b/matlab/src/matlab/+arrow/+array/UInt16Array.m index 3732df3c76111..4862ca20b9f88 100644 --- a/matlab/src/matlab/+arrow/+array/UInt16Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt16Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = uint16(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.UInt16Type - end - methods function obj = UInt16Array(data, varargin) obj@arrow.array.NumericArray(data, "uint16", ... diff --git a/matlab/src/matlab/+arrow/+array/UInt32Array.m b/matlab/src/matlab/+arrow/+array/UInt32Array.m index 183d4df08257a..782b0010997fc 100644 --- a/matlab/src/matlab/+arrow/+array/UInt32Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt32Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = uint32(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.UInt32Type - end - methods function obj = UInt32Array(data, varargin) obj@arrow.array.NumericArray(data, "uint32", ... diff --git a/matlab/src/matlab/+arrow/+array/UInt64Array.m b/matlab/src/matlab/+arrow/+array/UInt64Array.m index af828978ce2a7..9e25ce4987bc1 100644 --- a/matlab/src/matlab/+arrow/+array/UInt64Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt64Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = uint64(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.UInt64Type - end - methods function obj = UInt64Array(data, varargin) obj@arrow.array.NumericArray(data, "uint64", ... diff --git a/matlab/src/matlab/+arrow/+array/UInt8Array.m b/matlab/src/matlab/+arrow/+array/UInt8Array.m index b5dc664ea1476..8bad2401bd429 100644 --- a/matlab/src/matlab/+arrow/+array/UInt8Array.m +++ b/matlab/src/matlab/+arrow/+array/UInt8Array.m @@ -20,10 +20,6 @@ NullSubstitutionValue = uint8(0) end - properties(SetAccess=private, GetAccess=public) - Type = arrow.type.UInt8Type - end - methods function obj = UInt8Array(data, varargin) obj@arrow.array.NumericArray(data, "uint8", ... diff --git a/matlab/src/matlab/+arrow/+internal/+proxy/create.m b/matlab/src/matlab/+arrow/+internal/+proxy/create.m new file mode 100644 index 0000000000000..0ed1476058df6 --- /dev/null +++ b/matlab/src/matlab/+arrow/+internal/+proxy/create.m @@ -0,0 +1,25 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function proxy = create(name, args) +%CREATE Creates a proxy object. + arguments + name(1, 1) string {mustBeNonmissing} + end + arguments(Repeating) + args + end + proxy = libmexclass.proxy.Proxy.create(name, args{:}); +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+internal/+proxy/validate.m b/matlab/src/matlab/+arrow/+internal/+proxy/validate.m new file mode 100644 index 0000000000000..1b2b3649e42c3 --- /dev/null +++ b/matlab/src/matlab/+arrow/+internal/+proxy/validate.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function validate(proxy, expectedName) +%VALIDATE Throws an arrow:matlab:ProxyNameMismatch error if +% proxy.Name and expectedName are not equal. + arguments + proxy(1, 1) libmexclass.proxy.Proxy + expectedName(1, 1) string + end + + if proxy.Name ~= expectedName + errid = "arrow:proxy:ProxyNameMismatch"; + msg = "Proxy class name is " + proxyName + ", but expected " + expectedProxyName; + error(errid, msg); + end +end diff --git a/matlab/src/matlab/+arrow/+type/+traits/BooleanTraits.m b/matlab/src/matlab/+arrow/+type/+traits/BooleanTraits.m new file mode 100644 index 0000000000000..82a8b6b1e28ba --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/BooleanTraits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef BooleanTraits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.BooleanArray + ArrayClassName = "arrow.array.BooleanArray" + ArrayProxyClassName = "arrow.array.proxy.BooleanArray" + TypeConstructor = @arrow.type.BooleanType; + TypeClassName = "arrow.type.BooleanType" + TypeProxyClassName = "arrow.type.proxy.BooleanType" + MatlabConstructor = @logical + MatlabClassName = "logical" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Float32Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Float32Traits.m new file mode 100644 index 0000000000000..7dc0d17474e2f --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Float32Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Float32Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Float32Array + ArrayClassName = "arrow.array.Float32Array" + ArrayProxyClassName = "arrow.array.proxy.Float32Array" + TypeConstructor = @arrow.type.Float32Type; + TypeClassName = "arrow.type.Float32Type" + TypeProxyClassName = "arrow.type.proxy.Float32Type" + MatlabConstructor = @single + MatlabClassName = "single" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Float64Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Float64Traits.m new file mode 100644 index 0000000000000..9c52634b2c942 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Float64Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Float64Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Float64Array + ArrayClassName = "arrow.array.Float64Array" + ArrayProxyClassName = "arrow.array.proxy.Float64Array" + TypeConstructor = @arrow.type.Float64Type; + TypeClassName = "arrow.type.Float64Type" + TypeProxyClassName = "arrow.type.proxy.Float64Type" + MatlabConstructor = @double + MatlabClassName = "double" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Int16Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Int16Traits.m new file mode 100644 index 0000000000000..46b67b43c1783 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Int16Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Int16Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Int16Array + ArrayClassName = "arrow.array.Int16Array" + ArrayProxyClassName = "arrow.array.proxy.Int16Array" + TypeConstructor = @arrow.type.Int16Type; + TypeClassName = "arrow.type.Int16Type" + TypeProxyClassName = "arrow.type.proxy.Int16Type" + MatlabConstructor = @int16 + MatlabClassName = "int16" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Int32Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Int32Traits.m new file mode 100644 index 0000000000000..4117271e50ff1 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Int32Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Int32Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Int32Array + ArrayClassName = "arrow.array.Int32Array" + ArrayProxyClassName = "arrow.array.proxy.Int32Array" + TypeConstructor = @arrow.type.Int32Type; + TypeClassName = "arrow.type.Int32Type" + TypeProxyClassName = "arrow.type.proxy.Int32Type" + MatlabConstructor = @int32 + MatlabClassName = "int32" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Int64Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Int64Traits.m new file mode 100644 index 0000000000000..e25da953aa0fc --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Int64Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Int64Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Int64Array + ArrayClassName = "arrow.array.Int64Array" + ArrayProxyClassName = "arrow.array.proxy.Int64Array" + TypeConstructor = @arrow.type.Int64Type; + TypeClassName = "arrow.type.Int64Type" + TypeProxyClassName = "arrow.type.proxy.Int64Type" + MatlabConstructor = @int64 + MatlabClassName = "int64" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/Int8Traits.m b/matlab/src/matlab/+arrow/+type/+traits/Int8Traits.m new file mode 100644 index 0000000000000..9f73bd2667e1b --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/Int8Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Int8Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.Int8Array + ArrayClassName = "arrow.array.Int8Array" + ArrayProxyClassName = "arrow.array.proxy.Int8Array" + TypeConstructor = @arrow.type.Int8Type; + TypeClassName = "arrow.type.Int8Type" + TypeProxyClassName = "arrow.type.proxy.Int8Type" + MatlabConstructor = @int8 + MatlabClassName = "int8" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/StringTraits.m b/matlab/src/matlab/+arrow/+type/+traits/StringTraits.m new file mode 100644 index 0000000000000..0730657270129 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/StringTraits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef StringTraits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.StringArray + ArrayClassName = "arrow.array.StringArray" + ArrayProxyClassName = "arrow.array.proxy.StringArray" + TypeConstructor = @arrow.type.StringType; + TypeClassName = "arrow.type.StringType" + TypeProxyClassName = "arrow.type.proxy.StringType" + MatlabConstructor = @string + MatlabClassName = "string" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/TimestampTraits.m b/matlab/src/matlab/+arrow/+type/+traits/TimestampTraits.m new file mode 100644 index 0000000000000..488a5e7314016 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/TimestampTraits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef TimestampTraits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.TimestampArray + ArrayClassName = "arrow.array.TimestampArray" + ArrayProxyClassName = "arrow.array.proxy.TimestampArray" + TypeConstructor = @arrow.type.TimestampType; + TypeClassName = "arrow.type.TimestampType" + TypeProxyClassName = "arrow.type.proxy.TimestampType" + MatlabConstructor = @datetime + MatlabClassName = "datetime" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/TypeTraits.m b/matlab/src/matlab/+arrow/+type/+traits/TypeTraits.m new file mode 100644 index 0000000000000..54b8fc0a7709c --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/TypeTraits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef TypeTraits + + properties (Abstract, Constant) + ArrayConstructor + ArrayClassName + ArrayProxyClassName + TypeConstructor + TypeClassName + TypeProxyClassName + MatlabConstructor + MatlabClassName + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/UInt16Traits.m b/matlab/src/matlab/+arrow/+type/+traits/UInt16Traits.m new file mode 100644 index 0000000000000..b90e6294ce0d8 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/UInt16Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef UInt16Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.UInt16Array + ArrayClassName = "arrow.array.UInt16Array" + ArrayProxyClassName = "arrow.array.proxy.UInt16Array" + TypeConstructor = @arrow.type.UInt16Type; + TypeClassName = "arrow.type.UInt16Type" + TypeProxyClassName = "arrow.type.proxy.UInt16Type" + MatlabConstructor = @uint16 + MatlabClassName = "uint16" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/UInt32Traits.m b/matlab/src/matlab/+arrow/+type/+traits/UInt32Traits.m new file mode 100644 index 0000000000000..ff79bd9579a3b --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/UInt32Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef UInt32Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.UInt32Array + ArrayClassName = "arrow.array.UInt32Array" + ArrayProxyClassName = "arrow.array.proxy.UInt32Array" + TypeConstructor = @arrow.type.UInt32Type; + TypeClassName = "arrow.type.UInt32Type" + TypeProxyClassName = "arrow.type.proxy.UInt32Type" + MatlabConstructor = @uint32 + MatlabClassName = "uint32" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/UInt64Traits.m b/matlab/src/matlab/+arrow/+type/+traits/UInt64Traits.m new file mode 100644 index 0000000000000..a6b0de37528a9 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/UInt64Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef UInt64Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.UInt64Array + ArrayClassName = "arrow.array.UInt64Array" + ArrayProxyClassName = "arrow.array.proxy.UInt64Array" + TypeConstructor = @arrow.type.UInt64Type; + TypeClassName = "arrow.type.UInt64Type" + TypeProxyClassName = "arrow.type.proxy.UInt64Type" + MatlabConstructor = @uint64 + MatlabClassName = "uint64" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/UInt8Traits.m b/matlab/src/matlab/+arrow/+type/+traits/UInt8Traits.m new file mode 100644 index 0000000000000..ff2377ff812c3 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/UInt8Traits.m @@ -0,0 +1,29 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef UInt8Traits < arrow.type.traits.TypeTraits + + properties (Constant) + ArrayConstructor = @arrow.array.UInt8Array + ArrayClassName = "arrow.array.UInt8Array" + ArrayProxyClassName = "arrow.array.proxy.UInt8Array" + TypeConstructor = @arrow.type.UInt8Type; + TypeClassName = "arrow.type.UInt8Type" + TypeProxyClassName = "arrow.type.proxy.UInt8Type" + MatlabConstructor = @uint8 + MatlabClassName = "uint8" + end + +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/+traits/traits.m b/matlab/src/matlab/+arrow/+type/+traits/traits.m new file mode 100644 index 0000000000000..af59e2822df96 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/+traits/traits.m @@ -0,0 +1,89 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function typeTraits = traits(type) + % "Gateway" function that links an arrow Type ID enumeration (e.g. + % arrow.type.ID.String) or a MATLAB class string (e.g. "datetime") + % to associated type information. + import arrow.type.traits.* + import arrow.type.* + + if isa(type, "arrow.type.ID") + switch type + case ID.UInt8 + typeTraits = UInt8Traits(); + case ID.UInt16 + typeTraits = UInt16Traits(); + case ID.UInt32 + typeTraits = UInt32Traits(); + case ID.UInt64 + typeTraits = UInt64Traits(); + case ID.Int8 + typeTraits = Int8Traits(); + case ID.Int16 + typeTraits = Int16Traits(); + case ID.Int32 + typeTraits = Int32Traits(); + case ID.Int64 + typeTraits = Int64Traits(); + case ID.Float32 + typeTraits = Float32Traits(); + case ID.Float64 + typeTraits = Float64Traits(); + case ID.Boolean + typeTraits = BooleanTraits(); + case ID.String + typeTraits = StringTraits(); + case ID.Timestamp + typeTraits = TimestampTraits(); + otherwise + error("arrow:type:traits:UnsupportedArrowTypeID", "Unsupported Arrow type ID: " + type); + end + elseif isa(type, "string") % MATLAB class string + switch type + case "uint8" + typeTraits = UInt8Traits(); + case "uint16" + typeTraits = UInt16Traits(); + case "uint32" + typeTraits = UInt32Traits(); + case "uint64" + typeTraits = UInt64Traits(); + case "int8" + typeTraits = Int8Traits(); + case "int16" + typeTraits = Int16Traits(); + case "int32" + typeTraits = Int32Traits(); + case "int64" + typeTraits = Int64Traits(); + case "single" + typeTraits = Float32Traits(); + case "double" + typeTraits = Float64Traits(); + case "logical" + typeTraits = BooleanTraits(); + case "string" + typeTraits = StringTraits(); + case "datetime" + typeTraits = TimestampTraits(); + otherwise + error("arrow:type:traits:UnsupportedMatlabClass", "Unsupported MATLAB class: " + type); + end + else + error("arrow:type:traits:UnsupportedInputType", "The input argument to the traits function " + ... + "must be a MATLAB class string or an arrow.type.ID enumeration."); + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/BooleanType.m b/matlab/src/matlab/+arrow/+type/BooleanType.m index 050beae3f5120..6afa00e9258cb 100644 --- a/matlab/src/matlab/+arrow/+type/BooleanType.m +++ b/matlab/src/matlab/+arrow/+type/BooleanType.m @@ -13,10 +13,16 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef BooleanType < arrow.type.PrimitiveType +classdef BooleanType < arrow.type.FixedWidthType %BOOLEANTYPE Type class for boolean data. - properties(SetAccess = protected) - ID = arrow.type.ID.Boolean + methods + function obj = BooleanType(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.BooleanType")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/PrimitiveType.m b/matlab/src/matlab/+arrow/+type/FixedWidthType.m similarity index 67% rename from matlab/src/matlab/+arrow/+type/PrimitiveType.m rename to matlab/src/matlab/+arrow/+type/FixedWidthType.m index 6297b98d8b01b..8c9c5b26081ae 100644 --- a/matlab/src/matlab/+arrow/+type/PrimitiveType.m +++ b/matlab/src/matlab/+arrow/+type/FixedWidthType.m @@ -13,21 +13,23 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef PrimitiveType < arrow.type.Type -%PRIMITIVETYPE Abstract type class representing primtive data types. +classdef (Abstract) FixedWidthType < arrow.type.Type +%FIXEDWIDTHTYPE Abstract type class representing fixed width data types. - properties(Dependent, SetAccess=protected, GetAccess=public) + properties(Dependent, SetAccess=private, GetAccess=public) BitWidth end - properties(Constant) - NumFields = 0 - NumBuffers = 2 - end - methods + function obj = FixedWidthType(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy + end + obj@arrow.type.Type(proxy); + end + function width = get.BitWidth(obj) - width = bitWidth(obj.ID); + width = obj.Proxy.bitWidth(); end - end + end end diff --git a/matlab/src/matlab/+arrow/+type/Float32Type.m b/matlab/src/matlab/+arrow/+type/Float32Type.m index b0430bda7eab4..df5fa1ce844e9 100644 --- a/matlab/src/matlab/+arrow/+type/Float32Type.m +++ b/matlab/src/matlab/+arrow/+type/Float32Type.m @@ -13,10 +13,16 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Float32Type < arrow.type.PrimitiveType +classdef Float32Type < arrow.type.FixedWidthType %FLOAT32TYPE Type class for float32 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Float32 + methods + function obj = Float32Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Float32Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Float64Type.m b/matlab/src/matlab/+arrow/+type/Float64Type.m index a2ffe02b786af..ba93265ebc73e 100644 --- a/matlab/src/matlab/+arrow/+type/Float64Type.m +++ b/matlab/src/matlab/+arrow/+type/Float64Type.m @@ -13,10 +13,16 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Float64Type < arrow.type.PrimitiveType +classdef Float64Type < arrow.type.FixedWidthType %FLOAT64Type Type class for float64 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Float64 + methods + function obj = Float64Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Float64Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/ID.m b/matlab/src/matlab/+arrow/+type/ID.m index 2e320603d039c..076d79d196a30 100644 --- a/matlab/src/matlab/+arrow/+type/ID.m +++ b/matlab/src/matlab/+arrow/+type/ID.m @@ -35,24 +35,4 @@ % Date64 (17) Timestamp (18) end - - methods - function bitWidth = bitWidth(obj) - import arrow.type.ID - switch obj - case ID.Boolean - bitWidth = 1; - case {ID.UInt8, ID.Int8} - bitWidth = 8; - case {ID.UInt16, ID.Int16} - bitWidth = 16; - case {ID.UInt32, ID.Int32, ID.Float32} - bitWidth = 32; - case {ID.UInt64, ID.Int64, ID.Float64, ID.Timestamp} - bitWidth = 64; - otherwise - bitWidth = NaN; - end - end - end end diff --git a/matlab/src/matlab/+arrow/+type/Int16Type.m b/matlab/src/matlab/+arrow/+type/Int16Type.m index 3d060f7e58671..c16d3fd5ca53f 100644 --- a/matlab/src/matlab/+arrow/+type/Int16Type.m +++ b/matlab/src/matlab/+arrow/+type/Int16Type.m @@ -13,11 +13,17 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Int16Type < arrow.type.PrimitiveType +classdef Int16Type < arrow.type.FixedWidthType %INT16TYPE Type class for int8 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Int16 + methods + function obj = Int16Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Int16Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end diff --git a/matlab/src/matlab/+arrow/+type/Int32Type.m b/matlab/src/matlab/+arrow/+type/Int32Type.m index 98c81c08647dd..786697bf1136b 100644 --- a/matlab/src/matlab/+arrow/+type/Int32Type.m +++ b/matlab/src/matlab/+arrow/+type/Int32Type.m @@ -13,11 +13,17 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Int32Type < arrow.type.PrimitiveType +classdef Int32Type < arrow.type.FixedWidthType %INT32TYPE Type class for int32 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Int32 + methods + function obj = Int32Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Int32Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end diff --git a/matlab/src/matlab/+arrow/+type/Int64Type.m b/matlab/src/matlab/+arrow/+type/Int64Type.m index 23147817e36e1..bf6c71d622a63 100644 --- a/matlab/src/matlab/+arrow/+type/Int64Type.m +++ b/matlab/src/matlab/+arrow/+type/Int64Type.m @@ -13,10 +13,16 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Int64Type < arrow.type.PrimitiveType +classdef Int64Type < arrow.type.FixedWidthType %INT64TYPE Type class for int64 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Int64 + methods + function obj = Int64Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Int64Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Int8Type.m b/matlab/src/matlab/+arrow/+type/Int8Type.m index 9d364bb32be82..b28785f876ea8 100644 --- a/matlab/src/matlab/+arrow/+type/Int8Type.m +++ b/matlab/src/matlab/+arrow/+type/Int8Type.m @@ -13,11 +13,17 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Int8Type < arrow.type.PrimitiveType +classdef Int8Type < arrow.type.FixedWidthType %INT8TYPE Type class for int8 data. - properties(SetAccess = protected) - ID = arrow.type.ID.Int8 + methods + function obj = Int8Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.Int8Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end diff --git a/matlab/src/matlab/+arrow/+type/StringType.m b/matlab/src/matlab/+arrow/+type/StringType.m index 66a15dd0ea3e2..c269bfa6db33c 100644 --- a/matlab/src/matlab/+arrow/+type/StringType.m +++ b/matlab/src/matlab/+arrow/+type/StringType.m @@ -16,14 +16,14 @@ classdef StringType < arrow.type.Type %STRINGTYPE Type class for string data. - properties(SetAccess = protected) - ID = arrow.type.ID.String - end - - properties(Constant) - NumFields = 0 - NumBuffers = 3 - end - + methods + function obj = StringType(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.StringType")} + end + import arrow.internal.proxy.validate + obj@arrow.type.Type(proxy); + end + end end diff --git a/matlab/src/matlab/+arrow/+type/TimeUnit.m b/matlab/src/matlab/+arrow/+type/TimeUnit.m index 3ec8bf44d104f..358818be985c9 100644 --- a/matlab/src/matlab/+arrow/+type/TimeUnit.m +++ b/matlab/src/matlab/+arrow/+type/TimeUnit.m @@ -12,33 +12,28 @@ % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef TimeUnit +classdef TimeUnit < int16 % Enumeration class representing Time Units. enumeration - Second - Millisecond - Microsecond - Nanosecond + Second (0) + Millisecond (1) + Microsecond (2) + Nanosecond (3) end - properties (Dependent) - TicksPerSecond - end - - - methods - function ticksPerSecond = get.TicksPerSecond(obj) + methods (Hidden) + function ticks = ticksPerSecond(obj) import arrow.type.TimeUnit switch obj case TimeUnit.Second - ticksPerSecond = 1; + ticks = 1; case TimeUnit.Millisecond - ticksPerSecond = 1e3; + ticks = 1e3; case TimeUnit.Microsecond - ticksPerSecond = 1e6; + ticks = 1e6; case TimeUnit.Nanosecond - ticksPerSecond = 1e9; + ticks = 1e9; end end end diff --git a/matlab/src/matlab/+arrow/+type/TimestampType.m b/matlab/src/matlab/+arrow/+type/TimestampType.m index 99ac4a7b769f7..a5a376f8bc3b3 100644 --- a/matlab/src/matlab/+arrow/+type/TimestampType.m +++ b/matlab/src/matlab/+arrow/+type/TimestampType.m @@ -13,29 +13,30 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef TimestampType < arrow.type.PrimitiveType +classdef TimestampType < arrow.type.FixedWidthType %TIMESTAMPTYPE Type class for timestamp data. - - properties(SetAccess=private) - TimeZone(1, 1) string - TimeUnit(1, 1) arrow.type.TimeUnit - end - - properties(SetAccess = protected) - ID = arrow.type.ID.Timestamp + properties(Dependent, SetAccess=private, GetAccess=public) + TimeZone + TimeUnit end methods - function obj = TimestampType(opts) - %TIMESTAMPTYPE Construct an instance of this class + function obj = TimestampType(proxy) arguments - opts.TimeUnit(1, 1) arrow.type.TimeUnit = arrow.type.TimeUnit.Microsecond - opts.TimeZone(1, 1) string {mustBeNonmissing} = "" + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.TimestampType")} end - obj.TimeUnit = opts.TimeUnit; - obj.TimeZone = opts.TimeZone; + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); end - end -end + function unit = get.TimeUnit(obj) + val = obj.Proxy.timeUnit(); + unit = arrow.type.TimeUnit(val); + end + + function tz = get.TimeZone(obj) + tz = obj.Proxy.timeZone(); + end + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/Type.m b/matlab/src/matlab/+arrow/+type/Type.m index a05eb2253bf87..c2ae3dbc58c9c 100644 --- a/matlab/src/matlab/+arrow/+type/Type.m +++ b/matlab/src/matlab/+arrow/+type/Type.m @@ -13,11 +13,40 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef Type +classdef (Abstract) Type < matlab.mixin.CustomDisplay %TYPE Abstract type class. - properties (Abstract, SetAccess=protected) - ID(1, 1) arrow.type.ID + properties (Dependent, GetAccess=public, SetAccess=private) + ID + NumFields end -end + properties (GetAccess=public, SetAccess=private, Hidden) + Proxy + end + + methods + function obj = Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy + end + obj.Proxy = proxy; + end + + function numFields = get.NumFields(obj) + numFields = obj.Proxy.numFields(); + end + + function typeID = get.ID(obj) + typeID = arrow.type.ID(obj.Proxy.typeID()); + end + end + + methods (Access=protected) + function propgrp = getPropertyGroups(~) + proplist = {'ID'}; + propgrp = matlab.mixin.util.PropertyGroup(proplist); + end + end + +end diff --git a/matlab/src/matlab/+arrow/+type/UInt16Type.m b/matlab/src/matlab/+arrow/+type/UInt16Type.m index 8d53ea68556d8..3198b78671ef9 100644 --- a/matlab/src/matlab/+arrow/+type/UInt16Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt16Type.m @@ -13,10 +13,16 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef UInt16Type < arrow.type.PrimitiveType +classdef UInt16Type < arrow.type.FixedWidthType %UINT16TYPE Type class for uint16 data. - properties(SetAccess = protected) - ID = arrow.type.ID.UInt16 + methods + function obj = UInt16Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.UInt16Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/UInt32Type.m b/matlab/src/matlab/+arrow/+type/UInt32Type.m index 693bd897d66dc..53e60e4e34290 100644 --- a/matlab/src/matlab/+arrow/+type/UInt32Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt32Type.m @@ -13,10 +13,16 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef UInt32Type < arrow.type.PrimitiveType +classdef UInt32Type < arrow.type.FixedWidthType %UINT32TYPE Type class for uint32 data. - properties(SetAccess = protected) - ID = arrow.type.ID.UInt32 + methods + function obj = UInt32Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.UInt32Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/UInt64Type.m b/matlab/src/matlab/+arrow/+type/UInt64Type.m index fbd06646cedd7..f8512ec59497c 100644 --- a/matlab/src/matlab/+arrow/+type/UInt64Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt64Type.m @@ -13,10 +13,16 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef UInt64Type < arrow.type.PrimitiveType +classdef UInt64Type < arrow.type.FixedWidthType %UINT64TYPE Type class for uint64 data. - properties(SetAccess = protected) - ID = arrow.type.ID.UInt64 + methods + function obj = UInt64Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.UInt64Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/UInt8Type.m b/matlab/src/matlab/+arrow/+type/UInt8Type.m index 9abd001b43c67..898426e3a4076 100644 --- a/matlab/src/matlab/+arrow/+type/UInt8Type.m +++ b/matlab/src/matlab/+arrow/+type/UInt8Type.m @@ -13,10 +13,16 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef UInt8Type < arrow.type.PrimitiveType +classdef UInt8Type < arrow.type.FixedWidthType %UINT8TYPE Type class for uint8 data. - properties(SetAccess = protected) - ID = arrow.type.ID.UInt8 + methods + function obj = UInt8Type(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy {validate(proxy, "arrow.type.proxy.UInt8Type")} + end + import arrow.internal.proxy.validate + obj@arrow.type.FixedWidthType(proxy); + end end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/boolean.m b/matlab/src/matlab/+arrow/+type/boolean.m new file mode 100644 index 0000000000000..f5331d790e595 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/boolean.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +function type = boolean() +%BOOLEAN Creates an arrow.type.BooleanType object + proxy = arrow.internal.proxy.create("arrow.type.proxy.BooleanType"); + type = arrow.type.BooleanType(proxy); +end + diff --git a/matlab/src/matlab/+arrow/+type/float32.m b/matlab/src/matlab/+arrow/+type/float32.m new file mode 100644 index 0000000000000..d8c44dfc7f03e --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/float32.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = float32() +%FLOAT64 Creates an arrow.type.Float32Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Float32Type"); + type = arrow.type.Float32Type(proxy); +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/float64.m b/matlab/src/matlab/+arrow/+type/float64.m new file mode 100644 index 0000000000000..ae2fdc44c2a84 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/float64.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = float64() +%FLOAT64 Creates an arrow.type.Float64Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Float64Type"); + type = arrow.type.Float64Type(proxy); +end diff --git a/matlab/src/matlab/+arrow/+type/int16.m b/matlab/src/matlab/+arrow/+type/int16.m new file mode 100644 index 0000000000000..49f3bfdaa3522 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/int16.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = int16() +%INT16 Creates an arrow.type.Int16Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Int16Type"); + type = arrow.type.Int16Type(proxy); +end diff --git a/matlab/src/matlab/+arrow/+type/int32.m b/matlab/src/matlab/+arrow/+type/int32.m new file mode 100644 index 0000000000000..80673a6bb57a7 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/int32.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = int32() +%INT32 Creates an arrow.type.Int32Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Int32Type"); + type = arrow.type.Int32Type(proxy); +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/int64.m b/matlab/src/matlab/+arrow/+type/int64.m new file mode 100644 index 0000000000000..7e28fdc48e520 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/int64.m @@ -0,0 +1,21 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = int64() +%INT64 Creates an arrow.type.Int64Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Int64Type"); + type = arrow.type.Int64Type(proxy); +end + diff --git a/matlab/src/matlab/+arrow/+type/int8.m b/matlab/src/matlab/+arrow/+type/int8.m new file mode 100644 index 0000000000000..d59281cfb3db2 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/int8.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = int8() +%INT8 Creates an arrow.type.Int8Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.Int8Type"); + type = arrow.type.Int8Type(proxy); +end diff --git a/matlab/src/matlab/+arrow/+type/string.m b/matlab/src/matlab/+arrow/+type/string.m new file mode 100644 index 0000000000000..71329adc7cc2e --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/string.m @@ -0,0 +1,21 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = string() +%STRING Creates an arrow.type.StringType object + proxy = arrow.internal.proxy.create("arrow.type.proxy.StringType"); + type = arrow.type.StringType(proxy); +end + diff --git a/matlab/src/matlab/+arrow/+type/timestamp.m b/matlab/src/matlab/+arrow/+type/timestamp.m new file mode 100644 index 0000000000000..6ad47eae27e45 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/timestamp.m @@ -0,0 +1,25 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = timestamp(opts) +%TIMESTAMP Creates an arrow.type.TimestampType object + arguments + opts.TimeUnit(1, 1) arrow.type.TimeUnit = arrow.type.TimeUnit.Microsecond + opts.TimeZone(1, 1) string {mustBeNonmissing} = "" + end + args = struct(TimeUnit=string(opts.TimeUnit), TimeZone=opts.TimeZone); + proxy = arrow.internal.proxy.create("arrow.type.proxy.TimestampType", args); + type = arrow.type.TimestampType(proxy); +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/uint16.m b/matlab/src/matlab/+arrow/+type/uint16.m new file mode 100644 index 0000000000000..75032a0253cbc --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/uint16.m @@ -0,0 +1,21 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = uint16() +%UINT16 Creates an arrow.type.Int16Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.UInt16Type"); + type = arrow.type.UInt16Type(proxy); +end + diff --git a/matlab/src/matlab/+arrow/+type/uint32.m b/matlab/src/matlab/+arrow/+type/uint32.m new file mode 100644 index 0000000000000..79b821605d52a --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/uint32.m @@ -0,0 +1,21 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = uint32() +%UINT32 Creates an arrow.type.UInt32Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.UInt32Type"); + type = arrow.type.UInt32Type(proxy); +end + diff --git a/matlab/src/matlab/+arrow/+type/uint64.m b/matlab/src/matlab/+arrow/+type/uint64.m new file mode 100644 index 0000000000000..c0965fc9bd40f --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/uint64.m @@ -0,0 +1,20 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = uint64() +%UINT64 Creates an arrow.type.UInt64Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.UInt64Type"); + type = arrow.type.UInt64Type(proxy); +end diff --git a/matlab/src/matlab/+arrow/+type/uint8.m b/matlab/src/matlab/+arrow/+type/uint8.m new file mode 100644 index 0000000000000..b199a3c766052 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/uint8.m @@ -0,0 +1,21 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +function type = uint8() +%UINT8 Creates an arrow.type.UInt8Type object + proxy = arrow.internal.proxy.create("arrow.type.proxy.UInt8Type"); + type = arrow.type.UInt8Type(proxy); +end + diff --git a/matlab/test/arrow/array/hNumericArray.m b/matlab/test/arrow/array/hNumericArray.m index 7938811e4213a..f9f5f1d9e4ee3 100644 --- a/matlab/test/arrow/array/hNumericArray.m +++ b/matlab/test/arrow/array/hNumericArray.m @@ -43,14 +43,6 @@ function BasicTest(tc) tc.verifyEqual(className, tc.ArrowArrayClassName); end - function ShallowCopyTest(tc) - % NumericArrays stores a shallow copy of the array keep the - % memory alive. - A = tc.ArrowArrayConstructor(tc.MatlabArrayFcn([1, 2, 3])); - tc.verifyEqual(A.MatlabArray, tc.MatlabArrayFcn([1, 2, 3])); - tc.verifyEqual(toMATLAB(A), tc.MatlabArrayFcn([1 2 3]')); - end - function ToMATLAB(tc) % Create array from a scalar A1 = tc.ArrowArrayConstructor(tc.MatlabArrayFcn(100)); @@ -154,7 +146,7 @@ function TestArrowType(tc) % Verify the array has the expected arrow.type.Type object data = tc.MatlabArrayFcn([1 2 3 4]); arrowArray = tc.ArrowArrayConstructor(data); - tc.verifyEqual(arrowArray.Type, tc.ArrowType); + tc.verifyEqual(arrowArray.Type.ID, tc.ArrowType.ID); end end end diff --git a/matlab/test/arrow/array/tBooleanArray.m b/matlab/test/arrow/array/tBooleanArray.m index 3a565202a2775..ad6126b77fe51 100644 --- a/matlab/test/arrow/array/tBooleanArray.m +++ b/matlab/test/arrow/array/tBooleanArray.m @@ -22,7 +22,7 @@ MatlabArrayFcn = @logical MatlabConversionFcn = @logical NullSubstitutionValue = false - ArrowType = arrow.type.BooleanType + ArrowType = arrow.type.boolean end methods(TestClassSetup) @@ -155,7 +155,7 @@ function TestArrowType(tc) % Verify the array has the expected arrow.type.Type object data = tc.MatlabArrayFcn([true false]); arrowArray = tc.ArrowArrayConstructor(data); - tc.verifyEqual(arrowArray.Type, tc.ArrowType); + tc.verifyEqual(arrowArray.Type.ID, tc.ArrowType.ID); end end end diff --git a/matlab/test/arrow/array/tFloat32Array.m b/matlab/test/arrow/array/tFloat32Array.m index de7b312d84c18..e8655c7781ceb 100644 --- a/matlab/test/arrow/array/tFloat32Array.m +++ b/matlab/test/arrow/array/tFloat32Array.m @@ -24,7 +24,7 @@ MaxValue = realmax("single") MinValue = realmin("single") NullSubstitutionValue = single(NaN) - ArrowType = arrow.type.Float32Type + ArrowType = arrow.type.float32 end methods(Test) diff --git a/matlab/test/arrow/array/tFloat64Array.m b/matlab/test/arrow/array/tFloat64Array.m index b4fb9ec7a07e6..a01eef73883b6 100755 --- a/matlab/test/arrow/array/tFloat64Array.m +++ b/matlab/test/arrow/array/tFloat64Array.m @@ -24,7 +24,7 @@ MaxValue = realmax("double") MinValue = realmin("double") NullSubstitutionValue = NaN - ArrowType = arrow.type.Float64Type + ArrowType = arrow.type.float64 end methods(Test) diff --git a/matlab/test/arrow/array/tInt16Array.m b/matlab/test/arrow/array/tInt16Array.m index 58193e076c228..466dfaf9c4d7f 100644 --- a/matlab/test/arrow/array/tInt16Array.m +++ b/matlab/test/arrow/array/tInt16Array.m @@ -24,7 +24,7 @@ MaxValue = intmax("int16") MinValue = intmin("int16") NullSubstitutionValue = int16(0) - ArrowType = arrow.type.Int16Type + ArrowType = arrow.type.int16 end end diff --git a/matlab/test/arrow/array/tInt32Array.m b/matlab/test/arrow/array/tInt32Array.m index 59255c1272638..b8334e97ccb9a 100644 --- a/matlab/test/arrow/array/tInt32Array.m +++ b/matlab/test/arrow/array/tInt32Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("int32") MinValue = intmin("int32") NullSubstitutionValue = int32(0) - ArrowType = arrow.type.Int32Type + ArrowType = arrow.type.int32 end end diff --git a/matlab/test/arrow/array/tInt64Array.m b/matlab/test/arrow/array/tInt64Array.m index 289b4fcf3e290..a877cb2564fe9 100644 --- a/matlab/test/arrow/array/tInt64Array.m +++ b/matlab/test/arrow/array/tInt64Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("int64") MinValue = intmin("int64") NullSubstitutionValue = int64(0) - ArrowType = arrow.type.Int64Type + ArrowType = arrow.type.int64 end end diff --git a/matlab/test/arrow/array/tInt8Array.m b/matlab/test/arrow/array/tInt8Array.m index 9ae1eb8cc4fe7..dbd6e74ea7f8f 100644 --- a/matlab/test/arrow/array/tInt8Array.m +++ b/matlab/test/arrow/array/tInt8Array.m @@ -24,7 +24,7 @@ MaxValue = intmax("int8") MinValue = intmin("int8") NullSubstitutionValue = int8(0) - ArrowType = arrow.type.Int8Type + ArrowType = arrow.type.int8 end end diff --git a/matlab/test/arrow/array/tStringArray.m b/matlab/test/arrow/array/tStringArray.m index 000a57b27bcc2..792d7599816d5 100644 --- a/matlab/test/arrow/array/tStringArray.m +++ b/matlab/test/arrow/array/tStringArray.m @@ -22,7 +22,7 @@ MatlabArrayFcn = @string MatlabConversionFcn = @string NullSubstitutionValue = string(missing) - ArrowType = arrow.type.StringType + ArrowType = arrow.type.string end methods(TestClassSetup) @@ -149,7 +149,7 @@ function TestArrowType(tc) % Verify the array has the expected arrow.type.Type object data = tc.MatlabArrayFcn(["A", "B"]); arrowArray = tc.ArrowArrayConstructor(data); - tc.verifyEqual(arrowArray.Type, tc.ArrowType); + tc.verifyEqual(arrowArray.Type.ID, tc.ArrowType.ID); end function Unicode(tc) diff --git a/matlab/test/arrow/array/tUInt16Array.m b/matlab/test/arrow/array/tUInt16Array.m index b79a753694684..eed53c7882b47 100644 --- a/matlab/test/arrow/array/tUInt16Array.m +++ b/matlab/test/arrow/array/tUInt16Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("uint16") MinValue = intmin("uint16") NullSubstitutionValue = uint16(0) - ArrowType = arrow.type.UInt16Type + ArrowType = arrow.type.uint16 end end diff --git a/matlab/test/arrow/array/tUInt32Array.m b/matlab/test/arrow/array/tUInt32Array.m index 157cad941724d..b5e1970cbcc96 100644 --- a/matlab/test/arrow/array/tUInt32Array.m +++ b/matlab/test/arrow/array/tUInt32Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("uint32") MinValue = intmin("uint32") NullSubstitutionValue = uint32(0) - ArrowType = arrow.type.UInt32Type + ArrowType = arrow.type.uint32 end end diff --git a/matlab/test/arrow/array/tUInt64Array.m b/matlab/test/arrow/array/tUInt64Array.m index 41e479e816263..6cd2c9cba6911 100644 --- a/matlab/test/arrow/array/tUInt64Array.m +++ b/matlab/test/arrow/array/tUInt64Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("uint64") MinValue = intmin("uint64") NullSubstitutionValue = uint64(0) - ArrowType = arrow.type.UInt64Type + ArrowType = arrow.type.uint64 end end diff --git a/matlab/test/arrow/array/tUInt8Array.m b/matlab/test/arrow/array/tUInt8Array.m index 4aca2cced1c8d..68365958bc683 100644 --- a/matlab/test/arrow/array/tUInt8Array.m +++ b/matlab/test/arrow/array/tUInt8Array.m @@ -24,6 +24,6 @@ MaxValue = intmax("uint8") MinValue = intmin("uint8") NullSubstitutionValue = uint8(0) - ArrowType = arrow.type.UInt8Type + ArrowType = arrow.type.uint8 end end diff --git a/matlab/test/arrow/type/hPrimitiveType.m b/matlab/test/arrow/type/hFixedWidthType.m similarity index 80% rename from matlab/test/arrow/type/hPrimitiveType.m rename to matlab/test/arrow/type/hFixedWidthType.m index b757ad4b409c9..308ac46011a6c 100644 --- a/matlab/test/arrow/type/hPrimitiveType.m +++ b/matlab/test/arrow/type/hFixedWidthType.m @@ -13,17 +13,24 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef hPrimitiveType < matlab.unittest.TestCase +classdef hFixedWidthType < matlab.unittest.TestCase % Test class that defines shared unit tests for classes that inherit from -% arrow.type.PrimitiveType +% arrow.type.FixedWidthType properties(Abstract) ArrowType TypeID BitWidth + ClassName end methods(Test) + function TestClass(testCase) + % Verify ArrowType is an object of the expected class type. + name = string(class(testCase.ArrowType)); + testCase.verifyEqual(name, testCase.ClassName); + end + function TestTypeID(testCase) % Verify ID is set to the appropriate arrow.type.ID value. arrowType = testCase.ArrowType; @@ -39,13 +46,7 @@ function TestBitWidth(testCase) function TestNumFields(testCase) % Verify NumFields is set to 0 for primitive types. arrowType = testCase.ArrowType; - testCase.verifyEqual(arrowType.NumFields, 0); - end - - function TestNumBuffers(testCase) - % Verify NumBuffers is set to 2 for primitive types. - arrowType = testCase.ArrowType; - testCase.verifyEqual(arrowType.NumBuffers, 2); + testCase.verifyEqual(arrowType.NumFields, int32(0)); end end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tBooleanType.m b/matlab/test/arrow/type/tBooleanType.m index 23884991f2065..94de09a3e58f1 100644 --- a/matlab/test/arrow/type/tBooleanType.m +++ b/matlab/test/arrow/type/tBooleanType.m @@ -13,12 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tBooleanType < hPrimitiveType +classdef tBooleanType < hFixedWidthType % Test class for arrow.type.BooleanType properties - ArrowType = arrow.type.BooleanType + ArrowType = arrow.type.boolean TypeID = arrow.type.ID.Boolean - BitWidth = 1; + BitWidth = int32(1) + ClassName = "arrow.type.BooleanType" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tFloat32Type.m b/matlab/test/arrow/type/tFloat32Type.m index 8c4fa5f402942..c54fcfd32809b 100644 --- a/matlab/test/arrow/type/tFloat32Type.m +++ b/matlab/test/arrow/type/tFloat32Type.m @@ -13,12 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tFloat32Type < hPrimitiveType +classdef tFloat32Type < hFixedWidthType % Test class for arrow.type.Float32Type properties - ArrowType = arrow.type.Float32Type + ArrowType = arrow.type.float32 TypeID = arrow.type.ID.Float32 - BitWidth = 32; + BitWidth = int32(32) + ClassName = "arrow.type.Float32Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tFloat64Type.m b/matlab/test/arrow/type/tFloat64Type.m index c4489c4080341..6b5648dfc10e3 100644 --- a/matlab/test/arrow/type/tFloat64Type.m +++ b/matlab/test/arrow/type/tFloat64Type.m @@ -13,12 +13,14 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tFloat64Type < hPrimitiveType +classdef tFloat64Type < hFixedWidthType % Test class for arrow.type.Float64Type properties - ArrowType = arrow.type.Float64Type + ArrowType = arrow.type.float64 TypeID = arrow.type.ID.Float64 - BitWidth = 64; + BitWidth = int32(64) + ClassName = "arrow.type.Float64Type" + end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tID.m b/matlab/test/arrow/type/tID.m index 10c99dfab8775..344d2dd0f5a96 100644 --- a/matlab/test/arrow/type/tID.m +++ b/matlab/test/arrow/type/tID.m @@ -26,22 +26,6 @@ function verifyOnMatlabPath(tc) end methods (Test) - function bitWidth(testCase) - import arrow.type.ID - - typeIDs = [ID.Boolean, ID.UInt8, ID.Int8, ID.UInt16, ... - ID.Int16, ID.UInt32, ID.Int32, ID.UInt64, ... - ID.Int64, ID.Float32, ID.Float64]; - - expectedWidths = [1, 8, 8, 16, 16, 32, 32, 64, 64, 32, 64]; - - for ii = 1:numel(typeIDs) - actualWidth = bitWidth(typeIDs(ii)); - expectedWidth = expectedWidths(ii); - testCase.verifyEqual(actualWidth, expectedWidth); - end - end - function CastToUInt64(testCase) import arrow.type.ID diff --git a/matlab/test/arrow/type/tInt16Type.m b/matlab/test/arrow/type/tInt16Type.m index b5b5e803dfd06..a929ba688b5cd 100644 --- a/matlab/test/arrow/type/tInt16Type.m +++ b/matlab/test/arrow/type/tInt16Type.m @@ -13,12 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tInt16Type < hPrimitiveType +classdef tInt16Type < hFixedWidthType % Test class for arrow.type.Int16Type properties - ArrowType = arrow.type.Int16Type + ArrowType = arrow.type.int16 TypeID = arrow.type.ID.Int16 - BitWidth = 16; + BitWidth = int32(16) + ClassName = "arrow.type.Int16Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tInt32Type.m b/matlab/test/arrow/type/tInt32Type.m index ab9c1bf4a7afa..6d59b5454e7fc 100644 --- a/matlab/test/arrow/type/tInt32Type.m +++ b/matlab/test/arrow/type/tInt32Type.m @@ -13,12 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tInt32Type < hPrimitiveType +classdef tInt32Type < hFixedWidthType % Test class for arrow.type.Int32Type properties - ArrowType = arrow.type.Int32Type + ArrowType = arrow.type.int32 TypeID = arrow.type.ID.Int32 - BitWidth = 32; + BitWidth = int32(32) + ClassName = "arrow.type.Int32Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tInt64Type.m b/matlab/test/arrow/type/tInt64Type.m index b5a273f0f36a0..6ff0d2b07cbac 100644 --- a/matlab/test/arrow/type/tInt64Type.m +++ b/matlab/test/arrow/type/tInt64Type.m @@ -13,12 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tInt64Type < hPrimitiveType +classdef tInt64Type < hFixedWidthType % Test class for arrow.type.Int64Type properties - ArrowType = arrow.type.Int64Type + ArrowType = arrow.type.int64 TypeID = arrow.type.ID.Int64 - BitWidth = 64; + BitWidth = int32(64) + ClassName = "arrow.type.Int64Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tInt8Type.m b/matlab/test/arrow/type/tInt8Type.m index 7e8e06790d460..396be3a3f715a 100644 --- a/matlab/test/arrow/type/tInt8Type.m +++ b/matlab/test/arrow/type/tInt8Type.m @@ -13,12 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tInt8Type < hPrimitiveType +classdef tInt8Type < hFixedWidthType % Test class for arrow.type.Int8Type properties - ArrowType = arrow.type.Int8Type + ArrowType = arrow.type.int8 TypeID = arrow.type.ID.Int8 - BitWidth = 8; + BitWidth = int32(8) + ClassName = "arrow.type.Int8Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tStringType.m b/matlab/test/arrow/type/tStringType.m index f3cf101ac6185..057ffd5426345 100644 --- a/matlab/test/arrow/type/tStringType.m +++ b/matlab/test/arrow/type/tStringType.m @@ -19,20 +19,15 @@ methods (Test) function Basic(tc) - type = arrow.type.StringType; + type = arrow.type.string; className = string(class(type)); tc.verifyEqual(className, "arrow.type.StringType"); tc.verifyEqual(type.ID, arrow.type.ID.String); end - function NumBuffers(tc) - type = arrow.type.StringType; - tc.verifyEqual(type.NumBuffers, 3); - end - function NumFields(tc) - type = arrow.type.StringType; - tc.verifyEqual(type.NumFields, 0); + type = arrow.type.string; + tc.verifyEqual(type.NumFields, int32(0)); end end diff --git a/matlab/test/arrow/type/tTimeUnit.m b/matlab/test/arrow/type/tTimeUnit.m index b01de443443c5..0c2432193a3af 100644 --- a/matlab/test/arrow/type/tTimeUnit.m +++ b/matlab/test/arrow/type/tTimeUnit.m @@ -31,9 +31,9 @@ function TicksPerSecond(testCase) import arrow.type.TimeUnit units = [TimeUnit.Second, TimeUnit.Millisecond, ... TimeUnit.Microsecond, TimeUnit.Nanosecond]'; - ticksPerSecond = [1 1e3 1e6 1e9]; + ticks = [1 1e3 1e6 1e9]; for ii = 1:numel(units) - testCase.verifyEqual(units(ii).TicksPerSecond, ticksPerSecond(ii)); + testCase.verifyEqual(ticksPerSecond(units(ii)), ticks(ii)); end end end diff --git a/matlab/test/arrow/type/tTimestampType.m b/matlab/test/arrow/type/tTimestampType.m index f8a9a37f32a63..fa893d2d930de 100644 --- a/matlab/test/arrow/type/tTimestampType.m +++ b/matlab/test/arrow/type/tTimestampType.m @@ -13,19 +13,26 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tTimestampType < hPrimitiveType +classdef tTimestampType < hFixedWidthType % Test class for arrow.type.TimestampType properties - ArrowType = arrow.type.TimestampType + ArrowType = arrow.type.timestamp TypeID = arrow.type.ID.Timestamp - BitWidth = 64; + BitWidth = int32(64) + ClassName = "arrow.type.TimestampType" end methods(Test) + function TestClass(testCase) + % Verify ArrowType is an object of the expected class type. + name = string(class(testCase.ArrowType)); + testCase.verifyEqual(name, testCase.ClassName); + end + function DefaultTimeUnit(testCase) % Verify the default TimeUnit is Microsecond - type = arrow.type.TimestampType; + type = arrow.type.timestamp; actualUnit = type.TimeUnit; expectedUnit = arrow.type.TimeUnit.Microsecond; testCase.verifyEqual(actualUnit, expectedUnit); @@ -33,7 +40,7 @@ function DefaultTimeUnit(testCase) function DefaultTimeZone(testCase) % Verify the default TimeZone is "" - type = arrow.type.TimestampType; + type = arrow.type.timestamp; actualTimezone = type.TimeZone; expectedTimezone = ""; testCase.verifyEqual(actualTimezone, expectedTimezone); @@ -46,7 +53,7 @@ function SupplyTimeUnitEnum(testCase) TimeUnit.Microsecond, TimeUnit.Nanosecond]; for unit = expectedUnit - type = TimestampType(TimeUnit=unit); + type = timestamp(TimeUnit=unit); testCase.verifyEqual(type.TimeUnit, unit); end end @@ -60,42 +67,42 @@ function SupplyTimeUnitString(testCase) TimeUnit.Microsecond, TimeUnit.Nanosecond]; for ii = 1:numel(unitString) - type = TimestampType(TimeUnit=unitString(ii)); + type = timestamp(TimeUnit=unitString(ii)); testCase.verifyEqual(type.TimeUnit, expectedUnit(ii)); end end function SupplyTimeZone(testCase) % Supply the TimeZone. - type = arrow.type.TimestampType(TimeZone="America/New_York"); + type = arrow.type.timestamp(TimeZone="America/New_York"); testCase.verifyEqual(type.TimeZone, "America/New_York"); end function ErrorIfMissingStringTimeZone(testCase) - fcn = @() arrow.type.TimestampType(TimeZone=string(missing)); + fcn = @() arrow.type.timestamp(TimeZone=string(missing)); testCase.verifyError(fcn, "MATLAB:validators:mustBeNonmissing"); end function ErrorIfTimeZoneIsNonScalar(testCase) - fcn = @() arrow.type.TimestampType(TimeZone=["a", "b"]); + fcn = @() arrow.type.timestamp(TimeZone=["a", "b"]); testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); - fcn = @() arrow.type.TimestampType(TimeZone=strings(0, 0)); + fcn = @() arrow.type.timestamp(TimeZone=strings(0, 0)); testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); end function ErrorIfAmbiguousTimeUnit(testCase) - fcn = @() arrow.type.TimestampType(TimeUnit="mi"); + fcn = @() arrow.type.timestamp(TimeUnit="mi"); testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); end function ErrorIfTimeUnitIsNonScalar(testCase) units = [arrow.type.TimeUnit.Second; arrow.type.TimeUnit.Millisecond]; - fcn = @() arrow.type.TimestampType(TimeZone=units); + fcn = @() arrow.type.timestamp(TimeZone=units); testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); units = ["second" "millisecond"]; - fcn = @() arrow.type.TimestampType(TimeZone=units); + fcn = @() arrow.type.timestamp(TimeZone=units); testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); end end diff --git a/matlab/test/arrow/type/tUInt16Type.m b/matlab/test/arrow/type/tUInt16Type.m index b5102ace34d84..ede66f6324691 100644 --- a/matlab/test/arrow/type/tUInt16Type.m +++ b/matlab/test/arrow/type/tUInt16Type.m @@ -13,12 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tUInt16Type < hPrimitiveType +classdef tUInt16Type < hFixedWidthType % Test class for arrow.type.UInt16Type properties - ArrowType = arrow.type.UInt16Type + ArrowType = arrow.type.uint16 TypeID = arrow.type.ID.UInt16 - BitWidth = 16; + BitWidth = int32(16) + ClassName = "arrow.type.UInt16Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tUInt32Type.m b/matlab/test/arrow/type/tUInt32Type.m index 8f86eec7f53c3..def24c76ceb76 100644 --- a/matlab/test/arrow/type/tUInt32Type.m +++ b/matlab/test/arrow/type/tUInt32Type.m @@ -13,12 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tUInt32Type < hPrimitiveType +classdef tUInt32Type < hFixedWidthType % Test class for arrow.type.UInt32Type properties - ArrowType = arrow.type.UInt32Type + ArrowType = arrow.type.uint32 TypeID = arrow.type.ID.UInt32 - BitWidth = 32; + BitWidth = int32(32) + ClassName = "arrow.type.UInt32Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tUInt64Type.m b/matlab/test/arrow/type/tUInt64Type.m index 7f3084616d35f..9228e1cc504d6 100644 --- a/matlab/test/arrow/type/tUInt64Type.m +++ b/matlab/test/arrow/type/tUInt64Type.m @@ -13,12 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tUInt64Type < hPrimitiveType +classdef tUInt64Type < hFixedWidthType % Test class for arrow.type.UInt64Type properties - ArrowType = arrow.type.UInt64Type + ArrowType = arrow.type.uint64 TypeID = arrow.type.ID.UInt64 - BitWidth = 64; + BitWidth = int32(64) + ClassName = "arrow.type.UInt64Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tUInt8Type.m b/matlab/test/arrow/type/tUInt8Type.m index 6dfc8a4694359..eec3aa5fdec25 100644 --- a/matlab/test/arrow/type/tUInt8Type.m +++ b/matlab/test/arrow/type/tUInt8Type.m @@ -13,12 +13,13 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tUInt8Type < hPrimitiveType +classdef tUInt8Type < hFixedWidthType % Test class for arrow.type.UInt64Type properties - ArrowType = arrow.type.UInt8Type + ArrowType = arrow.type.uint8 TypeID = arrow.type.ID.UInt8 - BitWidth = 8; + BitWidth = int32(8) + ClassName = "arrow.type.UInt8Type" end end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/hTypeTraits.m b/matlab/test/arrow/type/traits/hTypeTraits.m new file mode 100644 index 0000000000000..df62fdd325f2f --- /dev/null +++ b/matlab/test/arrow/type/traits/hTypeTraits.m @@ -0,0 +1,78 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef hTypeTraits < matlab.unittest.TestCase +% Superclass for tests that validate the behavior of "type trait" objects +% like arrow.type.traits.StringTraits. + + properties (Abstract) + TraitsConstructor + ArrayConstructor + ArrayClassName + ArrayProxyClassName + TypeConstructor + TypeClassName + TypeProxyClassName + MatlabConstructor + MatlabClassName + end + + properties + Traits + end + + methods (TestMethodSetup) + function setupTraits(testCase) + testCase.Traits = testCase.TraitsConstructor(); + end + end + + methods(Test) + + function TestArrayConstructor(testCase) + testCase.verifyEqual(testCase.Traits.ArrayConstructor, testCase.ArrayConstructor); + end + + function TestArrayClassName(testCase) + testCase.verifyEqual(testCase.Traits.ArrayClassName, testCase.ArrayClassName); + end + + function TestArrayProxyClassName(testCase) + testCase.verifyEqual(testCase.Traits.ArrayProxyClassName, testCase.ArrayProxyClassName); + end + + function TestTypeConstructor(testCase) + testCase.verifyEqual(testCase.Traits.TypeConstructor, testCase.TypeConstructor); + end + + function TestTypeClassName(testCase) + testCase.verifyEqual(testCase.Traits.TypeClassName, testCase.TypeClassName); + end + + function TestTypeProxyClassName(testCase) + testCase.verifyEqual(testCase.Traits.TypeProxyClassName, testCase.TypeProxyClassName); + end + + function TestMatlabConstructor(testCase) + testCase.verifyEqual(testCase.Traits.MatlabConstructor, testCase.MatlabConstructor); + end + + function TestMatlabClassName(testCase) + testCase.verifyEqual(testCase.Traits.MatlabClassName, testCase.MatlabClassName); + end + + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tBooleanTraits.m b/matlab/test/arrow/type/traits/tBooleanTraits.m new file mode 100644 index 0000000000000..859dc630a1fc7 --- /dev/null +++ b/matlab/test/arrow/type/traits/tBooleanTraits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tBooleanTraits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.BooleanTraits + ArrayConstructor = @arrow.array.BooleanArray + ArrayClassName = "arrow.array.BooleanArray" + ArrayProxyClassName = "arrow.array.proxy.BooleanArray" + TypeConstructor = @arrow.type.BooleanType + TypeClassName = "arrow.type.BooleanType" + TypeProxyClassName = "arrow.type.proxy.BooleanType" + MatlabConstructor = @logical + MatlabClassName = "logical" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tInt16Traits.m b/matlab/test/arrow/type/traits/tInt16Traits.m new file mode 100644 index 0000000000000..bde308d28e68a --- /dev/null +++ b/matlab/test/arrow/type/traits/tInt16Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tInt16Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.Int16Traits + ArrayConstructor = @arrow.array.Int16Array + ArrayClassName = "arrow.array.Int16Array" + ArrayProxyClassName = "arrow.array.proxy.Int16Array" + TypeConstructor = @arrow.type.Int16Type + TypeClassName = "arrow.type.Int16Type" + TypeProxyClassName = "arrow.type.proxy.Int16Type" + MatlabConstructor = @int16 + MatlabClassName = "int16" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tInt32Traits.m b/matlab/test/arrow/type/traits/tInt32Traits.m new file mode 100644 index 0000000000000..651f647455408 --- /dev/null +++ b/matlab/test/arrow/type/traits/tInt32Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tInt32Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.Int32Traits + ArrayConstructor = @arrow.array.Int32Array + ArrayClassName = "arrow.array.Int32Array" + ArrayProxyClassName = "arrow.array.proxy.Int32Array" + TypeConstructor = @arrow.type.Int32Type + TypeClassName = "arrow.type.Int32Type" + TypeProxyClassName = "arrow.type.proxy.Int32Type" + MatlabConstructor = @int32 + MatlabClassName = "int32" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tInt64Traits.m b/matlab/test/arrow/type/traits/tInt64Traits.m new file mode 100644 index 0000000000000..4f16c91eb4e09 --- /dev/null +++ b/matlab/test/arrow/type/traits/tInt64Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tInt64Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.Int64Traits + ArrayConstructor = @arrow.array.Int64Array + ArrayClassName = "arrow.array.Int64Array" + ArrayProxyClassName = "arrow.array.proxy.Int64Array" + TypeConstructor = @arrow.type.Int64Type + TypeClassName = "arrow.type.Int64Type" + TypeProxyClassName = "arrow.type.proxy.Int64Type" + MatlabConstructor = @int64 + MatlabClassName = "int64" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tInt8Traits.m b/matlab/test/arrow/type/traits/tInt8Traits.m new file mode 100644 index 0000000000000..3e767abbebba4 --- /dev/null +++ b/matlab/test/arrow/type/traits/tInt8Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tInt8Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.Int8Traits + ArrayConstructor = @arrow.array.Int8Array + ArrayClassName = "arrow.array.Int8Array" + ArrayProxyClassName = "arrow.array.proxy.Int8Array" + TypeConstructor = @arrow.type.Int8Type + TypeClassName = "arrow.type.Int8Type" + TypeProxyClassName = "arrow.type.proxy.Int8Type" + MatlabConstructor = @int8 + MatlabClassName = "int8" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tStringTraits.m b/matlab/test/arrow/type/traits/tStringTraits.m new file mode 100644 index 0000000000000..68f061d1b031d --- /dev/null +++ b/matlab/test/arrow/type/traits/tStringTraits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tStringTraits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.StringTraits + ArrayConstructor = @arrow.array.StringArray + ArrayClassName = "arrow.array.StringArray" + ArrayProxyClassName = "arrow.array.proxy.StringArray" + TypeConstructor = @arrow.type.StringType + TypeClassName = "arrow.type.StringType" + TypeProxyClassName = "arrow.type.proxy.StringType" + MatlabConstructor = @string + MatlabClassName = "string" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tTimestampTraits.m b/matlab/test/arrow/type/traits/tTimestampTraits.m new file mode 100644 index 0000000000000..5f451c0631465 --- /dev/null +++ b/matlab/test/arrow/type/traits/tTimestampTraits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tTimestampTraits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.TimestampTraits + ArrayConstructor = @arrow.array.TimestampArray + ArrayClassName = "arrow.array.TimestampArray" + ArrayProxyClassName = "arrow.array.proxy.TimestampArray" + TypeConstructor = @arrow.type.TimestampType + TypeClassName = "arrow.type.TimestampType" + TypeProxyClassName = "arrow.type.proxy.TimestampType" + MatlabConstructor = @datetime + MatlabClassName = "datetime" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tUInt16Traits.m b/matlab/test/arrow/type/traits/tUInt16Traits.m new file mode 100644 index 0000000000000..4a9eef6f2978d --- /dev/null +++ b/matlab/test/arrow/type/traits/tUInt16Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tUInt16Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.UInt16Traits + ArrayConstructor = @arrow.array.UInt16Array + ArrayClassName = "arrow.array.UInt16Array" + ArrayProxyClassName = "arrow.array.proxy.UInt16Array" + TypeConstructor = @arrow.type.UInt16Type + TypeClassName = "arrow.type.UInt16Type" + TypeProxyClassName = "arrow.type.proxy.UInt16Type" + MatlabConstructor = @uint16 + MatlabClassName = "uint16" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tUInt32Traits.m b/matlab/test/arrow/type/traits/tUInt32Traits.m new file mode 100644 index 0000000000000..227e42c4eb0ec --- /dev/null +++ b/matlab/test/arrow/type/traits/tUInt32Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tUInt32Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.UInt32Traits + ArrayConstructor = @arrow.array.UInt32Array + ArrayClassName = "arrow.array.UInt32Array" + ArrayProxyClassName = "arrow.array.proxy.UInt32Array" + TypeConstructor = @arrow.type.UInt32Type + TypeClassName = "arrow.type.UInt32Type" + TypeProxyClassName = "arrow.type.proxy.UInt32Type" + MatlabConstructor = @uint32 + MatlabClassName = "uint32" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tUInt64Traits.m b/matlab/test/arrow/type/traits/tUInt64Traits.m new file mode 100644 index 0000000000000..370e905f27736 --- /dev/null +++ b/matlab/test/arrow/type/traits/tUInt64Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tUInt64Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.UInt64Traits + ArrayConstructor = @arrow.array.UInt64Array + ArrayClassName = "arrow.array.UInt64Array" + ArrayProxyClassName = "arrow.array.proxy.UInt64Array" + TypeConstructor = @arrow.type.UInt64Type + TypeClassName = "arrow.type.UInt64Type" + TypeProxyClassName = "arrow.type.proxy.UInt64Type" + MatlabConstructor = @uint64 + MatlabClassName = "uint64" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/tUInt8Traits.m b/matlab/test/arrow/type/traits/tUInt8Traits.m new file mode 100644 index 0000000000000..d93f9d3c1b942 --- /dev/null +++ b/matlab/test/arrow/type/traits/tUInt8Traits.m @@ -0,0 +1,30 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tUInt8Traits < hTypeTraits + + properties + TraitsConstructor = @arrow.type.traits.UInt8Traits + ArrayConstructor = @arrow.array.UInt8Array + ArrayClassName = "arrow.array.UInt8Array" + ArrayProxyClassName = "arrow.array.proxy.UInt8Array" + TypeConstructor = @arrow.type.UInt8Type + TypeClassName = "arrow.type.UInt8Type" + TypeProxyClassName = "arrow.type.proxy.UInt8Type" + MatlabConstructor = @uint8 + MatlabClassName = "uint8" + end + +end \ No newline at end of file diff --git a/matlab/test/arrow/type/traits/ttraits.m b/matlab/test/arrow/type/traits/ttraits.m new file mode 100644 index 0000000000000..14149a5ebff48 --- /dev/null +++ b/matlab/test/arrow/type/traits/ttraits.m @@ -0,0 +1,320 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef ttraits < matlab.unittest.TestCase + % Tests for the type traits (i.e. arrow.type.traits.traits) + % "gateway" function. + + methods(Test) + + function TestUInt8(testCase) + import arrow.type.traits.* + import arrow.type.* + + typeID = ID.UInt8; + expectedTraits = UInt8Traits(); + + actualTraits = traits(typeID); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestUInt16(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.UInt16; + expectedTraits = UInt16Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestUInt32(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.UInt32; + expectedTraits = UInt32Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestUInt64(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.UInt64; + expectedTraits = UInt64Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestInt8(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Int8; + expectedTraits = Int8Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestInt16(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Int16; + expectedTraits = Int16Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestInt32(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Int32; + expectedTraits = Int32Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestInt64(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Int64; + expectedTraits = Int64Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestString(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.String; + expectedTraits = StringTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestTimestamp(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Timestamp; + expectedTraits = TimestampTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestBoolean(testCase) + import arrow.type.traits.* + import arrow.type.* + + type = ID.Boolean; + expectedTraits = BooleanTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabUInt8(testCase) + import arrow.type.traits.* + + type = "uint8"; + expectedTraits = UInt8Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabUInt16(testCase) + import arrow.type.traits.* + + type = "uint16"; + expectedTraits = UInt16Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabUInt32(testCase) + import arrow.type.traits.* + + type = "uint32"; + expectedTraits = UInt32Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabUInt64(testCase) + import arrow.type.traits.* + + type = "uint64"; + expectedTraits = UInt64Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabInt8(testCase) + import arrow.type.traits.* + + type = "int8"; + expectedTraits = Int8Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabInt16(testCase) + import arrow.type.traits.* + + type = "int16"; + expectedTraits = Int16Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabInt32(testCase) + import arrow.type.traits.* + + type = "int32"; + expectedTraits = Int32Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabInt64(testCase) + import arrow.type.traits.* + + type = "int64"; + expectedTraits = Int64Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabSingle(testCase) + import arrow.type.traits.* + + type = "single"; + expectedTraits = Float32Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabDouble(testCase) + import arrow.type.traits.* + + type = "double"; + expectedTraits = Float64Traits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabLogical(testCase) + import arrow.type.traits.* + + type = "logical"; + expectedTraits = BooleanTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabString(testCase) + import arrow.type.traits.* + + type = "string"; + expectedTraits = StringTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestMatlabDatetime(testCase) + import arrow.type.traits.* + + type = "datetime"; + expectedTraits = TimestampTraits(); + + actualTraits = traits(type); + + testCase.verifyEqual(actualTraits, expectedTraits); + end + + function TestErrorIfUnsupportedMatlabClass(testCase) + import arrow.type.traits.* + + type = "not-a-class"; + + testCase.verifyError(@() traits(type), "arrow:type:traits:UnsupportedMatlabClass"); + end + + function TestErrorIfUnsupportedInputType(testCase) + import arrow.type.traits.* + + type = 123; + testCase.verifyError(@() traits(type), "arrow:type:traits:UnsupportedInputType"); + + type = {'double'}; + testCase.verifyError(@() traits(type), "arrow:type:traits:UnsupportedInputType"); + + type = datetime(2023, 1, 1); + testCase.verifyError(@() traits(type), "arrow:type:traits:UnsupportedInputType"); + end + + end + +end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 419f2ae459b81..c10ce07280fa6 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -24,7 +24,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_NAME libmexclass) # libmexclass is accessible for CI without permission issues. set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_REPOSITORY "https://github.com/mathworks/libmexclass.git") # Use a specific Git commit hash to avoid libmexclass version changing unexpectedly. -set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_TAG "3465900") +set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_TAG "d04f88d") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_SOURCE_SUBDIR "libmexclass/cpp") @@ -37,17 +37,21 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_ROOT_INCLUDE_DIR "${CMAKE_SOUR set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/error" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type") - + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/boolean_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/string_array.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/pack.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/unpack.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/time_unit.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/time_unit.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/type.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/fixed_width_type.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/string_type.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/proxy/timestamp_type.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy/factory.cc") diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index c5f0a663a814c..925565804f63e 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -2348,10 +2348,9 @@ cdef class Partitioning(_Weakrefable): return self.wrapped def __eq__(self, other): - try: + if isinstance(other, Partitioning): return self.partitioning.Equals(deref((other).unwrap())) - except TypeError: - return False + return False def parse(self, path): cdef CResult[CExpression] result diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index e47e5d3f3eb3b..98ab84c03900f 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1756,6 +1756,17 @@ def test_logical(): assert pc.invert(a) == pa.array([False, True, True, None]) +def test_dictionary_decode(): + array = pa.array(["a", "a", "b", "c", "b"]) + dictionary_array = array.dictionary_encode() + dictionary_array_decode = pc.dictionary_decode(dictionary_array) + + assert array != dictionary_array + + assert array == dictionary_array_decode + assert array == pc.dictionary_decode(array) + + def test_cast(): arr = pa.array([1, 2, 3, 4], type='int64') options = pc.CastOptions(pa.int8()) diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index 2f9b6a0922351..a70cf2fbc72af 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -589,6 +589,7 @@ def test_partitioning(): partitioning = klass(schema) assert isinstance(partitioning, ds.Partitioning) assert partitioning == klass(schema) + assert partitioning != "other object" schema = pa.schema([ pa.field('group', pa.int64()), diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index 457734bb73325..61cfb1af587a7 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -2424,6 +2424,7 @@ def test_numpy_asarray(constructor): assert result.dtype == "int32" +@pytest.mark.acero def test_invalid_non_join_column(): NUM_ITEMS = 30 t1 = pa.Table.from_pydict({ diff --git a/python/pyproject.toml b/python/pyproject.toml index fe8c938a9ce4f..7e61304585809 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -17,7 +17,7 @@ [build-system] requires = [ - "cython >= 0.29.31", + "cython >= 0.29.31,<3", "oldest-supported-numpy>=0.14", "setuptools_scm", "setuptools >= 40.1.0", diff --git a/python/requirements-build.txt b/python/requirements-build.txt index 507e9081373e2..6378d1b94e1bb 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,4 +1,4 @@ -cython>=0.29.31 +cython>=0.29.31,<3 oldest-supported-numpy>=0.14 setuptools_scm setuptools>=38.6.0 diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt index 6043d2ffb2c6e..e4f5243fbc2fe 100644 --- a/python/requirements-wheel-build.txt +++ b/python/requirements-wheel-build.txt @@ -1,4 +1,4 @@ -cython>=0.29.31 +cython>=0.29.31,<3 oldest-supported-numpy>=0.14 setuptools_scm setuptools>=58 diff --git a/python/setup.py b/python/setup.py index f06cb5a627562..dc529679c7f90 100755 --- a/python/setup.py +++ b/python/setup.py @@ -40,8 +40,9 @@ # Check if we're running 64-bit Python is_64_bit = sys.maxsize > 2**32 -if Cython.__version__ < '0.29.31': - raise Exception('Please upgrade to Cython 0.29.31 or newer') +if Cython.__version__ < '0.29.31' or Cython.__version__ >= '3.0': + raise Exception( + 'Please update your Cython version. Supported Cython >= 0.29.31, < 3.0') setup_dir = os.path.abspath(os.path.dirname(__file__)) @@ -491,7 +492,7 @@ def has_ext_modules(foo): 'pyarrow/_generated_version.py'), 'version_scheme': guess_next_dev_version }, - setup_requires=['setuptools_scm', 'cython >= 0.29.31'] + setup_requires, + setup_requires=['setuptools_scm', 'cython >= 0.29.31,<3'] + setup_requires, install_requires=install_requires, tests_require=['pytest', 'pandas', 'hypothesis'], python_requires='>=3.8', diff --git a/r/NEWS.md b/r/NEWS.md index f358c2aae45fc..45730a7b36018 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,6 +19,10 @@ # arrow 12.0.1.9000 +# arrow 12.0.1.1 + +* Update a package version reference to be text only instead of numeric due to CRAN update requiring this (#36353, #36364) + # arrow 12.0.1 * Update the version of the date library vendored with Arrow C++ library diff --git a/r/configure b/r/configure index 198a89cd85e83..a0f75f8ddb5cd 100755 --- a/r/configure +++ b/r/configure @@ -50,10 +50,10 @@ # Currently the configure script doesn't offer much to make this easy. # If you expect to rebuild multiple times, you should set up a dev # environment. -# * Installing a dev version as a regular developer. +# * Installing a dev version as a regular developer. # The best way is to maintain your own cmake build and install it # to a directory (not system) that you set as the env var -# $ARROW_HOME. +# $ARROW_HOME. # # For more information, see the various installation and developer vignettes. @@ -177,7 +177,7 @@ find_arrow () { else PC_LIB_VERSION=`grep '^Version' ${_LIBARROW_FOUND}/lib/pkgconfig/arrow.pc | sed s/Version:\ //` fi - # This is in an R script for convenience and testability. + # This is in an R script for convenience and testability. # Success means the found C++ library is ok to use. # Error means the versions don't line up and we shouldn't use it. # More specific messaging to the user is in the R script @@ -238,7 +238,7 @@ do_autobrew () { # Setup for local autobrew testing if [ -f "tools/apache-arrow.rb" ]; then # If you want to use a local apache-arrow.rb formula, do - # $ cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb tools/apache-arrow.rb + # $ cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow*.rb tools # before R CMD build or INSTALL (assuming a local checkout of the apache/arrow repository). # If you have this, you should use the local autobrew script so they match. cp tools/autobrew . diff --git a/r/tools/autobrew b/r/tools/autobrew index f181309892174..35ffebcab3796 100644 --- a/r/tools/autobrew +++ b/r/tools/autobrew @@ -62,7 +62,7 @@ fi # Hardcode this for my custom autobrew build rm -f $BREWDIR/lib/*.dylib AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-management -laws-cpp-sdk-cognito-identity -laws-cpp-sdk-sts -laws-cpp-sdk-s3 -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common -laws-crt-cpp -laws-c-io -laws-c-s3 -laws-c-auth -laws-c-http -laws-c-cal -laws-c-compression -laws-c-mqtt -lpthread -lcurl" -PKG_LIBS="-lparquet -larrow_dataset -larrow_acero -larrow -larrow_bundled_dependencies -lthrift -lbrotlienc-static -lbrotlidec-static -lbrotlicommon-static -llz4 -lsnappy -lzstd $AWS_LIBS" +PKG_LIBS="-lparquet -larrow_dataset -larrow_acero -larrow -larrow_bundled_dependencies -lthrift -lbrotlienc-static -lbrotlidec-static -lbrotlicommon-static -llz4 -lsnappy -lzstd $AWS_LIBS -lssl -lcrypto" PKG_DIRS="-L$BREWDIR/lib" # Prevent CRAN builder from linking against old libs in /usr/local/lib