From 7b122024303973c5594efc6eb6c77bf17fe1570e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 31 Dec 2018 10:43:58 -0600 Subject: [PATCH 001/203] ARROW-4135: [Python] Can't reload a pandas dataframe containing a list of datetime.time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reproduced via 0.11.1 Author: Krisztián Szűcs Closes #3289 from kszucs/ARROW-4135 and squashes the following commits: 70ca3e08 missign arg 511808bd forgot to uncomment 3093957f test --- python/pyarrow/tests/test_parquet.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 9f05170bdbeba..3a6c84678eba2 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -921,6 +921,14 @@ def _assert_unsupported(array): _assert_unsupported(a7) +def test_list_of_datetime_time_roundtrip(): + # ARROW-4135 + times = pd.to_datetime(['09:00', '09:30', '10:00', '10:30', '11:00', + '11:30', '12:00']) + df = pd.DataFrame({'time': [times.time]}) + _roundtrip_pandas_dataframe(df, write_kwargs={}) + + def test_large_list_records(): # This was fixed in PARQUET-1100 From 8d792b1c196dcb5f745cb48313558a9a35baccc2 Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Mon, 31 Dec 2018 10:50:30 -0600 Subject: [PATCH 002/203] PARQUET-1484: [C++] Improve memory usage of FileMetaDataBuilder Author: Deepak Majeti Closes #3277 from majetideepak/PARQUET-1484 and squashes the following commits: 212e5230 PARQUET-1484: Improve memory usage of FileMetaDataBuilder --- cpp/src/parquet/metadata.cc | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 6ac53c58afed4..f05918d9fd7f0 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -851,23 +851,19 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl { } RowGroupMetaDataBuilder* AppendRowGroup() { - row_groups_.emplace_back(new format::RowGroup); + row_groups_.emplace_back(); current_row_group_builder_ = - RowGroupMetaDataBuilder::Make(properties_, schema_, row_groups_.back().get()); + RowGroupMetaDataBuilder::Make(properties_, schema_, &row_groups_.back()); return current_row_group_builder_.get(); } std::unique_ptr Finish() { int64_t total_rows = 0; - std::vector row_groups; - for (auto row_group = row_groups_.begin(); row_group != row_groups_.end(); - row_group++) { - auto rowgroup = *((*row_group).get()); - row_groups.push_back(rowgroup); - total_rows += rowgroup.num_rows; + for (auto row_group : row_groups_) { + total_rows += row_group.num_rows; } metadata_->__set_num_rows(total_rows); - metadata_->__set_row_groups(row_groups); + metadata_->__set_row_groups(row_groups_); if (key_value_metadata_) { metadata_->key_value_metadata.clear(); @@ -922,7 +918,7 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl { private: const std::shared_ptr properties_; - std::vector> row_groups_; + std::vector row_groups_; std::unique_ptr current_row_group_builder_; const SchemaDescriptor* schema_; From 8c26b77120e592b10453aca1ab419c30e378dd7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 31 Dec 2018 10:52:08 -0600 Subject: [PATCH 003/203] ARROW-4134: [Packaging] Properly setup timezone in docker tests to prevent ORC adapter's abort MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python ORC tests were failing because of unset timezone. Crossbow tests: [kszucs/crossbow/build-388](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=build-388) Author: Krisztián Szűcs Closes #3288 from kszucs/ARROW-4134 and squashes the following commits: 4f502625 setup timezone in tha base cpp image --- c_glib/Dockerfile | 2 -- cpp/Dockerfile | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/c_glib/Dockerfile b/c_glib/Dockerfile index 5d64a5f154f62..7c8e412bca6f4 100644 --- a/c_glib/Dockerfile +++ b/c_glib/Dockerfile @@ -17,9 +17,7 @@ FROM arrow:cpp -ENV DEBIAN_FRONTEND=noninteractive RUN apt-get -q install --no-install-recommends -y \ - tzdata \ ruby-dev \ pkg-config \ autoconf-archive \ diff --git a/cpp/Dockerfile b/cpp/Dockerfile index 84c00b91cc405..4e5a4e4bc1cb2 100644 --- a/cpp/Dockerfile +++ b/cpp/Dockerfile @@ -18,7 +18,8 @@ FROM ubuntu:18.04 # install build essentials -RUN apt-get update -y -q && \ +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update -y -q && \ apt-get install -y -q --no-install-recommends \ ca-certificates \ ccache \ @@ -27,6 +28,7 @@ RUN apt-get update -y -q && \ git \ ninja-build \ pkg-config \ + tzdata \ wget # install conda and required packages From 71ccba9b217a7af922d8a69be21ed4db205af741 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Mon, 31 Dec 2018 17:46:42 -0600 Subject: [PATCH 004/203] ARROW-4128: [C++] Update style guide to reflect NULLPTR and doxygen Author: Micah Kornfield Author: emkornfield Closes #3284 from emkornfield/update_style_guide and squashes the following commits: b90a669b allow anonymous namespaces d0446107 update style guide to reflect NULLPTR and doxygen --- cpp/README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/README.md b/cpp/README.md index 7e92648dc37aa..2724ff44d8ac1 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -322,9 +322,13 @@ This requires [Doxygen](http://www.doxygen.org) to be installed. ## Development -This project follows [Google's C++ Style Guide][3] with minor exceptions. We do -not encourage anonymous namespaces and we relax the line length restriction to -90 characters. +This project follows [Google's C++ Style Guide][3] with minor exceptions: + + * We relax the line length restriction to 90 characters. + * We use the NULLPTR macro defined in `src/arrow/util/macros.h` to + support building C++/CLI (ARROW-1134) + * We use doxygen style comments ("///") instead of line comments ("//") + in header files. ### Memory Pools From 9376d85c409f4b9b272297b3acb6a0f70dcedc32 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 1 Jan 2019 13:34:25 -0600 Subject: [PATCH 005/203] ARROW-3910: [Python] Set date_as_objects=True as default in to_pandas methods This does not add a deprecation warning primarily because it's a bit difficult to do (we would need to check the data types whether it's a date -- or in the case of a table, if any field is a date--, and then warn if so). `True` is the correct option though in order to accurately roundtrip data to and from pandas. Some users might have some workarounds floating around, but this is sufficiently advanced stuff already. With this patch, date data round trips with no special options ``` In [2]: import pyarrow as pa In [3]: import datetime In [4]: arr = pa.array([datetime.date(2000, 1, 1), None]) In [5]: arr Out[5]: [ 10957, null ] In [6]: arr.to_pandas() Out[6]: array([datetime.date(2000, 1, 1), None], dtype=object) In [7]: pa.array(arr.to_pandas()) Out[7]: [ 10957, null ] ``` If others strongly feel it's worth going to the effort of raising a deprecation warning, please chime in. Author: Wes McKinney Closes #3272 from wesm/ARROW-3910 and squashes the following commits: 308afe56 Add Windows makefile for Sphinx, add section about date conversions to pandas.rst f77c2967 Set date_as_objects=True as default in to_pandas methods --- docs/make.bat | 52 ++++++++++++ docs/source/building.rst | 71 ++++++++++++++++ docs/source/index.rst | 6 ++ docs/source/python/development.rst | 50 ----------- docs/source/python/pandas.rst | 68 ++++++++++++++- python/pyarrow/array.pxi | 6 +- python/pyarrow/tests/test_convert_pandas.py | 94 ++++++++------------- 7 files changed, 231 insertions(+), 116 deletions(-) create mode 100644 docs/make.bat create mode 100644 docs/source/building.rst diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000000000..36f2086c20b3f --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,52 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/docs/source/building.rst b/docs/source/building.rst new file mode 100644 index 0000000000000..0fb4486db89c3 --- /dev/null +++ b/docs/source/building.rst @@ -0,0 +1,71 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Building the Documentation +========================== + +Prerequisites +------------- + +The documentation build process uses `Doxygen `_ and +`Sphinx `_ along with a few extensions. + +If you're using Conda, the required software can be installed in a single line: + +.. code-block:: shell + + conda install -c conda-forge --file ci/conda_env_sphinx.yml + +Otherwise, you'll first need to install `Doxygen `_ +yourself (for example from your distribution's official repositories, if +using Linux). Then you can install the Python-based requirements with the +following command: + +.. code-block:: shell + + pip install -r docs/requirements.txt + +Building +-------- + +.. note:: + + If you are building the documentation on Windows, not all sections + may build properly. + +These two steps are mandatory and must be executed in order. + +#. Process the C++ API using Doxygen + + .. code-block:: shell + + pushd cpp/apidoc + doxygen + popd + +#. Build the complete documentation using Sphinx + + .. code-block:: shell + + pushd docs + make html + popd + +After these steps are completed, the documentation is rendered in HTML +format in ``docs/_build/html``. In particular, you can point your browser +at ``docs/_build/html/index.html`` to read the docs and review any changes +you made. diff --git a/docs/source/index.rst b/docs/source/index.rst index fa6c683d14ecb..2b367b33823a2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -40,3 +40,9 @@ messaging and interprocess communication. cpp/index python/index + +.. toctree:: + :maxdepth: 2 + :caption: Other Topics + + building diff --git a/docs/source/python/development.rst b/docs/source/python/development.rst index 63e6051a7b864..ba8cfef721441 100644 --- a/docs/source/python/development.rst +++ b/docs/source/python/development.rst @@ -364,53 +364,3 @@ Getting ``python-test.exe`` to run is a bit tricky because your set PYTHONHOME=%CONDA_PREFIX% Now ``python-test.exe`` or simply ``ctest`` (to run all tests) should work. - -Building the Documentation -========================== - -Prerequisites -------------- - -The documentation build process uses `Doxygen `_ and -`Sphinx `_ along with a few extensions. - -If you're using Conda, the required software can be installed in a single line: - -.. code-block:: shell - - conda install -c conda-forge --file ci/conda_env_sphinx.yml - -Otherwise, you'll first need to install `Doxygen `_ -yourself (for example from your distribution's official repositories, if -using Linux). Then you can install the Python-based requirements with the -following command: - -.. code-block:: shell - - pip install -r docs/requirements.txt - -Building --------- - -These two steps are mandatory and must be executed in order. - -#. Process the C++ API using Doxygen - - .. code-block:: shell - - pushd cpp/apidoc - doxygen - popd - -#. Build the complete documentation using Sphinx - - .. code-block:: shell - - pushd docs - make html - popd - -After these steps are completed, the documentation is rendered in HTML -format in ``docs/_build/html``. In particular, you can point your browser -at ``docs/_build/html/index.html`` to read the docs and review any changes -you made. diff --git a/docs/source/python/pandas.rst b/docs/source/python/pandas.rst index 16b4ff6926809..dbc5e77e83bff 100644 --- a/docs/source/python/pandas.rst +++ b/docs/source/python/pandas.rst @@ -29,6 +29,13 @@ to them. (such as a different type system, and support for null values) that this is a separate topic from :ref:`numpy_interop`. +To follow examples in this document, make sure to run: + +.. ipython:: python + + import pandas as pd + import pyarrow as pa + DataFrames ---------- @@ -120,5 +127,64 @@ Arrow -> pandas Conversion +-------------------------------------+--------------------------------------------------------+ | ``TIMESTAMP(unit=*)`` | ``pd.Timestamp`` (``np.datetime64[ns]``) | +-------------------------------------+--------------------------------------------------------+ -| ``DATE`` | ``pd.Timestamp`` (``np.datetime64[ns]``) | +| ``DATE`` | ``object``(with ``datetime.date`` objects) | +-------------------------------------+--------------------------------------------------------+ + +Categorical types +~~~~~~~~~~~~~~~~~ + +TODO + +Datetime (Timestamp) types +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +TODO + +Date types +~~~~~~~~~~ + +While dates can be handled using the ``datetime64[ns]`` type in +pandas, some systems work with object arrays of Python's built-in +``datetime.date`` object: + +.. ipython:: python + + from datetime import date + s = pd.Series([date(2018, 12, 31), None, date(2000, 1, 1)]) + s + +When converting to an Arrow array, the ``date32`` type will be used by +default: + +.. ipython:: python + + arr = pa.array(s) + arr.type + arr[0] + +To use the 64-bit ``date64``, specify this explicitly: + +.. ipython:: python + + arr = pa.array(s, type='date64') + arr.type + +When converting back with ``to_pandas``, object arrays of +``datetime.date`` objects are returned: + +.. ipython:: python + + arr.to_pandas() + +If you want to use NumPy's ``datetime64`` dtype instead, pass +``date_as_object=False``: + +.. ipython:: python + + s2 = pd.Series(arr.to_pandas(date_as_object=False)) + s2.dtype + +Time types +~~~~~~~~~~ + +TODO diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index ef95efe71b33c..54d0e92cd5561 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -343,10 +343,8 @@ cdef class _PandasConvertible: def to_pandas(self, categories=None, bint strings_to_categorical=False, bint zero_copy_only=False, bint integer_object_nulls=False, - bint date_as_object=False, - bint use_threads=True, - bint deduplicate_objects=True, - bint ignore_metadata=False): + bint date_as_object=True, bint use_threads=True, + bint deduplicate_objects=True, bint ignore_metadata=False): """ Convert to a pandas-compatible NumPy array or DataFrame, as appropriate diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 8d8b65b2240b8..3e89f5eb4ff70 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -912,7 +912,7 @@ class MyDate(date): result = table.to_pandas() expected_df = pd.DataFrame( - {"date": np.array(["2000-01-01"], dtype="datetime64[ns]")} + {"date": np.array([date(2000, 1, 1)], dtype=object)} ) tm.assert_frame_equal(expected_df, result) @@ -962,7 +962,7 @@ def test_pandas_datetime_to_date64_failures(self, mask): with pytest.raises(pa.ArrowInvalid, match=expected_msg): pa.Array.from_pandas(s, type=pa.date64(), mask=mask) - def test_array_date_as_object(self): + def test_array_types_date_as_object(self): data = [date(2000, 1, 1), None, date(1970, 1, 1), @@ -972,58 +972,23 @@ def test_array_date_as_object(self): '1970-01-01', '2040-02-26'], dtype='datetime64') - arr = pa.array(data) - assert arr.equals(pa.array(expected)) - - result = arr.to_pandas() - assert result.dtype == expected.dtype - npt.assert_array_equal(arr.to_pandas(), expected) - - result = arr.to_pandas(date_as_object=True) - expected = expected.astype(object) - assert result.dtype == expected.dtype - npt.assert_array_equal(result, expected) - - def test_chunked_array_convert_date_as_object(self): - data = [date(2000, 1, 1), - None, - date(1970, 1, 1), - date(2040, 2, 26)] - expected = np.array(['2000-01-01', - None, - '1970-01-01', - '2040-02-26'], dtype='datetime64') - carr = pa.chunked_array([data]) - - result = carr.to_pandas() - assert result.dtype == expected.dtype - npt.assert_array_equal(carr.to_pandas(), expected) - - result = carr.to_pandas(date_as_object=True) - expected = expected.astype(object) - assert result.dtype == expected.dtype - npt.assert_array_equal(result, expected) + objects = [ + # The second value is the expected value for date_as_object=False + (pa.array(data), expected), + (pa.chunked_array([data]), expected), + (pa.column('date', [data]), expected.astype('M8[ns]'))] - def test_column_convert_date_as_object(self): - data = [date(2000, 1, 1), - None, - date(1970, 1, 1), - date(2040, 2, 26)] - expected = np.array(['2000-01-01', - None, - '1970-01-01', - '2040-02-26'], dtype='datetime64') - - arr = pa.array(data) - column = pa.column('date', arr) + assert objects[0][0].equals(pa.array(expected)) - result = column.to_pandas() - npt.assert_array_equal(column.to_pandas(), expected) + for obj, expected_datetime64 in objects: + result = obj.to_pandas() + expected_obj = expected.astype(object) + assert result.dtype == expected_obj.dtype + npt.assert_array_equal(result, expected_obj) - result = column.to_pandas(date_as_object=True) - expected = expected.astype(object) - assert result.dtype == expected.dtype - npt.assert_array_equal(result, expected) + result = obj.to_pandas(date_as_object=False) + assert result.dtype == expected_datetime64.dtype + npt.assert_array_equal(result, expected_datetime64) def test_table_convert_date_as_object(self): df = pd.DataFrame({ @@ -1034,8 +999,8 @@ def test_table_convert_date_as_object(self): table = pa.Table.from_pandas(df, preserve_index=False) - df_datetime = table.to_pandas() - df_object = table.to_pandas(date_as_object=True) + df_datetime = table.to_pandas(date_as_object=False) + df_object = table.to_pandas() tm.assert_frame_equal(df.astype('datetime64[ns]'), df_datetime, check_dtype=True) @@ -1055,9 +1020,7 @@ def test_date_infer(self): assert table.schema.equals(expected_schema) result = table.to_pandas() - expected = df.copy() - expected['date'] = pd.to_datetime(df['date']) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, df) def test_date_mask(self): arr = np.array([date(2017, 4, 3), date(2017, 4, 4)], @@ -1094,18 +1057,27 @@ def test_date_objects_typed(self): # Test converting back to pandas colnames = ['date32', 'date64'] table = pa.Table.from_arrays([a32, a64], colnames) - table_pandas = table.to_pandas() ex_values = (np.array(['2017-04-03', '2017-04-04', '2017-04-04', '2017-04-05'], - dtype='datetime64[D]') - .astype('datetime64[ns]')) + dtype='datetime64[D]')) ex_values[1] = pd.NaT.value - expected_pandas = pd.DataFrame({'date32': ex_values, - 'date64': ex_values}, + + ex_datetime64ns = ex_values.astype('datetime64[ns]') + expected_pandas = pd.DataFrame({'date32': ex_datetime64ns, + 'date64': ex_datetime64ns}, columns=colnames) + table_pandas = table.to_pandas(date_as_object=False) tm.assert_frame_equal(table_pandas, expected_pandas) + table_pandas_objects = table.to_pandas() + ex_objects = ex_values.astype('object') + expected_pandas_objects = pd.DataFrame({'date32': ex_objects, + 'date64': ex_objects}, + columns=colnames) + tm.assert_frame_equal(table_pandas_objects, + expected_pandas_objects) + def test_dates_from_integers(self): t1 = pa.date32() t2 = pa.date64() From 12912741c2cbb33fad2965ee3abc4d3b47a63515 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 3 Jan 2019 08:32:40 +0100 Subject: [PATCH 006/203] ARROW-4137: [Rust] Move parquet code into a separate crate This moves the parquet related code into a separate sub-crate that depends on arrow. Author: Chao Sun Author: Kouhei Sutou Closes #3291 from sunchao/ARROW-4137 and squashes the following commits: b2bcc1cd Add support for version update on release process bbeaaba2 Fix rustfmt 0545fd95 ARROW-4137: Move parquet code into a separate crate --- dev/release/00-prepare.sh | 6 +-- rust/Cargo.toml | 16 ++----- rust/parquet/Cargo.toml | 45 +++++++++++++++++++ rust/{ => parquet}/build.rs | 0 rust/{src/parquet => parquet/src}/basic.rs | 2 +- .../parquet => parquet/src}/column/mod.rs | 2 +- .../parquet => parquet/src}/column/page.rs | 8 ++-- .../parquet => parquet/src}/column/reader.rs | 22 ++++----- .../parquet => parquet/src}/column/writer.rs | 26 +++++------ .../parquet => parquet/src}/compression.rs | 8 ++-- .../{src/parquet => parquet/src}/data_type.rs | 4 +- .../src}/encodings/decoding.rs | 16 +++---- .../src}/encodings/encoding.rs | 20 ++++----- .../src}/encodings/levels.rs | 10 ++--- .../parquet => parquet/src}/encodings/mod.rs | 0 .../parquet => parquet/src}/encodings/rle.rs | 6 +-- rust/{src/parquet => parquet/src}/errors.rs | 0 .../parquet => parquet/src}/file/metadata.rs | 8 ++-- rust/{src/parquet => parquet/src}/file/mod.rs | 4 +- .../src}/file/properties.rs | 6 +-- .../parquet => parquet/src}/file/reader.rs | 20 ++++----- .../src}/file/statistics.rs | 6 +-- .../parquet => parquet/src}/file/writer.rs | 24 +++++----- .../parquet/mod.rs => parquet/src/lib.rs} | 7 +++ .../parquet => parquet/src}/record/api.rs | 10 ++--- .../parquet => parquet/src}/record/mod.rs | 0 .../parquet => parquet/src}/record/reader.rs | 20 ++++----- .../parquet => parquet/src}/record/triplet.rs | 18 ++++---- .../parquet => parquet/src}/schema/mod.rs | 2 +- .../parquet => parquet/src}/schema/parser.rs | 8 ++-- .../parquet => parquet/src}/schema/printer.rs | 14 +++--- .../parquet => parquet/src}/schema/types.rs | 8 ++-- .../src}/util/bit_packing.rs | 0 .../parquet => parquet/src}/util/bit_util.rs | 4 +- .../parquet => parquet/src}/util/hash_util.rs | 2 +- rust/{src/parquet => parquet/src}/util/io.rs | 4 +- .../parquet => parquet/src}/util/memory.rs | 0 rust/{src/parquet => parquet/src}/util/mod.rs | 0 .../src}/util/test_common.rs | 4 +- rust/src/lib.rs | 1 - 40 files changed, 199 insertions(+), 162 deletions(-) create mode 100644 rust/parquet/Cargo.toml rename rust/{ => parquet}/build.rs (100%) rename rust/{src/parquet => parquet/src}/basic.rs (99%) rename rust/{src/parquet => parquet/src}/column/mod.rs (99%) rename rust/{src/parquet => parquet/src}/column/page.rs (97%) rename rust/{src/parquet => parquet/src}/column/reader.rs (99%) rename rust/{src/parquet => parquet/src}/column/writer.rs (98%) rename rust/{src/parquet => parquet/src}/compression.rs (97%) rename rust/{src/parquet => parquet/src}/data_type.rs (99%) rename rust/{src/parquet => parquet/src}/encodings/decoding.rs (99%) rename rust/{src/parquet => parquet/src}/encodings/encoding.rs (98%) rename rust/{src/parquet => parquet/src}/encodings/levels.rs (98%) rename rust/{src/parquet => parquet/src}/encodings/mod.rs (100%) rename rust/{src/parquet => parquet/src}/encodings/rle.rs (99%) rename rust/{src/parquet => parquet/src}/errors.rs (100%) rename rust/{src/parquet => parquet/src}/file/metadata.rs (99%) rename rust/{src/parquet => parquet/src}/file/mod.rs (96%) rename rust/{src/parquet => parquet/src}/file/properties.rs (99%) rename rust/{src/parquet => parquet/src}/file/reader.rs (98%) rename rust/{src/parquet => parquet/src}/file/statistics.rs (99%) rename rust/{src/parquet => parquet/src}/file/writer.rs (98%) rename rust/{src/parquet/mod.rs => parquet/src/lib.rs} (87%) rename rust/{src/parquet => parquet/src}/record/api.rs (99%) rename rust/{src/parquet => parquet/src}/record/mod.rs (100%) rename rust/{src/parquet => parquet/src}/record/reader.rs (98%) rename rust/{src/parquet => parquet/src}/record/triplet.rs (97%) rename rust/{src/parquet => parquet/src}/schema/mod.rs (98%) rename rust/{src/parquet => parquet/src}/schema/parser.rs (99%) rename rust/{src/parquet => parquet/src}/schema/printer.rs (97%) rename rust/{src/parquet => parquet/src}/schema/types.rs (99%) rename rust/{src/parquet => parquet/src}/util/bit_packing.rs (100%) rename rust/{src/parquet => parquet/src}/util/bit_util.rs (99%) rename rust/{src/parquet => parquet/src}/util/hash_util.rs (99%) rename rust/{src/parquet => parquet/src}/util/io.rs (98%) rename rust/{src/parquet => parquet/src}/util/memory.rs (100%) rename rust/{src/parquet => parquet/src}/util/mod.rs (100%) rename rust/{src/parquet => parquet/src}/util/test_common.rs (98%) diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh index 47ef760b86b9e..20d9ab8fce651 100755 --- a/dev/release/00-prepare.sh +++ b/dev/release/00-prepare.sh @@ -100,9 +100,9 @@ update_versions() { cd "${SOURCE_DIR}/../../rust" sed -i.bak -r -e \ "s/^version = \".+\"/version = \"${version}\"/g" \ - Cargo.toml - rm -f Cargo.toml.bak - git add Cargo.toml + Cargo.toml parquet/Cargo.toml + rm -f Cargo.toml.bak parquet/Cargo.toml.bak + git add Cargo.toml parquet/Cargo.toml cd - } diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 49e8a9d9c8470..1bf64d73ade5e 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "arrow" -version = "0.11.0" +version = "0.12.0-SNAPSHOT" description = "Rust implementation of Apache Arrow" homepage = "https://github.com/apache/arrow" repository = "https://github.com/apache/arrow" @@ -42,17 +42,6 @@ serde_derive = "1.0.80" serde_json = "1.0.13" rand = "0.5" csv = "1.0.0" -parquet-format = "2.5.0" -quick-error = "1.2.2" -byteorder = "1" -thrift = "0.0.4" -snap = "0.2" -brotli = "2.5" -flate2 = "1.0.2" -lz4 = "1.23" -zstd = "0.4" -chrono = "0.4" -num-bigint = "0.2" num = "0.2" [dev-dependencies] @@ -66,3 +55,6 @@ harness = false [[bench]] name = "builder" harness = false + +[workspace] +members = ["parquet"] \ No newline at end of file diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml new file mode 100644 index 0000000000000..aa7eac224c0cf --- /dev/null +++ b/rust/parquet/Cargo.toml @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "parquet" +version = "0.12.0-SNAPSHOT" +license = "Apache-2.0" +description = "Apache Parquet implementation in Rust" +authors = ["Apache Arrow "] +keywords = [ "arrow", "parquet", "hadoop" ] +readme = "README.md" +build = "build.rs" +edition = "2018" + +[dependencies] +parquet-format = "2.5.0" +quick-error = "1.2.2" +byteorder = "1" +thrift = "0.0.4" +snap = "0.2" +brotli = "2.5" +flate2 = "1.0.2" +lz4 = "1.23" +zstd = "0.4" +chrono = "0.4" +num-bigint = "0.2" +arrow = { path = ".." } + +[dev-dependencies] +lazy_static = "1" +rand = "0.5" \ No newline at end of file diff --git a/rust/build.rs b/rust/parquet/build.rs similarity index 100% rename from rust/build.rs rename to rust/parquet/build.rs diff --git a/rust/src/parquet/basic.rs b/rust/parquet/src/basic.rs similarity index 99% rename from rust/src/parquet/basic.rs rename to rust/parquet/src/basic.rs index 22e16347dc00f..8b1be49659bc0 100644 --- a/rust/src/parquet/basic.rs +++ b/rust/parquet/src/basic.rs @@ -22,7 +22,7 @@ use std::{convert, fmt, result, str}; use parquet_format as parquet; -use crate::parquet::errors::ParquetError; +use crate::errors::ParquetError; // ---------------------------------------------------------------------- // Types from the Thrift definition diff --git a/rust/src/parquet/column/mod.rs b/rust/parquet/src/column/mod.rs similarity index 99% rename from rust/src/parquet/column/mod.rs rename to rust/parquet/src/column/mod.rs index 4ced32e28cbb9..9a72199d940f2 100644 --- a/rust/src/parquet/column/mod.rs +++ b/rust/parquet/src/column/mod.rs @@ -38,7 +38,7 @@ //! ```rust,no_run //! use std::{fs, path::Path, rc::Rc}; //! -//! use arrow::parquet::{ +//! use parquet::{ //! column::{reader::ColumnReader, writer::ColumnWriter}, //! file::{ //! properties::WriterProperties, diff --git a/rust/src/parquet/column/page.rs b/rust/parquet/src/column/page.rs similarity index 97% rename from rust/src/parquet/column/page.rs rename to rust/parquet/src/column/page.rs index 115037cba0bd5..9e0c76fb83cbd 100644 --- a/rust/src/parquet/column/page.rs +++ b/rust/parquet/src/column/page.rs @@ -17,10 +17,10 @@ //! Contains Parquet Page definitions and page reader interface. -use crate::parquet::basic::{Encoding, PageType}; -use crate::parquet::errors::Result; -use crate::parquet::file::{metadata::ColumnChunkMetaData, statistics::Statistics}; -use crate::parquet::util::memory::ByteBufferPtr; +use crate::basic::{Encoding, PageType}; +use crate::errors::Result; +use crate::file::{metadata::ColumnChunkMetaData, statistics::Statistics}; +use crate::util::memory::ByteBufferPtr; /// Parquet Page definition. /// diff --git a/rust/src/parquet/column/reader.rs b/rust/parquet/src/column/reader.rs similarity index 99% rename from rust/src/parquet/column/reader.rs rename to rust/parquet/src/column/reader.rs index f3dde31ab9a14..d327c50879ea8 100644 --- a/rust/src/parquet/column/reader.rs +++ b/rust/parquet/src/column/reader.rs @@ -24,15 +24,15 @@ use std::{ }; use super::page::{Page, PageReader}; -use crate::parquet::basic::*; -use crate::parquet::data_type::*; -use crate::parquet::encodings::{ +use crate::basic::*; +use crate::data_type::*; +use crate::encodings::{ decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder}, levels::LevelDecoder, }; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::schema::types::ColumnDescPtr; -use crate::parquet::util::memory::ByteBufferPtr; +use crate::errors::{ParquetError, Result}; +use crate::schema::types::ColumnDescPtr; +use crate::util::memory::ByteBufferPtr; /// Column reader for a Parquet type. pub enum ColumnReader { @@ -490,14 +490,14 @@ mod tests { use rand::distributions::range::SampleRange; use std::{collections::VecDeque, rc::Rc, vec::IntoIter}; - use crate::parquet::basic::Type as PhysicalType; - use crate::parquet::column::page::Page; - use crate::parquet::encodings::{ + use crate::basic::Type as PhysicalType; + use crate::column::page::Page; + use crate::encodings::{ encoding::{get_encoder, DictEncoder, Encoder}, levels::{max_buffer_size, LevelEncoder}, }; - use crate::parquet::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType}; - use crate::parquet::util::{ + use crate::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType}; + use crate::util::{ memory::{ByteBufferPtr, MemTracker, MemTrackerPtr}, test_common::random_numbers_range, }; diff --git a/rust/src/parquet/column/writer.rs b/rust/parquet/src/column/writer.rs similarity index 98% rename from rust/src/parquet/column/writer.rs rename to rust/parquet/src/column/writer.rs index 4798d9ad17927..26bd7c5aac778 100644 --- a/rust/src/parquet/column/writer.rs +++ b/rust/parquet/src/column/writer.rs @@ -19,21 +19,21 @@ use std::{cmp, collections::VecDeque, mem, rc::Rc}; -use crate::parquet::basic::{Compression, Encoding, PageType, Type}; -use crate::parquet::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter}; -use crate::parquet::compression::{create_codec, Codec}; -use crate::parquet::data_type::*; -use crate::parquet::encodings::{ +use crate::basic::{Compression, Encoding, PageType, Type}; +use crate::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter}; +use crate::compression::{create_codec, Codec}; +use crate::data_type::*; +use crate::encodings::{ encoding::{get_encoder, DictEncoder, Encoder}, levels::{max_buffer_size, LevelEncoder}, }; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::file::{ +use crate::errors::{ParquetError, Result}; +use crate::file::{ metadata::ColumnChunkMetaData, properties::{WriterProperties, WriterPropertiesPtr, WriterVersion}, }; -use crate::parquet::schema::types::ColumnDescPtr; -use crate::parquet::util::memory::{ByteBufferPtr, MemTracker}; +use crate::schema::types::ColumnDescPtr; +use crate::util::memory::{ByteBufferPtr, MemTracker}; /// Column writer for a Parquet type. pub enum ColumnWriter { @@ -802,15 +802,15 @@ mod tests { use rand::distributions::range::SampleRange; - use crate::parquet::column::{ + use crate::column::{ page::PageReader, reader::{get_column_reader, get_typed_column_reader, ColumnReaderImpl}, }; - use crate::parquet::file::{ + use crate::file::{ properties::WriterProperties, reader::SerializedPageReader, writer::SerializedPageWriter, }; - use crate::parquet::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType}; - use crate::parquet::util::{ + use crate::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType}; + use crate::util::{ io::{FileSink, FileSource}, test_common::{get_temp_file, random_numbers_range}, }; diff --git a/rust/src/parquet/compression.rs b/rust/parquet/src/compression.rs similarity index 97% rename from rust/src/parquet/compression.rs rename to rust/parquet/src/compression.rs index 3690cca032361..3644ffcc54272 100644 --- a/rust/src/parquet/compression.rs +++ b/rust/parquet/src/compression.rs @@ -23,7 +23,7 @@ //! # Example //! //! ```rust -//! use arrow::parquet::{basic::Compression, compression::create_codec}; +//! use parquet::{basic::Compression, compression::create_codec}; //! //! let mut codec = match create_codec(Compression::SNAPPY) { //! Ok(Some(codec)) => codec, @@ -48,8 +48,8 @@ use lz4; use snap::{decompress_len, max_compress_len, Decoder, Encoder}; use zstd; -use crate::parquet::basic::Compression as CodecType; -use crate::parquet::errors::{ParquetError, Result}; +use crate::basic::Compression as CodecType; +use crate::errors::{ParquetError, Result}; /// Parquet compression codec interface. pub trait Codec { @@ -250,7 +250,7 @@ impl Codec for ZSTDCodec { mod tests { use super::*; - use crate::parquet::util::test_common::*; + use crate::util::test_common::*; fn test_roundtrip(c: CodecType, data: &Vec) { let mut c1 = create_codec(c).unwrap().unwrap(); diff --git a/rust/src/parquet/data_type.rs b/rust/parquet/src/data_type.rs similarity index 99% rename from rust/src/parquet/data_type.rs rename to rust/parquet/src/data_type.rs index 26bdebd71bc8b..bfe0889cf71c4 100644 --- a/rust/src/parquet/data_type.rs +++ b/rust/parquet/src/data_type.rs @@ -22,8 +22,8 @@ use std::mem; use byteorder::{BigEndian, ByteOrder}; -use crate::parquet::basic::Type; -use crate::parquet::util::memory::{ByteBuffer, ByteBufferPtr}; +use crate::basic::Type; +use crate::util::memory::{ByteBuffer, ByteBufferPtr}; /// Rust representation for logical type INT96, value is backed by an array of `u32`. /// The type only takes 12 bytes, without extra padding. diff --git a/rust/src/parquet/encodings/decoding.rs b/rust/parquet/src/encodings/decoding.rs similarity index 99% rename from rust/src/parquet/encodings/decoding.rs rename to rust/parquet/src/encodings/decoding.rs index c6a6fd49ee336..f0e93fe1abea7 100644 --- a/rust/src/parquet/encodings/decoding.rs +++ b/rust/parquet/src/encodings/decoding.rs @@ -23,11 +23,11 @@ use super::rle::RleDecoder; use byteorder::{ByteOrder, LittleEndian}; -use crate::parquet::basic::*; -use crate::parquet::data_type::*; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::schema::types::ColumnDescPtr; -use crate::parquet::util::{ +use crate::basic::*; +use crate::data_type::*; +use crate::errors::{ParquetError, Result}; +use crate::schema::types::ColumnDescPtr; +use crate::util::{ bit_util::BitReader, memory::{ByteBuffer, ByteBufferPtr}, }; @@ -865,10 +865,8 @@ mod tests { use std::{mem, rc::Rc}; - use crate::parquet::schema::types::{ - ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType, - }; - use crate::parquet::util::{bit_util::set_array_bit, memory::MemTracker, test_common::RandGen}; + use crate::schema::types::{ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType}; + use crate::util::{bit_util::set_array_bit, memory::MemTracker, test_common::RandGen}; #[test] fn test_get_decoders() { diff --git a/rust/src/parquet/encodings/encoding.rs b/rust/parquet/src/encodings/encoding.rs similarity index 98% rename from rust/src/parquet/encodings/encoding.rs rename to rust/parquet/src/encodings/encoding.rs index cecb03cb540a9..e1d674cc6ca2c 100644 --- a/rust/src/parquet/encodings/encoding.rs +++ b/rust/parquet/src/encodings/encoding.rs @@ -19,12 +19,12 @@ use std::{cmp, io::Write, marker::PhantomData, mem, slice}; -use crate::parquet::basic::*; -use crate::parquet::data_type::*; -use crate::parquet::encodings::rle::RleEncoder; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::schema::types::ColumnDescPtr; -use crate::parquet::util::{ +use crate::basic::*; +use crate::data_type::*; +use crate::encodings::rle::RleEncoder; +use crate::errors::{ParquetError, Result}; +use crate::schema::types::ColumnDescPtr; +use crate::util::{ bit_util::{log2, num_required_bits, BitWriter}, hash_util, memory::{Buffer, ByteBuffer, ByteBufferPtr, MemTrackerPtr}, @@ -988,11 +988,9 @@ mod tests { use std::rc::Rc; - use crate::parquet::decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder}; - use crate::parquet::schema::types::{ - ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType, - }; - use crate::parquet::util::{memory::MemTracker, test_common::RandGen}; + use crate::decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder}; + use crate::schema::types::{ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType}; + use crate::util::{memory::MemTracker, test_common::RandGen}; const TEST_SET_SIZE: usize = 1024; diff --git a/rust/src/parquet/encodings/levels.rs b/rust/parquet/src/encodings/levels.rs similarity index 98% rename from rust/src/parquet/encodings/levels.rs rename to rust/parquet/src/encodings/levels.rs index ec65198ce55f0..29c92ddcdba9b 100644 --- a/rust/src/parquet/encodings/levels.rs +++ b/rust/parquet/src/encodings/levels.rs @@ -19,10 +19,10 @@ use std::{cmp, mem}; use super::rle::{RleDecoder, RleEncoder}; -use crate::parquet::basic::Encoding; -use crate::parquet::data_type::AsBytes; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::util::{ +use crate::basic::Encoding; +use crate::data_type::AsBytes; +use crate::errors::{ParquetError, Result}; +use crate::util::{ bit_util::{ceil, log2, BitReader, BitWriter}, memory::ByteBufferPtr, }; @@ -267,7 +267,7 @@ impl LevelDecoder { mod tests { use super::*; - use crate::parquet::util::test_common::random_numbers_range; + use crate::util::test_common::random_numbers_range; fn test_internal_roundtrip(enc: Encoding, levels: &[i16], max_level: i16, v2: bool) { let size = max_buffer_size(enc, max_level, levels.len()); diff --git a/rust/src/parquet/encodings/mod.rs b/rust/parquet/src/encodings/mod.rs similarity index 100% rename from rust/src/parquet/encodings/mod.rs rename to rust/parquet/src/encodings/mod.rs diff --git a/rust/src/parquet/encodings/rle.rs b/rust/parquet/src/encodings/rle.rs similarity index 99% rename from rust/src/parquet/encodings/rle.rs rename to rust/parquet/src/encodings/rle.rs index 5b56c2a250495..1a8b6e5c7c6b8 100644 --- a/rust/src/parquet/encodings/rle.rs +++ b/rust/parquet/src/encodings/rle.rs @@ -20,8 +20,8 @@ use std::{ mem::{size_of, transmute_copy}, }; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::util::{ +use crate::errors::{ParquetError, Result}; +use crate::util::{ bit_util::{self, BitReader, BitWriter}, memory::ByteBufferPtr, }; @@ -520,7 +520,7 @@ mod tests { thread_rng, Rng, SeedableRng, }; - use crate::parquet::util::memory::ByteBufferPtr; + use crate::util::memory::ByteBufferPtr; const MAX_WIDTH: usize = 32; diff --git a/rust/src/parquet/errors.rs b/rust/parquet/src/errors.rs similarity index 100% rename from rust/src/parquet/errors.rs rename to rust/parquet/src/errors.rs diff --git a/rust/src/parquet/file/metadata.rs b/rust/parquet/src/file/metadata.rs similarity index 99% rename from rust/src/parquet/file/metadata.rs rename to rust/parquet/src/file/metadata.rs index 7f2442506f67f..06507fdcad2a8 100644 --- a/rust/src/parquet/file/metadata.rs +++ b/rust/parquet/src/file/metadata.rs @@ -37,10 +37,10 @@ use std::rc::Rc; use parquet_format::{ColumnChunk, ColumnMetaData, RowGroup}; -use crate::parquet::basic::{ColumnOrder, Compression, Encoding, Type}; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::file::statistics::{self, Statistics}; -use crate::parquet::schema::types::{ +use crate::basic::{ColumnOrder, Compression, Encoding, Type}; +use crate::errors::{ParquetError, Result}; +use crate::file::statistics::{self, Statistics}; +use crate::schema::types::{ ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor, Type as SchemaType, TypePtr, }; diff --git a/rust/src/parquet/file/mod.rs b/rust/parquet/src/file/mod.rs similarity index 96% rename from rust/src/parquet/file/mod.rs rename to rust/parquet/src/file/mod.rs index 38fe8fa9b15b1..407a97d5d6e5e 100644 --- a/rust/src/parquet/file/mod.rs +++ b/rust/parquet/src/file/mod.rs @@ -29,7 +29,7 @@ //! ```rust,no_run //! use std::{fs, path::Path, rc::Rc}; //! -//! use arrow::parquet::{ +//! use parquet::{ //! file::{ //! properties::WriterProperties, //! writer::{FileWriter, SerializedFileWriter}, @@ -62,7 +62,7 @@ //! # Example of reading an existing file //! //! ```rust,no_run -//! use arrow::parquet::file::reader::{FileReader, SerializedFileReader}; +//! use parquet::file::reader::{FileReader, SerializedFileReader}; //! use std::{fs::File, path::Path}; //! //! let path = Path::new("/path/to/sample.parquet"); diff --git a/rust/src/parquet/file/properties.rs b/rust/parquet/src/file/properties.rs similarity index 99% rename from rust/src/parquet/file/properties.rs rename to rust/parquet/src/file/properties.rs index 911ec55733490..47b232e6fab04 100644 --- a/rust/src/parquet/file/properties.rs +++ b/rust/parquet/src/file/properties.rs @@ -20,7 +20,7 @@ //! # Usage //! //! ```rust -//! use arrow::parquet::{ +//! use parquet::{ //! basic::{Compression, Encoding}, //! file::properties::*, //! schema::types::ColumnPath, @@ -50,8 +50,8 @@ use std::{collections::HashMap, rc::Rc}; -use crate::parquet::basic::{Compression, Encoding}; -use crate::parquet::schema::types::ColumnPath; +use crate::basic::{Compression, Encoding}; +use crate::schema::types::ColumnPath; const DEFAULT_PAGE_SIZE: usize = 1024 * 1024; const DEFAULT_WRITE_BATCH_SIZE: usize = 1024; diff --git a/rust/src/parquet/file/reader.rs b/rust/parquet/src/file/reader.rs similarity index 98% rename from rust/src/parquet/file/reader.rs rename to rust/parquet/src/file/reader.rs index c2e5dd176dac5..747fbbc64f82e 100644 --- a/rust/src/parquet/file/reader.rs +++ b/rust/parquet/src/file/reader.rs @@ -32,17 +32,17 @@ use parquet_format::{ }; use thrift::protocol::TCompactInputProtocol; -use crate::parquet::basic::{ColumnOrder, Compression, Encoding, Type}; -use crate::parquet::column::{ +use crate::basic::{ColumnOrder, Compression, Encoding, Type}; +use crate::column::{ page::{Page, PageReader}, reader::{ColumnReader, ColumnReaderImpl}, }; -use crate::parquet::compression::{create_codec, Codec}; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::file::{metadata::*, statistics, FOOTER_SIZE, PARQUET_MAGIC}; -use crate::parquet::record::reader::RowIter; -use crate::parquet::schema::types::{self, SchemaDescriptor, Type as SchemaType}; -use crate::parquet::util::{io::FileSource, memory::ByteBufferPtr}; +use crate::compression::{create_codec, Codec}; +use crate::errors::{ParquetError, Result}; +use crate::file::{metadata::*, statistics, FOOTER_SIZE, PARQUET_MAGIC}; +use crate::record::reader::RowIter; +use crate::schema::types::{self, SchemaDescriptor, Type as SchemaType}; +use crate::util::{io::FileSource, memory::ByteBufferPtr}; // ---------------------------------------------------------------------- // APIs for file & row group readers @@ -545,8 +545,8 @@ mod tests { use parquet_format::TypeDefinedOrder; - use crate::parquet::basic::SortOrder; - use crate::parquet::util::test_common::{get_temp_file, get_test_file, get_test_path}; + use crate::basic::SortOrder; + use crate::util::test_common::{get_temp_file, get_test_file, get_test_path}; #[test] fn test_file_reader_metadata_size_smaller_than_footer() { diff --git a/rust/src/parquet/file/statistics.rs b/rust/parquet/src/file/statistics.rs similarity index 99% rename from rust/src/parquet/file/statistics.rs rename to rust/parquet/src/file/statistics.rs index ff4d731857f16..03831bbc72bf7 100644 --- a/rust/src/parquet/file/statistics.rs +++ b/rust/parquet/src/file/statistics.rs @@ -21,7 +21,7 @@ //! actual min and max values from statistics, see below: //! //! ```rust -//! use arrow::parquet::file::statistics::Statistics; +//! use parquet::file::statistics::Statistics; //! //! let stats = Statistics::int32(Some(1), Some(10), None, 3, true); //! assert_eq!(stats.null_count(), 3); @@ -42,8 +42,8 @@ use std::{cmp, fmt}; use byteorder::{ByteOrder, LittleEndian}; use parquet_format::Statistics as TStatistics; -use crate::parquet::basic::Type; -use crate::parquet::data_type::*; +use crate::basic::Type; +use crate::data_type::*; // Macro to generate methods create Statistics. macro_rules! statistics_new_func { diff --git a/rust/src/parquet/file/writer.rs b/rust/parquet/src/file/writer.rs similarity index 98% rename from rust/src/parquet/file/writer.rs rename to rust/parquet/src/file/writer.rs index 1e0c11641f9a4..e000842f3895f 100644 --- a/rust/src/parquet/file/writer.rs +++ b/rust/parquet/src/file/writer.rs @@ -28,18 +28,18 @@ use byteorder::{ByteOrder, LittleEndian}; use parquet_format as parquet; use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol}; -use crate::parquet::basic::PageType; -use crate::parquet::column::{ +use crate::basic::PageType; +use crate::column::{ page::{CompressedPage, Page, PageWriteSpec, PageWriter}, writer::{get_column_writer, ColumnWriter}, }; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::file::{ +use crate::errors::{ParquetError, Result}; +use crate::file::{ metadata::*, properties::WriterPropertiesPtr, statistics::to_thrift as statistics_to_thrift, FOOTER_SIZE, PARQUET_MAGIC, }; -use crate::parquet::schema::types::{self, SchemaDescPtr, SchemaDescriptor, TypePtr}; -use crate::parquet::util::io::{FileSink, Position}; +use crate::schema::types::{self, SchemaDescPtr, SchemaDescriptor, TypePtr}; +use crate::util::io::{FileSink, Position}; // ---------------------------------------------------------------------- // APIs for file & row group writers @@ -512,16 +512,16 @@ mod tests { use std::{error::Error, io::Cursor}; - use crate::parquet::basic::{Compression, Encoding, Repetition, Type}; - use crate::parquet::column::page::PageReader; - use crate::parquet::compression::{create_codec, Codec}; - use crate::parquet::file::{ + use crate::basic::{Compression, Encoding, Repetition, Type}; + use crate::column::page::PageReader; + use crate::compression::{create_codec, Codec}; + use crate::file::{ properties::WriterProperties, reader::{FileReader, SerializedFileReader, SerializedPageReader}, statistics::{from_thrift, to_thrift, Statistics}, }; - use crate::parquet::record::RowAccessor; - use crate::parquet::util::{memory::ByteBufferPtr, test_common::get_temp_file}; + use crate::record::RowAccessor; + use crate::util::{memory::ByteBufferPtr, test_common::get_temp_file}; #[test] fn test_file_writer_error_after_close() { diff --git a/rust/src/parquet/mod.rs b/rust/parquet/src/lib.rs similarity index 87% rename from rust/src/parquet/mod.rs rename to rust/parquet/src/lib.rs index 58cc7b13df6d6..75c56f5054f19 100644 --- a/rust/src/parquet/mod.rs +++ b/rust/parquet/src/lib.rs @@ -15,6 +15,13 @@ // specific language governing permissions and limitations // under the License. +#![feature(type_ascription)] +#![feature(rustc_private)] +#![feature(specialization)] +#![feature(try_from)] +#![allow(dead_code)] +#![allow(non_camel_case_types)] + #[macro_use] pub mod errors; pub mod basic; diff --git a/rust/src/parquet/record/api.rs b/rust/parquet/src/record/api.rs similarity index 99% rename from rust/src/parquet/record/api.rs rename to rust/parquet/src/record/api.rs index d6e3ec19b76f6..d0be43ad730ed 100644 --- a/rust/src/parquet/record/api.rs +++ b/rust/parquet/src/record/api.rs @@ -22,10 +22,10 @@ use std::fmt; use chrono::{Local, TimeZone}; use num_bigint::{BigInt, Sign}; -use crate::parquet::basic::{LogicalType, Type as PhysicalType}; -use crate::parquet::data_type::{ByteArray, Decimal, Int96}; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::schema::types::ColumnDescPtr; +use crate::basic::{LogicalType, Type as PhysicalType}; +use crate::data_type::{ByteArray, Decimal, Int96}; +use crate::errors::{ParquetError, Result}; +use crate::schema::types::ColumnDescPtr; /// Macro as a shortcut to generate 'not yet implemented' panic error. macro_rules! nyi { @@ -689,7 +689,7 @@ mod tests { use chrono; use std::rc::Rc; - use crate::parquet::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder}; + use crate::schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder}; /// Creates test column descriptor based on provided type parameters. macro_rules! make_column_descr { diff --git a/rust/src/parquet/record/mod.rs b/rust/parquet/src/record/mod.rs similarity index 100% rename from rust/src/parquet/record/mod.rs rename to rust/parquet/src/record/mod.rs diff --git a/rust/src/parquet/record/reader.rs b/rust/parquet/src/record/reader.rs similarity index 98% rename from rust/src/parquet/record/reader.rs rename to rust/parquet/src/record/reader.rs index d9f3d6fea1978..e1d3c964eca3a 100644 --- a/rust/src/parquet/record/reader.rs +++ b/rust/parquet/src/record/reader.rs @@ -20,14 +20,14 @@ use std::{collections::HashMap, fmt, rc::Rc}; -use crate::parquet::basic::{LogicalType, Repetition}; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::file::reader::{FileReader, RowGroupReader}; -use crate::parquet::record::{ +use crate::basic::{LogicalType, Repetition}; +use crate::errors::{ParquetError, Result}; +use crate::file::reader::{FileReader, RowGroupReader}; +use crate::record::{ api::{make_list, make_map, make_row, Field, Row}, triplet::TripletIter, }; -use crate::parquet::schema::types::{ColumnPath, SchemaDescPtr, SchemaDescriptor, Type, TypePtr}; +use crate::schema::types::{ColumnPath, SchemaDescPtr, SchemaDescriptor, Type, TypePtr}; /// Default batch size for a reader const DEFAULT_BATCH_SIZE: usize = 1024; @@ -709,11 +709,11 @@ impl Iterator for ReaderIter { mod tests { use super::*; - use crate::parquet::errors::{ParquetError, Result}; - use crate::parquet::file::reader::{FileReader, SerializedFileReader}; - use crate::parquet::record::api::{Field, Row}; - use crate::parquet::schema::parser::parse_message_type; - use crate::parquet::util::test_common::get_test_file; + use crate::errors::{ParquetError, Result}; + use crate::file::reader::{FileReader, SerializedFileReader}; + use crate::record::api::{Field, Row}; + use crate::schema::parser::parse_message_type; + use crate::util::test_common::get_test_file; // Convenient macros to assemble row, list, map, and group. diff --git a/rust/src/parquet/record/triplet.rs b/rust/parquet/src/record/triplet.rs similarity index 97% rename from rust/src/parquet/record/triplet.rs rename to rust/parquet/src/record/triplet.rs index fadcbbce9ba5b..6ec7799ccb03c 100644 --- a/rust/src/parquet/record/triplet.rs +++ b/rust/parquet/src/record/triplet.rs @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. -use crate::parquet::basic::Type as PhysicalType; -use crate::parquet::column::reader::{get_typed_column_reader, ColumnReader, ColumnReaderImpl}; -use crate::parquet::data_type::*; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::record::api::Field; -use crate::parquet::schema::types::ColumnDescPtr; +use crate::basic::Type as PhysicalType; +use crate::column::reader::{get_typed_column_reader, ColumnReader, ColumnReaderImpl}; +use crate::data_type::*; +use crate::errors::{ParquetError, Result}; +use crate::record::api::Field; +use crate::schema::types::ColumnDescPtr; /// Macro to generate simple functions that cover all types of triplet iterator. /// $func is a function of a typed triplet iterator and $token is a either {`ref`} or @@ -353,9 +353,9 @@ impl TypedTripletIter { mod tests { use super::*; - use crate::parquet::file::reader::{FileReader, SerializedFileReader}; - use crate::parquet::schema::types::ColumnPath; - use crate::parquet::util::test_common::get_test_file; + use crate::file::reader::{FileReader, SerializedFileReader}; + use crate::schema::types::ColumnPath; + use crate::util::test_common::get_test_file; #[test] #[should_panic(expected = "Expected positive batch size, found: 0")] diff --git a/rust/src/parquet/schema/mod.rs b/rust/parquet/src/schema/mod.rs similarity index 98% rename from rust/src/parquet/schema/mod.rs rename to rust/parquet/src/schema/mod.rs index 5319504964627..351ce97337178 100644 --- a/rust/src/parquet/schema/mod.rs +++ b/rust/parquet/src/schema/mod.rs @@ -20,7 +20,7 @@ //! # Example //! //! ```rust -//! use arrow::parquet::{ +//! use parquet::{ //! basic::{LogicalType, Repetition, Type as PhysicalType}, //! schema::{parser, printer, types::Type}, //! }; diff --git a/rust/src/parquet/schema/parser.rs b/rust/parquet/src/schema/parser.rs similarity index 99% rename from rust/src/parquet/schema/parser.rs rename to rust/parquet/src/schema/parser.rs index 2890c84a755ba..955c6c9830223 100644 --- a/rust/src/parquet/schema/parser.rs +++ b/rust/parquet/src/schema/parser.rs @@ -22,7 +22,7 @@ //! # Example //! //! ```rust -//! use arrow::parquet::schema::parser::parse_message_type; +//! use parquet::schema::parser::parse_message_type; //! //! let message_type = " //! message spark_schema { @@ -44,9 +44,9 @@ use std::rc::Rc; -use crate::parquet::basic::{LogicalType, Repetition, Type as PhysicalType}; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::schema::types::{Type, TypePtr}; +use crate::basic::{LogicalType, Repetition, Type as PhysicalType}; +use crate::errors::{ParquetError, Result}; +use crate::schema::types::{Type, TypePtr}; /// Parses message type as string into a Parquet [`Type`](`::schema::types::Type`) which, /// for example, could be used to extract individual columns. Returns Parquet general diff --git a/rust/src/parquet/schema/printer.rs b/rust/parquet/src/schema/printer.rs similarity index 97% rename from rust/src/parquet/schema/printer.rs rename to rust/parquet/src/schema/printer.rs index d61f116eb9e70..87c3683d9237d 100644 --- a/rust/src/parquet/schema/printer.rs +++ b/rust/parquet/src/schema/printer.rs @@ -21,7 +21,7 @@ //! # Example //! //! ```rust -//! use arrow::parquet::{ +//! use parquet::{ //! file::reader::{FileReader, SerializedFileReader}, //! schema::printer::{print_file_metadata, print_parquet_metadata, print_schema}, //! }; @@ -45,11 +45,9 @@ use std::{fmt, io}; -use crate::parquet::basic::{LogicalType, Type as PhysicalType}; -use crate::parquet::file::metadata::{ - ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData, -}; -use crate::parquet::schema::types::Type; +use crate::basic::{LogicalType, Type as PhysicalType}; +use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData}; +use crate::schema::types::Type; /// Prints Parquet metadata [`ParquetMetaData`](`::file::metadata::ParquetMetaData`) /// information. @@ -260,8 +258,8 @@ mod tests { use std::rc::Rc; - use crate::parquet::basic::{Repetition, Type as PhysicalType}; - use crate::parquet::schema::{parser::parse_message_type, types::Type}; + use crate::basic::{Repetition, Type as PhysicalType}; + use crate::schema::{parser::parse_message_type, types::Type}; fn assert_print_parse_message(message: Type) { let mut s = String::new(); diff --git a/rust/src/parquet/schema/types.rs b/rust/parquet/src/schema/types.rs similarity index 99% rename from rust/src/parquet/schema/types.rs rename to rust/parquet/src/schema/types.rs index 90c767c093055..30ee9f60e1a3e 100644 --- a/rust/src/parquet/schema/types.rs +++ b/rust/parquet/src/schema/types.rs @@ -21,8 +21,8 @@ use std::{collections::HashMap, convert::From, fmt, rc::Rc}; use parquet_format::SchemaElement; -use crate::parquet::basic::{LogicalType, Repetition, Type as PhysicalType}; -use crate::parquet::errors::{ParquetError, Result}; +use crate::basic::{LogicalType, Repetition, Type as PhysicalType}; +use crate::errors::{ParquetError, Result}; // ---------------------------------------------------------------------- // Parquet Type definitions @@ -512,7 +512,7 @@ impl ColumnPath { /// Returns string representation of this column path. /// ```rust - /// use arrow::parquet::schema::types::ColumnPath; + /// use parquet::schema::types::ColumnPath; /// /// let path = ColumnPath::new(vec!["a".to_string(), "b".to_string(), "c".to_string()]); /// assert_eq!(&path.string(), "a.b.c"); @@ -1014,7 +1014,7 @@ mod tests { use std::error::Error; - use crate::parquet::schema::parser::parse_message_type; + use crate::schema::parser::parse_message_type; #[test] fn test_primitive_type() { diff --git a/rust/src/parquet/util/bit_packing.rs b/rust/parquet/src/util/bit_packing.rs similarity index 100% rename from rust/src/parquet/util/bit_packing.rs rename to rust/parquet/src/util/bit_packing.rs diff --git a/rust/src/parquet/util/bit_util.rs b/rust/parquet/src/util/bit_util.rs similarity index 99% rename from rust/src/parquet/util/bit_util.rs rename to rust/parquet/src/util/bit_util.rs index 9dbb9a32333d2..ae680ecca4735 100644 --- a/rust/src/parquet/util/bit_util.rs +++ b/rust/parquet/src/util/bit_util.rs @@ -20,8 +20,8 @@ use std::{ mem::{size_of, transmute_copy}, }; -use crate::parquet::errors::{ParquetError, Result}; -use crate::parquet::util::{bit_packing::unpack32, memory::ByteBufferPtr}; +use crate::errors::{ParquetError, Result}; +use crate::util::{bit_packing::unpack32, memory::ByteBufferPtr}; /// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in /// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't diff --git a/rust/src/parquet/util/hash_util.rs b/rust/parquet/src/util/hash_util.rs similarity index 99% rename from rust/src/parquet/util/hash_util.rs rename to rust/parquet/src/util/hash_util.rs index c7bffef8bbf34..b4685fbd004da 100644 --- a/rust/src/parquet/util/hash_util.rs +++ b/rust/parquet/src/util/hash_util.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::parquet::data_type::AsBytes; +use crate::data_type::AsBytes; /// Computes hash value for `data`, with a seed value `seed`. /// The data type `T` must implement the `AsBytes` trait. diff --git a/rust/src/parquet/util/io.rs b/rust/parquet/src/util/io.rs similarity index 98% rename from rust/src/parquet/util/io.rs rename to rust/parquet/src/util/io.rs index 8724e67c2dbe7..d667c8e817a91 100644 --- a/rust/src/parquet/util/io.rs +++ b/rust/parquet/src/util/io.rs @@ -17,7 +17,7 @@ use std::{cmp, fs::File, io::*, sync::Mutex}; -use crate::parquet::file::reader::ParquetReader; +use crate::file::reader::ParquetReader; // ---------------------------------------------------------------------- // Read/Write wrappers for `File`. @@ -132,7 +132,7 @@ impl<'a> Position for Cursor<&'a mut Vec> { mod tests { use super::*; - use crate::parquet::util::test_common::{get_temp_file, get_test_file}; + use crate::util::test_common::{get_temp_file, get_test_file}; #[test] fn test_io_read_fully() { diff --git a/rust/src/parquet/util/memory.rs b/rust/parquet/src/util/memory.rs similarity index 100% rename from rust/src/parquet/util/memory.rs rename to rust/parquet/src/util/memory.rs diff --git a/rust/src/parquet/util/mod.rs b/rust/parquet/src/util/mod.rs similarity index 100% rename from rust/src/parquet/util/mod.rs rename to rust/parquet/src/util/mod.rs diff --git a/rust/src/parquet/util/test_common.rs b/rust/parquet/src/util/test_common.rs similarity index 98% rename from rust/src/parquet/util/test_common.rs rename to rust/parquet/src/util/test_common.rs index f9b1af4a5cef4..ad315a6aa4f69 100644 --- a/rust/src/parquet/util/test_common.rs +++ b/rust/parquet/src/util/test_common.rs @@ -21,8 +21,8 @@ use rand::{ }; use std::{env, fs, io::Write, path::PathBuf, str::FromStr}; -use crate::parquet::data_type::*; -use crate::parquet::util::memory::ByteBufferPtr; +use crate::data_type::*; +use crate::util::memory::ByteBufferPtr; /// Random generator of data type `T` values and sequences. pub trait RandGen { diff --git a/rust/src/lib.rs b/rust/src/lib.rs index d5708b10504c4..199159e369b5f 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -32,7 +32,6 @@ pub mod csv; pub mod datatypes; pub mod error; pub mod memory; -pub mod parquet; pub mod record_batch; pub mod tensor; pub mod util; From c71d27fe55ca2a273f194c860b59074b0c998a74 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Thu, 3 Jan 2019 18:47:25 +0900 Subject: [PATCH 007/203] ARROW-4141: [Ruby] Add support for creating schema from raw Ruby objects The followings should be implemented by follow-up works: * Arrow::TimestampDataType.new(unit: ...) * Arrow::Time32DataType.new(unit: ...) * Arrow::Time64DataType.new(unit: ...) * Arrow::DecimalDataType.new(precision: ..., scale: ...) * Arrow::SparseUnionDataType.new(fields: ..., type_codes: ...) * Arrow::DenseUnionDataType.new(fields: ..., type_codes: ...) * Arrow::DictionaryDataType.new(fields: ..., type_codes: ...) Author: Kouhei Sutou Closes #3293 from kou/ruby-schema-new and squashes the following commits: d251ba9d Add .yardopts to rat exclude files 169b8656 Add support for creating schema from raw Ruby objects --- dev/release/rat_exclude_files.txt | 1 + ruby/red-arrow/.gitignore | 2 + ruby/red-arrow/.yardopts | 6 + ruby/red-arrow/README.md | 2 +- ruby/red-arrow/Rakefile | 4 + ruby/red-arrow/lib/arrow/data-type.rb | 110 +++++++++++++++++-- ruby/red-arrow/lib/arrow/field.rb | 99 +++++++++++++++-- ruby/red-arrow/lib/arrow/list-data-type.rb | 68 ++++++++++++ ruby/red-arrow/lib/arrow/loader.rb | 1 + ruby/red-arrow/lib/arrow/schema.rb | 71 ++++++++++++ ruby/red-arrow/lib/arrow/struct-data-type.rb | 104 ++++++++++++++++++ ruby/red-arrow/red-arrow.gemspec | 2 + ruby/red-arrow/test/test-data-type.rb | 47 ++++++++ ruby/red-arrow/test/test-field.rb | 71 ++++++++++++ ruby/red-arrow/test/test-list-data-type.rb | 43 ++++++++ ruby/red-arrow/test/test-schema.rb | 88 ++++++++++++--- ruby/red-arrow/test/test-struct-data-type.rb | 96 +++++++++++++--- 17 files changed, 765 insertions(+), 50 deletions(-) create mode 100644 ruby/red-arrow/.yardopts create mode 100644 ruby/red-arrow/lib/arrow/list-data-type.rb create mode 100644 ruby/red-arrow/test/test-data-type.rb create mode 100644 ruby/red-arrow/test/test-field.rb create mode 100644 ruby/red-arrow/test/test-list-data-type.rb diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 7674e2fee0f29..1086793630b7d 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -186,5 +186,6 @@ r/README.md r/README.Rmd r/man/*.Rd .gitattributes +ruby/red-arrow/.yardopts rust/test/data/*.csv rust/rust-toolchain diff --git a/ruby/red-arrow/.gitignore b/ruby/red-arrow/.gitignore index 779545d9026f1..68e4b5c7b5de0 100644 --- a/ruby/red-arrow/.gitignore +++ b/ruby/red-arrow/.gitignore @@ -15,4 +15,6 @@ # specific language governing permissions and limitations # under the License. +/.yardoc/ +/doc/reference/ /pkg/ diff --git a/ruby/red-arrow/.yardopts b/ruby/red-arrow/.yardopts new file mode 100644 index 0000000000000..67159b1dc2f3a --- /dev/null +++ b/ruby/red-arrow/.yardopts @@ -0,0 +1,6 @@ +--output-dir doc/reference +--markup markdown +--no-private +lib/**/*.rb +- +doc/text/* diff --git a/ruby/red-arrow/README.md b/ruby/red-arrow/README.md index a6798dd90551f..95ec396fae5b1 100644 --- a/ruby/red-arrow/README.md +++ b/ruby/red-arrow/README.md @@ -39,7 +39,7 @@ Note that the Apache Arrow GLib packages are "unofficial". "Official" packages w Install Red Arrow after you install Apache Arrow GLib: -```text +```console % gem install red-arrow ``` diff --git a/ruby/red-arrow/Rakefile b/ruby/red-arrow/Rakefile index 96851afb9f9f7..a3ece36b732ac 100644 --- a/ruby/red-arrow/Rakefile +++ b/ruby/red-arrow/Rakefile @@ -19,6 +19,7 @@ require "rubygems" require "bundler/gem_helper" +require "yard" base_dir = File.join(__dir__) @@ -37,3 +38,6 @@ task :test do end task default: :test + +YARD::Rake::YardocTask.new do |task| +end diff --git a/ruby/red-arrow/lib/arrow/data-type.rb b/ruby/red-arrow/lib/arrow/data-type.rb index dad74fb40dc83..03960e47debca 100644 --- a/ruby/red-arrow/lib/arrow/data-type.rb +++ b/ruby/red-arrow/lib/arrow/data-type.rb @@ -18,21 +18,117 @@ module Arrow class DataType class << self + # Creates a new suitable {Arrow::DataType}. + # + # @overload resolve(data_type) + # + # Returns the given data type itself. This is convenient to + # use this method as {Arrow::DataType} converter. + # + # @param data_type [Arrow::DataType] The data type. + # + # @return [Arrow::DataType] The given data type itself. + # + # @overload resolve(name, *arguments) + # + # Creates a suitable data type from type name. For example, + # you can create {Arrow::BooleanDataType} from `:boolean`. + # + # @param name [String, Symbol] The type name of the data type. + # + # @param arguments [::Array] The additional information of the + # data type. + # + # For example, {Arrow::TimestampDataType} needs unit as + # additional information. + # + # @example Create a boolean data type + # Arrow::DataType.resolve(:boolean) + # + # @example Create a milliseconds unit timestamp data type + # Arrow::DataType.resolve(:timestamp, :milli) + # + # @overload resolve(description) + # + # Creates a suitable data type from data type description. + # + # Data type description is a raw `Hash`. Data type description + # must have `:type` value. `:type` is the type of the data type. + # + # If the type needs additional information, you need to + # specify it. See constructor document what information is + # needed. For example, {Arrow::ListDataType#initialize} needs + # `:field` value. + # + # @param description [Hash] The description of the data type. + # + # @option description [String, Symbol] :type The type name of + # the data type. + # + # @example Create a boolean data type + # Arrow::DataType.resolve(type: :boolean) + # + # @example Create a list data type + # Arrow::DataType.resolve(type: :list, + # field: {name: "visible", type: :boolean}) def resolve(data_type) case data_type when DataType data_type when String, Symbol - data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt") - data_type_class_name = "#{data_type_name}DataType" - unless Arrow.const_defined?(data_type_class_name) - raise ArgumentError, "invalid data type: #{data_typeinspect}" + resolve_class(data_type).new + when ::Array + type, *arguments = data_type + resolve_class(type).new(*arguments) + when Hash + type = nil + description = {} + data_type.each do |key, value| + key = key.to_sym + case key + when :type + type = value + else + description[key] = value + end + end + if type.nil? + message = + "data type description must have :type value: #{data_type.inspect}" + raise ArgumentError, message + end + data_type_class = resolve_class(type) + if description.empty? + data_type_class.new + else + data_type_class.new(description) end - data_type_class = Arrow.const_get(data_type_class_name) - data_type_class.new else - raise ArgumentError, "invalid data type: #{data_type.inspect}" + message = + "data type must be " + + "Arrow::DataType, String, Symbol, [String, ...], [Symbol, ...] " + + "{type: String, ...} or {type: Symbol, ...}: #{data_type.inspect}" + raise ArgumentError, message + end + end + + private + def resolve_class(data_type) + data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt") + data_type_class_name = "#{data_type_name}DataType" + unless Arrow.const_defined?(data_type_class_name) + available_types = [] + Arrow.constants.each do |name| + if name.to_s.end_with?("DataType") + available_types << name.to_s.gsub(/DataType\z/, "").downcase.to_sym + end + end + message = + "unknown type: #{data_type.inspect}: " + + "available types: #{available_types.inspect}" + raise ArgumentError, message end + Arrow.const_get(data_type_class_name) end end end diff --git a/ruby/red-arrow/lib/arrow/field.rb b/ruby/red-arrow/lib/arrow/field.rb index be5865fd5564c..8c7c8eaa005cb 100644 --- a/ruby/red-arrow/lib/arrow/field.rb +++ b/ruby/red-arrow/lib/arrow/field.rb @@ -19,16 +19,99 @@ module Arrow class Field alias_method :initialize_raw, :initialize private :initialize_raw - def initialize(name, data_type) - case data_type - when String, Symbol - data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt") - data_type_class_name = "#{data_type_name}DataType" - if Arrow.const_defined?(data_type_class_name) - data_type_class = Arrow.const_get(data_type_class_name) - data_type = data_type_class.new + + # Creates a new {Arrow::Field}. + # + # @overload initialize(name, data_type) + # + # @param name [String, Symbol] The name of the field. + # + # @param data_type [Arrow::DataType, Hash, String, Symbol] The + # data type of the field. + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a field with {Arrow::DataType}s + # Arrow::Field.new("visible", Arrow::BooleanDataType.new) + # + # @example Create a field with data type description + # Arrow::Field.new("visible", :boolean) + # + # @example Create a field with name as `Symbol` + # Arrow::Field.new(:visible, :boolean) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the field. + # + # Field description is a raw `Hash`. Field description must + # have `:name` and `:data_type` values. `:name` is the name of + # the field. `:data_type` is the data type of the field. You + # can use {Arrow::DataType} or data type description as + # `:data_type` value. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # There is a shortcut for convenience. If field description + # doesn't have `:data_type`, all keys except `:name` are + # processes as data type description. For example, the + # following field descrptions are the same: + # + # ```ruby + # {name: "visible", data_type: {type: :boolean}} + # {name: "visible", type: :boolean} # Shortcut version + # ``` + # + # @option description [String, Symbol] :name The name of the field. + # + # @option description [Arrow::DataType, Hash] :data_type The + # data type of the field. You can specify data type description + # by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a field with {Arrow::DataType}s + # Arrow::Field.new(name: "visible", + # data_type: Arrow::BooleanDataType.new) + # + # @example Create a field with data type description + # Arrow::Field.new(name: "visible", data_type: {type: :boolean} + # + # @example Create a field with shortcut form + # Arrow::Field.new(name: "visible", type: :boolean) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + name = nil + data_type = nil + data_type_description = {} + description.each do |key, value| + key = key.to_sym + case key + when :name + name = value + when :data_type + data_type = DataType.resolve(value) + else + data_type_description[key] = value + end end + data_type ||= DataType.resolve(data_type_description) + when 2 + name = args[0] + data_type = DataType.resolve(args[1]) + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message end + initialize_raw(name, data_type) end end diff --git a/ruby/red-arrow/lib/arrow/list-data-type.rb b/ruby/red-arrow/lib/arrow/list-data-type.rb new file mode 100644 index 0000000000000..c097da4e881e8 --- /dev/null +++ b/ruby/red-arrow/lib/arrow/list-data-type.rb @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class ListDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::ListDataType}. + # + # @overload initialize(field) + # + # @param field [Arrow::Field, Hash] The field of the list data + # type. You can also specify field description by `Hash`. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a list data type with {Arrow::Field} + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::ListDataType.new(visible_field) + # + # @example Create a list data type with field description + # Arrow::ListDataType.new(name: "visible", type: :boolean) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the list data + # type. It must have `:field` value. + # + # @option description [Arrow::Field, Hash] :field The field of + # the list data type. You can also specify field description + # by `Hash`. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a list data type with {Arrow::Field} + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::ListDataType.new(field: visible_field) + # + # @example Create a list data type with field description + # Arrow::ListDataType.new(field: {name: "visible", type: :boolean}) + def initialize(field) + if field.is_a?(Hash) and field.key?(:field) + description = field + field = description[:field] + end + if field.is_a?(Hash) + field_description = field + field = Field.new(field_description) + end + initialize_raw(field) + end + end +end diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb index 2092e461c1786..cea98e9a8578e 100644 --- a/ruby/red-arrow/lib/arrow/loader.rb +++ b/ruby/red-arrow/lib/arrow/loader.rb @@ -45,6 +45,7 @@ def require_libraries require "arrow/date64-array-builder" require "arrow/field" require "arrow/file-output-stream" + require "arrow/list-data-type" require "arrow/path-extension" require "arrow/record" require "arrow/record-batch" diff --git a/ruby/red-arrow/lib/arrow/schema.rb b/ruby/red-arrow/lib/arrow/schema.rb index 2e6bad29e6506..ecc3324b8a311 100644 --- a/ruby/red-arrow/lib/arrow/schema.rb +++ b/ruby/red-arrow/lib/arrow/schema.rb @@ -21,6 +21,77 @@ module Arrow class Schema include FieldContainable + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Schema}. + # + # @overload initialize(fields) + # + # @param fields [::Array] The fields of the + # schema. You can mix {Arrow::Field} and field description in + # the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a schema with {Arrow::Field}s + # visible_field = Arrow::Field.new("visible", :boolean) + # Arrow::Schema.new([visible_field]) + # + # @example Create a schema with field descriptions + # visible_field_description = { + # name: "visible", + # data_type: :boolean, + # } + # Arrow::Schema.new([visible_field_description]) + # + # @example Create a schema with {Arrow::Field}s and field descriptions + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::Schema.new(fields) + # + # @overload initialize(fields) + # + # @param fields [Hash{String, Symbol => Arrow::DataType, Hash}] + # The pairs of field name and field data type of the schema. + # You can mix {Arrow::DataType} and data description for field + # data type. + # + # See {Arrow::DataType.new} how to specify data type description. + # + # @example Create a schema with fields + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # :count => :int32, + # :tags => { + # type: :list, + # field: { + # name: "tag", + # type: :string, + # }, + # }, + # } + # Arrow::Schema.new(fields) + def initialize(fields) + case fields + when ::Array + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + when Hash + fields = fields.collect do |name, data_type| + Field.new(name, data_type) + end + end + initialize_raw(fields) + end + alias_method :[], :find_field end end diff --git a/ruby/red-arrow/lib/arrow/struct-data-type.rb b/ruby/red-arrow/lib/arrow/struct-data-type.rb index 7a59f1f620b81..ad810115d62ad 100644 --- a/ruby/red-arrow/lib/arrow/struct-data-type.rb +++ b/ruby/red-arrow/lib/arrow/struct-data-type.rb @@ -21,6 +21,110 @@ module Arrow class StructDataType include FieldContainable + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::StructDataType}. + # + # @overload initialize(fields) + # + # @param fields [::Array] The fields of the + # struct data type. You can also specify field description as + # a field. You can mix {Arrow::Field} and field description. + # + # See {Arrow::Field.new} how to specify field description. + # + # @example Create a struct data type with {Arrow::Field}s + # visible_field = Arrow::Field.new("visible", :boolean) + # count_field = Arrow::Field.new("count", :int32) + # Arrow::StructDataType.new([visible_field, count_field]) + # + # @example Create a struct data type with field descriptions + # field_descriptions = [ + # {name: "visible", type: :boolean}, + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(field_descriptions) + # + # @example Create a struct data type with {Arrow::Field} and field description + # fields = [ + # Arrow::Field.new("visible", :boolean), + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(fields) + # + # @overload initialize(fields) + # + # @param fields [Hash{String, Symbol => Arrow::DataType, Hash}] + # The pairs of field name and field data type of the struct + # data type. You can also specify data type description by + # `Hash`. You can mix {Arrow::DataType} and data type description. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @example Create a struct data type with {Arrow::DataType}s + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => Arrow::Int32DataType.new, + # } + # Arrow::StructDataType.new(fields) + # + # @example Create a struct data type with data type descriptions + # fields = { + # "visible" => :boolean, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields) + # + # @example Create a struct data type with {Arrow::DataType} and data type description + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the struct data + # type. It must have `:fields` value. + # + # @option description + # [::Array, + # Hash{String, Symbol => Arrow::DataType, Hash, String, Symbol}] + # :fields The fields of the struct data type. + # + # @example Create a struct data type with {Arrow::Field} and field description + # fields = [ + # Arrow::Field.new("visible", :boolean), + # {name: "count", type: :int32}, + # ] + # Arrow::StructDataType.new(fields: fields) + # + # @example Create a struct data type with {Arrow::DataType} and data type description + # fields = { + # "visible" => Arrow::BooleanDataType.new, + # "count" => {type: :int32}, + # } + # Arrow::StructDataType.new(fields: fields) + def initialize(fields) + if fields.is_a?(Hash) and fields.key?(:fields) + description = fields + fields = description[:fields] + end + if fields.is_a?(Hash) + fields = fields.collect do |name, data_type| + Field.new(name, data_type) + end + else + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + end + initialize_raw(fields) + end + alias_method :[], :find_field end end diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec index 9db755fc67ccc..8e79c75dcaff2 100644 --- a/ruby/red-arrow/red-arrow.gemspec +++ b/ruby/red-arrow/red-arrow.gemspec @@ -51,7 +51,9 @@ Gem::Specification.new do |spec| spec.add_development_dependency("bundler") spec.add_development_dependency("rake") + spec.add_development_dependency("redcarpet") spec.add_development_dependency("test-unit") + spec.add_development_dependency("yard") spec.metadata["msys2_mingw_dependencies"] = "apache-arrow" end diff --git a/ruby/red-arrow/test/test-data-type.rb b/ruby/red-arrow/test/test-data-type.rb new file mode 100644 index 0000000000000..c9dbfc6f11b6f --- /dev/null +++ b/ruby/red-arrow/test/test-data-type.rb @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class DataTypeTest < Test::Unit::TestCase + sub_test_case(".resolve") do + test("DataType") do + assert_equal(Arrow::BooleanDataType.new, + Arrow::DataType.resolve(Arrow::BooleanDataType.new)) + end + + test("String") do + assert_equal(Arrow::BooleanDataType.new, + Arrow::DataType.resolve("boolean")) + end + + test("Symbol") do + assert_equal(Arrow::BooleanDataType.new, + Arrow::DataType.resolve(:boolean)) + end + + test("Array") do + field = Arrow::Field.new(:visible, :boolean) + assert_equal(Arrow::ListDataType.new(field), + Arrow::DataType.resolve([:list, field])) + end + + test("Hash") do + field = Arrow::Field.new(:visible, :boolean) + assert_equal(Arrow::ListDataType.new(field), + Arrow::DataType.resolve(type: :list, field: field)) + end + end +end diff --git a/ruby/red-arrow/test/test-field.rb b/ruby/red-arrow/test/test-field.rb new file mode 100644 index 0000000000000..9be2068ea544b --- /dev/null +++ b/ruby/red-arrow/test/test-field.rb @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class FieldTest < Test::Unit::TestCase + sub_test_case(".new") do + test("String, Arrow::DataType") do + assert_equal("visible: bool", + Arrow::Field.new("visible", Arrow::BooleanDataType.new).to_s) + end + + test("Symbol, Arrow::DataType") do + assert_equal("visible: bool", + Arrow::Field.new(:visible, Arrow::BooleanDataType.new).to_s) + end + + test("String, Symbol") do + assert_equal("visible: bool", + Arrow::Field.new(:visible, :boolean).to_s) + end + + test("String, Hash") do + assert_equal("visible: bool", + Arrow::Field.new(:visible, type: :boolean).to_s) + end + + test("description: String") do + assert_equal("visible: bool", + Arrow::Field.new(name: "visible", + data_type: :boolean).to_s) + end + + test("description: Symbol") do + assert_equal("visible: bool", + Arrow::Field.new(name: :visible, + data_type: :boolean).to_s) + end + + test("description: shortcut") do + assert_equal("visible: bool", + Arrow::Field.new(name: :visible, + type: :boolean).to_s) + end + + test("Hash: shortcut: additional") do + description = { + name: :tags, + type: :list, + field: { + name: "tag", + type: :string, + }, + } + assert_equal("tags: list", + Arrow::Field.new(description).to_s) + end + end +end diff --git a/ruby/red-arrow/test/test-list-data-type.rb b/ruby/red-arrow/test/test-list-data-type.rb new file mode 100644 index 0000000000000..cca6ca3914b2b --- /dev/null +++ b/ruby/red-arrow/test/test-list-data-type.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ListDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Arrow::Field") do + field = Arrow::Field.new(:tag, :string) + assert_equal("list", + Arrow::ListDataType.new(field).to_s) + end + + test("Hash") do + assert_equal("list", + Arrow::ListDataType.new(name: "tag", type: :string).to_s) + end + + test("field: Arrow::Field") do + field = Arrow::Field.new(:tag, :string) + assert_equal("list", + Arrow::ListDataType.new(field: field).to_s) + end + + test("field: Hash") do + field_description = {name: "tag", type: :string} + assert_equal("list", + Arrow::ListDataType.new(field: field_description).to_s) + end + end +end diff --git a/ruby/red-arrow/test/test-schema.rb b/ruby/red-arrow/test/test-schema.rb index 2f989cf19f2ec..6cfbbb117d94d 100644 --- a/ruby/red-arrow/test/test-schema.rb +++ b/ruby/red-arrow/test/test-schema.rb @@ -19,31 +19,85 @@ class SchemaTest < Test::Unit::TestCase def setup @count_field = Arrow::Field.new("count", :uint32) @visible_field = Arrow::Field.new("visible", :boolean) - @schema = Arrow::Schema.new([@count_field, @visible_field]) end - sub_test_case("#[]") do - test("[String]") do - assert_equal([@count_field, @visible_field], - [@schema["count"], @schema["visible"]]) + sub_test_case(".new") do + test("[Arrow::Field]") do + fields = [ + @count_field, + @visible_field, + ] + assert_equal("count: uint32\n" + + "visible: bool", + Arrow::Schema.new(fields).to_s) end - test("[Symbol]") do - assert_equal([@count_field, @visible_field], - [@schema[:count], @schema[:visible]]) + test("[Arrow::Field, Hash]") do + fields = [ + @count_field, + {name: "visible", type: :boolean}, + ] + assert_equal("count: uint32\n" + + "visible: bool", + Arrow::Schema.new(fields).to_s) end - test("[Integer]") do - assert_equal([@count_field, @visible_field], - [@schema[0], @schema[1]]) + test("{String, Symbol => Arrow::DataType}") do + fields = { + "count" => Arrow::UInt32DataType.new, + :visible => :boolean, + } + assert_equal("count: uint32\n" + + "visible: bool", + Arrow::Schema.new(fields).to_s) end - test("[invalid]") do - invalid = [] - message = "field name or index must be String, Symbol or Integer" - message << ": <#{invalid.inspect}>" - assert_raise(ArgumentError.new(message)) do - @schema[invalid] + test("{String, Symbol => Hash}") do + fields = { + "count" => {type: :uint32}, + :tags => { + type: :list, + field: { + name: "tag", + type: :string, + }, + }, + } + assert_equal("count: uint32\n" + + "tags: list", + Arrow::Schema.new(fields).to_s) + end + end + + sub_test_case("instance methods") do + def setup + super + @schema = Arrow::Schema.new([@count_field, @visible_field]) + end + + sub_test_case("#[]") do + test("[String]") do + assert_equal([@count_field, @visible_field], + [@schema["count"], @schema["visible"]]) + end + + test("[Symbol]") do + assert_equal([@count_field, @visible_field], + [@schema[:count], @schema[:visible]]) + end + + test("[Integer]") do + assert_equal([@count_field, @visible_field], + [@schema[0], @schema[1]]) + end + + test("[invalid]") do + invalid = [] + message = "field name or index must be String, Symbol or Integer" + message << ": <#{invalid.inspect}>" + assert_raise(ArgumentError.new(message)) do + @schema[invalid] + end end end end diff --git a/ruby/red-arrow/test/test-struct-data-type.rb b/ruby/red-arrow/test/test-struct-data-type.rb index c802c44731072..d106e38b1d841 100644 --- a/ruby/red-arrow/test/test-struct-data-type.rb +++ b/ruby/red-arrow/test/test-struct-data-type.rb @@ -19,31 +19,93 @@ class StructDataTypeTest < Test::Unit::TestCase def setup @count_field = Arrow::Field.new("count", :uint32) @visible_field = Arrow::Field.new("visible", :boolean) - @data_type = Arrow::StructDataType.new([@count_field, @visible_field]) end - sub_test_case("#[]") do - test("[String]") do - assert_equal([@count_field, @visible_field], - [@data_type["count"], @data_type["visible"]]) + sub_test_case(".new") do + test("[Arrow::Field]") do + fields = [ + @count_field, + @visible_field, + ] + assert_equal("struct", + Arrow::StructDataType.new(fields).to_s) end - test("[Symbol]") do - assert_equal([@count_field, @visible_field], - [@data_type[:count], @data_type[:visible]]) + test("[Hash]") do + fields = [ + {name: "count", data_type: :uint32}, + {name: "visible", data_type: :boolean}, + ] + assert_equal("struct", + Arrow::StructDataType.new(fields).to_s) end - test("[Integer]") do - assert_equal([@count_field, @visible_field], - [@data_type[0], @data_type[1]]) + test("[Arrow::Field, Hash]") do + fields = [ + @count_field, + {name: "visible", data_type: :boolean}, + ] + assert_equal("struct", + Arrow::StructDataType.new(fields).to_s) end - test("[invalid]") do - invalid = [] - message = "field name or index must be String, Symbol or Integer" - message << ": <#{invalid.inspect}>" - assert_raise(ArgumentError.new(message)) do - @data_type[invalid] + test("{Arrow::DataType}") do + fields = { + "count" => Arrow::UInt32DataType.new, + "visible" => Arrow::BooleanDataType.new, + } + assert_equal("struct", + Arrow::StructDataType.new(fields).to_s) + end + + test("{Hash}") do + fields = { + "count" => {type: :uint32}, + "visible" => {type: :boolean}, + } + assert_equal("struct", + Arrow::StructDataType.new(fields).to_s) + end + + test("{String, Symbol}") do + fields = { + "count" => "uint32", + "visible" => :boolean, + } + assert_equal("struct", + Arrow::StructDataType.new(fields).to_s) + end + end + + sub_test_case("instance methods") do + def setup + super + @data_type = Arrow::StructDataType.new([@count_field, @visible_field]) + end + + sub_test_case("#[]") do + test("[String]") do + assert_equal([@count_field, @visible_field], + [@data_type["count"], @data_type["visible"]]) + end + + test("[Symbol]") do + assert_equal([@count_field, @visible_field], + [@data_type[:count], @data_type[:visible]]) + end + + test("[Integer]") do + assert_equal([@count_field, @visible_field], + [@data_type[0], @data_type[1]]) + end + + test("[invalid]") do + invalid = [] + message = "field name or index must be String, Symbol or Integer" + message << ": <#{invalid.inspect}>" + assert_raise(ArgumentError.new(message)) do + @data_type[invalid] + end end end end From 6ca8fcdeccc54a80ce90711441a41ec6ffbd216b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 3 Jan 2019 16:35:11 +0100 Subject: [PATCH 008/203] ARROW-4148: [CI/Python] Disable ORC on nightly Alpine builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nightly Python Alpine builds were [failing](https://travis-ci.org/kszucs/crossbow/builds/474545492) because PYARROW_WITH_ORC is enabled by default, but the underlying cpp image doesn't build against ORC. Crossbow builds: - ~[kszucs/crossbow/build-391](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=391)~ - [kszucs/crossbow/build-393](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=393) [GREEN] Author: Krisztián Szűcs Closes #3297 from kszucs/ARROW-4148 and squashes the following commits: 16e162e1 install dependencies from requirements.txt as well 6cd864f9 disable PYARROW_WITH_ORC --- python/Dockerfile.alpine | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/Dockerfile.alpine b/python/Dockerfile.alpine index ba0f2eb23f549..96362197a0343 100644 --- a/python/Dockerfile.alpine +++ b/python/Dockerfile.alpine @@ -30,9 +30,13 @@ RUN export PYTHON_MAJOR=${PYTHON_VERSION:0:1} && \ ADD python/requirements.txt \ python/requirements-test.txt \ /arrow/python/ -RUN pip install -r /arrow/python/requirements-test.txt cython +RUN pip install \ + -r /arrow/python/requirements.txt \ + -r /arrow/python/requirements-test.txt \ + cython ENV ARROW_PYTHON=ON \ + PYARROW_WITH_ORC=0 \ PYARROW_WITH_PARQUET=0 # build and test From 7f1fbf83284b745ee9215f6722e114ee467bdeb8 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 3 Jan 2019 21:52:57 +0100 Subject: [PATCH 009/203] ARROW-4009: [CI] Run Valgrind and C++ code coverage in different builds Also fix a couple ASAN / UBSAN issues (not all of them). Author: Antoine Pitrou Closes #3302 from pitrou/ARROW-4009-separate-codecov-valgrind and squashes the following commits: 7dacf9e1 Mark function inline 06372930 Use clang on non-coverage C++ job eca54b69 ARROW-4009: Run Valgrind and C++ code coverage in different builds --- .travis.yml | 30 +++- cpp/CMakeLists.txt | 8 + cpp/cmake_modules/san-config.cmake | 24 +-- cpp/src/arrow/array-test.cc | 13 +- cpp/src/arrow/array/builder_binary.h | 1 + cpp/src/arrow/array/builder_nested.h | 1 + cpp/src/arrow/buffer-builder.h | 205 +++++++++++++++++++++++++ cpp/src/arrow/buffer-test.cc | 1 + cpp/src/arrow/buffer.cc | 21 +-- cpp/src/arrow/buffer.h | 168 +------------------- cpp/src/arrow/io/readahead.cc | 2 + cpp/src/arrow/memory_pool.cc | 79 ++++++---- cpp/src/arrow/test-util.cc | 16 +- cpp/src/arrow/util/thread-pool-test.cc | 2 +- cpp/src/parquet/bloom_filter.h | 1 + 15 files changed, 324 insertions(+), 248 deletions(-) create mode 100644 cpp/src/arrow/buffer-builder.h diff --git a/.travis.yml b/.travis.yml index 059daeef8fd14..837b4cfef30db 100644 --- a/.travis.yml +++ b/.travis.yml @@ -61,13 +61,35 @@ matrix: - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh script: - $TRAVIS_BUILD_DIR/ci/travis_lint.sh - - name: "C++ unit tests, code coverage with gcc 4.8" + - name: "C++ unit tests w/ Valgrind, clang 6.0" + language: cpp + os: linux + env: + - ARROW_TRAVIS_VALGRIND=1 + - ARROW_TRAVIS_USE_TOOLCHAIN=1 + - ARROW_TRAVIS_PLASMA=1 + - ARROW_TRAVIS_ORC=1 + - ARROW_TRAVIS_PARQUET=1 + - ARROW_TRAVIS_GANDIVA=1 + - ARROW_BUILD_WARNING_LEVEL=CHECKIN + - CC="clang-6.0" + - CXX="clang++-6.0" + before_script: + - if [ $ARROW_CI_CPP_AFFECTED != "1" ]; then exit; fi + - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh + - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh + # If either C++ or Python changed, we must install the C++ libraries + - git submodule update --init + - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh + script: + - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh || travis_terminate 1 + # Separating Valgrind and C++ coverage makes individual jobs shorter + - name: "C++ unit tests w/ gcc 4.8, coverage" compiler: gcc language: cpp os: linux jdk: openjdk8 env: - - ARROW_TRAVIS_VALGRIND=1 - ARROW_TRAVIS_USE_TOOLCHAIN=1 - ARROW_TRAVIS_PLASMA=1 - ARROW_TRAVIS_ORC=1 @@ -87,7 +109,7 @@ matrix: - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh || travis_terminate 1 - $TRAVIS_BUILD_DIR/ci/travis_script_gandiva_java.sh || travis_terminate 1 - $TRAVIS_BUILD_DIR/ci/travis_upload_cpp_coverage.sh || travis_terminate 1 - - name: "Python 2.7 and 3.6 unit tests, coverage with gcc 4.8" + - name: "Python 2.7 and 3.6 unit tests w/ Valgrind, gcc 4.8, coverage" compiler: gcc language: cpp os: linux @@ -98,10 +120,10 @@ matrix: - ARROW_TRAVIS_USE_TOOLCHAIN=1 - ARROW_TRAVIS_COVERAGE=1 - ARROW_TRAVIS_PYTHON_DOCS=1 - - ARROW_BUILD_WARNING_LEVEL=CHECKIN - ARROW_TRAVIS_PYTHON_JVM=1 - ARROW_TRAVIS_PYTHON_GANDIVA=1 - ARROW_TRAVIS_OPTIONAL_INSTALL=1 + - ARROW_BUILD_WARNING_LEVEL=CHECKIN # TODO(wesm): Run the benchmarks outside of Travis # - ARROW_TRAVIS_PYTHON_BENCHMARKS=1 before_script: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 006b406ba0762..aba1a59618bb0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -111,6 +111,14 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") "Run the test suite using valgrind --tool=memcheck" OFF) + option(ARROW_USE_ASAN + "Enable Address Sanitizer checks" + OFF) + + option(ARROW_USE_TSAN + "Enable Thread Sanitizer checks" + OFF) + option(ARROW_BUILD_TESTS "Build the Arrow googletest unit tests, default OFF" OFF) diff --git a/cpp/cmake_modules/san-config.cmake b/cpp/cmake_modules/san-config.cmake index f2de9cf1f7553..22a9b0c8098a0 100644 --- a/cpp/cmake_modules/san-config.cmake +++ b/cpp/cmake_modules/san-config.cmake @@ -22,19 +22,6 @@ if (${ARROW_USE_ASAN}) ("${COMPILER_FAMILY}" STREQUAL "gcc" AND "${COMPILER_VERSION}" VERSION_GREATER "4.8"))) message(SEND_ERROR "Cannot use ASAN without clang or gcc >= 4.8") endif() - - # If UBSAN is also enabled, and we're on clang < 3.5, ensure static linking is - # enabled. Otherwise, we run into https://llvm.org/bugs/show_bug.cgi?id=18211 - if("${ARROW_USE_UBSAN}" AND - "${COMPILER_FAMILY}" STREQUAL "clang" AND - "${COMPILER_VERSION}" VERSION_LESS "3.5") - if("${ARROW_LINK}" STREQUAL "a") - message("Using static linking for ASAN+UBSAN build") - set(ARROW_LINK "s") - elseif("${ARROW_LINK}" STREQUAL "d") - message(SEND_ERROR "Cannot use dynamic linking when ASAN and UBSAN are both enabled") - endif() - endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -DADDRESS_SANITIZER") endif() @@ -49,7 +36,7 @@ if (${ARROW_USE_UBSAN}) ("${COMPILER_FAMILY}" STREQUAL "gcc" AND "${COMPILER_VERSION}" VERSION_GREATER "4.9"))) message(SEND_ERROR "Cannot use UBSAN without clang or gcc >= 4.9") endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover=all") endif () # Flag to enable thread sanitizer (clang or gcc 4.8) @@ -101,14 +88,7 @@ if ("${ARROW_USE_UBSAN}" OR "${ARROW_USE_ASAN}" OR "${ARROW_USE_TSAN}") # GCC 4.8 and 4.9 (latest as of this writing) don't allow you to specify a # sanitizer blacklist. if("${COMPILER_FAMILY}" STREQUAL "clang") - # Require clang 3.4 or newer; clang 3.3 has issues with TSAN and pthread - # symbol interception. - if("${COMPILER_VERSION}" VERSION_LESS "3.4") - message(SEND_ERROR "Must use clang 3.4 or newer to run a sanitizer build." - " Detected unsupported version ${COMPILER_VERSION}." - " Try using clang from $NATIVE_TOOLCHAIN/.") - endif() - add_definitions("-fsanitize-blacklist=${BUILD_SUPPORT_DIR}/sanitize-blacklist.txt") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize-blacklist=${BUILD_SUPPORT_DIR}/sanitize-blacklist.txt") else() message(WARNING "GCC does not support specifying a sanitizer blacklist. Known sanitizer check failures will not be suppressed.") endif() diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index bdb7eda118d51..ccdaad58c681a 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -30,6 +30,7 @@ #include #include "arrow/array.h" +#include "arrow/buffer-builder.h" #include "arrow/buffer.h" #include "arrow/builder.h" #include "arrow/ipc/test-common.h" @@ -761,22 +762,22 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendValuesLazyIter) { auto& draws = this->draws_; auto& valid_bytes = this->valid_bytes_; - auto doubler = [&draws](int64_t index) { return draws[index] * 2; }; - auto lazy_iter = internal::MakeLazyRange(doubler, size); + auto halve = [&draws](int64_t index) { return draws[index] / 2; }; + auto lazy_iter = internal::MakeLazyRange(halve, size); ASSERT_OK(this->builder_->AppendValues(lazy_iter.begin(), lazy_iter.end(), valid_bytes.begin())); - std::vector doubled; - transform(draws.begin(), draws.end(), back_inserter(doubled), - [](T in) { return in * 2; }); + std::vector halved; + transform(draws.begin(), draws.end(), back_inserter(halved), + [](T in) { return in / 2; }); std::shared_ptr result; FinishAndCheckPadding(this->builder_.get(), &result); std::shared_ptr expected; ASSERT_OK( - this->builder_->AppendValues(doubled.data(), doubled.size(), valid_bytes.data())); + this->builder_->AppendValues(halved.data(), halved.size(), valid_bytes.data())); FinishAndCheckPadding(this->builder_.get(), &expected); ASSERT_TRUE(expected->Equals(result)); diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h index 7c101bdffc5e4..324279daa4a6e 100644 --- a/cpp/src/arrow/array/builder_binary.h +++ b/cpp/src/arrow/array/builder_binary.h @@ -25,6 +25,7 @@ #include "arrow/array.h" #include "arrow/array/builder_base.h" +#include "arrow/buffer-builder.h" #include "arrow/status.h" #include "arrow/type_traits.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h index 863e6fef06f7d..19b0ad81b5a16 100644 --- a/cpp/src/arrow/array/builder_nested.h +++ b/cpp/src/arrow/array/builder_nested.h @@ -21,6 +21,7 @@ #include #include "arrow/array/builder_base.h" +#include "arrow/buffer-builder.h" namespace arrow { diff --git a/cpp/src/arrow/buffer-builder.h b/cpp/src/arrow/buffer-builder.h new file mode 100644 index 0000000000000..dafa3ee611f04 --- /dev/null +++ b/cpp/src/arrow/buffer-builder.h @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_BUFFER_BUILDER_H +#define ARROW_BUFFER_BUILDER_H + +#include +#include +#include +#include +#include +#include + +#include "arrow/buffer.h" +#include "arrow/status.h" +#include "arrow/util/bit-util.h" +#include "arrow/util/macros.h" +#include "arrow/util/visibility.h" + +namespace arrow { + +// ---------------------------------------------------------------------- +// Buffer builder classes + +/// \class BufferBuilder +/// \brief A class for incrementally building a contiguous chunk of in-memory data +class ARROW_EXPORT BufferBuilder { + public: + explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT) + : pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {} + + /// \brief Resize the buffer to the nearest multiple of 64 bytes + /// + /// \param elements the new capacity of the of the builder. Will be rounded + /// up to a multiple of 64 bytes for padding + /// \param shrink_to_fit if new capacity is smaller than the existing size, + /// reallocate internal buffer. Set to false to avoid reallocations when + /// shrinking the builder. + /// \return Status + Status Resize(const int64_t elements, bool shrink_to_fit = true) { + // Resize(0) is a no-op + if (elements == 0) { + return Status::OK(); + } + int64_t old_capacity = capacity_; + + if (buffer_ == NULLPTR) { + ARROW_RETURN_NOT_OK(AllocateResizableBuffer(pool_, elements, &buffer_)); + } else { + ARROW_RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit)); + } + capacity_ = buffer_->capacity(); + data_ = buffer_->mutable_data(); + if (capacity_ > old_capacity) { + memset(data_ + old_capacity, 0, capacity_ - old_capacity); + } + return Status::OK(); + } + + /// \brief Ensure that builder can accommodate the additional number of bytes + /// without the need to perform allocations + /// + /// \param size number of additional bytes to make space for + /// \return Status + Status Reserve(const int64_t size) { return Resize(size_ + size, false); } + + /// \brief Append the given data to the buffer + /// + /// The buffer is automatically expanded if necessary. + Status Append(const void* data, int64_t length) { + if (capacity_ < length + size_) { + int64_t new_capacity = BitUtil::NextPower2(length + size_); + ARROW_RETURN_NOT_OK(Resize(new_capacity)); + } + UnsafeAppend(data, length); + return Status::OK(); + } + + /// \brief Append the given data to the buffer + /// + /// The buffer is automatically expanded if necessary. + template + Status Append(const std::array& data) { + constexpr auto nbytes = static_cast(NBYTES); + if (capacity_ < nbytes + size_) { + int64_t new_capacity = BitUtil::NextPower2(nbytes + size_); + ARROW_RETURN_NOT_OK(Resize(new_capacity)); + } + + if (nbytes > 0) { + std::copy(data.cbegin(), data.cend(), data_ + size_); + size_ += nbytes; + } + return Status::OK(); + } + + // Advance pointer and zero out memory + Status Advance(const int64_t length) { + if (capacity_ < length + size_) { + int64_t new_capacity = BitUtil::NextPower2(length + size_); + ARROW_RETURN_NOT_OK(Resize(new_capacity)); + } + if (length > 0) { + memset(data_ + size_, 0, static_cast(length)); + size_ += length; + } + return Status::OK(); + } + + // Unsafe methods don't check existing size + void UnsafeAppend(const void* data, int64_t length) { + if (length > 0) { + memcpy(data_ + size_, data, static_cast(length)); + size_ += length; + } + } + + /// \brief Return result of builder as a Buffer object. + /// + /// The builder is reset and can be reused afterwards. + /// + /// \param[out] out the finalized Buffer object + /// \param shrink_to_fit if the buffer size is smaller than its capacity, + /// reallocate to fit more tightly in memory. Set to false to avoid + /// a reallocation, at the expense of potentially more memory consumption. + /// \return Status + Status Finish(std::shared_ptr* out, bool shrink_to_fit = true) { + ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit)); + *out = buffer_; + Reset(); + return Status::OK(); + } + + void Reset() { + buffer_ = NULLPTR; + capacity_ = size_ = 0; + } + + int64_t capacity() const { return capacity_; } + int64_t length() const { return size_; } + const uint8_t* data() const { return data_; } + + protected: + std::shared_ptr buffer_; + MemoryPool* pool_; + uint8_t* data_; + int64_t capacity_; + int64_t size_; +}; + +/// \brief A BufferBuilder subclass with convenience methods to append typed data +template +class ARROW_EXPORT TypedBufferBuilder : public BufferBuilder { + public: + explicit TypedBufferBuilder(MemoryPool* pool) : BufferBuilder(pool) {} + + Status Append(T arithmetic_value) { + static_assert(std::is_arithmetic::value, + "Convenience buffer append only supports arithmetic types"); + return BufferBuilder::Append(reinterpret_cast(&arithmetic_value), + sizeof(T)); + } + + Status Append(const T* arithmetic_values, int64_t num_elements) { + static_assert(std::is_arithmetic::value, + "Convenience buffer append only supports arithmetic types"); + return BufferBuilder::Append(reinterpret_cast(arithmetic_values), + num_elements * sizeof(T)); + } + + void UnsafeAppend(T arithmetic_value) { + static_assert(std::is_arithmetic::value, + "Convenience buffer append only supports arithmetic types"); + BufferBuilder::UnsafeAppend(reinterpret_cast(&arithmetic_value), sizeof(T)); + } + + void UnsafeAppend(const T* arithmetic_values, int64_t num_elements) { + static_assert(std::is_arithmetic::value, + "Convenience buffer append only supports arithmetic types"); + BufferBuilder::UnsafeAppend(reinterpret_cast(arithmetic_values), + num_elements * sizeof(T)); + } + + const T* data() const { return reinterpret_cast(data_); } + int64_t length() const { return size_ / sizeof(T); } + int64_t capacity() const { return capacity_ / sizeof(T); } +}; + +} // namespace arrow + +#endif // ARROW_BUFFER_BUILDER_H diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc index 4d16f7f9c277d..5d01515078c05 100644 --- a/cpp/src/arrow/buffer-test.cc +++ b/cpp/src/arrow/buffer-test.cc @@ -26,6 +26,7 @@ #include +#include "arrow/buffer-builder.h" #include "arrow/buffer.h" #include "arrow/memory_pool.h" #include "arrow/status.h" diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc index 01bb0c34968d3..9534d2b94f3e4 100644 --- a/cpp/src/arrow/buffer.cc +++ b/cpp/src/arrow/buffer.cc @@ -126,25 +126,18 @@ class PoolBuffer : public ResizableBuffer { } Status Resize(const int64_t new_size, bool shrink_to_fit = true) override { - if (!shrink_to_fit || (new_size > size_)) { - RETURN_NOT_OK(Reserve(new_size)); - } else { - // Buffer is not growing, so shrink to the requested size without + if (mutable_data_ && shrink_to_fit && new_size <= size_) { + // Buffer is non-null and is not growing, so shrink to the requested size without // excess space. int64_t new_capacity = BitUtil::RoundUpToMultipleOf64(new_size); if (capacity_ != new_capacity) { // Buffer hasn't got yet the requested size. - if (new_size == 0) { - pool_->Free(mutable_data_, capacity_); - capacity_ = 0; - mutable_data_ = nullptr; - data_ = nullptr; - } else { - RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_)); - data_ = mutable_data_; - capacity_ = new_capacity; - } + RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_)); + data_ = mutable_data_; + capacity_ = new_capacity; } + } else { + RETURN_NOT_OK(Reserve(new_size)); } size_ = new_size; diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h index 6b2ad1bbefc7f..f0042897b027b 100644 --- a/cpp/src/arrow/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -19,7 +19,6 @@ #define ARROW_BUFFER_H #include -#include #include #include #include @@ -29,7 +28,6 @@ #include "arrow/memory_pool.h" #include "arrow/status.h" -#include "arrow/util/bit-util.h" #include "arrow/util/macros.h" #include "arrow/util/visibility.h" @@ -110,7 +108,10 @@ class ARROW_EXPORT Buffer { #ifndef NDEBUG CheckMutable(); #endif - memset(mutable_data_ + size_, 0, static_cast(capacity_ - size_)); + // A zero-capacity buffer can have a null data pointer + if (capacity_ != 0) { + memset(mutable_data_ + size_, 0, static_cast(capacity_ - size_)); + } } /// \brief Construct a new buffer that owns its memory from a std::string @@ -391,167 +392,6 @@ Status AllocateEmptyBitmap(int64_t length, std::shared_ptr* out); /// @} -// ---------------------------------------------------------------------- -// Buffer builder classes - -/// \class BufferBuilder -/// \brief A class for incrementally building a contiguous chunk of in-memory data -class ARROW_EXPORT BufferBuilder { - public: - explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT) - : pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {} - - /// \brief Resize the buffer to the nearest multiple of 64 bytes - /// - /// \param elements the new capacity of the of the builder. Will be rounded - /// up to a multiple of 64 bytes for padding - /// \param shrink_to_fit if new capacity is smaller than the existing size, - /// reallocate internal buffer. Set to false to avoid reallocations when - /// shrinking the builder. - /// \return Status - Status Resize(const int64_t elements, bool shrink_to_fit = true) { - // Resize(0) is a no-op - if (elements == 0) { - return Status::OK(); - } - int64_t old_capacity = capacity_; - - if (buffer_ == NULLPTR) { - ARROW_RETURN_NOT_OK(AllocateResizableBuffer(pool_, elements, &buffer_)); - } else { - ARROW_RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit)); - } - capacity_ = buffer_->capacity(); - data_ = buffer_->mutable_data(); - if (capacity_ > old_capacity) { - memset(data_ + old_capacity, 0, capacity_ - old_capacity); - } - return Status::OK(); - } - - /// \brief Ensure that builder can accommodate the additional number of bytes - /// without the need to perform allocations - /// - /// \param size number of additional bytes to make space for - /// \return Status - Status Reserve(const int64_t size) { return Resize(size_ + size, false); } - - /// \brief Append the given data to the buffer - /// - /// The buffer is automatically expanded if necessary. - Status Append(const void* data, int64_t length) { - if (capacity_ < length + size_) { - int64_t new_capacity = BitUtil::NextPower2(length + size_); - ARROW_RETURN_NOT_OK(Resize(new_capacity)); - } - UnsafeAppend(data, length); - return Status::OK(); - } - - /// \brief Append the given data to the buffer - /// - /// The buffer is automatically expanded if necessary. - template - Status Append(const std::array& data) { - constexpr auto nbytes = static_cast(NBYTES); - if (capacity_ < nbytes + size_) { - int64_t new_capacity = BitUtil::NextPower2(nbytes + size_); - ARROW_RETURN_NOT_OK(Resize(new_capacity)); - } - - std::copy(data.cbegin(), data.cend(), data_ + size_); - size_ += nbytes; - return Status::OK(); - } - - // Advance pointer and zero out memory - Status Advance(const int64_t length) { - if (capacity_ < length + size_) { - int64_t new_capacity = BitUtil::NextPower2(length + size_); - ARROW_RETURN_NOT_OK(Resize(new_capacity)); - } - memset(data_ + size_, 0, static_cast(length)); - size_ += length; - return Status::OK(); - } - - // Unsafe methods don't check existing size - void UnsafeAppend(const void* data, int64_t length) { - memcpy(data_ + size_, data, static_cast(length)); - size_ += length; - } - - /// \brief Return result of builder as a Buffer object. - /// - /// The builder is reset and can be reused afterwards. - /// - /// \param[out] out the finalized Buffer object - /// \param shrink_to_fit if the buffer size is smaller than its capacity, - /// reallocate to fit more tightly in memory. Set to false to avoid - /// a reallocation, at the expense of potentially more memory consumption. - /// \return Status - Status Finish(std::shared_ptr* out, bool shrink_to_fit = true) { - ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit)); - *out = buffer_; - Reset(); - return Status::OK(); - } - - void Reset() { - buffer_ = NULLPTR; - capacity_ = size_ = 0; - } - - int64_t capacity() const { return capacity_; } - int64_t length() const { return size_; } - const uint8_t* data() const { return data_; } - - protected: - std::shared_ptr buffer_; - MemoryPool* pool_; - uint8_t* data_; - int64_t capacity_; - int64_t size_; -}; - -/// \brief A BufferBuilder subclass with convenience methods to append typed data -template -class ARROW_EXPORT TypedBufferBuilder : public BufferBuilder { - public: - explicit TypedBufferBuilder(MemoryPool* pool) : BufferBuilder(pool) {} - - Status Append(T arithmetic_value) { - static_assert(std::is_arithmetic::value, - "Convenience buffer append only supports arithmetic types"); - return BufferBuilder::Append(reinterpret_cast(&arithmetic_value), - sizeof(T)); - } - - Status Append(const T* arithmetic_values, int64_t num_elements) { - static_assert(std::is_arithmetic::value, - "Convenience buffer append only supports arithmetic types"); - return BufferBuilder::Append(reinterpret_cast(arithmetic_values), - num_elements * sizeof(T)); - } - - void UnsafeAppend(T arithmetic_value) { - static_assert(std::is_arithmetic::value, - "Convenience buffer append only supports arithmetic types"); - BufferBuilder::UnsafeAppend(reinterpret_cast(&arithmetic_value), sizeof(T)); - } - - void UnsafeAppend(const T* arithmetic_values, int64_t num_elements) { - static_assert(std::is_arithmetic::value, - "Convenience buffer append only supports arithmetic types"); - BufferBuilder::UnsafeAppend(reinterpret_cast(arithmetic_values), - num_elements * sizeof(T)); - } - - const T* data() const { return reinterpret_cast(data_); } - int64_t length() const { return size_ / sizeof(T); } - int64_t capacity() const { return capacity_ / sizeof(T); } -}; - } // namespace arrow #endif // ARROW_BUFFER_H diff --git a/cpp/src/arrow/io/readahead.cc b/cpp/src/arrow/io/readahead.cc index 89db6a66e8c8d..4222f87a5ca3b 100644 --- a/cpp/src/arrow/io/readahead.cc +++ b/cpp/src/arrow/io/readahead.cc @@ -162,11 +162,13 @@ class ReadaheadSpooler::Impl { int64_t bytes_read; RETURN_NOT_OK(AllocateResizableBuffer( pool_, read_size_ + buf->left_padding + buf->right_padding, &buffer)); + DCHECK_NE(buffer->mutable_data(), nullptr); RETURN_NOT_OK( raw_->Read(read_size_, &bytes_read, buffer->mutable_data() + buf->left_padding)); if (bytes_read < read_size_) { // Got a short read RETURN_NOT_OK(buffer->Resize(bytes_read + buf->left_padding + buf->right_padding)); + DCHECK_NE(buffer->mutable_data(), nullptr); } // Zero padding areas memset(buffer->mutable_data(), 0, buf->left_padding); diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index fb5beacf0f863..abf36fcac15e1 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -40,6 +40,14 @@ namespace arrow { constexpr size_t kAlignment = 64; namespace { + +#ifdef ARROW_JEMALLOC +inline size_t FixAllocationSize(int64_t size) { + // mallocx() and rallocx() don't support 0-sized allocations + return std::max(static_cast(size), kAlignment); +} +#endif + // Allocate memory according to the alignment requirements for Arrow // (as of May 2016 64 bytes) Status AllocateAligned(int64_t size, uint8_t** out) { @@ -58,8 +66,8 @@ Status AllocateAligned(int64_t size, uint8_t** out) { return Status::OutOfMemory("malloc of size ", size, " failed"); } #elif defined(ARROW_JEMALLOC) - *out = reinterpret_cast(mallocx( - std::max(static_cast(size), kAlignment), MALLOCX_ALIGN(kAlignment))); + *out = reinterpret_cast( + mallocx(FixAllocationSize(size), MALLOCX_ALIGN(kAlignment))); if (*out == NULL) { return Status::OutOfMemory("malloc of size ", size, " failed"); } @@ -76,6 +84,42 @@ Status AllocateAligned(int64_t size, uint8_t** out) { #endif return Status::OK(); } + +Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) { +#ifdef ARROW_JEMALLOC + uint8_t* previous_ptr = *ptr; + if (new_size < 0) { + return Status::Invalid("negative realloc size"); + } + if (static_cast(new_size) >= std::numeric_limits::max()) { + return Status::CapacityError("realloc overflows size_t"); + } + *ptr = reinterpret_cast( + rallocx(*ptr, FixAllocationSize(new_size), MALLOCX_ALIGN(kAlignment))); + if (*ptr == NULL) { + *ptr = previous_ptr; + return Status::OutOfMemory("realloc of size ", new_size, " failed"); + } +#else + // Note: We cannot use realloc() here as it doesn't guarantee alignment. + + // Allocate new chunk + uint8_t* out = nullptr; + RETURN_NOT_OK(AllocateAligned(new_size, &out)); + DCHECK(out); + // Copy contents and release old memory chunk + memcpy(out, *ptr, static_cast(std::min(new_size, old_size))); +#ifdef _WIN32 + _aligned_free(*ptr); +#else + std::free(*ptr); +#endif // defined(_MSC_VER) + *ptr = out; +#endif // defined(ARROW_JEMALLOC) + + return Status::OK(); +} + } // namespace MemoryPool::MemoryPool() {} @@ -99,36 +143,7 @@ class DefaultMemoryPool : public MemoryPool { } Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override { -#ifdef ARROW_JEMALLOC - uint8_t* previous_ptr = *ptr; - if (new_size < 0) { - return Status::Invalid("negative realloc size"); - } - if (static_cast(new_size) >= std::numeric_limits::max()) { - return Status::CapacityError("realloc overflows size_t"); - } - *ptr = reinterpret_cast( - rallocx(*ptr, static_cast(new_size), MALLOCX_ALIGN(kAlignment))); - if (*ptr == NULL) { - *ptr = previous_ptr; - return Status::OutOfMemory("realloc of size ", new_size, " failed"); - } -#else - // Note: We cannot use realloc() here as it doesn't guarantee alignment. - - // Allocate new chunk - uint8_t* out = nullptr; - RETURN_NOT_OK(AllocateAligned(new_size, &out)); - DCHECK(out); - // Copy contents and release old memory chunk - memcpy(out, *ptr, static_cast(std::min(new_size, old_size))); -#ifdef _WIN32 - _aligned_free(*ptr); -#else - std::free(*ptr); -#endif // defined(_MSC_VER) - *ptr = out; -#endif // defined(ARROW_JEMALLOC) + RETURN_NOT_OK(ReallocateAligned(old_size, new_size, ptr)); stats_.UpdateAllocatedBytes(new_size - old_size); return Status::OK(); diff --git a/cpp/src/arrow/test-util.cc b/cpp/src/arrow/test-util.cc index 8c5f36417f881..617c53978f619 100644 --- a/cpp/src/arrow/test-util.cc +++ b/cpp/src/arrow/test-util.cc @@ -303,17 +303,23 @@ void AssertZeroPadded(const Array& array) { for (const auto& buffer : array.data()->buffers) { if (buffer) { const int64_t padding = buffer->capacity() - buffer->size(); - std::vector zeros(padding); - ASSERT_EQ(0, memcmp(buffer->data() + buffer->size(), zeros.data(), padding)); + if (padding > 0) { + std::vector zeros(padding); + ASSERT_EQ(0, memcmp(buffer->data() + buffer->size(), zeros.data(), padding)); + } } } } void TestInitialized(const Array& array) { for (const auto& buffer : array.data()->buffers) { - if (buffer) { - std::vector zeros(buffer->capacity()); - throw_away = memcmp(buffer->data(), zeros.data(), buffer->size()); + if (buffer && buffer->capacity() > 0) { + int total = 0; + auto data = buffer->data(); + for (int64_t i = 0; i < buffer->size(); ++i) { + total ^= data[i]; + } + throw_away = total; } } } diff --git a/cpp/src/arrow/util/thread-pool-test.cc b/cpp/src/arrow/util/thread-pool-test.cc index 6d7b9e230f080..22a8db21fd280 100644 --- a/cpp/src/arrow/util/thread-pool-test.cc +++ b/cpp/src/arrow/util/thread-pool-test.cc @@ -298,7 +298,7 @@ TEST_F(TestThreadPool, Submit) { // Test fork safety on Unix -#if !(defined(_WIN32) || defined(ARROW_VALGRIND)) +#if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER)) TEST_F(TestThreadPool, ForkSafety) { pid_t child_pid; int child_status; diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h index 918780e04971a..0078051b49735 100644 --- a/cpp/src/parquet/bloom_filter.h +++ b/cpp/src/parquet/bloom_filter.h @@ -21,6 +21,7 @@ #include #include +#include "arrow/util/bit-util.h" #include "arrow/util/logging.h" #include "parquet/exception.h" #include "parquet/hasher.h" From 1ff79785e62855d003f4b5f0c054cbfd155160c1 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 4 Jan 2019 11:40:45 -0600 Subject: [PATCH 010/203] ARROW-4150: [C++] Ensure allocated buffers have non-null data pointer We would originally give 0-size buffers a null data pointer, but passing a null pointer to certain library functions such as memcpy() yields undefined behaviour. Also, a null pointer is a common indication that an error or bug occurred. Author: Antoine Pitrou Closes #3309 from pitrou/ARROW-4150-non-null-buffer-data and squashes the following commits: d9f1b03bf ARROW-4150: Ensure allocated buffers have non-null data pointer --- cpp/src/arrow/buffer-test.cc | 59 ++++++++++++++++++++++++++++++++++++ cpp/src/arrow/memory_pool.cc | 51 +++++++++++++++++++++---------- 2 files changed, 94 insertions(+), 16 deletions(-) diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc index 5d01515078c05..7c54e136195f3 100644 --- a/cpp/src/arrow/buffer-test.cc +++ b/cpp/src/arrow/buffer-test.cc @@ -177,6 +177,65 @@ TEST(TestBuffer, SliceMutableBuffer) { ASSERT_TRUE(slice->Equals(expected)); } +template +void TestZeroSizeAllocateBuffer(MemoryPool* pool, AllocateFunction&& allocate_func) { + auto allocated_bytes = pool->bytes_allocated(); + { + std::shared_ptr buffer; + + ASSERT_OK(allocate_func(pool, 0, &buffer)); + ASSERT_EQ(buffer->size(), 0); + // Even 0-sized buffers should not have a null data pointer + ASSERT_NE(buffer->data(), nullptr); + ASSERT_EQ(buffer->mutable_data(), buffer->data()); + + ASSERT_GE(pool->bytes_allocated(), allocated_bytes); + } + ASSERT_EQ(pool->bytes_allocated(), allocated_bytes); +} + +TEST(TestAllocateBuffer, ZeroSize) { + MemoryPool* pool = default_memory_pool(); + auto allocate_func = [](MemoryPool* pool, int64_t size, std::shared_ptr* out) { + return AllocateBuffer(pool, size, out); + }; + TestZeroSizeAllocateBuffer(pool, allocate_func); +} + +TEST(TestAllocateResizableBuffer, ZeroSize) { + MemoryPool* pool = default_memory_pool(); + auto allocate_func = [](MemoryPool* pool, int64_t size, std::shared_ptr* out) { + std::shared_ptr res; + RETURN_NOT_OK(AllocateResizableBuffer(pool, size, &res)); + *out = res; + return Status::OK(); + }; + TestZeroSizeAllocateBuffer(pool, allocate_func); +} + +TEST(TestAllocateResizableBuffer, ZeroResize) { + MemoryPool* pool = default_memory_pool(); + auto allocated_bytes = pool->bytes_allocated(); + { + std::shared_ptr buffer; + + ASSERT_OK(AllocateResizableBuffer(pool, 1000, &buffer)); + ASSERT_EQ(buffer->size(), 1000); + ASSERT_NE(buffer->data(), nullptr); + ASSERT_EQ(buffer->mutable_data(), buffer->data()); + + ASSERT_GE(pool->bytes_allocated(), allocated_bytes + 1000); + + ASSERT_OK(buffer->Resize(0)); + ASSERT_NE(buffer->data(), nullptr); + ASSERT_EQ(buffer->mutable_data(), buffer->data()); + + ASSERT_GE(pool->bytes_allocated(), allocated_bytes); + ASSERT_LT(pool->bytes_allocated(), allocated_bytes + 1000); + } + ASSERT_EQ(pool->bytes_allocated(), allocated_bytes); +} + TEST(TestBufferBuilder, ResizeReserve) { const std::string data = "some data"; auto data_ptr = data.c_str(); diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index abf36fcac15e1..3e0366a19da41 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -41,12 +41,9 @@ constexpr size_t kAlignment = 64; namespace { -#ifdef ARROW_JEMALLOC -inline size_t FixAllocationSize(int64_t size) { - // mallocx() and rallocx() don't support 0-sized allocations - return std::max(static_cast(size), kAlignment); -} -#endif +// A static piece of memory for 0-size allocations, so as to return +// an aligned non-null pointer. +alignas(kAlignment) static uint8_t zero_size_area[1]; // Allocate memory according to the alignment requirements for Arrow // (as of May 2016 64 bytes) @@ -55,6 +52,10 @@ Status AllocateAligned(int64_t size, uint8_t** out) { if (size < 0) { return Status::Invalid("negative malloc size"); } + if (size == 0) { + *out = zero_size_area; + return Status::OK(); + } if (static_cast(size) >= std::numeric_limits::max()) { return Status::CapacityError("malloc size overflows size_t"); } @@ -67,7 +68,7 @@ Status AllocateAligned(int64_t size, uint8_t** out) { } #elif defined(ARROW_JEMALLOC) *out = reinterpret_cast( - mallocx(FixAllocationSize(size), MALLOCX_ALIGN(kAlignment))); + mallocx(static_cast(size), MALLOCX_ALIGN(kAlignment))); if (*out == NULL) { return Status::OutOfMemory("malloc of size ", size, " failed"); } @@ -85,9 +86,32 @@ Status AllocateAligned(int64_t size, uint8_t** out) { return Status::OK(); } +void DeallocateAligned(uint8_t* ptr, int64_t size) { + if (ptr == zero_size_area) { + DCHECK_EQ(size, 0); + } else { +#ifdef _WIN32 + _aligned_free(ptr); +#elif defined(ARROW_JEMALLOC) + dallocx(ptr, MALLOCX_ALIGN(kAlignment)); +#else + std::free(ptr); +#endif + } +} + Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) { -#ifdef ARROW_JEMALLOC uint8_t* previous_ptr = *ptr; + if (previous_ptr == zero_size_area) { + DCHECK_EQ(old_size, 0); + return AllocateAligned(new_size, ptr); + } + if (new_size == 0) { + DeallocateAligned(previous_ptr, old_size); + *ptr = zero_size_area; + return Status::OK(); + } +#ifdef ARROW_JEMALLOC if (new_size < 0) { return Status::Invalid("negative realloc size"); } @@ -95,7 +119,7 @@ Status ReallocateAligned(int64_t old_size, int64_t new_size, uint8_t** ptr) { return Status::CapacityError("realloc overflows size_t"); } *ptr = reinterpret_cast( - rallocx(*ptr, FixAllocationSize(new_size), MALLOCX_ALIGN(kAlignment))); + rallocx(*ptr, static_cast(new_size), MALLOCX_ALIGN(kAlignment))); if (*ptr == NULL) { *ptr = previous_ptr; return Status::OutOfMemory("realloc of size ", new_size, " failed"); @@ -152,13 +176,8 @@ class DefaultMemoryPool : public MemoryPool { int64_t bytes_allocated() const override { return stats_.bytes_allocated(); } void Free(uint8_t* buffer, int64_t size) override { -#ifdef _WIN32 - _aligned_free(buffer); -#elif defined(ARROW_JEMALLOC) - dallocx(buffer, MALLOCX_ALIGN(kAlignment)); -#else - std::free(buffer); -#endif + DeallocateAligned(buffer, size); + stats_.UpdateAllocatedBytes(-size); } From 95f6ecfb9115659af3577693589ce4f9ae10eea3 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Fri, 4 Jan 2019 11:43:21 -0600 Subject: [PATCH 011/203] ARROW-4152: [GLib] Remove an example to show Torch integration Because Torch is not in active development. Author: Kouhei Sutou Closes #3303 from kou/glib-remove-torch-example and squashes the following commits: b29a96390 Remove an example to show Torch integration --- .travis.yml | 1 - c_glib/example/lua/Makefile.am | 1 - c_glib/example/lua/README.md | 5 - c_glib/example/lua/stream-to-torch-tensor.lua | 101 ------------------ ci/travis_before_script_c_glib.sh | 18 +--- ci/travis_script_c_glib.sh | 17 +-- 6 files changed, 6 insertions(+), 137 deletions(-) delete mode 100644 c_glib/example/lua/stream-to-torch-tensor.lua diff --git a/.travis.yml b/.travis.yml index 837b4cfef30db..f14f7e4785948 100644 --- a/.travis.yml +++ b/.travis.yml @@ -256,7 +256,6 @@ matrix: - ARROW_TRAVIS_USE_VENDORED_BOOST=1 - ARROW_TRAVIS_PARQUET=1 - ARROW_TRAVIS_PLASMA=1 - - BUILD_TORCH_EXAMPLE=no - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9" before_script: - if [ $ARROW_CI_RUBY_AFFECTED != "1" ]; then exit; fi diff --git a/c_glib/example/lua/Makefile.am b/c_glib/example/lua/Makefile.am index 86bdbed8a0228..9019d24741c1a 100644 --- a/c_glib/example/lua/Makefile.am +++ b/c_glib/example/lua/Makefile.am @@ -20,6 +20,5 @@ dist_lua_example_DATA = \ README.md \ read-batch.lua \ read-stream.lua \ - stream-to-torch-tensor.lua \ write-batch.lua \ write-stream.lua diff --git a/c_glib/example/lua/README.md b/c_glib/example/lua/README.md index e7e3351fef148..7d388d46acb33 100644 --- a/c_glib/example/lua/README.md +++ b/c_glib/example/lua/README.md @@ -48,8 +48,3 @@ Here are example codes in this directory: * `read-stream.lua`: It shows how to read Arrow array from file in stream mode. - - * `stream-to-torch-tensor.lua`: It shows how to read Arrow array - from file in stream mode and convert it to - [Torch](http://torch.ch/)'s - [`Tensor` object](http://torch7.readthedocs.io/en/rtd/tensor/index.html). diff --git a/c_glib/example/lua/stream-to-torch-tensor.lua b/c_glib/example/lua/stream-to-torch-tensor.lua deleted file mode 100644 index fc765e3c96872..0000000000000 --- a/c_glib/example/lua/stream-to-torch-tensor.lua +++ /dev/null @@ -1,101 +0,0 @@ --- Licensed to the Apache Software Foundation (ASF) under one --- or more contributor license agreements. See the NOTICE file --- distributed with this work for additional information --- regarding copyright ownership. The ASF licenses this file --- to you under the Apache License, Version 2.0 (the --- "License"); you may not use this file except in compliance --- with the License. You may obtain a copy of the License at --- --- http://www.apache.org/licenses/LICENSE-2.0 --- --- Unless required by applicable law or agreed to in writing, --- software distributed under the License is distributed on an --- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY --- KIND, either express or implied. See the License for the --- specific language governing permissions and limitations --- under the License. - -local lgi = require 'lgi' -local Arrow = lgi.Arrow - -local torch = require 'torch' - -Arrow.Array.torch_types = function(self) - return nil -end - -Arrow.Array.to_torch = function(self) - local types = self:torch_types() - if not types then - return nil - end - - local storage_type = types[1] - local tensor_type = types[2] - - local size = self:get_length() - local storage = storage_type(size) - if not storage then - return nil - end - - for i = 1, size do - storage[i] = self:get_value(i - 1) - end - return tensor_type(storage) -end - -Arrow.UInt8Array.torch_types = function(self) - return {torch.ByteStorage, torch.ByteTensor} -end - -Arrow.Int8Array.torch_types = function(self) - return {torch.CharStorage, torch.CharTensor} -end - -Arrow.Int16Array.torch_types = function(self) - return {torch.ShortStorage, torch.ShortTensor} -end - -Arrow.Int32Array.torch_types = function(self) - return {torch.IntStorage, torch.IntTensor} -end - -Arrow.Int64Array.torch_types = function(self) - return {torch.LongStorage, torch.LongTensor} -end - -Arrow.FloatArray.torch_types = function(self) - return {torch.FloatStorage, torch.FloatTensor} -end - -Arrow.DoubleArray.torch_types = function(self) - return {torch.DoubleStorage, torch.DoubleTensor} -end - - -local input_path = arg[1] or "/tmp/stream.arrow"; - -local input = Arrow.MemoryMappedInputStream.new(input_path) -local reader = Arrow.RecordBatchStreamReader.new(input) - -local i = 0 -while true do - local record_batch = reader:read_next_record_batch() - if not record_batch then - break - end - - print(string.rep("=", 40)) - print("record-batch["..i.."]:") - for j = 0, record_batch:get_n_columns() - 1 do - local column = record_batch:get_column(j) - local column_name = record_batch:get_column_name(j) - print(" "..column_name..":") - print(column:to_torch()) - end - - i = i + 1 -end - -input:close() diff --git a/ci/travis_before_script_c_glib.sh b/ci/travis_before_script_c_glib.sh index 7cd1c2a064396..e8dd0cdc80d2e 100755 --- a/ci/travis_before_script_c_glib.sh +++ b/ci/travis_before_script_c_glib.sh @@ -44,22 +44,8 @@ gem install test-unit gobject-introspection if [ $TRAVIS_OS_NAME = "osx" ]; then sudo env PKG_CONFIG_PATH=$PKG_CONFIG_PATH luarocks install lgi else - if [ $BUILD_TORCH_EXAMPLE = "yes" ]; then - git clone \ - --quiet \ - --depth 1 \ - --recursive \ - https://github.com/torch/distro.git ~/torch - pushd ~/torch - ./install-deps > /dev/null - echo "yes" | ./install.sh > /dev/null - . ~/torch/install/bin/torch-activate - popd - luarocks install lgi - else - sudo apt install -y -qq luarocks - sudo luarocks install lgi - fi + sudo apt install -y -qq luarocks + sudo luarocks install lgi fi pushd $ARROW_C_GLIB_DIR diff --git a/ci/travis_script_c_glib.sh b/ci/travis_script_c_glib.sh index adecc5c742967..c42a047ddf445 100755 --- a/ci/travis_script_c_glib.sh +++ b/ci/travis_script_c_glib.sh @@ -32,19 +32,10 @@ arrow_c_glib_run_test() export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$arrow_c_glib_lib_dir/pkgconfig pushd example/lua - if [ "$BUILD_TORCH_EXAMPLE" = "yes" ]; then - . ~/torch/install/bin/torch-activate - luajit write-batch.lua - luajit read-batch.lua - luajit write-stream.lua - luajit read-stream.lua - luajit stream-to-torch-tensor.lua - else - lua write-batch.lua - lua read-batch.lua - lua write-stream.lua - lua read-stream.lua - fi + lua write-batch.lua + lua read-batch.lua + lua write-stream.lua + lua read-stream.lua popd } From 1e9a23612d258cd51a20b9eccf7a13bd5be52007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 4 Jan 2019 11:53:57 -0600 Subject: [PATCH 012/203] ARROW-4149: [CI/C++] Parquet test misses ZSTD compression codec in CMake 3.2 nightly builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parquet Zstd tests were enabled regardless `ARROW_WITH_ZSTD` which can be set to [OFF](https://github.com/apache/arrow/blob/master/cpp/CMakeLists.txt#L271) depending CMake's version. Crossbow build: - ~[kszucs/crossbow/build-392](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=build-392)~ - [kszucs/crossbow/build-395](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=build-395) Author: Krisztián Szűcs Closes #3299 from kszucs/ARROW-4149 and squashes the following commits: 525ef76f1 lint b29bda570 disable more tests 54e6437fe only run Zstd tests if ARROW_WITH_ZSTD is set --- cpp/src/arrow/io/compressed-test.cc | 16 ++++++++++------ cpp/src/parquet/CMakeLists.txt | 5 +++++ cpp/src/parquet/column_writer-test.cc | 14 +++++++++----- cpp/src/parquet/file-deserialize-test.cc | 8 +++++--- cpp/src/parquet/file-serialize-test.cc | 2 ++ 5 files changed, 31 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/io/compressed-test.cc b/cpp/src/arrow/io/compressed-test.cc index 507302f384c0b..a099fbb316a65 100644 --- a/cpp/src/arrow/io/compressed-test.cc +++ b/cpp/src/arrow/io/compressed-test.cc @@ -199,12 +199,14 @@ TEST_P(CompressedInputStreamTest, InvalidData) { INSTANTIATE_TEST_CASE_P(TestGZipInputStream, CompressedInputStreamTest, ::testing::Values(Compression::GZIP)); -INSTANTIATE_TEST_CASE_P(TestZSTDInputStream, CompressedInputStreamTest, - ::testing::Values(Compression::ZSTD)); - INSTANTIATE_TEST_CASE_P(TestBrotliInputStream, CompressedInputStreamTest, ::testing::Values(Compression::BROTLI)); +#ifdef ARROW_WITH_ZSTD +INSTANTIATE_TEST_CASE_P(TestZSTDInputStream, CompressedInputStreamTest, + ::testing::Values(Compression::ZSTD)); +#endif + class CompressedOutputStreamTest : public ::testing::TestWithParam { protected: Compression::type GetCompression() { return GetParam(); } @@ -235,11 +237,13 @@ TEST_P(CompressedOutputStreamTest, RandomData) { INSTANTIATE_TEST_CASE_P(TestGZipOutputStream, CompressedOutputStreamTest, ::testing::Values(Compression::GZIP)); -INSTANTIATE_TEST_CASE_P(TestZSTDOutputStream, CompressedOutputStreamTest, - ::testing::Values(Compression::ZSTD)); - INSTANTIATE_TEST_CASE_P(TestBrotliOutputStream, CompressedOutputStreamTest, ::testing::Values(Compression::BROTLI)); +#ifdef ARROW_WITH_ZSTD +INSTANTIATE_TEST_CASE_P(TestZSTDOutputStream, CompressedOutputStreamTest, + ::testing::Values(Compression::ZSTD)); +#endif + } // namespace io } // namespace arrow diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 4eb8f68a2ba98..f6796726fce90 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -282,3 +282,8 @@ ADD_ARROW_BENCHMARK(encoding-benchmark PREFIX "parquet" LABELS "parquet-benchmarks" EXTRA_LINK_LIBS ${PARQUET_BENCHMARK_LINK_LIBRARIES}) + +# Required for tests, the ExternalProject for zstd does not build on CMake < 3.7 +if (ARROW_WITH_ZSTD) + add_definitions(-DARROW_WITH_ZSTD) +endif() diff --git a/cpp/src/parquet/column_writer-test.cc b/cpp/src/parquet/column_writer-test.cc index 4416e3d18e9ad..28a18b1008ac8 100644 --- a/cpp/src/parquet/column_writer-test.cc +++ b/cpp/src/parquet/column_writer-test.cc @@ -349,11 +349,6 @@ TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithLz4Compression) { LARGE_SIZE); } -TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithZstdCompression) { - this->TestRequiredWithSettings(Encoding::PLAIN, Compression::ZSTD, false, false, - LARGE_SIZE); -} - TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStats) { this->TestRequiredWithSettings(Encoding::PLAIN, Compression::UNCOMPRESSED, false, true, LARGE_SIZE); @@ -379,10 +374,19 @@ TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStatsAndLz4Compression) { LARGE_SIZE); } +// The ExternalProject for zstd does not build on CMake < 3.7, so we do not +// require it here +#ifdef ARROW_WITH_ZSTD +TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithZstdCompression) { + this->TestRequiredWithSettings(Encoding::PLAIN, Compression::ZSTD, false, false, + LARGE_SIZE); +} + TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStatsAndZstdCompression) { this->TestRequiredWithSettings(Encoding::PLAIN, Compression::ZSTD, false, true, LARGE_SIZE); } +#endif TYPED_TEST(TestPrimitiveWriter, Optional) { // Optional and non-repeated, with definition levels diff --git a/cpp/src/parquet/file-deserialize-test.cc b/cpp/src/parquet/file-deserialize-test.cc index 17dfe387fd6e0..f1c17240439fb 100644 --- a/cpp/src/parquet/file-deserialize-test.cc +++ b/cpp/src/parquet/file-deserialize-test.cc @@ -176,9 +176,11 @@ TEST_F(TestPageSerde, TestFailLargePageHeaders) { } TEST_F(TestPageSerde, Compression) { - Compression::type codec_types[5] = {Compression::GZIP, Compression::SNAPPY, - Compression::BROTLI, Compression::LZ4, - Compression::ZSTD}; + std::vector codec_types = {Compression::GZIP, Compression::SNAPPY, + Compression::BROTLI, Compression::LZ4}; +#ifdef ARROW_WITH_ZSTD + codec_types.push_back(Compression::ZSTD); +#endif const int32_t num_rows = 32; // dummy value data_page_header_.num_values = num_rows; diff --git a/cpp/src/parquet/file-serialize-test.cc b/cpp/src/parquet/file-serialize-test.cc index 750faa20e2454..88dd657603184 100644 --- a/cpp/src/parquet/file-serialize-test.cc +++ b/cpp/src/parquet/file-serialize-test.cc @@ -301,9 +301,11 @@ TYPED_TEST(TestSerialize, SmallFileLz4) { ASSERT_NO_FATAL_FAILURE(this->FileSerializeTest(Compression::LZ4)); } +#ifdef ARROW_WITH_ZSTD TYPED_TEST(TestSerialize, SmallFileZstd) { ASSERT_NO_FATAL_FAILURE(this->FileSerializeTest(Compression::ZSTD)); } +#endif } // namespace test From cc9e228dd4a9b3403d52de07f134603a824b3354 Mon Sep 17 00:00:00 2001 From: "minmin.fmm" Date: Fri, 4 Jan 2019 12:03:44 -0600 Subject: [PATCH 013/203] ARROW-4122: [C++] Initialize class members based on codebase static analysis Author: minmin.fmm Closes #3267 from micafan/fix_cpp_uninit_ctor and squashes the following commits: 71a86c5e3 fix lint error 270a992c5 fix cpp uninit ctor --- cpp/src/arrow/array/builder_dict.cc | 3 ++- cpp/src/arrow/io/compressed.cc | 2 +- cpp/src/arrow/io/hdfs.cc | 2 +- cpp/src/arrow/ipc/feather-internal.h | 2 +- cpp/src/arrow/ipc/feather.cc | 1 + cpp/src/arrow/ipc/reader.cc | 4 +++- cpp/src/arrow/python/arrow_to_pandas.cc | 5 ++++- cpp/src/arrow/util/bit-stream-utils.h | 7 ++++++- cpp/src/arrow/util/compression_zlib.cc | 2 +- cpp/src/plasma/client.cc | 2 +- 10 files changed, 21 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/array/builder_dict.cc b/cpp/src/arrow/array/builder_dict.cc index e534c3cadb14b..89939597f1e8b 100644 --- a/cpp/src/arrow/array/builder_dict.cc +++ b/cpp/src/arrow/array/builder_dict.cc @@ -161,7 +161,7 @@ DictionaryBuilder::~DictionaryBuilder() {} template DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& type, MemoryPool* pool) - : ArrayBuilder(type, pool), byte_width_(-1), values_builder_(pool) { + : ArrayBuilder(type, pool), delta_offset_(0), byte_width_(-1), values_builder_(pool) { DCHECK_EQ(T::type_id, type->id()) << "inconsistent type passed to DictionaryBuilder"; } @@ -175,6 +175,7 @@ template <> DictionaryBuilder::DictionaryBuilder( const std::shared_ptr& type, MemoryPool* pool) : ArrayBuilder(type, pool), + delta_offset_(0), byte_width_(checked_cast(*type).byte_width()) {} template diff --git a/cpp/src/arrow/io/compressed.cc b/cpp/src/arrow/io/compressed.cc index e5fd6b4adf4c7..1311dbc246634 100644 --- a/cpp/src/arrow/io/compressed.cc +++ b/cpp/src/arrow/io/compressed.cc @@ -44,7 +44,7 @@ namespace io { class CompressedOutputStream::Impl { public: Impl(MemoryPool* pool, Codec* codec, const std::shared_ptr& raw) - : pool_(pool), raw_(raw), codec_(codec), is_open_(true) {} + : pool_(pool), raw_(raw), codec_(codec), is_open_(true), compressed_pos_(0) {} ~Impl() { DCHECK(Close().ok()); } diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc index 3e9b804ca233c..0a50d3dcdcd90 100644 --- a/cpp/src/arrow/io/hdfs.cc +++ b/cpp/src/arrow/io/hdfs.cc @@ -336,7 +336,7 @@ static void SetPathInfo(const hdfsFileInfo* input, HdfsPathInfo* out) { // Private implementation class HadoopFileSystem::HadoopFileSystemImpl { public: - HadoopFileSystemImpl() {} + HadoopFileSystemImpl() : driver_(NULLPTR), port_(0), fs_(NULLPTR) {} Status Connect(const HdfsConnectionConfig* config) { if (config->driver == HdfsDriver::LIBHDFS3) { diff --git a/cpp/src/arrow/ipc/feather-internal.h b/cpp/src/arrow/ipc/feather-internal.h index 90512dd117238..2aa04b2db72ba 100644 --- a/cpp/src/arrow/ipc/feather-internal.h +++ b/cpp/src/arrow/ipc/feather-internal.h @@ -119,7 +119,7 @@ class ARROW_EXPORT TableBuilder { class ARROW_EXPORT TableMetadata { public: - TableMetadata() {} + TableMetadata() : table_(NULLPTR) {} ~TableMetadata() = default; Status Open(const std::shared_ptr& buffer) { diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc index b0ab62c678c72..d28bf7512999a 100644 --- a/cpp/src/arrow/ipc/feather.cc +++ b/cpp/src/arrow/ipc/feather.cc @@ -180,6 +180,7 @@ ColumnBuilder::ColumnBuilder(TableBuilder* parent, const std::string& name) fbb_ = &parent->fbb(); name_ = name; type_ = ColumnType::PRIMITIVE; + meta_time_.unit = TimeUnit::SECOND; } flatbuffers::Offset ColumnBuilder::CreateColumnMetadata() { diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index b2c26767be4e9..59a322a64338a 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -499,7 +499,9 @@ Status RecordBatchStreamReader::ReadNext(std::shared_ptr* batch) { class RecordBatchFileReader::RecordBatchFileReaderImpl { public: - RecordBatchFileReaderImpl() { dictionary_memo_ = std::make_shared(); } + RecordBatchFileReaderImpl() : file_(NULLPTR), footer_offset_(0), footer_(NULLPTR) { + dictionary_memo_ = std::make_shared(); + } Status ReadFooter() { int magic_size = static_cast(strlen(kArrowMagicBytes)); diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc index b532bfb705acd..8aa0bf74b7b27 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.cc +++ b/cpp/src/arrow/python/arrow_to_pandas.cc @@ -1064,7 +1064,10 @@ class CategoricalBlock : public PandasBlock { public: explicit CategoricalBlock(const PandasOptions& options, MemoryPool* pool, int64_t num_rows) - : PandasBlock(options, num_rows, 1), pool_(pool), needs_copy_(false) {} + : PandasBlock(options, num_rows, 1), + pool_(pool), + ordered_(false), + needs_copy_(false) {} Status Allocate() override { return Status::NotImplemented( diff --git a/cpp/src/arrow/util/bit-stream-utils.h b/cpp/src/arrow/util/bit-stream-utils.h index ff215e488b4a3..ae62a7ff1e2b3 100644 --- a/cpp/src/arrow/util/bit-stream-utils.h +++ b/cpp/src/arrow/util/bit-stream-utils.h @@ -110,7 +110,12 @@ class BitReader { memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes); } - BitReader() : buffer_(NULL), max_bytes_(0) {} + BitReader() + : buffer_(NULL), + max_bytes_(0), + buffered_values_(0), + byte_offset_(0), + bit_offset_(0) {} void Reset(const uint8_t* buffer, int buffer_len) { buffer_ = buffer; diff --git a/cpp/src/arrow/util/compression_zlib.cc b/cpp/src/arrow/util/compression_zlib.cc index dfda317e3bf36..736b0ab4f1524 100644 --- a/cpp/src/arrow/util/compression_zlib.cc +++ b/cpp/src/arrow/util/compression_zlib.cc @@ -85,7 +85,7 @@ static Status ZlibErrorPrefix(const char* prefix_msg, const char* msg) { class GZipDecompressor : public Decompressor { public: - GZipDecompressor() : initialized_(false) {} + GZipDecompressor() : initialized_(false), finished_(false) {} ~GZipDecompressor() override { if (initialized_) { diff --git a/cpp/src/plasma/client.cc b/cpp/src/plasma/client.cc index 8d153585c3d4e..f08d6efd71ee7 100644 --- a/cpp/src/plasma/client.cc +++ b/cpp/src/plasma/client.cc @@ -261,7 +261,7 @@ class PlasmaClient::Impl : public std::enable_shared_from_thisRelease(object_id_)); } -PlasmaClient::Impl::Impl() { +PlasmaClient::Impl::Impl() : store_conn_(0), store_capacity_(0) { #ifdef PLASMA_CUDA DCHECK_OK(CudaDeviceManager::GetInstance(&manager_)); #endif From c569a4c5684938230bf99e6b20b401322760089d Mon Sep 17 00:00:00 2001 From: Antonio Cavallo Date: Fri, 4 Jan 2019 12:13:54 -0600 Subject: [PATCH 014/203] ARROW-4127: [Documentation][Python] Add instructions to build with Docker Author: Antonio Cavallo Closes #3281 from cav71/documentation and squashes the following commits: a1c5dab21 Add Docker documentation build instructions --- docs/source/building.rst | 21 +++++++++++++++++++++ docs/source/python/development.rst | 5 +++++ 2 files changed, 26 insertions(+) diff --git a/docs/source/building.rst b/docs/source/building.rst index 0fb4486db89c3..dfa857498cf80 100644 --- a/docs/source/building.rst +++ b/docs/source/building.rst @@ -15,6 +15,8 @@ .. specific language governing permissions and limitations .. under the License. +.. _building-docs: + Building the Documentation ========================== @@ -69,3 +71,22 @@ After these steps are completed, the documentation is rendered in HTML format in ``docs/_build/html``. In particular, you can point your browser at ``docs/_build/html/index.html`` to read the docs and review any changes you made. + + +.. _building-docker: + +Building with Docker +-------------------- + +You can use Docker to build the documentation: + +.. code-block:: shell + + docker-compose build cpp + docker-compose build python + docker-compose build docs + docker-compose run docs + +The final output is located under [#]_:: + + docs/_build/html diff --git a/docs/source/python/development.rst b/docs/source/python/development.rst index ba8cfef721441..0bc1c62b4af18 100644 --- a/docs/source/python/development.rst +++ b/docs/source/python/development.rst @@ -364,3 +364,8 @@ Getting ``python-test.exe`` to run is a bit tricky because your set PYTHONHOME=%CONDA_PREFIX% Now ``python-test.exe`` or simply ``ctest`` (to run all tests) should work. + +Building the Documentation +========================== + +See :ref:`building-docs` for instructions to build the HTML documentation. From 7405406928ac0e3ab03bf2091173563ed54d2a07 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Fri, 4 Jan 2019 19:15:35 +0100 Subject: [PATCH 015/203] ARROW-4156: [C++] Don't use object libs with Xcode Author: Uwe L. Korn Closes #3308 from xhochy/ARROW-4156 and squashes the following commits: 1c76769d ARROW-4156: Don't use object libs with Xcode --- cpp/cmake_modules/BuildUtils.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index 7c1db679bf23e..77db28e2aab28 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -119,9 +119,11 @@ function(ADD_ARROW_LIB LIB_NAME) set(BUILD_STATIC ${ARROW_BUILD_STATIC}) endif() - if(MSVC) + if(MSVC OR (CMAKE_GENERATOR STREQUAL Xcode)) # MSVC needs to compile C++ separately for each library kind (shared and static) # because of dllexport declarations + # The Xcode generator doesn't reliably work with Xcode as target names are not + # guessed correctly. set(LIB_DEPS ${ARG_SOURCES}) set(EXTRA_DEPS ${ARG_DEPENDENCIES}) From 161d00fbeeb2f1992da8d8ac0e96fb14de51b646 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 4 Jan 2019 12:36:15 -0600 Subject: [PATCH 016/203] ARROW-4157: [C++] Fix clang documentation warnings on Ubuntu 18.04 I also added an option `ARROW_USE_CCACHE` to turn ccache on and off Author: Wes McKinney Closes #3310 from wesm/doc-fixes and squashes the following commits: e6c2f203f Fix clang documentation warnings on Ubuntu 18.04 --- cpp/CMakeLists.txt | 18 ++++-- cpp/src/arrow/gpu/cuda_context.h | 16 ++--- cpp/src/arrow/gpu/cuda_memory.h | 2 +- cpp/src/arrow/python/serialize.cc | 100 +++++++++++++++--------------- 4 files changed, 71 insertions(+), 65 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index aba1a59618bb0..3d2b698b8ff25 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -81,12 +81,6 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR INFER_FOUND) set(CMAKE_EXPORT_COMPILE_COMMANDS 1) endif() -find_program(CCACHE_FOUND ccache) -if(CCACHE_FOUND) - set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND}) - set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND}) -endif(CCACHE_FOUND) - # ---------------------------------------------------------------------- # cmake options @@ -115,6 +109,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") "Enable Address Sanitizer checks" OFF) + option(ARROW_USE_CCACHE + "Use ccache when compiling (if available)" + ON) + option(ARROW_USE_TSAN "Enable Thread Sanitizer checks" OFF) @@ -349,6 +347,14 @@ that have not been built" OFF) endif() +if (ARROW_USE_CCACHE) + find_program(CCACHE_FOUND ccache) + if(CCACHE_FOUND) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND}) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND}) + endif(CCACHE_FOUND) +endif() + if (ARROW_OPTIONAL_INSTALL) # Don't make the "install" target depend on the "all" target set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY true) diff --git a/cpp/src/arrow/gpu/cuda_context.h b/cpp/src/arrow/gpu/cuda_context.h index 9a67cea8975d1..300106214b488 100644 --- a/cpp/src/arrow/gpu/cuda_context.h +++ b/cpp/src/arrow/gpu/cuda_context.h @@ -37,23 +37,23 @@ class ARROW_EXPORT CudaDeviceManager { static Status GetInstance(CudaDeviceManager** manager); /// \brief Get the CUDA driver context for a particular device - /// \param[in] device_number + /// \param[in] device_number the CUDA device /// \param[out] out cached context - Status GetContext(int gpu_number, std::shared_ptr* ctx); + Status GetContext(int device_number, std::shared_ptr* out); /// \brief Get the shared CUDA driver context for a particular device - /// \param[in] device_number + /// \param[in] device_number the CUDA device /// \param[in] handle CUDA context handler created by another library /// \param[out] out shared context Status GetSharedContext(int device_number, void* handle, std::shared_ptr* out); /// \brief Allocate host memory with fast access to given GPU device - /// \param[in] device_number + /// \param[in] device_number the CUDA device /// \param[in] nbytes number of bytes /// \param[out] out the allocated buffer Status AllocateHost(int device_number, int64_t nbytes, - std::shared_ptr* buffer); + std::shared_ptr* out); Status FreeHost(void* data, int64_t nbytes); @@ -98,15 +98,15 @@ class ARROW_EXPORT CudaContext : public std::enable_shared_from_this* buffer); + std::shared_ptr* out); /// \brief Close memory mapped with IPC buffer /// \param[in] buffer a CudaBuffer referencing /// \return Status - Status CloseIpcBuffer(CudaBuffer* buf); + Status CloseIpcBuffer(CudaBuffer* buffer); /// \brief Block until the all device tasks are completed. Status Synchronize(void); diff --git a/cpp/src/arrow/gpu/cuda_memory.h b/cpp/src/arrow/gpu/cuda_memory.h index c8f80837cd9df..64fa02d789325 100644 --- a/cpp/src/arrow/gpu/cuda_memory.h +++ b/cpp/src/arrow/gpu/cuda_memory.h @@ -207,7 +207,7 @@ class ARROW_EXPORT CudaBufferWriter : public io::WritableFile { }; /// \brief Allocate CUDA-accessible memory on CPU host -/// \param[in] device_number +/// \param[in] device_number device to expose host memory /// \param[in] size number of bytes /// \param[out] out the allocated buffer /// \return Status diff --git a/cpp/src/arrow/python/serialize.cc b/cpp/src/arrow/python/serialize.cc index ca94369be5157..38ab238e9a2e1 100644 --- a/cpp/src/arrow/python/serialize.cc +++ b/cpp/src/arrow/python/serialize.cc @@ -55,8 +55,8 @@ using internal::checked_cast; namespace py { -/// A Sequence is a heterogeneous collections of elements. It can contain -/// scalar Python types, lists, tuples, dictionaries and tensors. +// A Sequence is a heterogeneous collections of elements. It can contain +// scalar Python types, lists, tuples, dictionaries and tensors. class SequenceBuilder { public: explicit SequenceBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT) @@ -81,7 +81,7 @@ class SequenceBuilder { dict_offsets_({0}), set_offsets_({0}) {} - /// Appending a none to the sequence + // Appending a none to the sequence Status AppendNone() { RETURN_NOT_OK(offsets_.Append(0)); RETURN_NOT_OK(types_.Append(0)); @@ -106,90 +106,90 @@ class SequenceBuilder { return out->Append(val); } - /// Appending a boolean to the sequence + // Appending a boolean to the sequence Status AppendBool(const bool data) { return AppendPrimitive(data, &bool_tag_, &bools_); } - /// Appending a python 2 int64_t to the sequence + // Appending a python 2 int64_t to the sequence Status AppendPy2Int64(const int64_t data) { return AppendPrimitive(data, &py2_int_tag_, &py2_ints_); } - /// Appending an int64_t to the sequence + // Appending an int64_t to the sequence Status AppendInt64(const int64_t data) { return AppendPrimitive(data, &int_tag_, &ints_); } - /// Append a list of bytes to the sequence + // Append a list of bytes to the sequence Status AppendBytes(const uint8_t* data, int32_t length) { RETURN_NOT_OK(Update(bytes_.length(), &bytes_tag_)); return bytes_.Append(data, length); } - /// Appending a string to the sequence + // Appending a string to the sequence Status AppendString(const char* data, int32_t length) { RETURN_NOT_OK(Update(strings_.length(), &string_tag_)); return strings_.Append(data, length); } - /// Appending a half_float to the sequence + // Appending a half_float to the sequence Status AppendHalfFloat(const npy_half data) { return AppendPrimitive(data, &half_float_tag_, &half_floats_); } - /// Appending a float to the sequence + // Appending a float to the sequence Status AppendFloat(const float data) { return AppendPrimitive(data, &float_tag_, &floats_); } - /// Appending a double to the sequence + // Appending a double to the sequence Status AppendDouble(const double data) { return AppendPrimitive(data, &double_tag_, &doubles_); } - /// Appending a Date64 timestamp to the sequence + // Appending a Date64 timestamp to the sequence Status AppendDate64(const int64_t timestamp) { return AppendPrimitive(timestamp, &date64_tag_, &date64s_); } - /// Appending a tensor to the sequence - /// - /// \param tensor_index Index of the tensor in the object. + // Appending a tensor to the sequence + // + // \param tensor_index Index of the tensor in the object. Status AppendTensor(const int32_t tensor_index) { RETURN_NOT_OK(Update(tensor_indices_.length(), &tensor_tag_)); return tensor_indices_.Append(tensor_index); } - /// Appending a numpy ndarray to the sequence - /// - /// \param tensor_index Index of the tensor in the object. + // Appending a numpy ndarray to the sequence + // + // \param tensor_index Index of the tensor in the object. Status AppendNdarray(const int32_t ndarray_index) { RETURN_NOT_OK(Update(ndarray_indices_.length(), &ndarray_tag_)); return ndarray_indices_.Append(ndarray_index); } - /// Appending a buffer to the sequence - /// - /// \param buffer_index Indes of the buffer in the object. + // Appending a buffer to the sequence + // + // \param buffer_index Indes of the buffer in the object. Status AppendBuffer(const int32_t buffer_index) { RETURN_NOT_OK(Update(buffer_indices_.length(), &buffer_tag_)); return buffer_indices_.Append(buffer_index); } - /// Add a sublist to the sequence. The data contained in the sublist will be - /// specified in the "Finish" method. - /// - /// To construct l = [[11, 22], 33, [44, 55]] you would for example run - /// list = ListBuilder(); - /// list.AppendList(2); - /// list.Append(33); - /// list.AppendList(2); - /// list.Finish([11, 22, 44, 55]); - /// list.Finish(); + // Add a sublist to the sequence. The data contained in the sublist will be + // specified in the "Finish" method. + // + // To construct l = [[11, 22], 33, [44, 55]] you would for example run + // list = ListBuilder(); + // list.AppendList(2); + // list.Append(33); + // list.AppendList(2); + // list.Finish([11, 22, 44, 55]); + // list.Finish(); - /// \param size - /// The size of the sublist + // \param size + // The size of the sublist Status AppendList(Py_ssize_t size) { int32_t offset; RETURN_NOT_OK(internal::CastSize(list_offsets_.back() + size, &offset)); @@ -256,8 +256,8 @@ class SequenceBuilder { return Status::OK(); } - /// Finish building the sequence and return the result. - /// Input arrays may be nullptr + // Finish building the sequence and return the result. + // Input arrays may be nullptr Status Finish(const Array* list_data, const Array* tuple_data, const Array* dict_data, const Array* set_data, std::shared_ptr* out) { fields_.resize(num_tags_); @@ -356,28 +356,28 @@ class SequenceBuilder { std::vector type_ids_; }; -/// Constructing dictionaries of key/value pairs. Sequences of -/// keys and values are built separately using a pair of -/// SequenceBuilders. The resulting Arrow representation -/// can be obtained via the Finish method. +// Constructing dictionaries of key/value pairs. Sequences of +// keys and values are built separately using a pair of +// SequenceBuilders. The resulting Arrow representation +// can be obtained via the Finish method. class DictBuilder { public: explicit DictBuilder(MemoryPool* pool = nullptr) : keys_(pool), vals_(pool) {} - /// Builder for the keys of the dictionary + // Builder for the keys of the dictionary SequenceBuilder& keys() { return keys_; } - /// Builder for the values of the dictionary + // Builder for the values of the dictionary SequenceBuilder& vals() { return vals_; } - /// Construct an Arrow StructArray representing the dictionary. - /// Contains a field "keys" for the keys and "vals" for the values. - /// \param val_list_data - /// List containing the data from nested lists in the value - /// list of the dictionary - /// - /// \param val_dict_data - /// List containing the data from nested dictionaries in the - /// value list of the dictionary + // Construct an Arrow StructArray representing the dictionary. + // Contains a field "keys" for the keys and "vals" for the values. + // \param val_list_data + // List containing the data from nested lists in the value + // list of the dictionary + // + // \param val_dict_data + // List containing the data from nested dictionaries in the + // value list of the dictionary Status Finish(const Array* key_tuple_data, const Array* key_dict_data, const Array* val_list_data, const Array* val_tuple_data, const Array* val_dict_data, const Array* val_set_data, From c322aecd82c93f96a6d8b8852c8336a750ebfbb1 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 4 Jan 2019 13:38:28 -0600 Subject: [PATCH 017/203] ARROW-4158: Allow committers to set ARROW_GITHUB_API_TOKEN for merge script, better debugging output Before this the error message printed in rate limit scenario was simply `url` (from the `KeyError`) Author: Wes McKinney Closes #3311 from wesm/ARROW-4158 and squashes the following commits: ca4b16e04 Better debugging output from merge PR script, add option to use GitHub API token for GET requests --- dev/README.md | 9 +++++++-- dev/merge_arrow_pr.py | 43 ++++++++++++++++++++++++++++++------------- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/dev/README.md b/dev/README.md index 98aeef6d9a4d8..ead36d3747e76 100644 --- a/dev/README.md +++ b/dev/README.md @@ -28,17 +28,22 @@ https://gitbox.apache.org/setup/ to be able to push to GitHub as the main remote. * How to merge a Pull request: -have an apache and apache-github remote setup + ``` -git remote add apache-github https://github.com/apache/arrow.git git remote add apache git@github.com:apache/arrow.git ``` + run the following command + ``` dev/merge_arrow_pr.py ``` +This uses the GitHub REST API; if you encounter rate limit issues, you may set +a `ARROW_GITHUB_API_TOKEN` environment variable to use a Personal Access Token. + Note: + * The directory name of your Arrow git clone must be called arrow * Without jira-python installed you'll have to close the JIRA manually diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 8539d5d3401fd..3d6ca31476ee3 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -24,8 +24,17 @@ # This utility assumes you already have a local Arrow git clone and that you # have added remotes corresponding to both (i) the Github Apache Arrow mirror # and (ii) the apache git repo. +# +# There are several pieces of authorization possibly needed via environment +# variables +# +# JIRA_USERNAME: your Apache JIRA id +# JIRA_PASSWORD: your Apache JIRA password +# ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests (to avoid +# rate limiting) import os +import pprint import re import subprocess import sys @@ -48,12 +57,10 @@ BRANCH_PREFIX = "PR_TOOL" JIRA_API_BASE = "https://issues.apache.org/jira" - -def get_json(url): - req = requests.get(url) +def get_json(url, headers=None): + req = requests.get(url, headers=headers) return req.json() - def run_cmd(cmd): if isinstance(cmd, six.string_types): cmd = cmd.split(' ') @@ -192,8 +199,15 @@ def __init__(self, project_name): self.github_api = ("https://api.github.com/repos/apache/{0}" .format(project_name)) + token = os.environ.get('ARROW_GITHUB_API_TOKEN', None) + if token: + self.headers = {'Authorization': 'token {0}'.format(token)} + else: + self.headers = None + def get_pr_data(self, number): - return get_json("%s/pulls/%s" % (self.github_api, number)) + return get_json("%s/pulls/%s" % (self.github_api, number), + headers=self.headers) class CommandInput(object): @@ -225,13 +239,16 @@ def __init__(self, cmd, github_api, git_remote, jira_con, number): self.con = jira_con self.number = number self._pr_data = github_api.get_pr_data(number) - self.url = self._pr_data["url"] - self.title = self._pr_data["title"] - - self.body = self._pr_data["body"] - self.target_ref = self._pr_data["base"]["ref"] - self.user_login = self._pr_data["user"]["login"] - self.base_ref = self._pr_data["head"]["ref"] + try: + self.url = self._pr_data["url"] + self.title = self._pr_data["title"] + self.body = self._pr_data["body"] + self.target_ref = self._pr_data["base"]["ref"] + self.user_login = self._pr_data["user"]["login"] + self.base_ref = self._pr_data["head"]["ref"] + except KeyError: + pprint.pprint(self._pr_data) + raise self.description = "%s/%s" % (self.user_login, self.base_ref) self.jira_issue = self._get_jira() @@ -435,4 +452,4 @@ def get_version_json(version_str): try: cli() except Exception as e: - print(e.args[0]) + raise From 4057b5f2f1402026c5853e53a038db8371650fbd Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 4 Jan 2019 15:17:13 -0600 Subject: [PATCH 018/203] PARQUET-690: [C++] Reuse Thrift resources when serializing metadata structures This patch should yield fewer memory allocations on the Parquet write path, using the same approach from Apache Impala. Before we were allocating a new buffer for each Thrift object serialization. Since a ColumnChunk generally will contain many data page headers, this is a bit wasteful Author: Wes McKinney Closes #3268 from wesm/PARQUET-690 and squashes the following commits: a5303f826 Fix lint issues 47de8356c Reuse Thrift resources when serializing metadata structures --- cpp/src/parquet/column_writer.cc | 9 ++- cpp/src/parquet/file-deserialize-test.cc | 4 +- cpp/src/parquet/metadata.cc | 6 +- cpp/src/parquet/thrift.h | 93 +++++++++++++++--------- 4 files changed, 69 insertions(+), 43 deletions(-) diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 37fce9c036b31..dfb65f1969777 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -141,6 +141,7 @@ class SerializedPageWriter : public PageWriter { total_uncompressed_size_(0), total_compressed_size_(0) { compressor_ = GetCodecFromArrow(codec); + thrift_serializer_.reset(new ThriftSerializer); } int64_t WriteDictionaryPage(const DictionaryPage& page) override { @@ -171,8 +172,7 @@ class SerializedPageWriter : public PageWriter { if (dictionary_page_offset_ == 0) { dictionary_page_offset_ = start_pos; } - int64_t header_size = - SerializeThriftMsg(&page_header, sizeof(format::PageHeader), sink_); + int64_t header_size = thrift_serializer_->Serialize(&page_header, sink_); sink_->Write(compressed_data->data(), compressed_data->size()); total_uncompressed_size_ += uncompressed_size + header_size; @@ -237,8 +237,7 @@ class SerializedPageWriter : public PageWriter { data_page_offset_ = start_pos; } - int64_t header_size = - SerializeThriftMsg(&page_header, sizeof(format::PageHeader), sink_); + int64_t header_size = thrift_serializer_->Serialize(&page_header, sink_); sink_->Write(compressed_data->data(), compressed_data->size()); total_uncompressed_size_ += uncompressed_size + header_size; @@ -270,6 +269,8 @@ class SerializedPageWriter : public PageWriter { int64_t total_uncompressed_size_; int64_t total_compressed_size_; + std::unique_ptr thrift_serializer_; + // Compression codec to use. std::unique_ptr<::arrow::util::Codec> compressor_; }; diff --git a/cpp/src/parquet/file-deserialize-test.cc b/cpp/src/parquet/file-deserialize-test.cc index f1c17240439fb..4db338b4bcb54 100644 --- a/cpp/src/parquet/file-deserialize-test.cc +++ b/cpp/src/parquet/file-deserialize-test.cc @@ -85,8 +85,8 @@ class TestPageSerde : public ::testing::Test { page_header_.compressed_page_size = compressed_size; page_header_.type = format::PageType::DATA_PAGE; - ASSERT_NO_THROW( - SerializeThriftMsg(&page_header_, max_serialized_len, out_stream_.get())); + ThriftSerializer serializer; + ASSERT_NO_THROW(serializer.Serialize(&page_header_, out_stream_.get())); } void ResetStream() { out_stream_.reset(new InMemoryOutputStream); } diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index f05918d9fd7f0..cc0bfec6321cd 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -361,7 +361,8 @@ class FileMetaData::FileMetaDataImpl { const ApplicationVersion& writer_version() const { return writer_version_; } void WriteTo(OutputStream* dst) const { - SerializeThriftMsg(metadata_.get(), 1024, dst); + ThriftSerializer serializer; + serializer.Serialize(metadata_.get(), dst); } std::unique_ptr RowGroup(int i) { @@ -667,7 +668,8 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { } void WriteTo(OutputStream* sink) { - SerializeThriftMsg(column_chunk_, sizeof(format::ColumnChunk), sink); + ThriftSerializer serializer; + serializer.Serialize(column_chunk_, sink); } const ColumnDescriptor* descr() const { return column_; } diff --git a/cpp/src/parquet/thrift.h b/cpp/src/parquet/thrift.h index 9c665acfac4ff..1afd9bf436550 100644 --- a/cpp/src/parquet/thrift.h +++ b/cpp/src/parquet/thrift.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -#ifndef PARQUET_THRIFT_UTIL_H -#define PARQUET_THRIFT_UTIL_H +#pragma once #include "arrow/util/windows_compatibility.h" @@ -28,6 +27,7 @@ #else #include #endif +#include // TCompactProtocol requires some #defines to work right. #define SIGNED_RIGHT_SHIFT_IS 1 @@ -105,18 +105,18 @@ static inline format::CompressionCodec::type ToThrift(Compression::type type) { // ---------------------------------------------------------------------- // Thrift struct serialization / deserialization utilities +using ThriftBuffer = apache::thrift::transport::TMemoryBuffer; + // Deserialize a thrift message from buf/len. buf/len must at least contain // all the bytes needed to store the thrift message. On return, len will be // set to the actual length of the header. template inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deserialized_msg) { // Deserialize msg bytes into c++ thrift msg using memory transport. - shared_ptr tmem_transport( - new apache::thrift::transport::TMemoryBuffer(const_cast(buf), *len)); - apache::thrift::protocol::TCompactProtocolFactoryT< - apache::thrift::transport::TMemoryBuffer> - tproto_factory; - shared_ptr tproto = + shared_ptr tmem_transport( + new ThriftBuffer(const_cast(buf), *len)); + apache::thrift::protocol::TCompactProtocolFactoryT tproto_factory; + shared_ptr tproto = // tproto_factory.getProtocol(tmem_transport); try { deserialized_msg->read(tproto.get()); @@ -129,34 +129,57 @@ inline void DeserializeThriftMsg(const uint8_t* buf, uint32_t* len, T* deseriali *len = *len - bytes_left; } -// Serialize obj into a buffer. The result is returned as a string. -// The arguments are the object to be serialized and -// the expected size of the serialized object -template -inline int64_t SerializeThriftMsg(T* obj, uint32_t len, OutputStream* out) { - shared_ptr mem_buffer( - new apache::thrift::transport::TMemoryBuffer(len)); - apache::thrift::protocol::TCompactProtocolFactoryT< - apache::thrift::transport::TMemoryBuffer> - tproto_factory; - shared_ptr tproto = - tproto_factory.getProtocol(mem_buffer); - try { - mem_buffer->resetBuffer(); - obj->write(tproto.get()); - } catch (std::exception& e) { - std::stringstream ss; - ss << "Couldn't serialize thrift: " << e.what() << "\n"; - throw ParquetException(ss.str()); +/// Utility class to serialize thrift objects to a binary format. This object +/// should be reused if possible to reuse the underlying memory. +/// Note: thrift will encode NULLs into the serialized buffer so it is not valid +/// to treat it as a string. +class ThriftSerializer { + public: + explicit ThriftSerializer(int initial_buffer_size = 1024) + : mem_buffer_(new ThriftBuffer(initial_buffer_size)) { + apache::thrift::protocol::TCompactProtocolFactoryT factory; + protocol_ = factory.getProtocol(mem_buffer_); } - uint8_t* out_buffer; - uint32_t out_length; - mem_buffer->getBuffer(&out_buffer, &out_length); - out->Write(out_buffer, out_length); - return out_length; -} + /// Serialize obj into a memory buffer. The result is returned in buffer/len. The + /// memory returned is owned by this object and will be invalid when another object + /// is serialized. + template + void SerializeToBuffer(const T* obj, uint32_t* len, uint8_t** buffer) { + SerializeObject(obj); + mem_buffer_->getBuffer(buffer, len); + } -} // namespace parquet + template + void SerializeToString(const T* obj, std::string* result) { + SerializeObject(obj); + *result = mem_buffer_->getBufferAsString(); + } + + template + int64_t Serialize(const T* obj, OutputStream* out) { + uint8_t* out_buffer; + uint32_t out_length; + SerializeToBuffer(obj, &out_length, &out_buffer); + out->Write(out_buffer, out_length); + return static_cast(out_length); + } -#endif // PARQUET_THRIFT_UTIL_H + private: + template + void SerializeObject(const T* obj) { + try { + mem_buffer_->resetBuffer(); + obj->write(protocol_.get()); + } catch (std::exception& e) { + std::stringstream ss; + ss << "Couldn't serialize thrift: " << e.what() << "\n"; + throw ParquetException(ss.str()); + } + } + + shared_ptr mem_buffer_; + shared_ptr protocol_; +}; + +} // namespace parquet From fba4f32001386b2ed593a69ec6d546a104eb45ba Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 4 Jan 2019 15:36:41 -0600 Subject: [PATCH 019/203] ARROW-3760: [R] Support Arrow CSV reader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main entry point is the `csv_read()` function, all it does is create a `csv::TableReader` with the `csv_table_reader()` generic and then `$Read()` from it. as in the #2947 for feather format, `csv_table_reader` is generic with the methods: - arrow::io::InputStream: calls the TableReader actor with the other options - character and fs_path: depending on the `mmap` option (TRUE by default) it opens the file with `mmap_open()` of `file_open()` and then calls the other method. ``` r library(arrow) tf <- tempfile() readr::write_csv(iris, tf) tab1 <- csv_read(tf) tab1 #> arrow::Table as_tibble(tab1) #> # A tibble: 150 x 5 #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> #> 1 5.1 3.5 1.4 0.2 setosa #> 2 4.9 3 1.4 0.2 setosa #> 3 4.7 3.2 1.3 0.2 setosa #> 4 4.6 3.1 1.5 0.2 setosa #> 5 5 3.6 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa #> 7 4.6 3.4 1.4 0.3 setosa #> 8 5 3.4 1.5 0.2 setosa #> 9 4.4 2.9 1.4 0.2 setosa #> 10 4.9 3.1 1.5 0.1 setosa #> # … with 140 more rows ``` Created on 2018-11-13 by the [reprex package](https://reprex.tidyverse.org) (v0.2.1.9000) Author: Romain Francois Closes #2949 from romainfrancois/ARROW-3760/csv_reader and squashes the following commits: 951e9f58b s/csv_read/read_csv_arrow/ 7770ec54c not using readr:: at this point bb13a76e0 rebase 83b51621a s/file_open/ReadableFile/ 959020c91 No need to special use mmap for file path method 6e740037d going through CharacterVector makes sure this is a character vector 258550143 line breaks for readability 0ab839783 linting 09187e63b Expose arrow::csv::TableReader, functions csv_table_reader() + csv_read() --- r/DESCRIPTION | 1 + r/NAMESPACE | 11 ++ r/R/RcppExports.R | 20 ++++ r/R/csv.R | 182 +++++++++++++++++++++++++++++ r/man/csv_convert_options.Rd | 14 +++ r/man/csv_parse_options.Rd | 33 ++++++ r/man/csv_read_options.Rd | 16 +++ r/man/csv_table_reader.Rd | 24 ++++ r/man/read_csv_arrow.Rd | 14 +++ r/src/RcppExports.cpp | 63 ++++++++++ r/src/arrow_types.h | 1 + r/src/csv.cpp | 76 ++++++++++++ r/tests/testthat/test-arrow-csv-.R | 33 ++++++ 13 files changed, 488 insertions(+) create mode 100644 r/R/csv.R create mode 100644 r/man/csv_convert_options.Rd create mode 100644 r/man/csv_parse_options.Rd create mode 100644 r/man/csv_read_options.Rd create mode 100644 r/man/csv_table_reader.Rd create mode 100644 r/man/read_csv_arrow.Rd create mode 100644 r/src/csv.cpp create mode 100644 r/tests/testthat/test-arrow-csv-.R diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 45e0f83dcbd0a..a2632973134b9 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -55,6 +55,7 @@ Collate: 'array.R' 'buffer.R' 'compute.R' + 'csv.R' 'dictionary.R' 'feather.R' 'io.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index 65d60d846f4cb..8846defbd8e65 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -39,6 +39,11 @@ S3method(buffer,default) S3method(buffer,integer) S3method(buffer,numeric) S3method(buffer,raw) +S3method(csv_table_reader,"arrow::csv::TableReader") +S3method(csv_table_reader,"arrow::io::InputStream") +S3method(csv_table_reader,character) +S3method(csv_table_reader,default) +S3method(csv_table_reader,fs_path) S3method(length,"arrow::Array") S3method(names,"arrow::RecordBatch") S3method(print,"arrow-enum") @@ -92,6 +97,10 @@ export(boolean) export(buffer) export(cast_options) export(chunked_array) +export(csv_convert_options) +export(csv_parse_options) +export(csv_read_options) +export(csv_table_reader) export(date32) export(date64) export(decimal) @@ -111,6 +120,7 @@ export(mmap_open) export(null) export(print.integer64) export(read_arrow) +export(read_csv_arrow) export(read_feather) export(read_message) export(read_record_batch) @@ -141,6 +151,7 @@ importFrom(glue,glue) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_int) +importFrom(rlang,abort) importFrom(rlang,dots_n) importFrom(rlang,list2) importFrom(rlang,warn) diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index 0310eab2027b9..55b9ab33ebf98 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -193,6 +193,26 @@ Table__cast <- function(table, schema, options) { .Call(`_arrow_Table__cast`, table, schema, options) } +csv___ReadOptions__initialize <- function(options) { + .Call(`_arrow_csv___ReadOptions__initialize`, options) +} + +csv___ParseOptions__initialize <- function(options) { + .Call(`_arrow_csv___ParseOptions__initialize`, options) +} + +csv___ConvertOptions__initialize <- function(options) { + .Call(`_arrow_csv___ConvertOptions__initialize`, options) +} + +csv___TableReader__Make <- function(input, read_options, parse_options, convert_options) { + .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options) +} + +csv___TableReader__Read <- function(table_reader) { + .Call(`_arrow_csv___TableReader__Read`, table_reader) +} + shared_ptr_is_null <- function(xp) { .Call(`_arrow_shared_ptr_is_null`, xp) } diff --git a/r/R/csv.R b/r/R/csv.R new file mode 100644 index 0000000000000..bad87559c05e5 --- /dev/null +++ b/r/R/csv.R @@ -0,0 +1,182 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' @include R6.R + +`arrow::csv::TableReader` <- R6Class("arrow::csv::TableReader", inherit = `arrow::Object`, + public = list( + Read = function() shared_ptr(`arrow::Table`, csv___TableReader__Read(self)) + ) +) + +`arrow::csv::ReadOptions` <- R6Class("arrow::csv::ReadOptions", inherit = `arrow::Object`) +`arrow::csv::ParseOptions` <- R6Class("arrow::csv::ParseOptions", inherit = `arrow::Object`) +`arrow::csv::ConvertOptions` <- R6Class("arrow::csv::ConvertOptions", inherit = `arrow::Object`) + +#' read options for the csv reader +#' +#' @param use_threads Whether to use the global CPU thread pool +#' @param block_size Block size we request from the IO layer; also determines the size of chunks when use_threads is `TRUE` +#' +#' @export +csv_read_options <- function(use_threads = TRUE, block_size = 1048576L) { + shared_ptr(`arrow::csv::ReadOptions`, csv___ReadOptions__initialize( + list( + use_threads = use_threads, + block_size = block_size + ) + )) +} + +#' Parsing options +#' +#' @param delimiter Field delimiter +#' @param quoting Whether quoting is used +#' @param quote_char Quoting character (if `quoting` is `TRUE`) +#' @param double_quote Whether a quote inside a value is double-quoted +#' @param escaping Whether escaping is used +#' @param escape_char Escaping character (if `escaping` is `TRUE`) +#' @param newlines_in_values Whether values are allowed to contain CR (`0x0d``) and LF (`0x0a``) characters +#' @param ignore_empty_lines Whether empty lines are ignored. If false, an empty line represents +#' @param header_rows Number of header rows to skip (including the first row containing column names) +#' +#' @export +csv_parse_options <- function( + delimiter = ",", quoting = TRUE, quote_char = '"', + double_quote = TRUE, escaping = FALSE, escape_char = '\\', + newlines_in_values = FALSE, ignore_empty_lines = TRUE, + header_rows = 1L +){ + shared_ptr(`arrow::csv::ParseOptions`, csv___ParseOptions__initialize( + list( + delimiter = delimiter, + quoting = quoting, + quote_char = quote_char, + double_quote = double_quote, + escaping = escaping, + escape_char = escape_char, + newlines_in_values = newlines_in_values, + ignore_empty_lines = ignore_empty_lines, + header_rows = header_rows + ) + )) +} + +#' Conversion Options for the csv reader +#' +#' @param check_utf8 Whether to check UTF8 validity of string columns +#' +#' @export +csv_convert_options <- function(check_utf8 = TRUE){ + shared_ptr(`arrow::csv::ConvertOptions`, csv___ConvertOptions__initialize( + list( + check_utf8 = check_utf8 + ) + )) +} + +#' CSV table reader +#' +#' @param file file +#' @param read_options, see [csv_read_options()] +#' @param parse_options, see [csv_parse_options()] +#' @param convert_options, see [csv_convert_options()] +#' @param ... additional parameters. +#' +#' @export +csv_table_reader <- function(file, + read_options = csv_read_options(), + parse_options = csv_parse_options(), + convert_options = csv_convert_options(), + ... +){ + UseMethod("csv_table_reader") +} + +#' @importFrom rlang abort +#' @export +csv_table_reader.default <- function(file, + read_options = csv_read_options(), + parse_options = csv_parse_options(), + convert_options = csv_convert_options(), + ... +) { + abort("unsupported") +} + +#' @export +`csv_table_reader.character` <- function(file, + read_options = csv_read_options(), + parse_options = csv_parse_options(), + convert_options = csv_convert_options(), + ... +){ + csv_table_reader(fs::path_abs(file), + read_options = read_options, + parse_options = parse_options, + convert_options = convert_options, + ... + ) +} + +#' @export +`csv_table_reader.fs_path` <- function(file, + read_options = csv_read_options(), + parse_options = csv_parse_options(), + convert_options = csv_convert_options(), + ... +){ + csv_table_reader(ReadableFile(file), + read_options = read_options, + parse_options = parse_options, + convert_options = convert_options, + ... + ) +} + +#' @export +`csv_table_reader.arrow::io::InputStream` <- function(file, + read_options = csv_read_options(), + parse_options = csv_parse_options(), + convert_options = csv_convert_options(), + ... +){ + shared_ptr(`arrow::csv::TableReader`, + csv___TableReader__Make(file, read_options, parse_options, convert_options) + ) +} + +#' @export +`csv_table_reader.arrow::csv::TableReader` <- function(file, + read_options = csv_read_options(), + parse_options = csv_parse_options(), + convert_options = csv_convert_options(), + ... +){ + file +} + +#' Read csv file into an arrow::Table +#' +#' Use arrow::csv::TableReader from [csv_table_reader()] +#' +#' @param ... Used to construct an arrow::csv::TableReader +#' @export +read_csv_arrow <- function(...) { + csv_table_reader(...)$Read() +} + diff --git a/r/man/csv_convert_options.Rd b/r/man/csv_convert_options.Rd new file mode 100644 index 0000000000000..323c6e01970ca --- /dev/null +++ b/r/man/csv_convert_options.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/csv.R +\name{csv_convert_options} +\alias{csv_convert_options} +\title{Conversion Options for the csv reader} +\usage{ +csv_convert_options(check_utf8 = TRUE) +} +\arguments{ +\item{check_utf8}{Whether to check UTF8 validity of string columns} +} +\description{ +Conversion Options for the csv reader +} diff --git a/r/man/csv_parse_options.Rd b/r/man/csv_parse_options.Rd new file mode 100644 index 0000000000000..9540771437f75 --- /dev/null +++ b/r/man/csv_parse_options.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/csv.R +\name{csv_parse_options} +\alias{csv_parse_options} +\title{Parsing options} +\usage{ +csv_parse_options(delimiter = ",", quoting = TRUE, + quote_char = "\\"", double_quote = TRUE, escaping = FALSE, + escape_char = "\\\\", newlines_in_values = FALSE, + ignore_empty_lines = TRUE, header_rows = 1L) +} +\arguments{ +\item{delimiter}{Field delimiter} + +\item{quoting}{Whether quoting is used} + +\item{quote_char}{Quoting character (if \code{quoting} is \code{TRUE})} + +\item{double_quote}{Whether a quote inside a value is double-quoted} + +\item{escaping}{Whether escaping is used} + +\item{escape_char}{Escaping character (if \code{escaping} is \code{TRUE})} + +\item{newlines_in_values}{Whether values are allowed to contain CR (\code{0x0d``) and LF (}0x0a``) characters} + +\item{ignore_empty_lines}{Whether empty lines are ignored. If false, an empty line represents} + +\item{header_rows}{Number of header rows to skip (including the first row containing column names)} +} +\description{ +Parsing options +} diff --git a/r/man/csv_read_options.Rd b/r/man/csv_read_options.Rd new file mode 100644 index 0000000000000..3fa2d8ccbf2f2 --- /dev/null +++ b/r/man/csv_read_options.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/csv.R +\name{csv_read_options} +\alias{csv_read_options} +\title{read options for the csv reader} +\usage{ +csv_read_options(use_threads = TRUE, block_size = 1048576L) +} +\arguments{ +\item{use_threads}{Whether to use the global CPU thread pool} + +\item{block_size}{Block size we request from the IO layer; also determines the size of chunks when use_threads is \code{TRUE}} +} +\description{ +read options for the csv reader +} diff --git a/r/man/csv_table_reader.Rd b/r/man/csv_table_reader.Rd new file mode 100644 index 0000000000000..029cd0b5923c2 --- /dev/null +++ b/r/man/csv_table_reader.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/csv.R +\name{csv_table_reader} +\alias{csv_table_reader} +\title{CSV table reader} +\usage{ +csv_table_reader(file, read_options = csv_read_options(), + parse_options = csv_parse_options(), + convert_options = csv_convert_options(), ...) +} +\arguments{ +\item{file}{file} + +\item{read_options, }{see \code{\link[=csv_read_options]{csv_read_options()}}} + +\item{parse_options, }{see \code{\link[=csv_parse_options]{csv_parse_options()}}} + +\item{convert_options, }{see \code{\link[=csv_convert_options]{csv_convert_options()}}} + +\item{...}{additional parameters.} +} +\description{ +CSV table reader +} diff --git a/r/man/read_csv_arrow.Rd b/r/man/read_csv_arrow.Rd new file mode 100644 index 0000000000000..4cdca91246b5b --- /dev/null +++ b/r/man/read_csv_arrow.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/csv.R +\name{read_csv_arrow} +\alias{read_csv_arrow} +\title{Read csv file into an arrow::Table} +\usage{ +read_csv_arrow(...) +} +\arguments{ +\item{...}{Used to construct an arrow::csv::TableReader} +} +\description{ +Use arrow::csv::TableReader from \code{\link[=csv_table_reader]{csv_table_reader()}} +} diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index e5a784eb70c23..c752afba1c258 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -558,6 +558,64 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// csv___ReadOptions__initialize +std::shared_ptr csv___ReadOptions__initialize(List_ options); +RcppExport SEXP _arrow_csv___ReadOptions__initialize(SEXP optionsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< List_ >::type options(optionsSEXP); + rcpp_result_gen = Rcpp::wrap(csv___ReadOptions__initialize(options)); + return rcpp_result_gen; +END_RCPP +} +// csv___ParseOptions__initialize +std::shared_ptr csv___ParseOptions__initialize(List_ options); +RcppExport SEXP _arrow_csv___ParseOptions__initialize(SEXP optionsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< List_ >::type options(optionsSEXP); + rcpp_result_gen = Rcpp::wrap(csv___ParseOptions__initialize(options)); + return rcpp_result_gen; +END_RCPP +} +// csv___ConvertOptions__initialize +std::shared_ptr csv___ConvertOptions__initialize(List_ options); +RcppExport SEXP _arrow_csv___ConvertOptions__initialize(SEXP optionsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< List_ >::type options(optionsSEXP); + rcpp_result_gen = Rcpp::wrap(csv___ConvertOptions__initialize(options)); + return rcpp_result_gen; +END_RCPP +} +// csv___TableReader__Make +std::shared_ptr csv___TableReader__Make(const std::shared_ptr& input, const std::shared_ptr& read_options, const std::shared_ptr& parse_options, const std::shared_ptr& convert_options); +RcppExport SEXP _arrow_csv___TableReader__Make(SEXP inputSEXP, SEXP read_optionsSEXP, SEXP parse_optionsSEXP, SEXP convert_optionsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type input(inputSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr& >::type read_options(read_optionsSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr& >::type parse_options(parse_optionsSEXP); + Rcpp::traits::input_parameter< const std::shared_ptr& >::type convert_options(convert_optionsSEXP); + rcpp_result_gen = Rcpp::wrap(csv___TableReader__Make(input, read_options, parse_options, convert_options)); + return rcpp_result_gen; +END_RCPP +} +// csv___TableReader__Read +std::shared_ptr csv___TableReader__Read(const std::shared_ptr& table_reader); +RcppExport SEXP _arrow_csv___TableReader__Read(SEXP table_readerSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type table_reader(table_readerSEXP); + rcpp_result_gen = Rcpp::wrap(csv___TableReader__Read(table_reader)); + return rcpp_result_gen; +END_RCPP +} // shared_ptr_is_null bool shared_ptr_is_null(SEXP xp); RcppExport SEXP _arrow_shared_ptr_is_null(SEXP xpSEXP) { @@ -2200,6 +2258,11 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_ChunkedArray__cast", (DL_FUNC) &_arrow_ChunkedArray__cast, 3}, {"_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, {"_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, + {"_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, + {"_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, + {"_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1}, + {"_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4}, + {"_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1}, {"_arrow_shared_ptr_is_null", (DL_FUNC) &_arrow_shared_ptr_is_null, 1}, {"_arrow_unique_ptr_is_null", (DL_FUNC) &_arrow_unique_ptr_is_null, 1}, {"_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index dba7a91c21e33..6fef7997dbfa7 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -22,6 +22,7 @@ #undef Free #include #include +#include #include #include #include diff --git a/r/src/csv.cpp b/r/src/csv.cpp new file mode 100644 index 0000000000000..0e1d09fb65e8b --- /dev/null +++ b/r/src/csv.cpp @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow_types.h" + +using namespace Rcpp; + +// [[Rcpp::export]] +std::shared_ptr csv___ReadOptions__initialize(List_ options) { + auto res = + std::make_shared(arrow::csv::ReadOptions::Defaults()); + res->use_threads = options["use_threads"]; + res->block_size = options["block_size"]; + return res; +} + +inline char get_char(CharacterVector x) { return CHAR(STRING_ELT(x, 0))[0]; } + +// [[Rcpp::export]] +std::shared_ptr csv___ParseOptions__initialize(List_ options) { + auto res = + std::make_shared(arrow::csv::ParseOptions::Defaults()); + res->delimiter = get_char(options["delimiter"]); + res->quoting = options["quoting"]; + res->quote_char = get_char(options["quote_char"]); + res->double_quote = options["double_quote"]; + res->escape_char = get_char(options["escape_char"]); + res->newlines_in_values = options["newlines_in_values"]; + res->header_rows = options["header_rows"]; + res->ignore_empty_lines = options["ignore_empty_lines"]; + return res; +} + +// [[Rcpp::export]] +std::shared_ptr csv___ConvertOptions__initialize( + List_ options) { + auto res = std::make_shared( + arrow::csv::ConvertOptions::Defaults()); + res->check_utf8 = options["check_utf8"]; + return res; +} + +// [[Rcpp::export]] +std::shared_ptr csv___TableReader__Make( + const std::shared_ptr& input, + const std::shared_ptr& read_options, + const std::shared_ptr& parse_options, + const std::shared_ptr& convert_options) { + std::shared_ptr table_reader; + STOP_IF_NOT_OK(arrow::csv::TableReader::Make(arrow::default_memory_pool(), input, + *read_options, *parse_options, + *convert_options, &table_reader)); + return table_reader; +} + +// [[Rcpp::export]] +std::shared_ptr csv___TableReader__Read( + const std::shared_ptr& table_reader) { + std::shared_ptr table; + STOP_IF_NOT_OK(table_reader->Read(&table)); + return table; +} diff --git a/r/tests/testthat/test-arrow-csv-.R b/r/tests/testthat/test-arrow-csv-.R new file mode 100644 index 0000000000000..2afd0622821ae --- /dev/null +++ b/r/tests/testthat/test-arrow-csv-.R @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +context("arrow::csv::TableReader") + +test_that("Can read csv file", { + tf <- local_tempfile() + write.csv(iris, tf, row.names = FALSE, quote = FALSE) + + tab1 <- read_csv_arrow(tf) + tab2 <- read_csv_arrow(mmap_open(tf)) + tab3 <- read_csv_arrow(ReadableFile(tf)) + + iris$Species <- as.character(iris$Species) + tab0 <- table(iris) + expect_equal(tab0, tab1) + expect_equal(tab0, tab2) + expect_equal(tab0, tab3) +}) From fa37ea335546c12768939db8f1974696edeb2b2b Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Sat, 5 Jan 2019 06:15:43 +0100 Subject: [PATCH 020/203] =?UTF-8?q?[Documentation]=C2=A0Fix=20syntax=20err?= =?UTF-8?q?or=20in=20building.rst=20(#3313)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/source/building.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/building.rst b/docs/source/building.rst index dfa857498cf80..2a0e9fb6bf9c7 100644 --- a/docs/source/building.rst +++ b/docs/source/building.rst @@ -87,6 +87,4 @@ You can use Docker to build the documentation: docker-compose build docs docker-compose run docs -The final output is located under [#]_:: - - docs/_build/html +The final output is located under ``docs/_build/html``. From a4f4808e274e46ce71b08188071d3e2db230c82e Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Sat, 5 Jan 2019 18:55:26 +0900 Subject: [PATCH 021/203] ARROW-4153: [GLib] Add builder_append_value() for consistency Because we use builder_append_values() for multiple values. builder_append() is deprecated. Author: Kouhei Sutou Closes #3304 from kou/glib-builder-append-value and squashes the following commits: e93c0419 Add builder_append_value() for consistency --- c_glib/arrow-cuda-glib/cuda.cpp | 2 +- c_glib/arrow-glib/array-builder.cpp | 626 ++++++++++++++++++++++++-- c_glib/arrow-glib/array-builder.h | 160 +++++++ c_glib/arrow-glib/codec.cpp | 2 +- c_glib/arrow-glib/orc-file-reader.cpp | 3 +- c_glib/example/build.c | 6 +- c_glib/test/helper/buildable.rb | 8 +- c_glib/test/test-array.rb | 18 +- c_glib/test/test-binary-array.rb | 10 +- c_glib/test/test-boolean-array.rb | 14 +- c_glib/test/test-date32-array.rb | 14 +- c_glib/test/test-date64-array.rb | 14 +- c_glib/test/test-decimal-array.rb | 4 +- c_glib/test/test-double-array.rb | 14 +- c_glib/test/test-float-array.rb | 14 +- c_glib/test/test-int16-array.rb | 14 +- c_glib/test/test-int32-array.rb | 14 +- c_glib/test/test-int64-array.rb | 14 +- c_glib/test/test-int8-array.rb | 14 +- c_glib/test/test-list-array.rb | 14 +- c_glib/test/test-string-array.rb | 6 +- c_glib/test/test-struct-array.rb | 12 +- c_glib/test/test-uint16-array.rb | 14 +- c_glib/test/test-uint32-array.rb | 14 +- c_glib/test/test-uint64-array.rb | 14 +- c_glib/test/test-uint8-array.rb | 14 +- 26 files changed, 873 insertions(+), 180 deletions(-) diff --git a/c_glib/arrow-cuda-glib/cuda.cpp b/c_glib/arrow-cuda-glib/cuda.cpp index 3f82f8fa806cb..9679cc0ff7fd8 100644 --- a/c_glib/arrow-cuda-glib/cuda.cpp +++ b/c_glib/arrow-cuda-glib/cuda.cpp @@ -648,7 +648,7 @@ garrow_cuda_ipc_memory_handle_new(const guint8 *data, * * Returns: (transfer full): A newly created #GArrowBuffer on success, * %NULL on error. The buffer has serialized @handle. The serialized - * @handle can be deserialized by garrow_gpu_cuda_ipc_memory_handle_new() + * @handle can be deserialized by garrow_cuda_ipc_memory_handle_new() * in other process. * * Since: 0.8.0 diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index a5c75790de939..4b61bfaf7fab9 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -29,10 +29,10 @@ template gboolean -garrow_array_builder_append(GArrowArrayBuilder *builder, - VALUE value, - GError **error, - const gchar *context) +garrow_array_builder_append_value(GArrowArrayBuilder *builder, + VALUE value, + GError **error, + const gchar *context) { auto arrow_builder = static_cast(garrow_array_builder_get_raw(builder)); @@ -446,17 +446,38 @@ garrow_boolean_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_boolean_array_builder_append_value() instead. */ gboolean garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder, gboolean value, GError **error) { - return garrow_array_builder_append + return garrow_boolean_array_builder_append_value(builder, value, error); +} + +/** + * garrow_boolean_array_builder_append_value: + * @builder: A #GArrowBooleanArrayBuilder. + * @value: A boolean value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder, + gboolean value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), static_cast(value), error, - "[boolean-array-builder][append]"); + "[boolean-array-builder][append-value]"); } /** @@ -583,17 +604,38 @@ garrow_int_array_builder_new(void) * Returns: %TRUE on success, %FALSE if there was an error. * * Since: 0.6.0 + * + * Deprecated: 0.12.0: + * Use garrow_int_array_builder_append_value() instead. */ gboolean garrow_int_array_builder_append(GArrowIntArrayBuilder *builder, gint64 value, GError **error) { - return garrow_array_builder_append + return garrow_int_array_builder_append_value(builder, value, error); +} + +/** + * garrow_int_array_builder_append_value: + * @builder: A #GArrowIntArrayBuilder. + * @value: A int value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[int-array-builder][append]"); + "[int-array-builder][append-value]"); } /** @@ -718,17 +760,38 @@ garrow_uint_array_builder_new(void) * Returns: %TRUE on success, %FALSE if there was an error. * * Since: 0.8.0 + * + * Deprecated: 0.12.0: + * Use garrow_uint_array_builder_append_value() instead. */ gboolean garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder, guint64 value, GError **error) { - return garrow_array_builder_append + return garrow_uint_array_builder_append_value(builder, value, error); +} + +/** + * garrow_uint_array_builder_append_value: + * @builder: A #GArrowUIntArrayBuilder. + * @value: A unsigned int value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder, + guint64 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[uint-array-builder][append]"); + "[uint-array-builder][append-value]"); } /** @@ -848,17 +911,38 @@ garrow_int8_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_int8_array_builder_append_value() instead. */ gboolean garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder, gint8 value, GError **error) { - return garrow_array_builder_append + return garrow_int8_array_builder_append_value(builder, value, error); +} + +/** + * garrow_int8_array_builder_append_value: + * @builder: A #GArrowInt8ArrayBuilder. + * @value: A int8 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder, + gint8 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[int8-array-builder][append]"); + "[int8-array-builder][append-value]"); } /** @@ -976,17 +1060,38 @@ garrow_uint8_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_uint8_array_builder_append_value() instead. */ gboolean garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder, guint8 value, GError **error) { - return garrow_array_builder_append + return garrow_uint8_array_builder_append_value(builder, value, error); +} + +/** + * garrow_uint8_array_builder_append_value: + * @builder: A #GArrowUInt8ArrayBuilder. + * @value: An uint8 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder, + guint8 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[uint8-array-builder][append]"); + "[uint8-array-builder][append-value]"); } /** @@ -1104,17 +1209,38 @@ garrow_int16_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_int16_array_builder_append_value() instead. */ gboolean garrow_int16_array_builder_append(GArrowInt16ArrayBuilder *builder, gint16 value, GError **error) { - return garrow_array_builder_append + return garrow_int16_array_builder_append_value(builder, value, error); +} + +/** + * garrow_int16_array_builder_append_value: + * @builder: A #GArrowInt16ArrayBuilder. + * @value: A int16 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder, + gint16 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[int16-array-builder][append]"); + "[int16-array-builder][append-value]"); } /** @@ -1232,17 +1358,38 @@ garrow_uint16_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_uint16_array_builder_append_value() instead. */ gboolean garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder, guint16 value, GError **error) { - return garrow_array_builder_append + return garrow_uint16_array_builder_append_value(builder, value, error); +} + +/** + * garrow_uint16_array_builder_append_value: + * @builder: A #GArrowUInt16ArrayBuilder. + * @value: An uint16 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder, + guint16 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[uint16-array-builder][append]"); + "[uint16-array-builder][append-value]"); } /** @@ -1360,17 +1507,38 @@ garrow_int32_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_int32_array_builder_append_value() instead. */ gboolean garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder, gint32 value, GError **error) { - return garrow_array_builder_append + return garrow_int32_array_builder_append_value(builder, value, error); +} + +/** + * garrow_int32_array_builder_append_value: + * @builder: A #GArrowInt32ArrayBuilder. + * @value: A int32 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder, + gint32 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[int32-array-builder][append]"); + "[int32-array-builder][append-value]"); } /** @@ -1488,17 +1656,38 @@ garrow_uint32_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_uint32_array_builder_append_value() instead. */ gboolean garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder, guint32 value, GError **error) { - return garrow_array_builder_append + return garrow_uint32_array_builder_append_value(builder, value, error); +} + +/** + * garrow_uint32_array_builder_append_value: + * @builder: A #GArrowUInt32ArrayBuilder. + * @value: An uint32 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder, + guint32 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[uint32-array-builder][append]"); + "[uint32-array-builder][append-value]"); } /** @@ -1616,17 +1805,38 @@ garrow_int64_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_int64_array_builder_append_value() instead. */ gboolean garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder, gint64 value, GError **error) { - return garrow_array_builder_append + return garrow_int64_array_builder_append_value(builder, value, error); +} + +/** + * garrow_int64_array_builder_append_value: + * @builder: A #GArrowInt64ArrayBuilder. + * @value: A int64 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[int64-array-builder][append]"); + "[int64-array-builder][append-value]"); } /** @@ -1744,17 +1954,38 @@ garrow_uint64_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_uint64_array_builder_append_value() instead. */ gboolean garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder, guint64 value, GError **error) { - return garrow_array_builder_append + return garrow_uint64_array_builder_append_value(builder, value, error); +} + +/** + * garrow_uint64_array_builder_append_value: + * @builder: A #GArrowUInt64ArrayBuilder. + * @value: An uint64 value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder, + guint64 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[uint64-array-builder][append]"); + "[uint64-array-builder][append-value]"); } /** @@ -1872,17 +2103,38 @@ garrow_float_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_float_array_builder_append_value() instead. */ gboolean garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder, gfloat value, GError **error) { - return garrow_array_builder_append + return garrow_float_array_builder_append_value(builder, value, error); +} + +/** + * garrow_float_array_builder_append_value: + * @builder: A #GArrowFloatArrayBuilder. + * @value: A float value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder, + gfloat value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[float-array-builder][append]"); + "[float-array-builder][append-value]"); } /** @@ -2000,17 +2252,38 @@ garrow_double_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_double_array_builder_append_value() instead. */ gboolean garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder, gdouble value, GError **error) { - return garrow_array_builder_append + return garrow_double_array_builder_append_value(builder, value, error); +} + +/** + * garrow_double_array_builder_append_value: + * @builder: A #GArrowDoubleArrayBuilder. + * @value: A double value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder, + gdouble value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[double-array-builder][append]"); + "[double-array-builder][append-value]"); } /** @@ -2129,19 +2402,44 @@ garrow_binary_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_binary_array_builder_append_value() instead. */ gboolean garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder, const guint8 *value, gint32 length, GError **error) +{ + return garrow_binary_array_builder_append_value(builder, value, length, error); +} + +/** + * garrow_binary_array_builder_append_value: + * @builder: A #GArrowBinaryArrayBuilder. + * @value: (array length=length): A binary value. + * @length: A value length. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_binary_array_builder_append_value(GArrowBinaryArrayBuilder *builder, + const guint8 *value, + gint32 length, + GError **error) { auto arrow_builder = static_cast( garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); auto status = arrow_builder->Append(value, length); - return garrow_error_check(error, status, "[binary-array-builder][append]"); + return garrow_error_check(error, + status, + "[binary-array-builder][append-value]"); } /** @@ -2197,11 +2495,32 @@ garrow_string_array_builder_new(void) * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. + * + * Deprecated: 0.12.0: + * Use garrow_string_array_builder_append_value() instead. */ gboolean garrow_string_array_builder_append(GArrowStringArrayBuilder *builder, const gchar *value, GError **error) +{ + return garrow_string_array_builder_append_value(builder, value, error); +} + +/** + * garrow_string_array_builder_append_value: + * @builder: A #GArrowStringArrayBuilder. + * @value: A string value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_string_array_builder_append_value(GArrowStringArrayBuilder *builder, + const gchar *value, + GError **error) { auto arrow_builder = static_cast( @@ -2209,7 +2528,9 @@ garrow_string_array_builder_append(GArrowStringArrayBuilder *builder, auto status = arrow_builder->Append(value, static_cast(strlen(value))); - return garrow_error_check(error, status, "[string-array-builder][append]"); + return garrow_error_check(error, + status, + "[string-array-builder][append-value]"); } /** @@ -2290,17 +2611,38 @@ garrow_date32_array_builder_new(void) * Returns: %TRUE on success, %FALSE if there was an error. * * Since: 0.7.0 + * + * Deprecated: 0.12.0: + * Use garrow_date32_array_builder_append_value() instead. */ gboolean garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder, gint32 value, GError **error) { - return garrow_array_builder_append + return garrow_date32_array_builder_append_value(builder, value, error); +} + +/** + * garrow_date32_array_builder_append_value: + * @builder: A #GArrowDate32ArrayBuilder. + * @value: The number of days since UNIX epoch in signed 32bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder, + gint32 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[date32-array-builder][append]"); + "[date32-array-builder][append-value]"); } /** @@ -2425,17 +2767,38 @@ garrow_date64_array_builder_new(void) * Returns: %TRUE on success, %FALSE if there was an error. * * Since: 0.7.0 + * + * Deprecated: 0.12.0: + * Use garrow_date64_array_builder_append_value() instead. */ gboolean garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder, gint64 value, GError **error) { - return garrow_array_builder_append + return garrow_date64_array_builder_append_value(builder, value, error); +} + +/** + * garrow_date64_array_builder_append_value: + * @builder: A #GArrowDate64ArrayBuilder. + * @value: The number of milliseconds since UNIX epoch in signed 64bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[date64-array-builder][append]"); + "[date64-array-builder][append-value]"); } /** @@ -2562,17 +2925,38 @@ garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type) * Returns: %TRUE on success, %FALSE if there was an error. * * Since: 0.7.0 + * + * Deprecated: 0.12.0: + * Use garrow_timestamp_array_builder_append_value() instead. */ gboolean garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder, gint64 value, GError **error) { - return garrow_array_builder_append + return garrow_timestamp_array_builder_append_value(builder, value, error); +} + +/** + * garrow_timestamp_array_builder_append_value: + * @builder: A #GArrowTimestampArrayBuilder. + * @value: The number of milliseconds since UNIX epoch in signed 64bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[timestamp-array-builder][append]"); + "[timestamp-array-builder][append-value]"); } /** @@ -2699,17 +3083,38 @@ garrow_time32_array_builder_new(GArrowTime32DataType *data_type) * Returns: %TRUE on success, %FALSE if there was an error. * * Since: 0.7.0 + * + * Deprecated: 0.12.0: + * Use garrow_time32_array_builder_append_value() instead. */ gboolean garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder, gint32 value, GError **error) { - return garrow_array_builder_append + return garrow_time32_array_builder_append_value(builder, value, error); +} + +/** + * garrow_time32_array_builder_append_value: + * @builder: A #GArrowTime32ArrayBuilder. + * @value: The number of days since UNIX epoch in signed 32bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder, + gint32 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[time32-array-builder][append]"); + "[time32-array-builder][append-value]"); } /** @@ -2836,17 +3241,38 @@ garrow_time64_array_builder_new(GArrowTime64DataType *data_type) * Returns: %TRUE on success, %FALSE if there was an error. * * Since: 0.7.0 + * + * Deprecated: 0.12.0: + * Use garrow_time64_array_builder_append_value() instead. */ gboolean garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder, gint64 value, GError **error) { - return garrow_array_builder_append + return garrow_time64_array_builder_append_value(builder, value, error); +} + +/** + * garrow_time64_array_builder_append_value: + * @builder: A #GArrowTime64ArrayBuilder. + * @value: The number of milliseconds since UNIX epoch in signed 64bit integer. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder, + gint64 value, + GError **error) +{ + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), value, error, - "[time64-array-builder][append]"); + "[time64-array-builder][append-value]"); } /** @@ -3047,17 +3473,72 @@ garrow_list_array_builder_new(GArrowListDataType *data_type, * g_object_unref(array); * } * ]| + * + * Deprecated: 0.12.0: + * Use garrow_list_array_builder_append_value() instead. */ gboolean garrow_list_array_builder_append(GArrowListArrayBuilder *builder, GError **error) +{ + return garrow_list_array_builder_append_value(builder, error); +} + +/** + * garrow_list_array_builder_append_value: + * @builder: A #GArrowListArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new list element. To append a new list element, you + * need to call this function then append list element values to + * `value_builder`. `value_builder` is the #GArrowArrayBuilder + * specified to constructor. You can get `value_builder` by + * garrow_list_array_builder_get_value_builder(). + * + * |[ + * GArrowInt8ArrayBuilder *value_builder; + * GArrowListArrayBuilder *builder; + * + * value_builder = garrow_int8_array_builder_new(); + * builder = garrow_list_array_builder_new(value_builder, NULL); + * + * // Start 0th list element: [1, 0, -1] + * garrow_list_array_builder_append(builder, NULL); + * garrow_int8_array_builder_append(value_builder, 1); + * garrow_int8_array_builder_append(value_builder, 0); + * garrow_int8_array_builder_append(value_builder, -1); + * + * // Start 1st list element: [-29, 29] + * garrow_list_array_builder_append(builder, NULL); + * garrow_int8_array_builder_append(value_builder, -29); + * garrow_int8_array_builder_append(value_builder, 29); + * + * { + * // [[1, 0, -1], [-29, 29]] + * GArrowArray *array = garrow_array_builder_finish(builder); + * // Now, builder is needless. + * g_object_unref(builder); + * g_object_unref(value_builder); + * + * // Use array... + * g_object_unref(array); + * } + * ]| + * + * Since: 0.12.0 + */ +gboolean +garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder, + GError **error) { auto arrow_builder = static_cast( garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); auto status = arrow_builder->Append(); - return garrow_error_check(error, status, "[list-array-builder][append]"); + return garrow_error_check(error, status, "[list-array-builder][append-value]"); } /** @@ -3195,17 +3676,49 @@ garrow_struct_array_builder_new(GArrowStructDataType *data_type, * |[ * // TODO * ]| + * + * Deprecated: 0.12.0: + * Use garrow_struct_array_builder_append_value() instead. */ gboolean garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder, GError **error) +{ + return garrow_struct_array_builder_append_value(builder, error); +} + +/** + * garrow_struct_array_builder_append_value: + * @builder: A #GArrowStructArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new struct element. To append a new struct element, + * you need to call this function then append struct element field + * values to all `field_builder`s. `field_value`s are the + * #GArrowArrayBuilder specified to constructor. You can get + * `field_builder` by garrow_struct_array_builder_get_field_builder() + * or garrow_struct_array_builder_get_field_builders(). + * + * |[ + * // TODO + * ]| + * + * Since: 0.12.0 + */ +gboolean +garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder, + GError **error) { auto arrow_builder = static_cast( garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); auto status = arrow_builder->Append(); - return garrow_error_check(error, status, "[struct-array-builder][append]"); + return garrow_error_check(error, + status, + "[struct-array-builder][append-value]"); } /** @@ -3315,18 +3828,39 @@ garrow_decimal128_array_builder_new(GArrowDecimalDataType *data_type) * Returns: %TRUE on success, %FALSE if there was an error. * * Since: 0.10.0 + * + * Deprecated: 0.12.0: + * Use garrow_decimal128_array_builder_append_value() instead. */ gboolean garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder, GArrowDecimal128 *value, GError **error) +{ + return garrow_decimal128_array_builder_append_value(builder, value, error); +} + +/** + * garrow_decimal128_array_builder_append_value: + * @builder: A #GArrowDecimal128ArrayBuilder. + * @value: A decimal value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *builder, + GArrowDecimal128 *value, + GError **error) { auto arrow_decimal = garrow_decimal128_get_raw(value); - return garrow_array_builder_append + return garrow_array_builder_append_value (GARROW_ARRAY_BUILDER(builder), *arrow_decimal, error, - "[decimal128-array-builder][append]"); + "[decimal128-array-builder][append-value]"); } G_END_DECLS diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h index db340b70ab77c..1ddc0266f4993 100644 --- a/c_glib/arrow-glib/array-builder.h +++ b/c_glib/arrow-glib/array-builder.h @@ -90,9 +90,16 @@ GType garrow_boolean_array_builder_get_type(void) G_GNUC_CONST; GArrowBooleanArrayBuilder *garrow_boolean_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_boolean_array_builder_append_value) gboolean garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder, gboolean value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder, + gboolean value, + GError **error); gboolean garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder, const gboolean *values, gint64 values_length, @@ -150,9 +157,16 @@ GType garrow_int_array_builder_get_type(void) G_GNUC_CONST; GArrowIntArrayBuilder *garrow_int_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_int_array_builder_append_value) gboolean garrow_int_array_builder_append(GArrowIntArrayBuilder *builder, gint64 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder, + gint64 value, + GError **error); gboolean garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder, const gint64 *values, gint64 values_length, @@ -179,9 +193,16 @@ struct _GArrowUIntArrayBuilderClass GArrowUIntArrayBuilder *garrow_uint_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint_array_builder_append_value) gboolean garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder, guint64 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder, + guint64 value, + GError **error); gboolean garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder, const guint64 *values, gint64 values_length, @@ -239,9 +260,16 @@ GType garrow_int8_array_builder_get_type(void) G_GNUC_CONST; GArrowInt8ArrayBuilder *garrow_int8_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_int8_array_builder_append_value) gboolean garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder, gint8 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder, + gint8 value, + GError **error); gboolean garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder, const gint8 *values, gint64 values_length, @@ -299,9 +327,16 @@ GType garrow_uint8_array_builder_get_type(void) G_GNUC_CONST; GArrowUInt8ArrayBuilder *garrow_uint8_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint8_array_builder_append_value) gboolean garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder, guint8 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder, + guint8 value, + GError **error); gboolean garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder, const guint8 *values, gint64 values_length, @@ -359,9 +394,16 @@ GType garrow_int16_array_builder_get_type(void) G_GNUC_CONST; GArrowInt16ArrayBuilder *garrow_int16_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_int16_array_builder_append_value) gboolean garrow_int16_array_builder_append(GArrowInt16ArrayBuilder *builder, gint16 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder, + gint16 value, + GError **error); gboolean garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder, const gint16 *values, gint64 values_length, @@ -419,9 +461,16 @@ GType garrow_uint16_array_builder_get_type(void) G_GNUC_CONST; GArrowUInt16ArrayBuilder *garrow_uint16_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint16_array_builder_append_value) gboolean garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder, guint16 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder, + guint16 value, + GError **error); gboolean garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder, const guint16 *values, gint64 values_length, @@ -479,9 +528,16 @@ GType garrow_int32_array_builder_get_type(void) G_GNUC_CONST; GArrowInt32ArrayBuilder *garrow_int32_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_int32_array_builder_append_value) gboolean garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder, gint32 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder, + gint32 value, + GError **error); gboolean garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder, const gint32 *values, gint64 values_length, @@ -539,9 +595,16 @@ GType garrow_uint32_array_builder_get_type(void) G_GNUC_CONST; GArrowUInt32ArrayBuilder *garrow_uint32_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint32_array_builder_append_value) gboolean garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder, guint32 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder, + guint32 value, + GError **error); gboolean garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder, const guint32 *values, gint64 values_length, @@ -599,9 +662,16 @@ GType garrow_int64_array_builder_get_type(void) G_GNUC_CONST; GArrowInt64ArrayBuilder *garrow_int64_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_int64_array_builder_append_value) gboolean garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder, gint64 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder, + gint64 value, + GError **error); gboolean garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder, const gint64 *values, gint64 values_length, @@ -659,9 +729,16 @@ GType garrow_uint64_array_builder_get_type(void) G_GNUC_CONST; GArrowUInt64ArrayBuilder *garrow_uint64_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint64_array_builder_append_value) gboolean garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder, guint64 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder, + guint64 value, + GError **error); gboolean garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder, const guint64 *values, gint64 values_length, @@ -719,9 +796,16 @@ GType garrow_float_array_builder_get_type(void) G_GNUC_CONST; GArrowFloatArrayBuilder *garrow_float_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_float_array_builder_append_value) gboolean garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder, gfloat value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder, + gfloat value, + GError **error); gboolean garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder, const gfloat *values, gint64 values_length, @@ -779,9 +863,16 @@ GType garrow_double_array_builder_get_type(void) G_GNUC_CONST; GArrowDoubleArrayBuilder *garrow_double_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_double_array_builder_append_value) gboolean garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder, gdouble value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder, + gdouble value, + GError **error); gboolean garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder, const gdouble *values, gint64 values_length, @@ -839,10 +930,18 @@ GType garrow_binary_array_builder_get_type(void) G_GNUC_CONST; GArrowBinaryArrayBuilder *garrow_binary_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_binary_array_builder_append_value) gboolean garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder, const guint8 *value, gint32 length, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_binary_array_builder_append_value(GArrowBinaryArrayBuilder *builder, + const guint8 *value, + gint32 length, + GError **error); gboolean garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder, GError **error); @@ -891,9 +990,16 @@ GType garrow_string_array_builder_get_type(void) G_GNUC_CONST; GArrowStringArrayBuilder *garrow_string_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_string_array_builder_append_value) gboolean garrow_string_array_builder_append(GArrowStringArrayBuilder *builder, const gchar *value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_string_array_builder_append_value(GArrowStringArrayBuilder *builder, + const gchar *value, + GError **error); gboolean garrow_string_array_builder_append_values(GArrowStringArrayBuilder *builder, const gchar **values, gint64 values_length, @@ -946,9 +1052,16 @@ GType garrow_date32_array_builder_get_type(void) G_GNUC_CONST; GArrowDate32ArrayBuilder *garrow_date32_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_date32_array_builder_append_value) gboolean garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder, gint32 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder, + gint32 value, + GError **error); gboolean garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder, const gint32 *values, gint64 values_length, @@ -1006,9 +1119,16 @@ GType garrow_date64_array_builder_get_type(void) G_GNUC_CONST; GArrowDate64ArrayBuilder *garrow_date64_array_builder_new(void); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_date64_array_builder_append_value) gboolean garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder, gint64 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder, + gint64 value, + GError **error); gboolean garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder, const gint64 *values, gint64 values_length, @@ -1067,9 +1187,16 @@ GType garrow_timestamp_array_builder_get_type(void) G_GNUC_CONST; GArrowTimestampArrayBuilder * garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_timestamp_array_builder_append_value) gboolean garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder, gint64 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder, + gint64 value, + GError **error); gboolean garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builder, const gint64 *values, gint64 values_length, @@ -1127,9 +1254,16 @@ GType garrow_time32_array_builder_get_type(void) G_GNUC_CONST; GArrowTime32ArrayBuilder *garrow_time32_array_builder_new(GArrowTime32DataType *data_type); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_time32_array_builder_append_value) gboolean garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder, gint32 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder, + gint32 value, + GError **error); gboolean garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder, const gint32 *values, gint64 values_length, @@ -1187,9 +1321,16 @@ GType garrow_time64_array_builder_get_type(void) G_GNUC_CONST; GArrowTime64ArrayBuilder *garrow_time64_array_builder_new(GArrowTime64DataType *data_type); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_time64_array_builder_append_value) gboolean garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder, gint64 value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder, + gint64 value, + GError **error); gboolean garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder, const gint64 *values, gint64 values_length, @@ -1248,8 +1389,14 @@ GType garrow_list_array_builder_get_type(void) G_GNUC_CONST; GArrowListArrayBuilder *garrow_list_array_builder_new(GArrowListDataType *data_type, GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_list_array_builder_append_value) gboolean garrow_list_array_builder_append(GArrowListArrayBuilder *builder, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder, + GError **error); gboolean garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder, GError **error); @@ -1301,8 +1448,14 @@ GType garrow_struct_array_builder_get_type(void) G_GNUC_CONST; GArrowStructArrayBuilder *garrow_struct_array_builder_new(GArrowStructDataType *data_type, GError **error); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_struct_array_builder_append_value) gboolean garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder, + GError **error); gboolean garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder, GError **error); @@ -1324,8 +1477,15 @@ struct _GArrowDecimal128ArrayBuilderClass GArrowDecimal128ArrayBuilder *garrow_decimal128_array_builder_new(GArrowDecimalDataType *data_type); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_array_builder_append_value) gboolean garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder, GArrowDecimal128 *value, GError **error); +#endif +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *builder, + GArrowDecimal128 *value, + GError **error); G_END_DECLS diff --git a/c_glib/arrow-glib/codec.cpp b/c_glib/arrow-glib/codec.cpp index 45863878e9c7e..7f06fabde74e8 100644 --- a/c_glib/arrow-glib/codec.cpp +++ b/c_glib/arrow-glib/codec.cpp @@ -119,7 +119,7 @@ garrow_codec_class_init(GArrowCodecClass *klass) /** * garrow_codec_new: - * @type: A #GArrowCodompressionType. + * @type: A #GArrowCompressionType. * @error: (nullable): Return location for a #GError or %NULL. * * Returns: A newly created #GArrowCodec on success, %NULL on error. diff --git a/c_glib/arrow-glib/orc-file-reader.cpp b/c_glib/arrow-glib/orc-file-reader.cpp index bde3cfc8fa04f..31905a2f9fea1 100644 --- a/c_glib/arrow-glib/orc-file-reader.cpp +++ b/c_glib/arrow-glib/orc-file-reader.cpp @@ -199,8 +199,7 @@ garrow_orc_file_reader_new(GArrowSeekableInputStream *input, * Since: 0.10.0 * * Deprecated: 0.12.0: - * Use garrow_orc_file_reader_set_field_indices() instead. - * + * Use garrow_orc_file_reader_set_field_indices() instead. */ void garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader, diff --git a/c_glib/example/build.c b/c_glib/example/build.c index 8c6cf74d74815..9b2d58d2b2bba 100644 --- a/c_glib/example/build.c +++ b/c_glib/example/build.c @@ -33,13 +33,13 @@ main(int argc, char **argv) builder = garrow_int32_array_builder_new(); if (success) { - success = garrow_int32_array_builder_append(builder, 29, &error); + success = garrow_int32_array_builder_append_value(builder, 29, &error); } if (success) { - success = garrow_int32_array_builder_append(builder, 2929, &error); + success = garrow_int32_array_builder_append_value(builder, 2929, &error); } if (success) { - success = garrow_int32_array_builder_append(builder, 292929, &error); + success = garrow_int32_array_builder_append_value(builder, 292929, &error); } if (!success) { g_print("failed to append: %s\n", error->message); diff --git a/c_glib/test/helper/buildable.rb b/c_glib/test/helper/buildable.rb index d6d1ff89b6a3e..f3ae709512eeb 100644 --- a/c_glib/test/helper/buildable.rb +++ b/c_glib/test/helper/buildable.rb @@ -135,20 +135,20 @@ def append_to_builder(builder, value) data_type = builder.value_data_type case data_type when Arrow::ListDataType - builder.append + builder.append_value value_builder = builder.value_builder value.each do |v| append_to_builder(value_builder, v) end when Arrow::StructDataType - builder.append + builder.append_value value.each do |name, v| field_index = data_type.get_field_index(name) field_builder = builder.get_field_builder(field_index) append_to_builder(field_builder, v) end else - builder.append(value) + builder.append_value(value) end end end @@ -179,7 +179,7 @@ def build_array(builder, values) if value.nil? builder.append_null else - builder.append(value) + builder.append_value(value) end end builder.finish diff --git a/c_glib/test/test-array.rb b/c_glib/test/test-array.rb index 12fba7346c36f..3befde3c7a9bb 100644 --- a/c_glib/test/test-array.rb +++ b/c_glib/test/test-array.rb @@ -42,7 +42,7 @@ def test_equal_range def test_is_null builder = Arrow::BooleanArrayBuilder.new builder.append_null - builder.append(true) + builder.append_value(true) array = builder.finish assert_equal([true, false], array.length.times.collect {|i| array.null?(i)}) @@ -51,7 +51,7 @@ def test_is_null def test_is_valid builder = Arrow::BooleanArrayBuilder.new builder.append_null - builder.append(true) + builder.append_value(true) array = builder.finish assert_equal([false, true], array.length.times.collect {|i| array.valid?(i)}) @@ -59,7 +59,7 @@ def test_is_valid def test_length builder = Arrow::BooleanArrayBuilder.new - builder.append(true) + builder.append_value(true) array = builder.finish assert_equal(1, array.length) end @@ -75,10 +75,10 @@ def test_n_nulls def test_null_bitmap builder = Arrow::BooleanArrayBuilder.new builder.append_null - builder.append(true) - builder.append(false) + builder.append_value(true) + builder.append_value(false) builder.append_null - builder.append(false) + builder.append_value(false) array = builder.finish assert_equal(0b10110, array.null_bitmap.data.to_s.unpack("c*")[0]) end @@ -97,9 +97,9 @@ def test_value_type def test_slice builder = Arrow::BooleanArrayBuilder.new - builder.append(true) - builder.append(false) - builder.append(true) + builder.append_value(true) + builder.append_value(false) + builder.append_value(true) array = builder.finish sub_array = array.slice(1, 2) assert_equal([false, true], diff --git a/c_glib/test/test-binary-array.rb b/c_glib/test/test-binary-array.rb index 2dfd9cfbaaf14..0dcaf4eef60c5 100644 --- a/c_glib/test/test-binary-array.rb +++ b/c_glib/test/test-binary-array.rb @@ -32,7 +32,7 @@ def test_new def test_value data = "\x00\x01\x02" builder = Arrow::BinaryArrayBuilder.new - builder.append(data) + builder.append_value(data) array = builder.finish assert_equal(data, array.get_value(0).to_s) end @@ -41,8 +41,8 @@ def test_buffer data1 = "\x00\x01\x02" data2 = "\x03\x04\x05" builder = Arrow::BinaryArrayBuilder.new - builder.append(data1) - builder.append(data2) + builder.append_value(data1) + builder.append_value(data2) array = builder.finish assert_equal(data1 + data2, array.buffer.data.to_s) end @@ -51,8 +51,8 @@ def test_offsets_buffer data1 = "\x00\x01" data2 = "\x02\x03\x04" builder = Arrow::BinaryArrayBuilder.new - builder.append(data1) - builder.append(data2) + builder.append_value(data1) + builder.append_value(data2) array = builder.finish byte_per_offset = 4 assert_equal([0, 2, 5].pack("l*"), diff --git a/c_glib/test/test-boolean-array.rb b/c_glib/test/test-boolean-array.rb index ac07ec995ea32..e8c7e5efe2fc5 100644 --- a/c_glib/test/test-boolean-array.rb +++ b/c_glib/test/test-boolean-array.rb @@ -29,16 +29,16 @@ def test_new def test_buffer builder = Arrow::BooleanArrayBuilder.new - builder.append(true) - builder.append(false) - builder.append(true) + builder.append_value(true) + builder.append_value(false) + builder.append_value(true) array = builder.finish assert_equal([0b101].pack("C*"), array.buffer.data.to_s) end def test_value builder = Arrow::BooleanArrayBuilder.new - builder.append(true) + builder.append_value(true) array = builder.finish assert_equal(true, array.get_value(0)) end @@ -46,9 +46,9 @@ def test_value def test_values require_gi_bindings(3, 3, 1) builder = Arrow::BooleanArrayBuilder.new - builder.append(true) - builder.append(false) - builder.append(true) + builder.append_value(true) + builder.append_value(false) + builder.append_value(true) array = builder.finish assert_equal([true, false, true], array.values) end diff --git a/c_glib/test/test-date32-array.rb b/c_glib/test/test-date32-array.rb index f1425693f381e..09ef78650bd59 100644 --- a/c_glib/test/test-date32-array.rb +++ b/c_glib/test/test-date32-array.rb @@ -34,9 +34,9 @@ def test_buffer after_epoch = 17406 # 2017-08-28 builder = Arrow::Date32ArrayBuilder.new - builder.append(0) - builder.append(after_epoch) - builder.append(before_epoch) + builder.append_value(0) + builder.append_value(after_epoch) + builder.append_value(before_epoch) array = builder.finish assert_equal([0, after_epoch, before_epoch].pack("l*"), array.buffer.data.to_s) @@ -46,7 +46,7 @@ def test_value after_epoch = 17406 # 2017-08-28 builder = Arrow::Date32ArrayBuilder.new - builder.append(after_epoch) + builder.append_value(after_epoch) array = builder.finish assert_equal(after_epoch, array.get_value(0)) end @@ -56,9 +56,9 @@ def test_values after_epoch = 17406 # 2017-08-28 builder = Arrow::Date32ArrayBuilder.new - builder.append(0) - builder.append(after_epoch) - builder.append(before_epoch) + builder.append_value(0) + builder.append_value(after_epoch) + builder.append_value(before_epoch) array = builder.finish assert_equal([0, after_epoch, before_epoch], array.values) end diff --git a/c_glib/test/test-date64-array.rb b/c_glib/test/test-date64-array.rb index 1ea9f5a6a0545..4d9f189196fc8 100644 --- a/c_glib/test/test-date64-array.rb +++ b/c_glib/test/test-date64-array.rb @@ -34,9 +34,9 @@ def test_buffer after_epoch = 1503878400000 # 2017-08-28T00:00:00Z builder = Arrow::Date64ArrayBuilder.new - builder.append(0) - builder.append(after_epoch) - builder.append(before_epoch) + builder.append_value(0) + builder.append_value(after_epoch) + builder.append_value(before_epoch) array = builder.finish assert_equal([0, after_epoch, before_epoch].pack("q*"), array.buffer.data.to_s) @@ -46,7 +46,7 @@ def test_value after_epoch = 1503878400000 # 2017-08-28T00:00:00Z builder = Arrow::Date64ArrayBuilder.new - builder.append(after_epoch) + builder.append_value(after_epoch) array = builder.finish assert_equal(after_epoch, array.get_value(0)) end @@ -56,9 +56,9 @@ def test_values after_epoch = 1503878400000 # 2017-08-28T00:00:00Z builder = Arrow::Date64ArrayBuilder.new - builder.append(0) - builder.append(after_epoch) - builder.append(before_epoch) + builder.append_value(0) + builder.append_value(after_epoch) + builder.append_value(before_epoch) array = builder.finish assert_equal([0, after_epoch, before_epoch], array.values) end diff --git a/c_glib/test/test-decimal-array.rb b/c_glib/test/test-decimal-array.rb index a65e10037659a..a5eb28253d95f 100644 --- a/c_glib/test/test-decimal-array.rb +++ b/c_glib/test/test-decimal-array.rb @@ -20,7 +20,7 @@ def test_format_value data_type = Arrow::DecimalDataType.new(8,2) builder = Arrow::Decimal128ArrayBuilder.new(data_type) decimal = Arrow::Decimal128.new("23423445") - builder.append(decimal) + builder.append_value(decimal) array = builder.finish assert_equal("234234.45", array.format_value(0)) end @@ -29,7 +29,7 @@ def test_value data_type = Arrow::DecimalDataType.new(8,2) builder = Arrow::Decimal128ArrayBuilder.new(data_type) decimal = Arrow::Decimal128.new("23423445") - builder.append(decimal) + builder.append_value(decimal) array = builder.finish assert_equal("234234.45", array.get_value(0).to_string_scale(array.value_data_type.scale)) diff --git a/c_glib/test/test-double-array.rb b/c_glib/test/test-double-array.rb index 1213a5dfe53d6..020ed8f079960 100644 --- a/c_glib/test/test-double-array.rb +++ b/c_glib/test/test-double-array.rb @@ -29,16 +29,16 @@ def test_new def test_buffer builder = Arrow::DoubleArrayBuilder.new - builder.append(-1.1) - builder.append(2.2) - builder.append(-4.4) + builder.append_value(-1.1) + builder.append_value(2.2) + builder.append_value(-4.4) array = builder.finish assert_equal([-1.1, 2.2, -4.4].pack("d*"), array.buffer.data.to_s) end def test_value builder = Arrow::DoubleArrayBuilder.new - builder.append(1.5) + builder.append_value(1.5) array = builder.finish assert_in_delta(1.5, array.get_value(0)) end @@ -46,9 +46,9 @@ def test_value def test_values require_gi_bindings(3, 1, 7) builder = Arrow::DoubleArrayBuilder.new - builder.append(1.5) - builder.append(3) - builder.append(4.5) + builder.append_value(1.5) + builder.append_value(3) + builder.append_value(4.5) array = builder.finish assert_equal([1.5, 3.0, 4.5], array.values) end diff --git a/c_glib/test/test-float-array.rb b/c_glib/test/test-float-array.rb index c8e1b4d864c08..c2a71a0dd39db 100644 --- a/c_glib/test/test-float-array.rb +++ b/c_glib/test/test-float-array.rb @@ -29,16 +29,16 @@ def test_new def test_buffer builder = Arrow::FloatArrayBuilder.new - builder.append(-1.1) - builder.append(2.2) - builder.append(-4.4) + builder.append_value(-1.1) + builder.append_value(2.2) + builder.append_value(-4.4) array = builder.finish assert_equal([-1.1, 2.2, -4.4].pack("f*"), array.buffer.data.to_s) end def test_value builder = Arrow::FloatArrayBuilder.new - builder.append(1.5) + builder.append_value(1.5) array = builder.finish assert_in_delta(1.5, array.get_value(0)) end @@ -46,9 +46,9 @@ def test_value def test_values require_gi_bindings(3, 1, 7) builder = Arrow::FloatArrayBuilder.new - builder.append(1.5) - builder.append(3) - builder.append(4.5) + builder.append_value(1.5) + builder.append_value(3) + builder.append_value(4.5) array = builder.finish assert_equal([1.5, 3.0, 4.5], array.values) end diff --git a/c_glib/test/test-int16-array.rb b/c_glib/test/test-int16-array.rb index 13646e0d5b818..e0efb68019b24 100644 --- a/c_glib/test/test-int16-array.rb +++ b/c_glib/test/test-int16-array.rb @@ -29,16 +29,16 @@ def test_new def test_buffer builder = Arrow::Int16ArrayBuilder.new - builder.append(-1) - builder.append(2) - builder.append(-4) + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) array = builder.finish assert_equal([-1, 2, -4].pack("s*"), array.buffer.data.to_s) end def test_value builder = Arrow::Int16ArrayBuilder.new - builder.append(-1) + builder.append_value(-1) array = builder.finish assert_equal(-1, array.get_value(0)) end @@ -46,9 +46,9 @@ def test_value def test_values require_gi_bindings(3, 1, 7) builder = Arrow::Int16ArrayBuilder.new - builder.append(-1) - builder.append(2) - builder.append(-4) + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) array = builder.finish assert_equal([-1, 2, -4], array.values) end diff --git a/c_glib/test/test-int32-array.rb b/c_glib/test/test-int32-array.rb index d1579a8eba881..9827e532bf154 100644 --- a/c_glib/test/test-int32-array.rb +++ b/c_glib/test/test-int32-array.rb @@ -28,25 +28,25 @@ def test_new def test_buffer builder = Arrow::Int32ArrayBuilder.new - builder.append(-1) - builder.append(2) - builder.append(-4) + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) array = builder.finish assert_equal([-1, 2, -4].pack("l*"), array.buffer.data.to_s) end def test_value builder = Arrow::Int32ArrayBuilder.new - builder.append(-1) + builder.append_value(-1) array = builder.finish assert_equal(-1, array.get_value(0)) end def test_values builder = Arrow::Int32ArrayBuilder.new - builder.append(-1) - builder.append(2) - builder.append(-4) + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) array = builder.finish assert_equal([-1, 2, -4], array.values) end diff --git a/c_glib/test/test-int64-array.rb b/c_glib/test/test-int64-array.rb index 5d9c37a55c084..39a74d34e23fa 100644 --- a/c_glib/test/test-int64-array.rb +++ b/c_glib/test/test-int64-array.rb @@ -28,25 +28,25 @@ def test_new def test_buffer builder = Arrow::Int64ArrayBuilder.new - builder.append(-1) - builder.append(2) - builder.append(-4) + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) array = builder.finish assert_equal([-1, 2, -4].pack("q*"), array.buffer.data.to_s) end def test_value builder = Arrow::Int64ArrayBuilder.new - builder.append(-1) + builder.append_value(-1) array = builder.finish assert_equal(-1, array.get_value(0)) end def test_values builder = Arrow::Int64ArrayBuilder.new - builder.append(-1) - builder.append(2) - builder.append(-4) + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) array = builder.finish assert_equal([-1, 2, -4], array.values) end diff --git a/c_glib/test/test-int8-array.rb b/c_glib/test/test-int8-array.rb index e17c10c53611e..46fe591a575c2 100644 --- a/c_glib/test/test-int8-array.rb +++ b/c_glib/test/test-int8-array.rb @@ -28,25 +28,25 @@ def test_new def test_buffer builder = Arrow::Int8ArrayBuilder.new - builder.append(-1) - builder.append(2) - builder.append(-4) + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) array = builder.finish assert_equal([-1, 2, -4].pack("c*"), array.buffer.data.to_s) end def test_value builder = Arrow::Int8ArrayBuilder.new - builder.append(-1) + builder.append_value(-1) array = builder.finish assert_equal(-1, array.get_value(0)) end def test_values builder = Arrow::Int8ArrayBuilder.new - builder.append(-1) - builder.append(2) - builder.append(-4) + builder.append_value(-1) + builder.append_value(2) + builder.append_value(-4) array = builder.finish assert_equal([-1, 2, -4], array.values) end diff --git a/c_glib/test/test-list-array.rb b/c_glib/test/test-list-array.rb index 14f84067ac525..271d32236acbd 100644 --- a/c_glib/test/test-list-array.rb +++ b/c_glib/test/test-list-array.rb @@ -38,14 +38,14 @@ def test_value builder = Arrow::ListArrayBuilder.new(data_type) value_builder = builder.value_builder - builder.append - value_builder.append(-29) - value_builder.append(29) + builder.append_value + value_builder.append_value(-29) + value_builder.append_value(29) - builder.append - value_builder.append(-1) - value_builder.append(0) - value_builder.append(1) + builder.append_value + value_builder.append_value(-1) + value_builder.append_value(0) + value_builder.append_value(1) array = builder.finish value = array.get_value(1) diff --git a/c_glib/test/test-string-array.rb b/c_glib/test/test-string-array.rb index a9edb0ae49152..61459edbb8059 100644 --- a/c_glib/test/test-string-array.rb +++ b/c_glib/test/test-string-array.rb @@ -31,15 +31,15 @@ def test_new def test_value builder = Arrow::StringArrayBuilder.new - builder.append("Hello") + builder.append_value("Hello") array = builder.finish assert_equal("Hello", array.get_string(0)) end def test_buffer builder = Arrow::StringArrayBuilder.new - builder.append("Hello") - builder.append("World") + builder.append_value("Hello") + builder.append_value("World") array = builder.finish assert_equal("HelloWorld", array.buffer.data.to_s) end diff --git a/c_glib/test/test-struct-array.rb b/c_glib/test/test-struct-array.rb index 78760a9b30984..af7e299d8b7ce 100644 --- a/c_glib/test/test-struct-array.rb +++ b/c_glib/test/test-struct-array.rb @@ -58,13 +58,13 @@ def test_flatten data_type = Arrow::StructDataType.new(fields) builder = Arrow::StructArrayBuilder.new(data_type) - builder.append - builder.get_field_builder(0).append(-29) - builder.get_field_builder(1).append(true) + builder.append_value + builder.get_field_builder(0).append_value(-29) + builder.get_field_builder(1).append_value(true) - builder.append - builder.field_builders[0].append(2) - builder.field_builders[1].append(false) + builder.append_value + builder.field_builders[0].append_value(2) + builder.field_builders[1].append_value(false) array = builder.finish values = array.length.times.collect do |i| diff --git a/c_glib/test/test-uint16-array.rb b/c_glib/test/test-uint16-array.rb index 1362c8e7ff507..baa6934e4f4e2 100644 --- a/c_glib/test/test-uint16-array.rb +++ b/c_glib/test/test-uint16-array.rb @@ -29,16 +29,16 @@ def test_new def test_buffer builder = Arrow::UInt16ArrayBuilder.new - builder.append(1) - builder.append(2) - builder.append(4) + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) array = builder.finish assert_equal([1, 2, 4].pack("S*"), array.buffer.data.to_s) end def test_value builder = Arrow::UInt16ArrayBuilder.new - builder.append(1) + builder.append_value(1) array = builder.finish assert_equal(1, array.get_value(0)) end @@ -46,9 +46,9 @@ def test_value def test_values require_gi_bindings(3, 1, 7) builder = Arrow::UInt16ArrayBuilder.new - builder.append(1) - builder.append(2) - builder.append(4) + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) array = builder.finish assert_equal([1, 2, 4], array.values) end diff --git a/c_glib/test/test-uint32-array.rb b/c_glib/test/test-uint32-array.rb index 01b3edb353ff2..b9efb4cf00403 100644 --- a/c_glib/test/test-uint32-array.rb +++ b/c_glib/test/test-uint32-array.rb @@ -29,16 +29,16 @@ def test_new def test_buffer builder = Arrow::UInt32ArrayBuilder.new - builder.append(1) - builder.append(2) - builder.append(4) + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) array = builder.finish assert_equal([1, 2, 4].pack("L*"), array.buffer.data.to_s) end def test_value builder = Arrow::UInt32ArrayBuilder.new - builder.append(1) + builder.append_value(1) array = builder.finish assert_equal(1, array.get_value(0)) end @@ -46,9 +46,9 @@ def test_value def test_values require_gi_bindings(3, 1, 7) builder = Arrow::UInt32ArrayBuilder.new - builder.append(1) - builder.append(2) - builder.append(4) + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) array = builder.finish assert_equal([1, 2, 4], array.values) end diff --git a/c_glib/test/test-uint64-array.rb b/c_glib/test/test-uint64-array.rb index a002af269293c..b4275cefdd9b8 100644 --- a/c_glib/test/test-uint64-array.rb +++ b/c_glib/test/test-uint64-array.rb @@ -29,16 +29,16 @@ def test_new def test_buffer builder = Arrow::UInt64ArrayBuilder.new - builder.append(1) - builder.append(2) - builder.append(4) + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) array = builder.finish assert_equal([1, 2, 4].pack("Q*"), array.buffer.data.to_s) end def test_value builder = Arrow::UInt64ArrayBuilder.new - builder.append(1) + builder.append_value(1) array = builder.finish assert_equal(1, array.get_value(0)) end @@ -46,9 +46,9 @@ def test_value def test_values require_gi_bindings(3, 1, 7) builder = Arrow::UInt64ArrayBuilder.new - builder.append(1) - builder.append(2) - builder.append(4) + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) array = builder.finish assert_equal([1, 2, 4], array.values) end diff --git a/c_glib/test/test-uint8-array.rb b/c_glib/test/test-uint8-array.rb index 9137e53be70e5..08dfb3064cccb 100644 --- a/c_glib/test/test-uint8-array.rb +++ b/c_glib/test/test-uint8-array.rb @@ -28,25 +28,25 @@ def test_new def test_buffer builder = Arrow::UInt8ArrayBuilder.new - builder.append(1) - builder.append(2) - builder.append(4) + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) array = builder.finish assert_equal([1, 2, 4].pack("C*"), array.buffer.data.to_s) end def test_value builder = Arrow::UInt8ArrayBuilder.new - builder.append(1) + builder.append_value(1) array = builder.finish assert_equal(1, array.get_value(0)) end def test_values builder = Arrow::UInt8ArrayBuilder.new - builder.append(1) - builder.append(2) - builder.append(4) + builder.append_value(1) + builder.append_value(2) + builder.append_value(4) array = builder.finish assert_equal([1, 2, 4], array.values) end From 857deae933478970b4fc0ff55fab61f32a5c6e4f Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Sat, 5 Jan 2019 20:23:22 +0900 Subject: [PATCH 022/203] ARROW-4154: [GLib] Add GArrowDecimal128DataType garrow_decimal_data_type_new() is deprecated. Author: Kouhei Sutou Closes #3305 from kou/glib-decimal128-data-type and squashes the following commits: b51b7a19 Use decimal128 4823eea6 Add GArrowDecimal128DataType --- c_glib/arrow-glib/Makefile.am | 6 +- c_glib/arrow-glib/array-builder.cpp | 6 +- c_glib/arrow-glib/array-builder.h | 5 +- c_glib/arrow-glib/basic-array.cpp | 4 +- c_glib/arrow-glib/basic-data-type.cpp | 61 +++++++++++++++---- c_glib/arrow-glib/basic-data-type.h | 28 +++++++-- .../{decimal.cpp => decimal128.cpp} | 2 +- c_glib/arrow-glib/{decimal.h => decimal128.h} | 0 .../{decimal.hpp => decimal128.hpp} | 2 +- c_glib/arrow-glib/meson.build | 6 +- c_glib/arrow-glib/orc-file-reader.h | 2 + c_glib/doc/arrow-glib/arrow-glib-docs.xml | 2 +- ...imal-array.rb => test-decimal128-array.rb} | 6 +- ...a-type.rb => test-decimal128-data-type.rb} | 10 +-- .../{test-decimal.rb => test-decimal128.rb} | 0 15 files changed, 99 insertions(+), 41 deletions(-) rename c_glib/arrow-glib/{decimal.cpp => decimal128.cpp} (99%) rename c_glib/arrow-glib/{decimal.h => decimal128.h} (100%) rename c_glib/arrow-glib/{decimal.hpp => decimal128.hpp} (96%) rename c_glib/test/{test-decimal-array.rb => test-decimal128-array.rb} (89%) rename c_glib/test/{test-decimal-data-type.rb => test-decimal128-data-type.rb} (80%) rename c_glib/test/{test-decimal.rb => test-decimal128.rb} (100%) diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am index bf97168eb81d7..a296595571438 100644 --- a/c_glib/arrow-glib/Makefile.am +++ b/c_glib/arrow-glib/Makefile.am @@ -59,7 +59,7 @@ libarrow_glib_la_headers = \ composite-array.h \ composite-data-type.h \ data-type.h \ - decimal.h \ + decimal128.h \ error.h \ field.h \ gobject-type.h \ @@ -110,7 +110,7 @@ libarrow_glib_la_sources = \ column.cpp \ composite-array.cpp \ composite-data-type.cpp \ - decimal.cpp \ + decimal128.cpp \ error.cpp \ field.cpp \ record-batch.cpp \ @@ -155,7 +155,7 @@ libarrow_glib_la_cpp_headers = \ codec.hpp \ column.hpp \ data-type.hpp \ - decimal.hpp \ + decimal128.hpp \ error.hpp \ field.hpp \ record-batch.hpp \ diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index 4b61bfaf7fab9..5f2d4119ce6a2 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -23,9 +23,9 @@ #include #include +#include #include #include -#include template gboolean @@ -3803,14 +3803,14 @@ garrow_decimal128_array_builder_class_init(GArrowDecimal128ArrayBuilderClass *kl /** * garrow_decimal128_array_builder_new: - * @data_type: #GArrowDecimalDataType for the decimal. + * @data_type: #GArrowDecimal128DataType for the decimal. * * Returns: A newly created #GArrowDecimal128ArrayBuilder. * * Since: 0.10.0 */ GArrowDecimal128ArrayBuilder * -garrow_decimal128_array_builder_new(GArrowDecimalDataType *data_type) +garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type) { auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); auto builder = garrow_array_builder_new(arrow_data_type, diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h index 1ddc0266f4993..b2ad6f4bfd3fd 100644 --- a/c_glib/arrow-glib/array-builder.h +++ b/c_glib/arrow-glib/array-builder.h @@ -20,8 +20,7 @@ #pragma once #include -#include -#include +#include G_BEGIN_DECLS @@ -1475,7 +1474,7 @@ struct _GArrowDecimal128ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; -GArrowDecimal128ArrayBuilder *garrow_decimal128_array_builder_new(GArrowDecimalDataType *data_type); +GArrowDecimal128ArrayBuilder *garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_array_builder_append_value) diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp index fef43a0285e25..9aebd9cb8957a 100644 --- a/c_glib/arrow-glib/basic-array.cpp +++ b/c_glib/arrow-glib/basic-array.cpp @@ -22,12 +22,12 @@ #endif #include +#include #include #include -#include +#include #include #include -#include #include diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp index cd3aa97679b5d..2a599963ee3aa 100644 --- a/c_glib/arrow-glib/basic-data-type.cpp +++ b/c_glib/arrow-glib/basic-data-type.cpp @@ -85,7 +85,9 @@ G_BEGIN_DECLS * #GArrowTime64DataType is a class for the number of microseconds or * nanoseconds since midnight in 64-bit signed integer data type. * - * #GArrowDecimalDataType is a class for 128-bit decimal data type. + * #GArrowDecimalDataType is a base class for decimal data type. + * + * #GArrowDecimal128DataType is a class for 128-bit decimal data type. */ typedef struct GArrowDataTypePrivate_ { @@ -1040,9 +1042,9 @@ garrow_time64_data_type_new(GArrowTimeUnit unit, GError **error) } -G_DEFINE_TYPE(GArrowDecimalDataType, - garrow_decimal_data_type, - GARROW_TYPE_DATA_TYPE) +G_DEFINE_ABSTRACT_TYPE(GArrowDecimalDataType, + garrow_decimal_data_type, + GARROW_TYPE_DATA_TYPE) static void garrow_decimal_data_type_init(GArrowDecimalDataType *object) @@ -1062,18 +1064,16 @@ garrow_decimal_data_type_class_init(GArrowDecimalDataTypeClass *klass) * Returns: The newly created decimal data type. * * Since: 0.10.0 + * + * Deprecate: 0.12.0: + * Use garrow_decimal128_data_type_new() instead. */ GArrowDecimalDataType * garrow_decimal_data_type_new(gint32 precision, gint32 scale) { - auto arrow_data_type = arrow::decimal(precision, scale); - - GArrowDecimalDataType *data_type = - GARROW_DECIMAL_DATA_TYPE(g_object_new(GARROW_TYPE_DECIMAL_DATA_TYPE, - "data-type", &arrow_data_type, - NULL)); - return data_type; + auto decimal128_data_type = garrow_decimal128_data_type_new(precision, scale); + return GARROW_DECIMAL_DATA_TYPE(decimal128_data_type); } /** @@ -1112,6 +1112,43 @@ garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type) return arrow_decimal_type->scale(); } + +G_DEFINE_TYPE(GArrowDecimal128DataType, + garrow_decimal128_data_type, + GARROW_TYPE_DECIMAL_DATA_TYPE) + +static void +garrow_decimal128_data_type_init(GArrowDecimal128DataType *object) +{ +} + +static void +garrow_decimal128_data_type_class_init(GArrowDecimal128DataTypeClass *klass) +{ +} + +/** + * garrow_decimal128_data_type_new: + * @precision: The precision of decimal data. + * @scale: The scale of decimal data. + * + * Returns: The newly created 128-bit decimal data type. + * + * Since: 0.12.0 + */ +GArrowDecimal128DataType * +garrow_decimal128_data_type_new(gint32 precision, + gint32 scale) +{ + auto arrow_data_type = arrow::decimal(precision, scale); + + auto data_type = + GARROW_DECIMAL128_DATA_TYPE(g_object_new(GARROW_TYPE_DECIMAL128_DATA_TYPE, + "data-type", &arrow_data_type, + NULL)); + return data_type; +} + G_END_DECLS GArrowDataType * @@ -1199,7 +1236,7 @@ garrow_data_type_new_raw(std::shared_ptr *arrow_data_type) type = GARROW_TYPE_DICTIONARY_DATA_TYPE; break; case arrow::Type::type::DECIMAL: - type = GARROW_TYPE_DECIMAL_DATA_TYPE; + type = GARROW_TYPE_DECIMAL128_DATA_TYPE; break; default: type = GARROW_TYPE_DATA_TYPE; diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h index 45fddba34d4bc..ef41f1dbcfa0b 100644 --- a/c_glib/arrow-glib/basic-data-type.h +++ b/c_glib/arrow-glib/basic-data-type.h @@ -19,9 +19,9 @@ #pragma once -#include +#include #include -#include +#include G_BEGIN_DECLS @@ -651,6 +651,7 @@ GArrowTime64DataType *garrow_time64_data_type_new (GArrowTimeUnit unit, #define GARROW_TYPE_DECIMAL_DATA_TYPE (garrow_decimal_data_type_get_type()) +/* TODO: Delivered from GArrowFixedSizeBinaryDataType. */ G_DECLARE_DERIVABLE_TYPE(GArrowDecimalDataType, garrow_decimal_data_type, GARROW, @@ -661,9 +662,28 @@ struct _GArrowDecimalDataTypeClass GArrowDataTypeClass parent_class; }; -GArrowDecimalDataType *garrow_decimal_data_type_new (gint32 precision, - gint32 scale); +#ifndef GARROW_DISABLE_DEPRECATED +GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_data_type_new) +GArrowDecimalDataType * +garrow_decimal_data_type_new(gint32 precision, gint32 scale); +#endif gint32 garrow_decimal_data_type_get_precision(GArrowDecimalDataType *decimal_data_type); gint32 garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type); + +#define GARROW_TYPE_DECIMAL128_DATA_TYPE (garrow_decimal128_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128DataType, + garrow_decimal128_data_type, + GARROW, + DECIMAL128_DATA_TYPE, + GArrowDecimalDataType) +struct _GArrowDecimal128DataTypeClass +{ + GArrowDecimalDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_12 +GArrowDecimal128DataType * +garrow_decimal128_data_type_new(gint32 precision, gint32 scale); + G_END_DECLS diff --git a/c_glib/arrow-glib/decimal.cpp b/c_glib/arrow-glib/decimal128.cpp similarity index 99% rename from c_glib/arrow-glib/decimal.cpp rename to c_glib/arrow-glib/decimal128.cpp index 34eb417a96105..e30eb7ee58638 100644 --- a/c_glib/arrow-glib/decimal.cpp +++ b/c_glib/arrow-glib/decimal128.cpp @@ -21,7 +21,7 @@ # include #endif -#include +#include #include G_BEGIN_DECLS diff --git a/c_glib/arrow-glib/decimal.h b/c_glib/arrow-glib/decimal128.h similarity index 100% rename from c_glib/arrow-glib/decimal.h rename to c_glib/arrow-glib/decimal128.h diff --git a/c_glib/arrow-glib/decimal.hpp b/c_glib/arrow-glib/decimal128.hpp similarity index 96% rename from c_glib/arrow-glib/decimal.hpp rename to c_glib/arrow-glib/decimal128.hpp index ce56cfe0bd062..84bf47e409f50 100644 --- a/c_glib/arrow-glib/decimal.hpp +++ b/c_glib/arrow-glib/decimal128.hpp @@ -23,7 +23,7 @@ #include -#include +#include GArrowDecimal128 *garrow_decimal128_new_raw(std::shared_ptr *arrow_decimal128); std::shared_ptr garrow_decimal128_get_raw(GArrowDecimal128 *decimal); diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build index d962ec103175e..14126bee8d784 100644 --- a/c_glib/arrow-glib/meson.build +++ b/c_glib/arrow-glib/meson.build @@ -27,7 +27,7 @@ sources = files( 'column.cpp', 'composite-array.cpp', 'composite-data-type.cpp', - 'decimal.cpp', + 'decimal128.cpp', 'error.cpp', 'field.cpp', 'record-batch.cpp', @@ -77,7 +77,7 @@ c_headers = files( 'composite-array.h', 'composite-data-type.h', 'data-type.h', - 'decimal.h', + 'decimal128.h', 'error.h', 'field.h', 'gobject-type.h', @@ -128,7 +128,7 @@ cpp_headers = files( 'codec.hpp', 'column.hpp', 'data-type.hpp', - 'decimal.hpp', + 'decimal128.hpp', 'error.hpp', 'field.hpp', 'record-batch.hpp', diff --git a/c_glib/arrow-glib/orc-file-reader.h b/c_glib/arrow-glib/orc-file-reader.h index 97cf1efa92ff7..9551d52e0fd55 100644 --- a/c_glib/arrow-glib/orc-file-reader.h +++ b/c_glib/arrow-glib/orc-file-reader.h @@ -45,6 +45,7 @@ garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader, const gint *field_indexes, guint n_field_indexes); #endif +GARROW_AVAILABLE_IN_0_12 void garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader, const gint *field_indices, @@ -55,6 +56,7 @@ const gint * garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader, guint *n_field_indexes); #endif +GARROW_AVAILABLE_IN_0_12 const gint * garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader, guint *n_field_indices); diff --git a/c_glib/doc/arrow-glib/arrow-glib-docs.xml b/c_glib/doc/arrow-glib/arrow-glib-docs.xml index 17b75005ff97a..f9f01fe23e4de 100644 --- a/c_glib/doc/arrow-glib/arrow-glib-docs.xml +++ b/c_glib/doc/arrow-glib/arrow-glib-docs.xml @@ -53,7 +53,7 @@ Decimal - + Tensor diff --git a/c_glib/test/test-decimal-array.rb b/c_glib/test/test-decimal128-array.rb similarity index 89% rename from c_glib/test/test-decimal-array.rb rename to c_glib/test/test-decimal128-array.rb index a5eb28253d95f..132ceb7788585 100644 --- a/c_glib/test/test-decimal-array.rb +++ b/c_glib/test/test-decimal128-array.rb @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. -class TestDecimalArray < Test::Unit::TestCase +class TestDecimal128Array < Test::Unit::TestCase def test_format_value - data_type = Arrow::DecimalDataType.new(8,2) + data_type = Arrow::Decimal128DataType.new(8, 2) builder = Arrow::Decimal128ArrayBuilder.new(data_type) decimal = Arrow::Decimal128.new("23423445") builder.append_value(decimal) @@ -26,7 +26,7 @@ def test_format_value end def test_value - data_type = Arrow::DecimalDataType.new(8,2) + data_type = Arrow::Decimal128DataType.new(8, 2) builder = Arrow::Decimal128ArrayBuilder.new(data_type) decimal = Arrow::Decimal128.new("23423445") builder.append_value(decimal) diff --git a/c_glib/test/test-decimal-data-type.rb b/c_glib/test/test-decimal128-data-type.rb similarity index 80% rename from c_glib/test/test-decimal-data-type.rb rename to c_glib/test/test-decimal128-data-type.rb index 04bfe78f925c0..27a31e28309cd 100644 --- a/c_glib/test/test-decimal-data-type.rb +++ b/c_glib/test/test-decimal128-data-type.rb @@ -15,24 +15,24 @@ # specific language governing permissions and limitations # under the License. -class TestDecimalDataType < Test::Unit::TestCase +class TestDecimal128DataType < Test::Unit::TestCase def test_type - data_type = Arrow::DecimalDataType.new(2, 0) + data_type = Arrow::Decimal128DataType.new(2, 0) assert_equal(Arrow::Type::DECIMAL, data_type.id) end def test_to_s - data_type = Arrow::DecimalDataType.new(2, 0) + data_type = Arrow::Decimal128DataType.new(2, 0) assert_equal("decimal(2, 0)", data_type.to_s) end def test_precision - data_type = Arrow::DecimalDataType.new(8, 2) + data_type = Arrow::Decimal128DataType.new(8, 2) assert_equal(8, data_type.precision) end def test_scale - data_type = Arrow::DecimalDataType.new(8, 2) + data_type = Arrow::Decimal128DataType.new(8, 2) assert_equal(2, data_type.scale) end end diff --git a/c_glib/test/test-decimal.rb b/c_glib/test/test-decimal128.rb similarity index 100% rename from c_glib/test/test-decimal.rb rename to c_glib/test/test-decimal128.rb From 46b1bc764ade2ac776a94255e4ca0467f375ee4e Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 5 Jan 2019 12:16:04 -0600 Subject: [PATCH 023/203] ARROW-4160: [Rust] Add README and executable files to parquet Author: Chao Sun Closes #3314 from sunchao/ARROW-4160 and squashes the following commits: 9d215df22 ARROW-4160: Add README and executable files to parquet --- rust/parquet/Cargo.toml | 4 +- rust/parquet/README.md | 111 +++++++++++++++++++++++++ rust/parquet/src/bin/parquet-read.rs | 87 +++++++++++++++++++ rust/parquet/src/bin/parquet-schema.rs | 88 ++++++++++++++++++++ 4 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 rust/parquet/README.md create mode 100644 rust/parquet/src/bin/parquet-read.rs create mode 100644 rust/parquet/src/bin/parquet-schema.rs diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml index aa7eac224c0cf..7478992327ddc 100644 --- a/rust/parquet/Cargo.toml +++ b/rust/parquet/Cargo.toml @@ -17,9 +17,11 @@ [package] name = "parquet" -version = "0.12.0-SNAPSHOT" +version = "0.5.0-SNAPSHOT" license = "Apache-2.0" description = "Apache Parquet implementation in Rust" +homepage = "https://github.com/apache/arrow" +repository = "https://github.com/apache/arrow" authors = ["Apache Arrow "] keywords = [ "arrow", "parquet", "hadoop" ] readme = "README.md" diff --git a/rust/parquet/README.md b/rust/parquet/README.md new file mode 100644 index 0000000000000..e9238ffba8a13 --- /dev/null +++ b/rust/parquet/README.md @@ -0,0 +1,111 @@ + + +# An Apache Parquet implementation in Rust + +## Usage +Add this to your Cargo.toml: +```toml +[dependencies] +parquet = "0.4" +``` + +and this to your crate root: +```rust +extern crate parquet; +``` + +Example usage of reading data: +```rust +use std::fs::File; +use std::path::Path; +use parquet::file::reader::{FileReader, SerializedFileReader}; + +let file = File::open(&Path::new("/path/to/file")).unwrap(); +let reader = SerializedFileReader::new(file).unwrap(); +let mut iter = reader.get_row_iter(None).unwrap(); +while let Some(record) = iter.next() { + println!("{}", record); +} +``` +See [crate documentation](https://docs.rs/crate/parquet/0.4.2) on available API. + +## Supported Parquet Version +- Parquet-format 2.4.0 + +To update Parquet format to a newer version, check if [parquet-format](https://github.com/sunchao/parquet-format-rs) +version is available. Then simply update version of `parquet-format` crate in Cargo.toml. + +## Features +- [X] All encodings supported +- [X] All compression codecs supported +- [X] Read support + - [X] Primitive column value readers + - [X] Row record reader + - [ ] Arrow record reader +- [X] Statistics support +- [X] Write support + - [X] Primitive column value writers + - [ ] Row record writer + - [ ] Arrow record writer +- [ ] Predicate pushdown +- [ ] Parquet format 2.5 support +- [ ] HDFS support + +## Requirements +- Rust nightly + +See [Working with nightly Rust](https://github.com/rust-lang-nursery/rustup.rs/blob/master/README.md#working-with-nightly-rust) +to install nightly toolchain and set it as default. + +## Build +Run `cargo build` or `cargo build --release` to build in release mode. +Some features take advantage of SSE4.2 instructions, which can be +enabled by adding `RUSTFLAGS="-C target-feature=+sse4.2"` before the +`cargo build` command. + +## Test +Run `cargo test` for unit tests. + +## Binaries +The following binaries are provided (use `cargo install` to install them): +- **parquet-schema** for printing Parquet file schema and metadata. +`Usage: parquet-schema [verbose]`, where `file-path` is the path to a Parquet file, +and optional `verbose` is the boolean flag that allows to print full metadata or schema only +(when not specified only schema will be printed). + +- **parquet-read** for reading records from a Parquet file. +`Usage: parquet-read [num-records]`, where `file-path` is the path to a Parquet file, +and `num-records` is the number of records to read from a file (when not specified all records will +be printed). + +If you see `Library not loaded` error, please make sure `LD_LIBRARY_PATH` is set properly: +``` +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(rustc --print sysroot)/lib +``` + +## Benchmarks +Run `cargo bench` for benchmarks. + +## Docs +To build documentation, run `cargo doc --no-deps`. +To compile and view in the browser, run `cargo doc --no-deps --open`. + +## License +Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0. diff --git a/rust/parquet/src/bin/parquet-read.rs b/rust/parquet/src/bin/parquet-read.rs new file mode 100644 index 0000000000000..c86b26e3e7a4d --- /dev/null +++ b/rust/parquet/src/bin/parquet-read.rs @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Binary file to read data from a Parquet file. +//! +//! # Install +//! +//! `parquet-read` can be installed using `cargo`: +//! ``` +//! cargo install parquet +//! ``` +//! After this `parquet-read` should be globally available: +//! ``` +//! parquet-read XYZ.parquet +//! ``` +//! +//! The binary can also be built from the source code and run as follows: +//! ``` +//! cargo run --bin parquet-read XYZ.parquet +//! ``` +//! +//! # Usage +//! +//! ``` +//! parquet-read [num-records] +//! ``` +//! where `file-path` is the path to a Parquet file and `num-records` is the optional +//! numeric option that allows to specify number of records to read from a file. +//! When not provided, all records are read. +//! +//! Note that `parquet-read` reads full file schema, no projection or filtering is +//! applied. + +extern crate parquet; + +use std::{env, fs::File, path::Path, process}; + +use parquet::file::reader::{FileReader, SerializedFileReader}; + +fn main() { + let args: Vec = env::args().collect(); + if args.len() != 2 && args.len() != 3 { + println!("Usage: parquet-read [num-records]"); + process::exit(1); + } + + let mut num_records: Option = None; + if args.len() == 3 { + match args[2].parse() { + Ok(value) => num_records = Some(value), + Err(e) => panic!("Error when reading value for [num-records], {}", e), + } + } + + let path = Path::new(&args[1]); + let file = File::open(&path).unwrap(); + let parquet_reader = SerializedFileReader::new(file).unwrap(); + + // Use full schema as projected schema + let mut iter = parquet_reader.get_row_iter(None).unwrap(); + + let mut start = 0; + let end = num_records.unwrap_or(0); + let all_records = num_records.is_none(); + + while all_records || start < end { + match iter.next() { + Some(row) => println!("{}", row), + None => break, + } + start += 1; + } +} diff --git a/rust/parquet/src/bin/parquet-schema.rs b/rust/parquet/src/bin/parquet-schema.rs new file mode 100644 index 0000000000000..2eaf7652ae9d6 --- /dev/null +++ b/rust/parquet/src/bin/parquet-schema.rs @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Binary file to print the schema and metadata of a Parquet file. +//! +//! # Install +//! +//! `parquet-schema` can be installed using `cargo`: +//! ``` +//! cargo install parquet +//! ``` +//! After this `parquet-schema` should be globally available: +//! ``` +//! parquet-schema XYZ.parquet +//! ``` +//! +//! The binary can also be built from the source code and run as follows: +//! ``` +//! cargo run --bin parquet-schema XYZ.parquet +//! ``` +//! +//! # Usage +//! +//! ``` +//! parquet-schema [verbose] +//! ``` +//! where `file-path` is the path to a Parquet file and `verbose` is the optional boolean +//! flag that allows to print schema only, when set to `false` (default behaviour when +//! not provided), or print full file metadata, when set to `true`. + +extern crate parquet; + +use std::{env, fs::File, path::Path, process}; + +use parquet::{ + file::reader::{FileReader, SerializedFileReader}, + schema::printer::{print_file_metadata, print_parquet_metadata}, +}; + +fn main() { + let args: Vec = env::args().collect(); + if args.len() != 2 && args.len() != 3 { + println!("Usage: parquet-schema [verbose]"); + process::exit(1); + } + let path = Path::new(&args[1]); + let mut verbose = false; + if args.len() == 3 { + match args[2].parse() { + Ok(b) => verbose = b, + Err(e) => panic!( + "Error when reading value for [verbose] (expected either 'true' or 'false'): {}", + e + ), + } + } + let file = match File::open(&path) { + Err(e) => panic!("Error when opening file {}: {}", path.display(), e), + Ok(f) => f, + }; + match SerializedFileReader::new(file) { + Err(e) => panic!("Error when parsing Parquet file: {}", e), + Ok(parquet_reader) => { + let metadata = parquet_reader.metadata(); + println!("Metadata for file: {}", &args[1]); + println!(""); + if verbose { + print_parquet_metadata(&mut std::io::stdout(), &metadata); + } else { + print_file_metadata(&mut std::io::stdout(), &metadata.file_metadata()); + } + } + } +} From 66f0d39a1c9ddd5e9de85ff7bfc8c13601372050 Mon Sep 17 00:00:00 2001 From: Binyang2014 Date: Sun, 6 Jan 2019 02:31:10 +0800 Subject: [PATCH 024/203] [Documentation][C++] Change build conda create command for Windows developer (#3316) [Documentation][C++] Change build conda create command for Windows developer --- cpp/apidoc/Windows.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/apidoc/Windows.md b/cpp/apidoc/Windows.md index 5199c2fdbfa59..8a724d0342be7 100644 --- a/cpp/apidoc/Windows.md +++ b/cpp/apidoc/Windows.md @@ -38,10 +38,11 @@ Launch cmd.exe and run following commands: conda config --add channels conda-forge ``` -Now, you can bootstrap a build environment +Now, you can bootstrap a build environment (call from the root directory of the +Arrow codebase): ```shell -conda create -n arrow-dev cmake git boost-cpp flatbuffers rapidjson cmake thrift-cpp snappy zlib brotli gflags lz4-c zstd -c conda-forge +conda create -n arrow-dev --file=ci\conda_env_cpp.yml ``` > **Note:** Make sure to get the `conda-forge` build of `gflags` as the From 5723adad7ad80c95ba8fcb55d40186d6a29edb74 Mon Sep 17 00:00:00 2001 From: Jeffrey Wong Date: Sat, 5 Jan 2019 12:33:09 -0600 Subject: [PATCH 025/203] ARROW-3731: MVP to read parquet in R library I am contributing to [Arrow 3731](https://issues.apache.org/jira/browse/ARROW-3731). This PR has the minimum functionality to read parquet files into an arrow::Table, which can then be converted to a tibble. Multiple parquet files can be read inside `lapply`, and then concatenated at the end. Steps to compile 1) Build arrow and parquet c++ projects 2) In R run `devtools::load_all()` What I could use help with: The biggest challenge for me is my lack of experience with pkg-config. The R library has a `configure` file which uses pkg-config to figure out what c++ libraries to link to. Currently, `configure` looks up the Arrow project and links to -larrow only. We need it to also link to -lparquet. I do not know how to modify pkg-config's metadata to let it know to link to both -larrow and -lparquet Author: Jeffrey Wong Author: Romain Francois Author: jeffwong-nflx Closes #3230 from jeffwong-nflx/master and squashes the following commits: c67fa3d36 Merge pull request #3 from jeffwong-nflx/cleanup 1df3026cb don't hard code -larrow and -lparquet 8ccaa5172 cleanup 75ba5c9ae add contributor 56adad2ae Merge pull request #2 from romainfrancois/3731/parquet-2 7d6e64df2 read_parquet() only reading one parquet file, and gains a `as_tibble` argument e936b4400 need parquet on travis too ff260c587 header was too commented, renamed to parquet.cpp 9e1897f80 styling etc ... 456c5d260 read parquet files 22d89dd23 hardcode -larrow and -lparquet --- .travis.yml | 2 ++ r/DESCRIPTION | 2 ++ r/NAMESPACE | 1 + r/R/RcppExports.R | 4 +++ r/R/parquet.R | 33 +++++++++++++++++++++++ r/README.Rmd | 2 +- r/README.md | 61 ++++++++++++------------------------------- r/configure | 4 +-- r/man/read_parquet.Rd | 21 +++++++++++++++ r/src/RcppExports.cpp | 12 +++++++++ r/src/parquet.cpp | 37 ++++++++++++++++++++++++++ 11 files changed, 131 insertions(+), 48 deletions(-) create mode 100644 r/R/parquet.R create mode 100644 r/man/read_parquet.Rd create mode 100644 r/src/parquet.cpp diff --git a/.travis.yml b/.travis.yml index f14f7e4785948..916ccf460ecf8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -326,6 +326,8 @@ matrix: language: r cache: packages latex: false + env: + - ARROW_TRAVIS_PARQUET=1 before_install: # Have to copy-paste this here because of how R's build steps work - eval `python $TRAVIS_BUILD_DIR/ci/detect-changes.py` diff --git a/r/DESCRIPTION b/r/DESCRIPTION index a2632973134b9..5303a877f9e26 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -4,6 +4,7 @@ Version: 0.11.0.9000 Authors@R: c( person("Romain", "François", email = "romain@rstudio.com", role = c("aut", "cre")), person("Javier", "Luraschi", email = "javier@rstudio.com", role = c("ctb")), + person("Jeffrey", "Wong", email = "jeffreyw@netflix.com", role = c("ctb")), person("Apache Arrow", email = "dev@arrow.apache.org", role = c("aut", "cph")) ) Description: R Integration to 'Apache' 'Arrow'. @@ -62,6 +63,7 @@ Collate: 'memory_pool.R' 'message.R' 'on_exit.R' + 'parquet.R' 'read_record_batch.R' 'read_table.R' 'reexports-bit64.R' diff --git a/r/NAMESPACE b/r/NAMESPACE index 8846defbd8e65..f8f6384dce1f8 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -123,6 +123,7 @@ export(read_arrow) export(read_csv_arrow) export(read_feather) export(read_message) +export(read_parquet) export(read_record_batch) export(read_schema) export(read_table) diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index 55b9ab33ebf98..c6fe8719f4e89 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -637,6 +637,10 @@ ipc___ReadMessage <- function(stream) { .Call(`_arrow_ipc___ReadMessage`, stream) } +read_parquet_file <- function(filename) { + .Call(`_arrow_read_parquet_file`, filename) +} + RecordBatch__num_columns <- function(x) { .Call(`_arrow_RecordBatch__num_columns`, x) } diff --git a/r/R/parquet.R b/r/R/parquet.R new file mode 100644 index 0000000000000..141da7bd04b2c --- /dev/null +++ b/r/R/parquet.R @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Read parquet file from disk +#' +#' @param file a file path +#' @param as_tibble should the [arrow::Table][arrow__Table] be converted to a tibble. +#' @param ... currently ignored +#' +#' @return a [arrow::Table][arrow__Table], or a data frame if `as_tibble` is `TRUE`. +#' +#' @export +read_parquet <- function(file, as_tibble = TRUE, ...) { + tab <- shared_ptr(`arrow::Table`, read_parquet_file(f)) + if (isTRUE(as_tibble)) { + tab <- as_tibble(tab) + } + tab +} diff --git a/r/README.Rmd b/r/README.Rmd index 2c51d01c0f00f..9f0f39fef5352 100644 --- a/r/README.Rmd +++ b/r/README.Rmd @@ -25,7 +25,7 @@ git clone https://github.com/apache/arrow.git cd arrow/cpp && mkdir release && cd release # It is important to statically link to boost libraries -cmake .. -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off +cmake .. -DARROW_PARQUET=ON -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off make install ``` diff --git a/r/README.md b/r/README.md index 868fdff0a06e0..987d0c24a185b 100644 --- a/r/README.md +++ b/r/README.md @@ -14,7 +14,7 @@ git clone https://github.com/apache/arrow.git cd arrow/cpp && mkdir release && cd release # It is important to statically link to boost libraries -cmake .. -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off +cmake .. -DARROW_PARQUET=ON -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off make install ``` @@ -38,48 +38,19 @@ tf <- tempfile() #> # A tibble: 10 x 2 #> x y #> -#> 1 1 -0.255 -#> 2 2 -0.162 -#> 3 3 -0.614 -#> 4 4 -0.322 -#> 5 5 0.0693 -#> 6 6 -0.920 -#> 7 7 -1.08 -#> 8 8 0.658 -#> 9 9 0.821 -#> 10 10 0.539 -arrow::write_arrow(tib, tf) - -# read it back with pyarrow -pa <- import("pyarrow") -as_tibble(pa$open_file(tf)$read_pandas()) -#> # A tibble: 10 x 2 -#> x y -#> -#> 1 1 -0.255 -#> 2 2 -0.162 -#> 3 3 -0.614 -#> 4 4 -0.322 -#> 5 5 0.0693 -#> 6 6 -0.920 -#> 7 7 -1.08 -#> 8 8 0.658 -#> 9 9 0.821 -#> 10 10 0.539 -``` - -## Development - -### Code style - -We use Google C++ style in our C++ code. Check for style errors with - -``` -./lint.sh -``` - -You can fix the style issues with - +#> 1 1 0.0855 +#> 2 2 -1.68 +#> 3 3 -0.0294 +#> 4 4 -0.124 +#> 5 5 0.0675 +#> 6 6 1.64 +#> 7 7 1.54 +#> 8 8 -0.0209 +#> 9 9 -0.982 +#> 10 10 0.349 +# arrow::write_arrow(tib, tf) + +# # read it back with pyarrow +# pa <- import("pyarrow") +# as_tibble(pa$open_file(tf)$read_pandas()) ``` -./lint.sh --fix -``` \ No newline at end of file diff --git a/r/configure b/r/configure index 28f6a73ac7ef5..c17fd4c2ef624 100755 --- a/r/configure +++ b/r/configure @@ -26,13 +26,13 @@ # R CMD INSTALL --configure-vars='INCLUDE_DIR=/.../include LIB_DIR=/.../lib' # Library settings -PKG_CONFIG_NAME="arrow" +PKG_CONFIG_NAME="arrow parquet" PKG_DEB_NAME="arrow" PKG_RPM_NAME="arrow" PKG_CSW_NAME="arrow" PKG_BREW_NAME="apache-arrow" PKG_TEST_HEADER="" -PKG_LIBS="-larrow" +PKG_LIBS="-larrow -lparquet" # Use pkg-config if available pkg-config --version >/dev/null 2>&1 diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd new file mode 100644 index 0000000000000..c29e18bca5baf --- /dev/null +++ b/r/man/read_parquet.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parquet.R +\name{read_parquet} +\alias{read_parquet} +\title{Read parquet file from disk} +\usage{ +read_parquet(file, as_tibble = TRUE, ...) +} +\arguments{ +\item{file}{a file path} + +\item{as_tibble}{should the \link[=arrow__Table]{arrow::Table} be converted to a tibble.} + +\item{...}{currently ignored} +} +\value{ +a \link[=arrow__Table]{arrow::Table}, or a data frame if \code{as_tibble} is \code{TRUE}. +} +\description{ +Read parquet file from disk +} diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index c752afba1c258..1e8fed1867655 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -1779,6 +1779,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// read_parquet_file +std::shared_ptr read_parquet_file(std::string filename); +RcppExport SEXP _arrow_read_parquet_file(SEXP filenameSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP); + rcpp_result_gen = Rcpp::wrap(read_parquet_file(filename)); + return rcpp_result_gen; +END_RCPP +} // RecordBatch__num_columns int RecordBatch__num_columns(const std::shared_ptr& x); RcppExport SEXP _arrow_RecordBatch__num_columns(SEXP xSEXP) { @@ -2369,6 +2380,7 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1}, {"_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1}, {"_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1}, + {"_arrow_read_parquet_file", (DL_FUNC) &_arrow_read_parquet_file, 1}, {"_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, {"_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, {"_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, diff --git a/r/src/parquet.cpp b/r/src/parquet.cpp new file mode 100644 index 0000000000000..859bd4826e7c2 --- /dev/null +++ b/r/src/parquet.cpp @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include + +// [[Rcpp::export]] +std::shared_ptr read_parquet_file(std::string filename) { + std::shared_ptr infile; + PARQUET_THROW_NOT_OK( + arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool(), &infile)); + + std::unique_ptr reader; + PARQUET_THROW_NOT_OK( + parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader)); + std::shared_ptr table; + PARQUET_THROW_NOT_OK(reader->ReadTable(&table)); + + return table; +} From 91a72ac7e1361b78167a703ecd5dddb85b621159 Mon Sep 17 00:00:00 2001 From: jlapacik Date: Sat, 5 Jan 2019 14:19:52 -0600 Subject: [PATCH 026/203] ARROW-4130: [Go] offset not used when accessing binary array Closes https://github.com/apache/arrow/issues/3270 . Author: jlapacik Closes #3283 from jlapacik/fix/go-binary-slice and squashes the following commits: 5cf6a4f03 assign slice value in out of bounds tests 0666c0ed4 allocate new slice for each test case 9b5a00057 remove single letter variable b46f8412d ARROW-4130: offset not used when accessing binary array --- go/arrow/array/binary.go | 38 +++- go/arrow/array/binary_test.go | 343 ++++++++++++++++++++++++++++++++++ 2 files changed, 376 insertions(+), 5 deletions(-) diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go index 0b89b7e5817cc..850fb09b4a81a 100644 --- a/go/arrow/array/binary.go +++ b/go/arrow/array/binary.go @@ -38,7 +38,13 @@ func NewBinaryData(data *Data) *Binary { } // Value returns the slice at index i. This value should not be mutated. -func (a *Binary) Value(i int) []byte { return a.valueBytes[a.valueOffsets[i]:a.valueOffsets[i+1]] } +func (a *Binary) Value(i int) []byte { + if i < 0 || i >= a.array.data.length { + panic("arrow/array: index out of range") + } + idx := a.array.data.offset + i + return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] +} // ValueString returns the string at index i without performing additional allocations. // The string is only valid for the lifetime of the Binary array. @@ -47,10 +53,32 @@ func (a *Binary) ValueString(i int) string { return *(*string)(unsafe.Pointer(&b)) } -func (a *Binary) ValueOffset(i int) int { return int(a.valueOffsets[i]) } -func (a *Binary) ValueLen(i int) int { return int(a.valueOffsets[i+1] - a.valueOffsets[i]) } -func (a *Binary) ValueOffsets() []int32 { return a.valueOffsets } -func (a *Binary) ValueBytes() []byte { return a.valueBytes } +func (a *Binary) ValueOffset(i int) int { + if i < 0 || i >= a.array.data.length { + panic("arrow/array: index out of range") + } + return int(a.valueOffsets[a.array.data.offset+i]) +} + +func (a *Binary) ValueLen(i int) int { + if i < 0 || i >= a.array.data.length { + panic("arrow/array: index out of range") + } + beg := a.array.data.offset + i + return int(a.valueOffsets[beg+1] - a.valueOffsets[beg]) +} + +func (a *Binary) ValueOffsets() []int32 { + beg := a.array.data.offset + end := beg + a.array.data.length + 1 + return a.valueOffsets[beg:end] +} + +func (a *Binary) ValueBytes() []byte { + beg := a.array.data.offset + end := beg + a.array.data.length + return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]] +} func (a *Binary) setData(data *Data) { if len(data.buffers) != 3 { diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go index 87d1b58c47d14..2af45dee60f76 100644 --- a/go/arrow/array/binary_test.go +++ b/go/arrow/array/binary_test.go @@ -17,6 +17,7 @@ package array import ( + "reflect" "testing" "github.com/stretchr/testify/assert" @@ -62,3 +63,345 @@ func TestBinary(t *testing.T) { b.Release() } + +func TestBinarySliceData(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) + defer b.Release() + + for _, v := range values { + b.AppendString(v) + } + + arr := b.NewArray().(*Binary) + defer arr.Release() + + if got, want := arr.Len(), len(values); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + vs := make([]string, arr.Len()) + + for i := range vs { + vs[i] = arr.ValueString(i) + } + + if got, want := vs, values; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + + tests := []struct { + interval [2]int64 + want []string + }{ + { + interval: [2]int64{0, 0}, + want: []string{}, + }, + { + interval: [2]int64{0, 5}, + want: []string{"a", "bc", "def", "g", "hijk"}, + }, + { + interval: [2]int64{0, 10}, + want: []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"}, + }, + { + interval: [2]int64{5, 10}, + want: []string{"lm", "n", "opq", "rs", "tu"}, + }, + { + interval: [2]int64{10, 10}, + want: []string{}, + }, + { + interval: [2]int64{2, 7}, + want: []string{"def", "g", "hijk", "lm", "n"}, + }, + } + + for _, tc := range tests { + t.Run("", func(t *testing.T) { + + slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary) + defer slice.Release() + + if got, want := slice.Len(), len(tc.want); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + vs := make([]string, slice.Len()) + + for i := range vs { + vs[i] = slice.ValueString(i) + } + + if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + }) + } +} + +func TestBinarySliceDataWithNull(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} + valids := []bool{true, true, false, false, true, true, true, true, false, true} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) + defer b.Release() + + b.AppendStringValues(values, valids) + + arr := b.NewArray().(*Binary) + defer arr.Release() + + if got, want := arr.Len(), len(values); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + if got, want := arr.NullN(), 3; got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + vs := make([]string, arr.Len()) + + for i := range vs { + vs[i] = arr.ValueString(i) + } + + if got, want := vs, values; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + + tests := []struct { + interval [2]int64 + nulls int + want []string + }{ + { + interval: [2]int64{0, 2}, + nulls: 0, + want: []string{"a", "bc"}, + }, + { + interval: [2]int64{0, 3}, + nulls: 1, + want: []string{"a", "bc", ""}, + }, + { + interval: [2]int64{0, 4}, + nulls: 2, + want: []string{"a", "bc", "", ""}, + }, + { + interval: [2]int64{4, 8}, + nulls: 0, + want: []string{"hijk", "lm", "", "opq"}, + }, + { + interval: [2]int64{2, 9}, + nulls: 3, + want: []string{"", "", "hijk", "lm", "", "opq", ""}, + }, + } + + for _, tc := range tests { + t.Run("", func(t *testing.T) { + + slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary) + defer slice.Release() + + if got, want := slice.Len(), len(tc.want); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + if got, want := slice.NullN(), tc.nulls; got != want { + t.Errorf("got=%d, want=%d", got, want) + } + + vs := make([]string, slice.Len()) + + for i := range vs { + vs[i] = slice.ValueString(i) + } + + if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + }) + } +} + +func TestBinarySliceOutOfBounds(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) + defer b.Release() + + for _, v := range values { + b.AppendString(v) + } + + arr := b.NewArray().(*Binary) + defer arr.Release() + + slice := NewSlice(arr, 3, 8).(*Binary) + defer slice.Release() + + tests := []struct { + index int + panic bool + }{ + { + index: -1, + panic: true, + }, + { + index: 5, + panic: true, + }, + { + index: 0, + panic: false, + }, + { + index: 4, + panic: false, + }, + } + + for _, tc := range tests { + t.Run("", func(t *testing.T) { + + var val string + + if tc.panic { + defer func() { + e := recover() + if e == nil { + t.Fatalf("this should have panicked, but did not; slice value %q", val) + } + if got, want := e.(string), "arrow/array: index out of range"; got != want { + t.Fatalf("invalid error. got=%q, want=%q", got, want) + } + }() + } else { + defer func() { + if e := recover(); e != nil { + t.Fatalf("unexpected panic: %v", e) + } + }() + } + + val = slice.ValueString(tc.index) + }) + } +} + +func TestBinaryValueOffset(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} + valids := []bool{true, true, false, false, true, true, true, true, false, true} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) + defer b.Release() + + b.AppendStringValues(values, valids) + + arr := b.NewArray().(*Binary) + defer arr.Release() + + slice := NewSlice(arr, 2, 9).(*Binary) + defer slice.Release() + + offset := 3 + vs := values[2:9] + + for i, v := range vs { + assert.Equal(t, offset, slice.ValueOffset(i)) + offset += len(v) + } +} + +func TestBinaryValueLen(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} + valids := []bool{true, true, false, false, true, true, true, true, false, true} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) + defer b.Release() + + b.AppendStringValues(values, valids) + + arr := b.NewArray().(*Binary) + defer arr.Release() + + slice := NewSlice(arr, 2, 9).(*Binary) + defer slice.Release() + + vs := values[2:9] + + for i, v := range vs { + assert.Equal(t, len(v), slice.ValueLen(i)) + } +} + +func TestBinaryValueOffsets(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} + valids := []bool{true, true, false, false, true, true, true, true, false, true} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) + defer b.Release() + + b.AppendStringValues(values, valids) + + arr := b.NewArray().(*Binary) + defer arr.Release() + + assert.Equal(t, []int32{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets()) + + slice := NewSlice(arr, 2, 9).(*Binary) + defer slice.Release() + + assert.Equal(t, []int32{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets()) +} + +func TestBinaryValueBytes(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer mem.AssertSize(t, 0) + + values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} + valids := []bool{true, true, false, false, true, true, true, true, false, true} + + b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) + defer b.Release() + + b.AppendStringValues(values, valids) + + arr := b.NewArray().(*Binary) + defer arr.Release() + + assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes()) + + slice := NewSlice(arr, 2, 9).(*Binary) + defer slice.Release() + + assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes()) +} From 489534046290db2f607c59bf57f32d888e8109ed Mon Sep 17 00:00:00 2001 From: Neville Dipale Date: Sat, 5 Jan 2019 15:48:20 -0700 Subject: [PATCH 027/203] ARROW-4155: [Rust] Implement array_ops::sum() for PrimitiveArray Adds the ability to return the sum of column Author: Neville Dipale Closes #3317 from nevi-me/rust/array-ops and squashes the following commits: bba0788 fix if-else branch 62a91f3 add condition to check null values on sum 9ca0034 cargo fmt a23d240 : Implement array_ops::sum() for PrimitiveArray --- rust/src/array_ops.rs | 53 +++++++++++++++++++++++++++++++++++++++++++ rust/src/datatypes.rs | 33 +++++++++++++++++---------- 2 files changed, 74 insertions(+), 12 deletions(-) diff --git a/rust/src/array_ops.rs b/rust/src/array_ops.rs index 59145754f0248..517111ba76a45 100644 --- a/rust/src/array_ops.rs +++ b/rust/src/array_ops.rs @@ -155,6 +155,35 @@ where n } +/// Returns the sum of values in the array. +/// +/// Returns `None` if the array is empty or only contains null values. +pub fn sum(array: &PrimitiveArray) -> Option +where + T: ArrowNumericType, + T::Native: Add, +{ + let mut n: T::Native = T::default_value(); + // iteratively track whether all values are null (or array is empty) + let mut all_nulls = true; + let data = array.data(); + for i in 0..data.len() { + if data.is_null(i) { + continue; + } + if all_nulls { + all_nulls = false; + } + let m = array.value(i); + n = n + m; + } + if all_nulls { + None + } else { + Some(n) + } +} + /// Perform `left == right` operation on two arrays. pub fn eq(left: &PrimitiveArray, right: &PrimitiveArray) -> Result where @@ -399,6 +428,30 @@ mod tests { assert_eq!(13, c.value(2)); } + #[test] + fn test_primitive_array_sum() { + let a = Int32Array::from(vec![1, 2, 3, 4, 5]); + assert_eq!(15, sum(&a).unwrap()); + } + + #[test] + fn test_primitive_array_float_sum() { + let a = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]); + assert_eq!(16.5, sum(&a).unwrap()); + } + + #[test] + fn test_primitive_array_sum_with_nulls() { + let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]); + assert_eq!(10, sum(&a).unwrap()); + } + + #[test] + fn test_primitive_array_sum_all_nulls() { + let a = Int32Array::from(vec![None, None, None]); + assert_eq!(None, sum(&a)); + } + #[test] fn test_primitive_array_eq() { let a = Int32Array::from(vec![8, 8, 8, 8, 8]); diff --git a/rust/src/datatypes.rs b/rust/src/datatypes.rs index 36cb818cdfc7a..49e06eb0969b2 100644 --- a/rust/src/datatypes.rs +++ b/rust/src/datatypes.rs @@ -83,10 +83,15 @@ pub trait ArrowPrimitiveType: 'static { /// Returns the bit width of this primitive type. fn get_bit_width() -> usize; + + /// Returns a default value of this primitive type. + /// + /// This is useful for aggregate array ops like `sum()`, `mean()`. + fn default_value() -> Self::Native; } macro_rules! make_type { - ($name:ident, $native_ty:ty, $data_ty:path, $bit_width:expr) => { + ($name:ident, $native_ty:ty, $data_ty:path, $bit_width:expr, $default_val:expr) => { impl ArrowNativeType for $native_ty {} pub struct $name {} @@ -101,21 +106,25 @@ macro_rules! make_type { fn get_bit_width() -> usize { $bit_width } + + fn default_value() -> Self::Native { + $default_val + } } }; } -make_type!(BooleanType, bool, DataType::Boolean, 1); -make_type!(Int8Type, i8, DataType::Int8, 8); -make_type!(Int16Type, i16, DataType::Int16, 16); -make_type!(Int32Type, i32, DataType::Int32, 32); -make_type!(Int64Type, i64, DataType::Int64, 64); -make_type!(UInt8Type, u8, DataType::UInt8, 8); -make_type!(UInt16Type, u16, DataType::UInt16, 16); -make_type!(UInt32Type, u32, DataType::UInt32, 32); -make_type!(UInt64Type, u64, DataType::UInt64, 64); -make_type!(Float32Type, f32, DataType::Float32, 32); -make_type!(Float64Type, f64, DataType::Float64, 64); +make_type!(BooleanType, bool, DataType::Boolean, 1, false); +make_type!(Int8Type, i8, DataType::Int8, 8, 0i8); +make_type!(Int16Type, i16, DataType::Int16, 16, 0i16); +make_type!(Int32Type, i32, DataType::Int32, 32, 0i32); +make_type!(Int64Type, i64, DataType::Int64, 64, 0i64); +make_type!(UInt8Type, u8, DataType::UInt8, 8, 0u8); +make_type!(UInt16Type, u16, DataType::UInt16, 16, 0u16); +make_type!(UInt32Type, u32, DataType::UInt32, 32, 0u32); +make_type!(UInt64Type, u64, DataType::UInt64, 64, 0u64); +make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32); +make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64); /// A subtype of primitive type that represents numeric values. pub trait ArrowNumericType: ArrowPrimitiveType {} From 601498f7169f2340b393bccba1d0a0e0b65d1562 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Sun, 6 Jan 2019 20:53:13 +0900 Subject: [PATCH 028/203] ARROW-4161: [GLib] Add PlasmaClientOptions Author: Kouhei Sutou Closes #3315 from kou/glib-plasma-client-new-options and squashes the following commits: 73eff12a Add support for Plasma::ClientOptions ed52a8ab Add PlasmaClientOptions --- c_glib/plasma-glib/client.cpp | 137 +++++++++++++++++- c_glib/plasma-glib/client.h | 21 +++ .../test/plasma/test-plasma-client-options.rb | 31 ++++ c_glib/test/plasma/test-plasma-client.rb | 3 +- .../test/plasma/test-plasma-created-object.rb | 2 +- .../plasma/test-plasma-referred-object.rb | 2 +- ruby/red-plasma/lib/plasma/client.rb | 13 +- ruby/red-plasma/test/test-plasma-client.rb | 24 ++- 8 files changed, 223 insertions(+), 10 deletions(-) create mode 100644 c_glib/test/plasma/test-plasma-client-options.rb diff --git a/c_glib/plasma-glib/client.cpp b/c_glib/plasma-glib/client.cpp index 9591a0a714f27..2038ea61f042a 100644 --- a/c_glib/plasma-glib/client.cpp +++ b/c_glib/plasma-glib/client.cpp @@ -39,6 +39,9 @@ G_BEGIN_DECLS * @title: Client related classes * @include: plasma-glib/plasma-glib.h * + * #GPlasmaClientOptions is a class for customizing plasma store + * connection. + * * #GPlasmaClientCreateOptions is a class for customizing object creation. * * #GPlasmaClient is a class for an interface with a plasma store. @@ -46,6 +49,131 @@ G_BEGIN_DECLS * Since: 0.12.0 */ +typedef struct GPlasmaClientCreatePrivate_ { + gint n_retries; +} GPlasmaClientOptionsPrivate; + +enum { + PROP_N_RETRIES = 1 +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GPlasmaClientOptions, + gplasma_client_options, + G_TYPE_OBJECT) + +#define GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + gplasma_client_options_get_instance_private( \ + GPLASMA_CLIENT_OPTIONS(object))) + +static void +gplasma_client_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_N_RETRIES: + priv->n_retries = g_value_get_int(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gplasma_client_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_N_RETRIES: + g_value_set_int(value, priv->n_retries); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gplasma_client_options_init(GPlasmaClientOptions *object) +{ +} + +static void +gplasma_client_options_class_init(GPlasmaClientOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = gplasma_client_options_set_property; + gobject_class->get_property = gplasma_client_options_get_property; + + GParamSpec *spec; + spec = g_param_spec_int("n-retries", + "N retries", + "The number of retries to connect plasma store. " + "-1 means that the system default value is used.", + -1, + G_MAXINT, + -1, + static_cast(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT)); + g_object_class_install_property(gobject_class, PROP_N_RETRIES, spec); +} + +/** + * gplasma_client_options_new: + * + * Returns: A newly created #GPlasmaClientOptions. + * + * Since: 0.12.0 + */ +GPlasmaClientOptions * +gplasma_client_options_new(void) +{ + auto options = g_object_new(GPLASMA_TYPE_CLIENT_OPTIONS, + NULL); + return GPLASMA_CLIENT_OPTIONS(options); +} + +/** + * gplasma_client_options_set_n_retries: + * @options: A #GPlasmaClientOptions. + * @n_retries: The number of retires on connect. + * + * Since: 0.12.0 + */ +void +gplasma_client_options_set_n_retries(GPlasmaClientOptions *options, + gint n_retries) +{ + auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(options); + priv->n_retries = n_retries; +} + +/** + * gplasma_client_options_get_n_retries: + * @options: A #GPlasmaClientOptions. + * + * Returns: The number of retries on connect. + * + * Since: 0.12.0 + */ +gint +gplasma_client_options_get_n_retries(GPlasmaClientOptions *options) +{ + auto priv = GPLASMA_CLIENT_OPTIONS_GET_PRIVATE(options); + return priv->n_retries; +} + + typedef struct GPlasmaClientCreateOptionsPrivate_ { guint8 *metadata; gsize metadata_size; @@ -182,6 +310,7 @@ gplasma_client_create_options_get_metadata(GPlasmaClientCreateOptions *options, return priv->metadata; } + typedef struct GPlasmaClientPrivate_ { plasma::PlasmaClient *client; bool disconnected; @@ -262,6 +391,7 @@ gplasma_client_class_init(GPlasmaClientClass *klass) /** * gplasma_client_new: * @store_socket_name: The name of the UNIX domain socket. + * @options: (nullable): The options to custom how to connect to plasma store. * @error: (nullable): Return location for a #GError or %NULL. * * Returns: (nullable): A newly created #GPlasmaClient on success, @@ -271,10 +401,15 @@ gplasma_client_class_init(GPlasmaClientClass *klass) */ GPlasmaClient * gplasma_client_new(const gchar *store_socket_name, + GPlasmaClientOptions *options, GError **error) { auto plasma_client = new plasma::PlasmaClient(); - auto status = plasma_client->Connect(store_socket_name, ""); + int n_retries = -1; + if (options) { + n_retries = gplasma_client_options_get_n_retries(options); + } + auto status = plasma_client->Connect(store_socket_name, "", 0, n_retries); if (garrow_error_check(error, status, "[plasma][client][new]")) { return gplasma_client_new_raw(plasma_client); } else { diff --git a/c_glib/plasma-glib/client.h b/c_glib/plasma-glib/client.h index 34b0ba22e3188..2cb983e14e970 100644 --- a/c_glib/plasma-glib/client.h +++ b/c_glib/plasma-glib/client.h @@ -23,6 +23,26 @@ G_BEGIN_DECLS +#define GPLASMA_TYPE_CLIENT_OPTIONS (gplasma_client_options_get_type()) +G_DECLARE_DERIVABLE_TYPE(GPlasmaClientOptions, + gplasma_client_options, + GPLASMA, + CLIENT_OPTIONS, + GObject) + +struct _GPlasmaClientOptionsClass +{ + GObjectClass parent_class; +}; + +GPlasmaClientOptions *gplasma_client_options_new(void); +void +gplasma_client_options_set_n_retries(GPlasmaClientOptions *options, + gint n_retries); +gint +gplasma_client_options_get_n_retries(GPlasmaClientOptions *options); + + #define GPLASMA_TYPE_CLIENT_CREATE_OPTIONS \ (gplasma_client_create_options_get_type()) G_DECLARE_DERIVABLE_TYPE(GPlasmaClientCreateOptions, @@ -59,6 +79,7 @@ struct _GPlasmaClientClass }; GPlasmaClient *gplasma_client_new(const gchar *store_socket_name, + GPlasmaClientOptions *options, GError **error); GPlasmaCreatedObject * gplasma_client_create(GPlasmaClient *client, diff --git a/c_glib/test/plasma/test-plasma-client-options.rb b/c_glib/test/plasma/test-plasma-client-options.rb new file mode 100644 index 0000000000000..abe6fd3ce46ff --- /dev/null +++ b/c_glib/test/plasma/test-plasma-client-options.rb @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestPlasmaClientOptions < Test::Unit::TestCase + include Helper::Omittable + + def setup + omit("Plasma is required") unless defined?(::Plasma) + @options = Plasma::ClientOptions.new + end + + test("n_retries") do + assert_equal(-1, @options.n_retries) + @options.n_retries = 10 + assert_equal(10, @options.n_retries) + end +end diff --git a/c_glib/test/plasma/test-plasma-client.rb b/c_glib/test/plasma/test-plasma-client.rb index 6caf09f02570c..a57d1fc5944e9 100644 --- a/c_glib/test/plasma/test-plasma-client.rb +++ b/c_glib/test/plasma/test-plasma-client.rb @@ -23,7 +23,8 @@ def setup omit("Plasma is required") unless defined?(::Plasma) @store = Helper::PlasmaStore.new @store.start - @client = Plasma::Client.new(@store.socket_path) + @options = Plasma::ClientOptions.new + @client = Plasma::Client.new(@store.socket_path, @options) @id = Plasma::ObjectID.new("Hello") @data = "World" @options = Plasma::ClientCreateOptions.new diff --git a/c_glib/test/plasma/test-plasma-created-object.rb b/c_glib/test/plasma/test-plasma-created-object.rb index 54d6774790abe..9025ff4ac22d9 100644 --- a/c_glib/test/plasma/test-plasma-created-object.rb +++ b/c_glib/test/plasma/test-plasma-created-object.rb @@ -21,7 +21,7 @@ def setup omit("Plasma is required") unless defined?(::Plasma) @store = Helper::PlasmaStore.new @store.start - @client = Plasma::Client.new(@store.socket_path) + @client = Plasma::Client.new(@store.socket_path, nil) @id = Plasma::ObjectID.new("Hello") @data = "World" diff --git a/c_glib/test/plasma/test-plasma-referred-object.rb b/c_glib/test/plasma/test-plasma-referred-object.rb index f55c0b13c5603..a74641ed5dcd3 100644 --- a/c_glib/test/plasma/test-plasma-referred-object.rb +++ b/c_glib/test/plasma/test-plasma-referred-object.rb @@ -21,7 +21,7 @@ def setup omit("Plasma is required") unless defined?(::Plasma) @store = Helper::PlasmaStore.new @store.start - @client = Plasma::Client.new(@store.socket_path) + @client = Plasma::Client.new(@store.socket_path, nil) @id = Plasma::ObjectID.new("Hello") @data = "World" diff --git a/ruby/red-plasma/lib/plasma/client.rb b/ruby/red-plasma/lib/plasma/client.rb index 464ef8c336fd9..d32ded6ff60b4 100644 --- a/ruby/red-plasma/lib/plasma/client.rb +++ b/ruby/red-plasma/lib/plasma/client.rb @@ -18,9 +18,18 @@ module Plasma class Client alias_method :initialize_raw, :initialize - def initialize(socket_path) + private :initialize_raw + def initialize(socket_path, options=nil) socket_path = socket_path.to_path if socket_path.respond_to?(:to_path) - initialize_raw(socket_path) + if options + options_raw = options + options = ClientOptions.new + options_raw.each do |key, value| + setter = "#{key}=" + options.__send__(setter, value) if options.respond_to?(setter) + end + end + initialize_raw(socket_path, options) end end end diff --git a/ruby/red-plasma/test/test-plasma-client.rb b/ruby/red-plasma/test/test-plasma-client.rb index e7f8dbdba42e0..de76fb9d36e8b 100644 --- a/ruby/red-plasma/test/test-plasma-client.rb +++ b/ruby/red-plasma/test/test-plasma-client.rb @@ -20,15 +20,31 @@ def setup @store = nil @store = Helper::PlasmaStore.new @store.start + @id = Plasma::ObjectID.new("Hello") + @data = "World" end def teardown @store.stop if @store end - def test_new - assert_nothing_raised do - Plasma::Client.new(Pathname(@store.socket_path)) - end + def test_new_pathname + client = Plasma::Client.new(Pathname(@store.socket_path)) + object = client.create(@id, @data.bytesize, nil) + object.data.set_data(0, @data) + object.seal + + object = client.refer_object(@id, -1) + assert_equal(@data, object.data.data.to_s) + end + + def test_new_options + client = Plasma::Client.new(@store.socket_path, n_retries: 1) + object = client.create(@id, @data.bytesize, nil) + object.data.set_data(0, @data) + object.seal + + object = client.refer_object(@id, -1) + assert_equal(@data, object.data.data.to_s) end end From fa6e4238fdce81a17c1957ffbc8cd7defdbc3831 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Sun, 6 Jan 2019 21:21:15 +0900 Subject: [PATCH 029/203] ARROW-4162: [Ruby] Add support for creating data types from description Author: Kouhei Sutou Closes #3318 from kou/ruby-data-type-new-by-description and squashes the following commits: e7231e69 Add support for creating data types from description --- .../lib/arrow/decimal128-data-type.rb | 69 ++++++++++++ .../lib/arrow/dense-union-data-type.rb | 90 +++++++++++++++ .../lib/arrow/dictionary-data-type.rb | 106 ++++++++++++++++++ ruby/red-arrow/lib/arrow/loader.rb | 7 ++ .../lib/arrow/sparse-union-data-type.rb | 90 +++++++++++++++ ruby/red-arrow/lib/arrow/time32-data-type.rb | 61 ++++++++++ ruby/red-arrow/lib/arrow/time64-data-type.rb | 61 ++++++++++ .../lib/arrow/timestamp-data-type.rb | 57 ++++++++++ .../test/test-decimal128-data-type.rb | 31 +++++ .../test/test-dense-union-data-type.rb | 41 +++++++ .../test/test-dictionary-data-type.rb | 40 +++++++ .../test/test-sparse-union-data-type.rb | 41 +++++++ ruby/red-arrow/test/test-time32-data-type.rb | 42 +++++++ ruby/red-arrow/test/test-time64-data-type.rb | 42 +++++++ .../test/test-timestamp-data-type.rb | 42 +++++++ 15 files changed, 820 insertions(+) create mode 100644 ruby/red-arrow/lib/arrow/decimal128-data-type.rb create mode 100644 ruby/red-arrow/lib/arrow/dense-union-data-type.rb create mode 100644 ruby/red-arrow/lib/arrow/dictionary-data-type.rb create mode 100644 ruby/red-arrow/lib/arrow/sparse-union-data-type.rb create mode 100644 ruby/red-arrow/lib/arrow/time32-data-type.rb create mode 100644 ruby/red-arrow/lib/arrow/time64-data-type.rb create mode 100644 ruby/red-arrow/lib/arrow/timestamp-data-type.rb create mode 100644 ruby/red-arrow/test/test-decimal128-data-type.rb create mode 100644 ruby/red-arrow/test/test-dense-union-data-type.rb create mode 100644 ruby/red-arrow/test/test-dictionary-data-type.rb create mode 100644 ruby/red-arrow/test/test-sparse-union-data-type.rb create mode 100644 ruby/red-arrow/test/test-time32-data-type.rb create mode 100644 ruby/red-arrow/test/test-time64-data-type.rb create mode 100644 ruby/red-arrow/test/test-timestamp-data-type.rb diff --git a/ruby/red-arrow/lib/arrow/decimal128-data-type.rb b/ruby/red-arrow/lib/arrow/decimal128-data-type.rb new file mode 100644 index 0000000000000..c97944bf8db76 --- /dev/null +++ b/ruby/red-arrow/lib/arrow/decimal128-data-type.rb @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Decimal128DataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Decimal128DataType}. + # + # @overload initialize(precision, scale) + # + # @param precision [Integer] The precision of the decimal data + # type. It's the number of digits including the number of + # digits after the decimal point. + # + # @param scale [Integer] The scale of the decimal data + # type. It's the number of digits after the decimal point. + # + # @example Create a decimal data type for "XXXXXX.YY" decimal + # Arrow::Decimal128DataType.new(8, 2) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the decimal data + # type. It must have `:precision` and `:scale` values. + # + # @option description [Integer] :precision The precision of the + # decimal data type. It's the number of digits including the + # number of digits after the decimal point. + # + # @option description [Integer] :scale The scale of the decimal + # data type. It's the number of digits after the decimal + # point. + # + # @example Create a decimal data type for "XXXXXX.YY" decimal + # Arrow::Decimal128DataType.new(precision: 8, + # scale: 2) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + precision = description[:precision] + scale = description[:scale] + when 2 + precision, scale = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + initialize_raw(precision, scale) + end + end +end diff --git a/ruby/red-arrow/lib/arrow/dense-union-data-type.rb b/ruby/red-arrow/lib/arrow/dense-union-data-type.rb new file mode 100644 index 0000000000000..740b31331c964 --- /dev/null +++ b/ruby/red-arrow/lib/arrow/dense-union-data-type.rb @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class DenseUnionDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::DenseUnionDataType}. + # + # @overload initialize(fields, type_codes) + # + # @param fields [::Array] The fields of the + # dense union data type. You can mix {Arrow::Field} and field + # description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @param type_codes [::Array] The IDs that indicates + # corresponding fields. + # + # @example Create a dense union data type for {2: visible, 9: count} + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::DenseUnionDataType.new(fields, [2, 9]) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the dense union + # data type. It must have `:fields` and `:type_codes` values. + # + # @option description [::Array] :fields The + # fields of the dense union data type. You can mix + # {Arrow::Field} and field description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @option description [::Array] :type_codes The IDs + # that indicates corresponding fields. + # + # @example Create a dense union data type for {2: visible, 9: count} + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::DenseUnionDataType.new(fields: fields, + # type_codes: [2, 9]) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + fields = description[:fields] + type_codes = description[:type_codes] + when 2 + fields, type_codes = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + initialize_raw(fields, type_codes) + end + end +end diff --git a/ruby/red-arrow/lib/arrow/dictionary-data-type.rb b/ruby/red-arrow/lib/arrow/dictionary-data-type.rb new file mode 100644 index 0000000000000..e799fdfac799e --- /dev/null +++ b/ruby/red-arrow/lib/arrow/dictionary-data-type.rb @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class DictionaryDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::DictionaryDataType}. + # + # @overload initialize(index_data_type, dictionary, ordered) + # + # @param index_data_type [Arrow::DataType, Hash, String, Symbol] + # The index data type of the dictionary data type. It must be + # signed integer data types. Here are available signed integer + # data types: + # + # * Arrow::Int8DataType + # * Arrow::Int16DataType + # * Arrow::Int32DataType + # * Arrow::Int64DataType + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @param dictionary [Arrow::Array] The real values of the + # dictionary data type. + # + # @param ordered [Boolean] Whether dictionary contents are + # ordered or not. + # + # @example Create a dictionary data type for {0: "Hello", 1: "World"} + # index_data_type = :int8 + # dictionary = Arrow::StringArray.new(["Hello", "World"]) + # ordered = true + # Arrow::DictionaryDataType.new(index_data_type, + # dictionary, + # ordered) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the dictionary + # data type. It must have `:index_data_type`, `:dictionary` + # and `:ordered` values. + # + # @option description [Arrow::DataType, Hash, String, Symbol] + # :index_data_type The index data type of the dictionary data + # type. It must be signed integer data types. Here are + # available signed integer data types: + # + # * Arrow::Int8DataType + # * Arrow::Int16DataType + # * Arrow::Int32DataType + # * Arrow::Int64DataType + # + # You can specify data type as a description by `Hash`. + # + # See {Arrow::DataType.resolve} how to specify data type + # description. + # + # @option description [Arrow::Array] :dictionary The real values + # of the dictionary data type. + # + # @option description [Boolean] :ordered Whether dictionary + # contents are ordered or not. + # + # @example Create a dictionary data type for {0: "Hello", 1: "World"} + # dictionary = Arrow::StringArray.new(["Hello", "World"]) + # Arrow::DictionaryDataType.new(index_data_type: :int8, + # dictionary: dictionary, + # ordered: true) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + index_data_type = description[:index_data_type] + dictionary = description[:dictionary] + ordered = description[:ordered] + when 3 + index_data_type, dictionary, ordered = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1 or 3)" + raise ArgumentError, message + end + index_data_type = DataType.resolve(index_data_type) + initialize_raw(index_data_type, dictionary, ordered) + end + end +end diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb index cea98e9a8578e..8747476222955 100644 --- a/ruby/red-arrow/lib/arrow/loader.rb +++ b/ruby/red-arrow/lib/arrow/loader.rb @@ -43,6 +43,9 @@ def require_libraries require "arrow/date32-array-builder" require "arrow/date64-array" require "arrow/date64-array-builder" + require "arrow/decimal128-data-type" + require "arrow/dense-union-data-type" + require "arrow/dictionary-data-type" require "arrow/field" require "arrow/file-output-stream" require "arrow/list-data-type" @@ -54,6 +57,7 @@ def require_libraries require "arrow/rolling-window" require "arrow/schema" require "arrow/slicer" + require "arrow/sparse-union-data-type" require "arrow/struct-array" require "arrow/struct-data-type" require "arrow/table" @@ -63,8 +67,11 @@ def require_libraries require "arrow/table-loader" require "arrow/table-saver" require "arrow/tensor" + require "arrow/time32-data-type" + require "arrow/time64-data-type" require "arrow/timestamp-array" require "arrow/timestamp-array-builder" + require "arrow/timestamp-data-type" require "arrow/writable" end diff --git a/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb b/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb new file mode 100644 index 0000000000000..fb0ddf0909165 --- /dev/null +++ b/ruby/red-arrow/lib/arrow/sparse-union-data-type.rb @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class SparseUnionDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::SparseUnionDataType}. + # + # @overload initialize(fields, type_codes) + # + # @param fields [::Array] The fields of the + # sparse union data type. You can mix {Arrow::Field} and field + # description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @param type_codes [::Array] The IDs that indicates + # corresponding fields. + # + # @example Create a sparse union data type for {2: visible, 9: count} + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::SparseUnionDataType.new(fields, [2, 9]) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the sparse union + # data type. It must have `:fields` and `:type_codes` values. + # + # @option description [::Array] :fields The + # fields of the sparse union data type. You can mix + # {Arrow::Field} and field description in the fields. + # + # See {Arrow::Field.new} how to specify field description. + # + # @option description [::Array] :type_codes The IDs + # that indicates corresponding fields. + # + # @example Create a sparse union data type for {2: visible, 9: count} + # fields = [ + # Arrow::Field.new("visible", :boolean), + # { + # name: "count", + # type: :int32, + # }, + # ] + # Arrow::SparseUnionDataType.new(fields: fields, + # type_codes: [2, 9]) + def initialize(*args) + n_args = args.size + case n_args + when 1 + description = args[0] + fields = description[:fields] + type_codes = description[:type_codes] + when 2 + fields, type_codes = args + else + message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + raise ArgumentError, message + end + fields = fields.collect do |field| + field = Field.new(field) unless field.is_a?(Field) + field + end + initialize_raw(fields, type_codes) + end + end +end diff --git a/ruby/red-arrow/lib/arrow/time32-data-type.rb b/ruby/red-arrow/lib/arrow/time32-data-type.rb new file mode 100644 index 0000000000000..9e8d955494338 --- /dev/null +++ b/ruby/red-arrow/lib/arrow/time32-data-type.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time32DataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Time32DataType}. + # + # @overload initialize(unit) + # + # @param unit [Arrow::TimeUnit, Symbol] The unit of the + # time32 data type. + # + # The unit must be second or millisecond. + # + # @example Create a time32 data type with {Arrow::TimeUnit} + # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI) + # + # @example Create a time32 data type with Symbol + # Arrow::Time32DataType.new(:milli) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the time32 data + # type. It must have `:unit` value. + # + # @option description [Arrow::TimeUnit, Symbol] :unit The unit of + # the time32 data type. + # + # The unit must be second or millisecond. + # + # @example Create a time32 data type with {Arrow::TimeUnit} + # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI) + # + # @example Create a time32 data type with Symbol + # Arrow::Time32DataType.new(unit: :milli) + def initialize(unit) + if unit.is_a?(Hash) + description = unit + unit = description[:unit] + end + initialize_raw(unit) + end + end +end diff --git a/ruby/red-arrow/lib/arrow/time64-data-type.rb b/ruby/red-arrow/lib/arrow/time64-data-type.rb new file mode 100644 index 0000000000000..ca31a561b43c4 --- /dev/null +++ b/ruby/red-arrow/lib/arrow/time64-data-type.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Time64DataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::Time64DataType}. + # + # @overload initialize(unit) + # + # @param unit [Arrow::TimeUnit, Symbol] The unit of the + # time64 data type. + # + # The unit must be microsecond or nanosecond. + # + # @example Create a time64 data type with {Arrow::TimeUnit} + # Arrow::Time64DataType.new(Arrow::TimeUnit::NANO) + # + # @example Create a time64 data type with Symbol + # Arrow::Time64DataType.new(:nano) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the time64 data + # type. It must have `:unit` value. + # + # @option description [Arrow::TimeUnit, Symbol] :unit The unit of + # the time64 data type. + # + # The unit must be microsecond or nanosecond. + # + # @example Create a time64 data type with {Arrow::TimeUnit} + # Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO) + # + # @example Create a time64 data type with Symbol + # Arrow::Time64DataType.new(unit: :nano) + def initialize(unit) + if unit.is_a?(Hash) + description = unit + unit = description[:unit] + end + initialize_raw(unit) + end + end +end diff --git a/ruby/red-arrow/lib/arrow/timestamp-data-type.rb b/ruby/red-arrow/lib/arrow/timestamp-data-type.rb new file mode 100644 index 0000000000000..86ed3e00eadd1 --- /dev/null +++ b/ruby/red-arrow/lib/arrow/timestamp-data-type.rb @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class TimestampDataType + alias_method :initialize_raw, :initialize + private :initialize_raw + + # Creates a new {Arrow::TimestampDataType}. + # + # @overload initialize(unit) + # + # @param unit [Arrow::TimeUnit, Symbol] The unit of the + # timestamp data type. + # + # @example Create a timestamp data type with {Arrow::TimeUnit} + # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI) + # + # @example Create a timestamp data type with Symbol + # Arrow::TimestampDataType.new(:milli) + # + # @overload initialize(description) + # + # @param description [Hash] The description of the timestamp data + # type. It must have `:unit` value. + # + # @option description [Arrow::TimeUnit, Symbol] :unit The unit of + # the timestamp data type. + # + # @example Create a timestamp data type with {Arrow::TimeUnit} + # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI) + # + # @example Create a timestamp data type with Symbol + # Arrow::TimestampDataType.new(unit: :milli) + def initialize(unit) + if unit.is_a?(Hash) + description = unit + unit = description[:unit] + end + initialize_raw(unit) + end + end +end diff --git a/ruby/red-arrow/test/test-decimal128-data-type.rb b/ruby/red-arrow/test/test-decimal128-data-type.rb new file mode 100644 index 0000000000000..6cdd22fff8ea8 --- /dev/null +++ b/ruby/red-arrow/test/test-decimal128-data-type.rb @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal128DataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("ordered arguments") do + assert_equal("decimal(8, 2)", + Arrow::Decimal128DataType.new(8, 2).to_s) + end + + test("description") do + assert_equal("decimal(8, 2)", + Arrow::Decimal128DataType.new(precision: 8, + scale: 2).to_s) + end + end +end diff --git a/ruby/red-arrow/test/test-dense-union-data-type.rb b/ruby/red-arrow/test/test-dense-union-data-type.rb new file mode 100644 index 0000000000000..96699e52e45d9 --- /dev/null +++ b/ruby/red-arrow/test/test-dense-union-data-type.rb @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class DenseUnionDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + def setup + @fields = [ + Arrow::Field.new("visible", :boolean), + { + name: "count", + type: :int32, + }, + ] + end + + test("ordered arguments") do + assert_equal("union[dense]", + Arrow::DenseUnionDataType.new(@fields, [2, 9]).to_s) + end + + test("description") do + assert_equal("union[dense]", + Arrow::DenseUnionDataType.new(fields: @fields, + type_codes: [2, 9]).to_s) + end + end +end diff --git a/ruby/red-arrow/test/test-dictionary-data-type.rb b/ruby/red-arrow/test/test-dictionary-data-type.rb new file mode 100644 index 0000000000000..be9cd6f301035 --- /dev/null +++ b/ruby/red-arrow/test/test-dictionary-data-type.rb @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class DictionaryDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + def setup + @index_data_type = :int8 + @dictionary = Arrow::StringArray.new(["Hello", "World"]) + @ordered = true + end + + test("ordered arguments") do + assert_equal("dictionary", + Arrow::DictionaryDataType.new(@index_data_type, + @dictionary, + @ordered).to_s) + end + + test("description") do + assert_equal("dictionary", + Arrow::DictionaryDataType.new(index_data_type: @index_data_type, + dictionary: @dictionary, + ordered: @ordered).to_s) + end + end +end diff --git a/ruby/red-arrow/test/test-sparse-union-data-type.rb b/ruby/red-arrow/test/test-sparse-union-data-type.rb new file mode 100644 index 0000000000000..4159b42268da9 --- /dev/null +++ b/ruby/red-arrow/test/test-sparse-union-data-type.rb @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class SparseUnionDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + def setup + @fields = [ + Arrow::Field.new("visible", :boolean), + { + name: "count", + type: :int32, + }, + ] + end + + test("ordered arguments") do + assert_equal("union[sparse]", + Arrow::SparseUnionDataType.new(@fields, [2, 9]).to_s) + end + + test("description") do + assert_equal("union[sparse]", + Arrow::SparseUnionDataType.new(fields: @fields, + type_codes: [2, 9]).to_s) + end + end +end diff --git a/ruby/red-arrow/test/test-time32-data-type.rb b/ruby/red-arrow/test/test-time32-data-type.rb new file mode 100644 index 0000000000000..26f17359a1223 --- /dev/null +++ b/ruby/red-arrow/test/test-time32-data-type.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Time32DataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Arrow::TimeUnit") do + assert_equal("time32[ms]", + Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI).to_s) + end + + test("Symbol") do + assert_equal("time32[ms]", + Arrow::Time32DataType.new(:milli).to_s) + end + + test("unit: Arrow::TimeUnit") do + data_type = Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI) + assert_equal("time32[ms]", + data_type.to_s) + end + + test("unit: Symbol") do + data_type = Arrow::Time32DataType.new(unit: :milli) + assert_equal("time32[ms]", + data_type.to_s) + end + end +end diff --git a/ruby/red-arrow/test/test-time64-data-type.rb b/ruby/red-arrow/test/test-time64-data-type.rb new file mode 100644 index 0000000000000..a5f34175398ca --- /dev/null +++ b/ruby/red-arrow/test/test-time64-data-type.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Time64DataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Arrow::TimeUnit") do + assert_equal("time64[ns]", + Arrow::Time64DataType.new(Arrow::TimeUnit::NANO).to_s) + end + + test("Symbol") do + assert_equal("time64[ns]", + Arrow::Time64DataType.new(:nano).to_s) + end + + test("unit: Arrow::TimeUnit") do + data_type = Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO) + assert_equal("time64[ns]", + data_type.to_s) + end + + test("unit: Symbol") do + data_type = Arrow::Time64DataType.new(unit: :nano) + assert_equal("time64[ns]", + data_type.to_s) + end + end +end diff --git a/ruby/red-arrow/test/test-timestamp-data-type.rb b/ruby/red-arrow/test/test-timestamp-data-type.rb new file mode 100644 index 0000000000000..f8ccd3d8bb8b4 --- /dev/null +++ b/ruby/red-arrow/test/test-timestamp-data-type.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TimestampDataTypeTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Arrow::TimeUnit") do + assert_equal("timestamp[ms]", + Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI).to_s) + end + + test("Symbol") do + assert_equal("timestamp[ms]", + Arrow::TimestampDataType.new(:milli).to_s) + end + + test("unit: Arrow::TimeUnit") do + data_type = Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI) + assert_equal("timestamp[ms]", + data_type.to_s) + end + + test("unit: Symbol") do + data_type = Arrow::TimestampDataType.new(unit: :milli) + assert_equal("timestamp[ms]", + data_type.to_s) + end + end +end From b95628f2980fd800efe73ab0e4778dd209f7596c Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Mon, 7 Jan 2019 08:54:59 +0900 Subject: [PATCH 030/203] ARROW-4166: [Ruby] Add support for saving to and loading from buffer Author: Kouhei Sutou Closes #3320 from kou/ruby-table-io-buffer and squashes the following commits: 7025e765 Add support for saving to and loading from buffer --- ruby/red-arrow/lib/arrow/table-loader.rb | 46 ++++-- ruby/red-arrow/lib/arrow/table-saver.rb | 66 +++++---- ruby/red-arrow/test/test-table.rb | 139 ++++++++++++------ .../lib/parquet/arrow-table-loadable.rb | 7 +- .../lib/parquet/arrow-table-savable.rb | 6 +- ruby/red-parquet/test/test-arrow-table.rb | 8 +- 6 files changed, 177 insertions(+), 95 deletions(-) diff --git a/ruby/red-arrow/lib/arrow/table-loader.rb b/ruby/red-arrow/lib/arrow/table-loader.rb index a6ce9a1029bb3..9bfd41042768a 100644 --- a/ruby/red-arrow/lib/arrow/table-loader.rb +++ b/ruby/red-arrow/lib/arrow/table-loader.rb @@ -18,14 +18,14 @@ module Arrow class TableLoader class << self - def load(path, options={}) - new(path, options).load + def load(output, options={}) + new(output, options).load end end - def initialize(path, options={}) - path = path.to_path if path.respond_to?(:to_path) - @path = path + def initialize(output, options={}) + output = output.to_path if output.respond_to?(:to_path) + @output = output @options = options fill_options end @@ -50,7 +50,7 @@ def load __send__(custom_load_method) else # For backward compatibility. - __send__(custom_load_method, @path) + __send__(custom_load_method, @output) end end @@ -60,11 +60,15 @@ def fill_options return end - extension = PathExtension.new(@path) - info = extension.extract + if @output.is_a?(Buffer) + info = {} + else + extension = PathExtension.new(@output) + info = extension.extract + end format = info[:format] @options = @options.dup - if respond_to?("load_as_#{format}", true) + if format and respond_to?("load_as_#{format}", true) @options[:format] ||= format.to_sym else @options[:format] ||= :arrow @@ -74,6 +78,14 @@ def fill_options end end + def open_input_stream + if @output.is_a?(Buffer) + BufferInputStream.new(@output) + else + MemoryMappedInputStream.new(@output) + end + end + def load_raw(input, reader) schema = reader.schema chunked_arrays = [] @@ -100,7 +112,7 @@ def load_as_arrow RecordBatchStreamReader, ] reader_class_candidates.each do |reader_class_candidate| - input = MemoryMappedInputStream.new(@path) + input = open_input_stream begin reader = reader_class_candidate.new(input) rescue Arrow::Error @@ -114,20 +126,20 @@ def load_as_arrow end def load_as_batch - input = MemoryMappedInputStream.new(@path) + input = open_input_stream reader = RecordBatchFileReader.new(input) load_raw(input, reader) end def load_as_stream - input = MemoryMappedInputStream.new(@path) + input = open_input_stream reader = RecordBatchStreamReader.new(input) load_raw(input, reader) end if Arrow.const_defined?(:ORCFileReader) def load_as_orc - input = MemoryMappedInputStream.new(@path) + input = open_input_stream reader = ORCFileReader.new(input) field_indexes = @options[:field_indexes] reader.set_field_indexes(field_indexes) if field_indexes @@ -140,11 +152,15 @@ def load_as_orc def load_as_csv options = @options.dup options.delete(:format) - CSVLoader.load(Pathname.new(@path), options) + if @output.is_a?(Buffer) + CSVLoader.load(@output.data.to_s, options) + else + CSVLoader.load(Pathname.new(@output), options) + end end def load_as_feather - input = MemoryMappedInputStream.new(@path) + input = open_input_stream reader = FeatherFileReader.new(input) table = reader.read table.instance_variable_set(:@input, input) diff --git a/ruby/red-arrow/lib/arrow/table-saver.rb b/ruby/red-arrow/lib/arrow/table-saver.rb index 99e6e490532c1..817cc548717d8 100644 --- a/ruby/red-arrow/lib/arrow/table-saver.rb +++ b/ruby/red-arrow/lib/arrow/table-saver.rb @@ -18,15 +18,15 @@ module Arrow class TableSaver class << self - def save(table, path, options={}) - new(table, path, options).save + def save(table, output, options={}) + new(table, output, options).save end end - def initialize(table, path, options={}) + def initialize(table, output, options={}) @table = table - path = path.to_path if path.respond_to?(:to_path) - @path = path + output = output.to_path if output.respond_to?(:to_path) + @output = output @options = options fill_options end @@ -51,7 +51,7 @@ def save __send__(custom_save_method) else # For backward compatibility. - __send__(custom_save_method, @path) + __send__(custom_save_method, @output) end end @@ -61,11 +61,15 @@ def fill_options return end - extension = PathExtension.new(@path) - info = extension.extract + if @output.is_a?(Buffer) + info = {} + else + extension = PathExtension.new(@output) + info = extension.extract + end format = info[:format] @options = @options.dup - if respond_to?("save_as_#{format}", true) + if format and respond_to?("save_as_#{format}", true) @options[:format] ||= format.to_sym else @options[:format] ||= :arrow @@ -75,8 +79,30 @@ def fill_options end end + def open_raw_output_stream(&block) + if @output.is_a?(Buffer) + BufferOutputStream.open(@output, &block) + else + FileOutputStream.open(@output, false, &block) + end + end + + def open_output_stream(&block) + compression = @options[:compression] + if compression + codec = Codec.new(compression) + open_raw_output_stream do |raw_output| + CompressedOutputStream.open(codec, raw_output) do |output| + yield(output) + end + end + else + open_raw_output_stream(&block) + end + end + def save_raw(writer_class) - FileOutputStream.open(@path, false) do |output| + open_output_stream do |output| writer_class.open(output, @table.schema) do |writer| writer.write_table(@table) end @@ -95,24 +121,8 @@ def save_as_stream save_raw(RecordBatchStreamWriter) end - def open_output - compression = @options[:compression] - if compression - codec = Codec.new(compression) - FileOutputStream.open(@path, false) do |raw_output| - CompressedOutputStream.open(codec, raw_output) do |output| - yield(output) - end - end - else - ::File.open(@path, "w") do |output| - yield(output) - end - end - end - def save_as_csv - open_output do |output| + open_output_stream do |output| csv = CSV.new(output) names = @table.schema.fields.collect(&:name) csv << names @@ -125,7 +135,7 @@ def save_as_csv end def save_as_feather - FileOutputStream.open(@path, false) do |output| + open_output_stream do |output| FeatherFileWriter.open(output) do |writer| writer.write(@table) end diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb index 1576f779ce3b6..2876f762f00bd 100644 --- a/ruby/red-arrow/test/test-table.rb +++ b/ruby/red-arrow/test/test-table.rb @@ -395,83 +395,128 @@ def setup end sub_test_case("#save and .load") do - sub_test_case(":format") do - test("default") do - file = Tempfile.new(["red-arrow", ".arrow"]) - @table.save(file.path) - assert_equal(@table, Arrow::Table.load(file.path)) + module SaveLoadFormatTests + def test_default + output = create_output(".arrow") + @table.save(output) + assert_equal(@table, Arrow::Table.load(output)) end - test(":batch") do - file = Tempfile.new(["red-arrow", ".arrow"]) - @table.save(file.path, :format => :batch) - assert_equal(@table, Arrow::Table.load(file.path, :format => :batch)) + def test_batch + output = create_output(".arrow") + @table.save(output, format: :batch) + assert_equal(@table, Arrow::Table.load(output, format: :batch)) end - test(":stream") do - file = Tempfile.new(["red-arrow", ".arrow"]) - @table.save(file.path, :format => :stream) - assert_equal(@table, Arrow::Table.load(file.path, :format => :stream)) + def test_stream + output = create_output(".arrow") + @table.save(output, format: :stream) + assert_equal(@table, Arrow::Table.load(output, format: :stream)) end - test(":csv") do - file = Tempfile.new(["red-arrow", ".csv"]) - @table.save(file.path, :format => :csv) + def test_csv + output = create_output(".csv") + @table.save(output, format: :csv) assert_equal(@table, - Arrow::Table.load(file.path, - :format => :csv, - :schema => @table.schema)) + Arrow::Table.load(output, + format: :csv, + schema: @table.schema)) end - test("csv.gz") do - file = Tempfile.new(["red-arrow", ".csv.gz"]) - @table.save(file.path) + def test_csv_gz + output = create_output(".csv.gz") + @table.save(output, + format: :csv, + compression: :gzip) assert_equal(@table, - Arrow::Table.load(file.path, - :format => :csv, - :compression => :gzip, - :schema => @table.schema)) + Arrow::Table.load(output, + format: :csv, + compression: :gzip, + schema: @table.schema)) end + end + + sub_test_case("path") do + sub_test_case(":format") do + include SaveLoadFormatTests - sub_test_case("load: auto detect") do - test("batch") do - file = Tempfile.new(["red-arrow", ".arrow"]) - @table.save(file.path, :format => :batch) - assert_equal(@table, Arrow::Table.load(file.path)) + def create_output(extension) + @file = Tempfile.new(["red-arrow", extension]) + @file.path end - test("stream") do - file = Tempfile.new(["red-arrow", ".arrow"]) - @table.save(file.path, :format => :stream) - assert_equal(@table, Arrow::Table.load(file.path)) + sub_test_case("save: auto detect") do + test("csv") do + output = create_output(".csv") + @table.save(output) + assert_equal(@table, + Arrow::Table.load(output, + format: :csv, + schema: @table.schema)) + end + + test("csv.gz") do + output = create_output(".csv.gz") + @table.save(output) + assert_equal(@table, + Arrow::Table.load(output, + format: :csv, + compression: :gzip, + schema: @table.schema)) + end end - test("csv") do - path = fixture_path("with-header.csv") - assert_equal(<<-TABLE, Arrow::Table.load(path, skip_lines: /^#/).to_s) + sub_test_case("load: auto detect") do + test("batch") do + output = create_output(".arrow") + @table.save(output, format: :batch) + assert_equal(@table, Arrow::Table.load(output)) + end + + test("stream") do + output = create_output(".arrow") + @table.save(output, format: :stream) + assert_equal(@table, Arrow::Table.load(output)) + end + + test("csv") do + path = fixture_path("with-header.csv") + table = Arrow::Table.load(path, skip_lines: /^\#/) + assert_equal(<<-TABLE, table.to_s) name score 0 alice 10 1 bob 29 2 chris -1 - TABLE - end + TABLE + end - test("csv.gz") do - file = Tempfile.new(["red-arrow", ".csv.gz"]) - Zlib::GzipWriter.wrap(file) do |gz| - gz.write(<<-CSV) + test("csv.gz") do + file = Tempfile.new(["red-arrow", ".csv.gz"]) + Zlib::GzipWriter.wrap(file) do |gz| + gz.write(<<-CSV) name,score alice,10 bob,29 chris,-1 - CSV - end - assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s) + CSV + end + assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s) name score 0 alice 10 1 bob 29 2 chris -1 TABLE + end + end + end + end + + sub_test_case("Buffer") do + sub_test_case(":format") do + include SaveLoadFormatTests + + def create_output(extension) + Arrow::ResizableBuffer.new(1024) end end end diff --git a/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb b/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb index 4df527bb8da3b..e3aa1ce0a67bf 100644 --- a/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb +++ b/ruby/red-parquet/lib/parquet/arrow-table-loadable.rb @@ -19,9 +19,12 @@ module Parquet module ArrowTableLoadable private def load_as_parquet - reader = Parquet::ArrowFileReader.new(@path) + input = open_input_stream + reader = Parquet::ArrowFileReader.new(input) reader.use_threads = (@options[:use_threads] != false) - reader.read_table + table = reader.read_table + table.instance_variable_set(:@input, input) + table end end end diff --git a/ruby/red-parquet/lib/parquet/arrow-table-savable.rb b/ruby/red-parquet/lib/parquet/arrow-table-savable.rb index 5d96d5f58ec00..7667381867d9a 100644 --- a/ruby/red-parquet/lib/parquet/arrow-table-savable.rb +++ b/ruby/red-parquet/lib/parquet/arrow-table-savable.rb @@ -20,8 +20,10 @@ module ArrowTableSavable private def save_as_parquet chunk_size = @options[:chunk_size] || 1024 # TODO - Parquet::ArrowFileWriter.open(@table.schema, @path) do |writer| - writer.write_table(@table, chunk_size) + open_output_stream do |output| + Parquet::ArrowFileWriter.open(@table.schema, output) do |writer| + writer.write_table(@table, chunk_size) + end end end end diff --git a/ruby/red-parquet/test/test-arrow-table.rb b/ruby/red-parquet/test/test-arrow-table.rb index 258b4173948c3..1a565b64451a8 100644 --- a/ruby/red-parquet/test/test-arrow-table.rb +++ b/ruby/red-parquet/test/test-arrow-table.rb @@ -40,9 +40,15 @@ def setup @table = Arrow::Table.new(schema, [@count_column, @visible_column]) end - def test_save_load + def test_save_load_path tempfile = Tempfile.open(["red-parquet", ".parquet"]) @table.save(tempfile.path) assert_equal(@table, Arrow::Table.load(tempfile.path)) end + + def test_save_load_buffer + buffer = Arrow::ResizableBuffer.new(1024) + @table.save(buffer, format: :parquet) + assert_equal(@table, Arrow::Table.load(buffer, format: :parquet)) + end end From 5fad19185fd224e464c21b00d0cb6fdd04d65b0a Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 7 Jan 2019 09:07:52 +0900 Subject: [PATCH 031/203] ARROW-4171: [Rust] fix parquet crate release version Author: Chao Sun Closes #3324 from sunchao/ARROW-4171 and squashes the following commits: 9a9fc00e ARROW-4171: fix parquet crate release version --- rust/parquet/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml index 7478992327ddc..e0272ab4f09e1 100644 --- a/rust/parquet/Cargo.toml +++ b/rust/parquet/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "parquet" -version = "0.5.0-SNAPSHOT" +version = "0.12.0-SNAPSHOT" license = "Apache-2.0" description = "Apache Parquet implementation in Rust" homepage = "https://github.com/apache/arrow" From 00026303d4419a457ab3e01126b05b5aacefee8a Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sun, 6 Jan 2019 21:20:25 +1300 Subject: [PATCH 032/203] Fix link to Intel SIMD docs --- docs/source/format/Layout.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/format/Layout.rst b/docs/source/format/Layout.rst index 868a99b34f8d0..69cbf0654900a 100644 --- a/docs/source/format/Layout.rst +++ b/docs/source/format/Layout.rst @@ -659,6 +659,6 @@ Apache Drill Documentation - `Value Vectors`_ .. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering .. _Intel performance guide: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors .. _Endianness: https://en.wikipedia.org/wiki/Endianness -.. _SIMD: https://software.intel.com/en-us/node/600110 +.. _SIMD: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-introduction-to-the-simd-data-layout-templates .. _Parquet: https://parquet.apache.org/documentation/latest/ .. _Value Vectors: https://drill.apache.org/docs/value-vectors/ From 1eec9e8195716573b04bbe9416d0be2ed3430261 Mon Sep 17 00:00:00 2001 From: Yosuke Shiro Date: Mon, 7 Jan 2019 11:32:35 +0900 Subject: [PATCH 033/203] ARROW-4168: [GLib] Use property to keep GArrowDataType passed in garrow_field_new() This is follow-up of https://github.com/apache/arrow/pull/3197#pullrequestreview-186349753 Author: Yosuke Shiro Author: Kouhei Sutou Closes #3322 from shiro615/glib-use-property-to-keep-data-type and squashes the following commits: 2135b583 Remove needless new lines cc85b1ef Fix indent 46844bc1 Use {class_name}_data_type to unify argument names a6af562a Reuse GARROW_DATA_TYPE(list_data_type) 77117f08 Call g_object_unref() for GArrowDataType cae21658 Use property to keep GArrowDataType in garrow_field_new() --- c_glib/arrow-glib/column.cpp | 5 +- c_glib/arrow-glib/composite-data-type.cpp | 99 ++++++++++++----------- c_glib/arrow-glib/composite-data-type.h | 24 +++--- c_glib/arrow-glib/field.cpp | 67 ++++++++------- c_glib/arrow-glib/field.hpp | 3 +- c_glib/arrow-glib/schema.cpp | 18 ++++- c_glib/gandiva-glib/node.cpp | 1 - 7 files changed, 123 insertions(+), 94 deletions(-) diff --git a/c_glib/arrow-glib/column.cpp b/c_glib/arrow-glib/column.cpp index e3e964f557659..68694b3d67903 100644 --- a/c_glib/arrow-glib/column.cpp +++ b/c_glib/arrow-glib/column.cpp @@ -322,7 +322,10 @@ garrow_column_get_field(GArrowColumn *column) } else { const auto arrow_column = garrow_column_get_raw(column); auto arrow_field = arrow_column->field(); - return garrow_field_new_raw(&arrow_field); + auto data_type = garrow_column_get_data_type(column); + auto field = garrow_field_new_raw(&arrow_field, data_type); + g_object_unref(data_type); + return field; } } diff --git a/c_glib/arrow-glib/composite-data-type.cpp b/c_glib/arrow-glib/composite-data-type.cpp index 599506f269c8c..8046d2e23a31a 100644 --- a/c_glib/arrow-glib/composite-data-type.cpp +++ b/c_glib/arrow-glib/composite-data-type.cpp @@ -92,15 +92,13 @@ garrow_list_data_type_new(GArrowField *field) GArrowField * garrow_list_data_type_get_value_field(GArrowListDataType *list_data_type) { - auto arrow_data_type = - garrow_data_type_get_raw(GARROW_DATA_TYPE(list_data_type)); + auto data_type = GARROW_DATA_TYPE(list_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); auto arrow_list_data_type = static_cast(arrow_data_type.get()); auto arrow_field = arrow_list_data_type->value_field(); - auto field = garrow_field_new_raw(&arrow_field); - - return field; + return garrow_field_new_raw(&arrow_field, data_type); } @@ -143,22 +141,22 @@ garrow_struct_data_type_new(GList *fields) /** * garrow_struct_data_type_get_n_fields: - * @data_type: A #GArrowStructDataType. + * @struct_data_type: A #GArrowStructDataType. * * Returns: The number of fields of the struct data type. * * Since: 0.12.0 */ gint -garrow_struct_data_type_get_n_fields(GArrowStructDataType *data_type) +garrow_struct_data_type_get_n_fields(GArrowStructDataType *struct_data_type) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(struct_data_type)); return arrow_data_type->num_children(); } /** * garrow_struct_data_type_get_fields: - * @data_type: A #GArrowStructDataType. + * @struct_data_type: A #GArrowStructDataType. * * Returns: (transfer full) (element-type GArrowField): * The fields of the struct data type. @@ -166,21 +164,23 @@ garrow_struct_data_type_get_n_fields(GArrowStructDataType *data_type) * Since: 0.12.0 */ GList * -garrow_struct_data_type_get_fields(GArrowStructDataType *data_type) +garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto data_type = GARROW_DATA_TYPE(struct_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); auto arrow_fields = arrow_data_type->children(); GList *fields = NULL; for (auto arrow_field : arrow_fields) { - fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field)); + fields = g_list_prepend(fields, + garrow_field_new_raw(&arrow_field, data_type)); } return g_list_reverse(fields); } /** * garrow_struct_data_type_get_field: - * @data_type: A #GArrowStructDataType. + * @struct_data_type: A #GArrowStructDataType. * @i: The index of the target field. * * Returns: (transfer full) (nullable): @@ -189,10 +189,11 @@ garrow_struct_data_type_get_fields(GArrowStructDataType *data_type) * Since: 0.12.0 */ GArrowField * -garrow_struct_data_type_get_field(GArrowStructDataType *data_type, +garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type, gint i) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto data_type = GARROW_DATA_TYPE(struct_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); if (i < 0) { i += arrow_data_type->num_children(); @@ -206,7 +207,7 @@ garrow_struct_data_type_get_field(GArrowStructDataType *data_type, auto arrow_field = arrow_data_type->child(i); if (arrow_field) { - return garrow_field_new_raw(&arrow_field); + return garrow_field_new_raw(&arrow_field, data_type); } else { return NULL; } @@ -214,7 +215,7 @@ garrow_struct_data_type_get_field(GArrowStructDataType *data_type, /** * garrow_struct_data_type_get_field_by_name: - * @data_type: A #GArrowStructDataType. + * @struct_data_type: A #GArrowStructDataType. * @name: The name of the target field. * * Returns: (transfer full) (nullable): @@ -223,16 +224,17 @@ garrow_struct_data_type_get_field(GArrowStructDataType *data_type, * Since: 0.12.0 */ GArrowField * -garrow_struct_data_type_get_field_by_name(GArrowStructDataType *data_type, +garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type, const gchar *name) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto data_type = GARROW_DATA_TYPE(struct_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); auto arrow_struct_data_type = std::static_pointer_cast(arrow_data_type); auto arrow_field = arrow_struct_data_type->GetFieldByName(name); if (arrow_field) { - return garrow_field_new_raw(&arrow_field); + return garrow_field_new_raw(&arrow_field, data_type); } else { return NULL; } @@ -240,7 +242,7 @@ garrow_struct_data_type_get_field_by_name(GArrowStructDataType *data_type, /** * garrow_struct_data_type_get_field_index: - * @data_type: A #GArrowStructDataType. + * @struct_data_type: A #GArrowStructDataType. * @name: The name of the target field. * * Returns: The index of the target index in the struct data type @@ -249,10 +251,10 @@ garrow_struct_data_type_get_field_by_name(GArrowStructDataType *data_type, * Since: 0.12.0 */ gint -garrow_struct_data_type_get_field_index(GArrowStructDataType *data_type, +garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type, const gchar *name) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(struct_data_type)); auto arrow_struct_data_type = std::static_pointer_cast(arrow_data_type); @@ -276,22 +278,22 @@ garrow_union_data_type_class_init(GArrowUnionDataTypeClass *klass) /** * garrow_union_data_type_get_n_fields: - * @data_type: A #GArrowUnionDataType. + * @union_data_type: A #GArrowUnionDataType. * * Returns: The number of fields of the union data type. * * Since: 0.12.0 */ gint -garrow_union_data_type_get_n_fields(GArrowUnionDataType *data_type) +garrow_union_data_type_get_n_fields(GArrowUnionDataType *union_data_type) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(union_data_type)); return arrow_data_type->num_children(); } /** * garrow_union_data_type_get_fields: - * @data_type: A #GArrowUnionDataType. + * @union_data_type: A #GArrowUnionDataType. * * Returns: (transfer full) (element-type GArrowField): * The fields of the union data type. @@ -299,21 +301,23 @@ garrow_union_data_type_get_n_fields(GArrowUnionDataType *data_type) * Since: 0.12.0 */ GList * -garrow_union_data_type_get_fields(GArrowUnionDataType *data_type) +garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto data_type = GARROW_DATA_TYPE(union_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); auto arrow_fields = arrow_data_type->children(); GList *fields = NULL; for (auto arrow_field : arrow_fields) { - fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field)); + fields = g_list_prepend(fields, + garrow_field_new_raw(&arrow_field, data_type)); } return g_list_reverse(fields); } /** * garrow_union_data_type_get_field: - * @data_type: A #GArrowUnionDataType. + * @union_data_type: A #GArrowUnionDataType. * @i: The index of the target field. * * Returns: (transfer full) (nullable): @@ -322,10 +326,11 @@ garrow_union_data_type_get_fields(GArrowUnionDataType *data_type) * Since: 0.12.0 */ GArrowField * -garrow_union_data_type_get_field(GArrowUnionDataType *data_type, - gint i) +garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type, + gint i) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto data_type = GARROW_DATA_TYPE(union_data_type); + auto arrow_data_type = garrow_data_type_get_raw(data_type); if (i < 0) { i += arrow_data_type->num_children(); @@ -339,7 +344,7 @@ garrow_union_data_type_get_field(GArrowUnionDataType *data_type, auto arrow_field = arrow_data_type->child(i); if (arrow_field) { - return garrow_field_new_raw(&arrow_field); + return garrow_field_new_raw(&arrow_field, data_type); } else { return NULL; } @@ -347,7 +352,7 @@ garrow_union_data_type_get_field(GArrowUnionDataType *data_type, /** * garrow_union_data_type_get_type_codes: - * @data_type: A #GArrowUnionDataType. + * @union_data_type: A #GArrowUnionDataType. * @n_type_codes: (out): The number of type codes. * * Returns: (transfer full) (array length=n_type_codes): @@ -358,10 +363,10 @@ garrow_union_data_type_get_field(GArrowUnionDataType *data_type, * Since: 0.12.0 */ guint8 * -garrow_union_data_type_get_type_codes(GArrowUnionDataType *data_type, +garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type, gsize *n_type_codes) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(union_data_type)); auto arrow_union_data_type = std::static_pointer_cast(arrow_data_type); @@ -515,16 +520,16 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type, /** * garrow_dictionary_data_type_get_index_data_type: - * @data_type: The #GArrowDictionaryDataType. + * @dictionary_data_type: The #GArrowDictionaryDataType. * * Returns: (transfer full): The #GArrowDataType of index. * * Since: 0.8.0 */ GArrowDataType * -garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_type) +garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictionary_data_type) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type)); auto arrow_dictionary_data_type = std::static_pointer_cast(arrow_data_type); auto arrow_index_data_type = arrow_dictionary_data_type->index_type(); @@ -533,16 +538,16 @@ garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_t /** * garrow_dictionary_data_type_get_dictionary: - * @data_type: The #GArrowDictionaryDataType. + * @dictionary_data_type: The #GArrowDictionaryDataType. * * Returns: (transfer full): The dictionary as #GArrowArray. * * Since: 0.8.0 */ GArrowArray * -garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type) +garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *dictionary_data_type) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type)); auto arrow_dictionary_data_type = std::static_pointer_cast(arrow_data_type); auto arrow_dictionary = arrow_dictionary_data_type->dictionary(); @@ -551,16 +556,16 @@ garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type) /** * garrow_dictionary_data_type_is_ordered: - * @data_type: The #GArrowDictionaryDataType. + * @dictionary_data_type: The #GArrowDictionaryDataType. * * Returns: Whether dictionary contents are ordered or not. * * Since: 0.8.0 */ gboolean -garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *data_type) +garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type) { - auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type)); auto arrow_dictionary_data_type = std::static_pointer_cast(arrow_data_type); return arrow_dictionary_data_type->ordered(); diff --git a/c_glib/arrow-glib/composite-data-type.h b/c_glib/arrow-glib/composite-data-type.h index 25e1ac3d94929..f60a9cdeb6911 100644 --- a/c_glib/arrow-glib/composite-data-type.h +++ b/c_glib/arrow-glib/composite-data-type.h @@ -83,17 +83,17 @@ struct _GArrowStructDataTypeClass GArrowStructDataType *garrow_struct_data_type_new (GList *fields); gint -garrow_struct_data_type_get_n_fields(GArrowStructDataType *data_type); +garrow_struct_data_type_get_n_fields(GArrowStructDataType *struct_data_type); GList * -garrow_struct_data_type_get_fields(GArrowStructDataType *data_type); +garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type); GArrowField * -garrow_struct_data_type_get_field(GArrowStructDataType *data_type, +garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type, gint i); GArrowField * -garrow_struct_data_type_get_field_by_name(GArrowStructDataType *data_type, +garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type, const gchar *name); gint -garrow_struct_data_type_get_field_index(GArrowStructDataType *data_type, +garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type, const gchar *name); @@ -109,14 +109,14 @@ struct _GArrowUnionDataTypeClass }; gint -garrow_union_data_type_get_n_fields(GArrowUnionDataType *data_type); +garrow_union_data_type_get_n_fields(GArrowUnionDataType *union_data_type); GList * -garrow_union_data_type_get_fields(GArrowUnionDataType *data_type); +garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type); GArrowField * -garrow_union_data_type_get_field(GArrowUnionDataType *data_type, +garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type, gint i); guint8 * -garrow_union_data_type_get_type_codes(GArrowUnionDataType *data_type, +garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type, gsize *n_type_codes); @@ -172,11 +172,11 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type, GArrowArray *dictionary, gboolean ordered); GArrowDataType * -garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_type); +garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictionary_data_type); GArrowArray * -garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type); +garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *dictionary_data_type); gboolean -garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *data_type); +garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type); G_END_DECLS diff --git a/c_glib/arrow-glib/field.cpp b/c_glib/arrow-glib/field.cpp index b989d288ec30f..d74053af48f05 100644 --- a/c_glib/arrow-glib/field.cpp +++ b/c_glib/arrow-glib/field.cpp @@ -37,11 +37,12 @@ G_BEGIN_DECLS typedef struct GArrowFieldPrivate_ { std::shared_ptr field; + GArrowDataType *data_type; } GArrowFieldPrivate; enum { - PROP_0, - PROP_FIELD + PROP_FIELD = 1, + PROP_DATA_TYPE }; G_DEFINE_TYPE_WITH_PRIVATE(GArrowField, @@ -54,11 +55,22 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowField, GARROW_FIELD(obj))) static void -garrow_field_finalize(GObject *object) +garrow_field_dispose(GObject *object) { - GArrowFieldPrivate *priv; + auto priv = GARROW_FIELD_GET_PRIVATE(object); - priv = GARROW_FIELD_GET_PRIVATE(object); + if (priv->data_type) { + g_object_unref(priv->data_type); + priv->data_type = nullptr; + } + + G_OBJECT_CLASS(garrow_field_parent_class)->dispose(object); +} + +static void +garrow_field_finalize(GObject *object) +{ + auto priv = GARROW_FIELD_GET_PRIVATE(object); priv->field = nullptr; @@ -80,19 +92,9 @@ garrow_field_set_property(GObject *object, priv->field = *static_cast *>(g_value_get_pointer(value)); break; - default: - G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + case PROP_DATA_TYPE: + priv->data_type = GARROW_DATA_TYPE(g_value_dup_object(value)); break; - } -} - -static void -garrow_field_get_property(GObject *object, - guint prop_id, - GValue *value, - GParamSpec *pspec) -{ - switch (prop_id) { default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); break; @@ -107,21 +109,27 @@ garrow_field_init(GArrowField *object) static void garrow_field_class_init(GArrowFieldClass *klass) { - GObjectClass *gobject_class; - GParamSpec *spec; - - gobject_class = G_OBJECT_CLASS(klass); + auto gobject_class = G_OBJECT_CLASS(klass); + gobject_class->dispose = garrow_field_dispose; gobject_class->finalize = garrow_field_finalize; gobject_class->set_property = garrow_field_set_property; - gobject_class->get_property = garrow_field_get_property; + GParamSpec *spec; spec = g_param_spec_pointer("field", "Field", "The raw std::shared *", static_cast(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY)); g_object_class_install_property(gobject_class, PROP_FIELD, spec); + + spec = g_param_spec_object("data-type", + "Data type", + "The data type", + GARROW_TYPE_DATA_TYPE, + static_cast(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec); } /** @@ -137,7 +145,7 @@ garrow_field_new(const gchar *name, { auto arrow_data_type = garrow_data_type_get_raw(data_type); auto arrow_field = std::make_shared(name, arrow_data_type); - return garrow_field_new_raw(&arrow_field); + return garrow_field_new_raw(&arrow_field, data_type); } /** @@ -157,7 +165,7 @@ garrow_field_new_full(const gchar *name, std::make_shared(name, garrow_data_type_get_raw(data_type), nullable); - return garrow_field_new_raw(&arrow_field); + return garrow_field_new_raw(&arrow_field, data_type); } /** @@ -177,14 +185,13 @@ garrow_field_get_name(GArrowField *field) * garrow_field_get_data_type: * @field: A #GArrowField. * - * Returns: (transfer full): The data type of the field. + * Returns: (transfer none): The data type of the field. */ GArrowDataType * garrow_field_get_data_type(GArrowField *field) { - const auto arrow_field = garrow_field_get_raw(field); - auto type = arrow_field->type(); - return garrow_data_type_new_raw(&type); + auto priv = GARROW_FIELD_GET_PRIVATE(field); + return priv->data_type; } /** @@ -233,10 +240,12 @@ garrow_field_to_string(GArrowField *field) G_END_DECLS GArrowField * -garrow_field_new_raw(std::shared_ptr *arrow_field) +garrow_field_new_raw(std::shared_ptr *arrow_field, + GArrowDataType *data_type) { auto field = GARROW_FIELD(g_object_new(GARROW_TYPE_FIELD, "field", arrow_field, + "data-type", data_type, NULL)); return field; } diff --git a/c_glib/arrow-glib/field.hpp b/c_glib/arrow-glib/field.hpp index e130ad5992409..f8d0d46c97ab4 100644 --- a/c_glib/arrow-glib/field.hpp +++ b/c_glib/arrow-glib/field.hpp @@ -23,5 +23,6 @@ #include -GArrowField *garrow_field_new_raw(std::shared_ptr *arrow_field); +GArrowField *garrow_field_new_raw(std::shared_ptr *arrow_field, + GArrowDataType *data_type); std::shared_ptr garrow_field_get_raw(GArrowField *field); diff --git a/c_glib/arrow-glib/schema.cpp b/c_glib/arrow-glib/schema.cpp index 1affaaede766b..64332419e0972 100644 --- a/c_glib/arrow-glib/schema.cpp +++ b/c_glib/arrow-glib/schema.cpp @@ -21,6 +21,7 @@ # include #endif +#include #include #include #include @@ -173,7 +174,11 @@ garrow_schema_get_field(GArrowSchema *schema, guint i) { const auto arrow_schema = garrow_schema_get_raw(schema); auto arrow_field = arrow_schema->field(i); - return garrow_field_new_raw(&arrow_field); + auto arrow_data_type = arrow_field->type(); + auto data_type = garrow_data_type_new_raw(&arrow_data_type); + auto field = garrow_field_new_raw(&arrow_field, data_type); + g_object_unref(data_type); + return field; } /** @@ -192,7 +197,11 @@ garrow_schema_get_field_by_name(GArrowSchema *schema, if (arrow_field == nullptr) { return NULL; } else { - return garrow_field_new_raw(&arrow_field); + auto arrow_data_type = arrow_field->type(); + auto data_type = garrow_data_type_new_raw(&arrow_data_type); + auto field = garrow_field_new_raw(&arrow_field, data_type); + g_object_unref(data_type); + return field; } } @@ -223,7 +232,10 @@ garrow_schema_get_fields(GArrowSchema *schema) GList *fields = NULL; for (auto arrow_field : arrow_schema->fields()) { - GArrowField *field = garrow_field_new_raw(&arrow_field); + auto arrow_data_type = arrow_field->type(); + auto data_type = garrow_data_type_new_raw(&arrow_data_type); + auto field = garrow_field_new_raw(&arrow_field, data_type); + g_object_unref(data_type); fields = g_list_prepend(fields, field); } diff --git a/c_glib/gandiva-glib/node.cpp b/c_glib/gandiva-glib/node.cpp index 709836524d848..2c68cbeabe330 100644 --- a/c_glib/gandiva-glib/node.cpp +++ b/c_glib/gandiva-glib/node.cpp @@ -1200,7 +1200,6 @@ ggandiva_field_node_new_raw(std::shared_ptr *gandiva_node, "field", field, "return-type", return_type, NULL); - g_object_unref(return_type); return GGANDIVA_FIELD_NODE(field_node); } From 16460d3b90f194c1212ec0b709b2a8171360ef54 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Mon, 7 Jan 2019 10:32:00 +0100 Subject: [PATCH 034/203] ARROW-4173: Fix JIRA library name in error message Author: Kouhei Sutou Closes #3326 from kou/dev-fix-jira-library-name and squashes the following commits: a16654dc Fix JIRA library name in error message --- dev/merge_arrow_pr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 3d6ca31476ee3..5a926f5f6d17a 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -47,8 +47,8 @@ try: import jira.client except ImportError: - print("Could not find jira-python library. " - "Run 'sudo pip install jira-python' to install.") + print("Could not find jira library. " + "Run 'sudo pip install jira' to install.") print("Exiting without trying to close the associated JIRA.") sys.exit(1) From 84e10b69a8043f507eabc7b3f224a265baa33a1a Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Mon, 7 Jan 2019 19:58:39 +0900 Subject: [PATCH 035/203] ARROW-4174: [Ruby] Add support for building composite array from raw Ruby objects Author: Kouhei Sutou Closes #3327 from kou/ruby-array-builder and squashes the following commits: 20e5874c Add support for old GObject Introspection 36b993ba Add support for building composite array from raw Ruby objects --- c_glib/arrow-glib/array-builder.cpp | 21 +++ c_glib/arrow-glib/array-builder.h | 3 + c_glib/arrow-glib/decimal128.cpp | 18 +++ c_glib/arrow-glib/decimal128.h | 4 + ruby/red-arrow/lib/arrow/array.rb | 8 +- .../lib/arrow/decimal128-array-builder.rb | 64 ++++++++ ruby/red-arrow/lib/arrow/field.rb | 2 +- .../red-arrow/lib/arrow/list-array-builder.rb | 86 +++++++++++ ruby/red-arrow/lib/arrow/loader.rb | 5 +- .../lib/arrow/struct-array-builder.rb | 129 ++++++++++++++++ .../test/test-decimal128-array-builder.rb | 95 ++++++++++++ ruby/red-arrow/test/test-decimal128-array.rb | 38 +++++ .../red-arrow/test/test-list-array-builder.rb | 62 ++++++++ ruby/red-arrow/test/test-list-array.rb | 32 ++++ .../test/test-struct-array-builder.rb | 145 ++++++++++++++++++ ruby/red-arrow/test/test-struct-array.rb | 21 +++ 16 files changed, 728 insertions(+), 5 deletions(-) create mode 100644 ruby/red-arrow/lib/arrow/decimal128-array-builder.rb create mode 100644 ruby/red-arrow/lib/arrow/list-array-builder.rb create mode 100644 ruby/red-arrow/lib/arrow/struct-array-builder.rb create mode 100644 ruby/red-arrow/test/test-decimal128-array-builder.rb create mode 100644 ruby/red-arrow/test/test-decimal128-array.rb create mode 100644 ruby/red-arrow/test/test-list-array-builder.rb create mode 100644 ruby/red-arrow/test/test-list-array.rb create mode 100644 ruby/red-arrow/test/test-struct-array-builder.rb diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index 5f2d4119ce6a2..095c68d87689d 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -3863,6 +3863,27 @@ garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *build "[decimal128-array-builder][append-value]"); } +/** + * garrow_decimal128_array_builder_append_null: + * @builder: A #GArrowDecimal128ArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new NULL element. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builder, + GError **error) +{ + return garrow_array_builder_append_null + (GARROW_ARRAY_BUILDER(builder), + error, + "[decimal128-array-builder][append-null]"); +} + G_END_DECLS GArrowArrayBuilder * diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h index b2ad6f4bfd3fd..bc0a99429b8f1 100644 --- a/c_glib/arrow-glib/array-builder.h +++ b/c_glib/arrow-glib/array-builder.h @@ -1486,5 +1486,8 @@ GARROW_AVAILABLE_IN_0_12 gboolean garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *builder, GArrowDecimal128 *value, GError **error); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builder, + GError **error); G_END_DECLS diff --git a/c_glib/arrow-glib/decimal128.cpp b/c_glib/arrow-glib/decimal128.cpp index e30eb7ee58638..d87a5019c1203 100644 --- a/c_glib/arrow-glib/decimal128.cpp +++ b/c_glib/arrow-glib/decimal128.cpp @@ -136,6 +136,24 @@ garrow_decimal128_new_integer(const gint64 data) return garrow_decimal128_new_raw(&arrow_decimal); } +/** + * garrow_decimal128_equal: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if both of them is the same value, %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + const auto arrow_decimal = garrow_decimal128_get_raw(decimal); + const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal); + return *arrow_decimal == *arrow_other_decimal; +} + /** * garrow_decimal128_to_string_scale: * @decimal: A #GArrowDecimal128. diff --git a/c_glib/arrow-glib/decimal128.h b/c_glib/arrow-glib/decimal128.h index 918cf3d49b4d2..e8fa59980cd94 100644 --- a/c_glib/arrow-glib/decimal128.h +++ b/c_glib/arrow-glib/decimal128.h @@ -20,6 +20,7 @@ #pragma once #include +#include G_BEGIN_DECLS @@ -37,6 +38,9 @@ struct _GArrowDecimal128Class GArrowDecimal128 *garrow_decimal128_new_string(const gchar *data); GArrowDecimal128 *garrow_decimal128_new_integer(const gint64 data); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); gchar *garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal, gint32 scale); gchar *garrow_decimal128_to_string(GArrowDecimal128 *decimal); diff --git a/ruby/red-arrow/lib/arrow/array.rb b/ruby/red-arrow/lib/arrow/array.rb index 049224154dca3..359e70e007bdd 100644 --- a/ruby/red-arrow/lib/arrow/array.rb +++ b/ruby/red-arrow/lib/arrow/array.rb @@ -21,12 +21,14 @@ class Array class << self def new(*args) - return super if args.size != 1 - builder_class_name = "#{name}Builder" if const_defined?(builder_class_name) builder_class = const_get(builder_class_name) - builder_class.build(*args) + if args.size == builder_class.method(:build).arity + builder_class.build(*args) + else + super + end else super end diff --git a/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb b/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb new file mode 100644 index 0000000000000..9a849d487571e --- /dev/null +++ b/ruby/red-arrow/lib/arrow/decimal128-array-builder.rb @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require "bigdecimal" + +module Arrow + class Decimal128ArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_value_raw, :append_value + def append_value(value) + case value + when nil + return append_null + when String + value = Decimal128.new(value) + when Float + value = Decimal128.new(value.to_s) + when BigDecimal + value = Decimal128.new(value.to_s) + end + append_value_raw(value) + end + + def append_values(values, is_valids=nil) + if is_valids + is_valids.each_with_index do |is_valid, i| + if is_valid + append_value(values[i]) + else + append_null + end + end + else + values.each do |value| + if value.nil? + append_null + else + append_value(value) + end + end + end + end + end +end diff --git a/ruby/red-arrow/lib/arrow/field.rb b/ruby/red-arrow/lib/arrow/field.rb index 8c7c8eaa005cb..599ff30975985 100644 --- a/ruby/red-arrow/lib/arrow/field.rb +++ b/ruby/red-arrow/lib/arrow/field.rb @@ -108,7 +108,7 @@ def initialize(*args) name = args[0] data_type = DataType.resolve(args[1]) else - message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + message = "wrong number of arguments (given #{n_args}, expected 1..2)" raise ArgumentError, message end diff --git a/ruby/red-arrow/lib/arrow/list-array-builder.rb b/ruby/red-arrow/lib/arrow/list-array-builder.rb new file mode 100644 index 0000000000000..aa093c2de9b5c --- /dev/null +++ b/ruby/red-arrow/lib/arrow/list-array-builder.rb @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class ListArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + alias_method :append_value_raw, :append_value + + # @overload append_value + # + # Starts appending a list record. You also need to append list + # value by {#value_builder}. + # + # @overload append_value(list) + # + # Appends a list record including list value. + # + # @param value [nil, ::Array] The list value of the record. + # + # If this is `nil`, the list record is null. + # + # If this is `Array`, it's the list value of the record. + # + # @since 0.12.0 + def append_value(*args) + n_args = args.size + + case n_args + when 0 + append_value_raw + when 1 + value = args[0] + case value + when nil + append_null + when ::Array + append_value_raw + @value_builder ||= value_builder + @value_builder.append_values(value, nil) + else + message = "list value must be nil or Array: #{value.inspect}" + raise ArgumentError, message + end + else + message = "wrong number of arguments (given #{n_args}, expected 0..1)" + raise ArgumentError, message + end + end + + def append_values(lists, is_valids=nil) + if is_valids + is_valids.each_with_index do |is_valid, i| + if is_valid + append_value(lists[i]) + else + append_null + end + end + else + lists.each do |list| + append_value(list) + end + end + end + end +end diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb index 8747476222955..acd2573e3218f 100644 --- a/ruby/red-arrow/lib/arrow/loader.rb +++ b/ruby/red-arrow/lib/arrow/loader.rb @@ -43,11 +43,13 @@ def require_libraries require "arrow/date32-array-builder" require "arrow/date64-array" require "arrow/date64-array-builder" + require "arrow/decimal128-array-builder" require "arrow/decimal128-data-type" require "arrow/dense-union-data-type" require "arrow/dictionary-data-type" require "arrow/field" require "arrow/file-output-stream" + require "arrow/list-array-builder" require "arrow/list-data-type" require "arrow/path-extension" require "arrow/record" @@ -59,6 +61,7 @@ def require_libraries require "arrow/slicer" require "arrow/sparse-union-data-type" require "arrow/struct-array" + require "arrow/struct-array-builder" require "arrow/struct-data-type" require "arrow/table" require "arrow/table-formatter" @@ -101,7 +104,7 @@ def load_method_info(info, klass, method_name) end super(info, klass, method_name) else - super + super end end end diff --git a/ruby/red-arrow/lib/arrow/struct-array-builder.rb b/ruby/red-arrow/lib/arrow/struct-array-builder.rb new file mode 100644 index 0000000000000..883ce84da7de7 --- /dev/null +++ b/ruby/red-arrow/lib/arrow/struct-array-builder.rb @@ -0,0 +1,129 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class StructArrayBuilder + class << self + def build(data_type, values) + builder = new(data_type) + builder.build(values) + end + end + + def [](index_or_name) + find_field_builder(index_or_name) + end + + def find_field_builder(index_or_name) + case index_or_name + when String, Symbol + name = index_or_name + (@name_to_builder ||= build_name_to_builder)[name.to_s] + else + index = index_or_name + cached_field_builders[index] + end + end + + alias_method :append_value_raw, :append_value + + # @overload append_value + # + # Starts appending a struct record. You need to append values of + # fields. + # + # @overload append_value(value) + # + # Appends a struct record including values of fields. + # + # @param value [nil, ::Array, Hash] The struct record value. + # + # If this is `nil`, the struct record is null. + # + # If this is `Array` or `Hash`, they are values of fields. + # + # @since 0.12.0 + def append_value(*args) + n_args = args.size + + case n_args + when 0 + append_value_raw + when 1 + value = args[0] + case value + when nil + append_null + when ::Array + append_value_raw + value.each_with_index do |sub_value, i| + self[i].append_value(sub_value) + end + when Hash + append_value_raw + value.each do |name, sub_value| + self[name].append_value(sub_value) + end + else + message = "struct value must be nil, Array or Hash: #{value.inspect}" + raise ArgumentError, message + end + else + message = "wrong number of arguments (given #{n_args}, expected 0..1)" + raise ArgumentError, message + end + end + + def append_values(values, is_valids=nil) + if is_valids + is_valids.each_with_index do |is_valid, i| + if is_valid + append_value(values[i]) + else + append_null + end + end + else + values.each do |value| + append_value(value) + end + end + end + + alias_method :append_null_raw, :append_null + def append_null + append_null_raw + cached_field_builders.each do |builder| + builder.append_null + end + end + + private + def cached_field_builders + @field_builders ||= field_builders + end + + def build_name_to_builder + name_to_builder = {} + builders = cached_field_builders + value_data_type.fields.each_with_index do |field, i| + name_to_builder[field.name] = builders[i] + end + name_to_builder + end + end +end diff --git a/ruby/red-arrow/test/test-decimal128-array-builder.rb b/ruby/red-arrow/test/test-decimal128-array-builder.rb new file mode 100644 index 0000000000000..841846490b792 --- /dev/null +++ b/ruby/red-arrow/test/test-decimal128-array-builder.rb @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal128ArrayBuilderTest < Test::Unit::TestCase + def setup + @data_type = Arrow::Decimal128DataType.new(8, 2) + @builder = Arrow::Decimal128ArrayBuilder.new(@data_type) + end + + sub_test_case("#append_value") do + test("nil") do + @builder.append_value(nil) + array = @builder.finish + assert_equal(nil, array[0]) + end + + test("Arrow::Decimal128") do + @builder.append_value(Arrow::Decimal128.new("10.1")) + array = @builder.finish + assert_equal(Arrow::Decimal128.new("10.1"), + array[0]) + end + + test("String") do + @builder.append_value("10.1") + array = @builder.finish + assert_equal(Arrow::Decimal128.new("10.1"), + array[0]) + end + + test("Float") do + @builder.append_value(10.1) + array = @builder.finish + assert_equal(Arrow::Decimal128.new("10.1"), + array[0]) + end + + test("BigDecimal") do + @builder.append_value(BigDecimal("10.1")) + array = @builder.finish + assert_equal(Arrow::Decimal128.new("10.1"), + array[0]) + end + end + + sub_test_case("#append_values") do + test("mixed") do + @builder.append_values([ + Arrow::Decimal128.new("10.1"), + nil, + "10.1", + 10.1, + BigDecimal("10.1"), + ]) + array = @builder.finish + assert_equal([ + Arrow::Decimal128.new("10.1"), + nil, + Arrow::Decimal128.new("10.1"), + Arrow::Decimal128.new("10.1"), + Arrow::Decimal128.new("10.1"), + ], + array.to_a) + end + + test("is_valids") do + @builder.append_values([ + Arrow::Decimal128.new("10.1"), + nil, + Arrow::Decimal128.new("10.1"), + ]) + array = @builder.finish + assert_equal([ + Arrow::Decimal128.new("10.1"), + nil, + Arrow::Decimal128.new("10.1"), + ], + array.to_a) + end + end +end diff --git a/ruby/red-arrow/test/test-decimal128-array.rb b/ruby/red-arrow/test/test-decimal128-array.rb new file mode 100644 index 0000000000000..9162be8b4cf13 --- /dev/null +++ b/ruby/red-arrow/test/test-decimal128-array.rb @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Decimal128ArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + test("build") do + data_type = Arrow::Decimal128DataType.new(8, 2) + values = [ + 10.1, + nil, + "10.1", + BigDecimal("10.1"), + ] + array = Arrow::Decimal128Array.new(data_type, values) + assert_equal([ + Arrow::Decimal128.new("10.1"), + nil, + Arrow::Decimal128.new("10.1"), + Arrow::Decimal128.new("10.1"), + ], + array.to_a) + end + end +end diff --git a/ruby/red-arrow/test/test-list-array-builder.rb b/ruby/red-arrow/test/test-list-array-builder.rb new file mode 100644 index 0000000000000..e36f2c8340be4 --- /dev/null +++ b/ruby/red-arrow/test/test-list-array-builder.rb @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ListArrayBuilderTest < Test::Unit::TestCase + def setup + @data_type = Arrow::ListDataType.new(name: "visible", type: :boolean) + @builder = Arrow::ListArrayBuilder.new(@data_type) + end + + sub_test_case("#append_value") do + test("nil") do + @builder.append_value(nil) + array = @builder.finish + assert_equal(nil, array[0]) + end + + test("Array") do + @builder.append_value([true, false, true]) + array = @builder.finish + assert_equal([true, false, true], array[0].to_a) + end + end + + sub_test_case("#append_values") do + test("[nil, Array]") do + @builder.append_values([[false], nil, [true, false, true]]) + array = @builder.finish + assert_equal([ + [false], + nil, + [true, false, true], + ], + array.collect {|list| list ? list.to_a : nil}) + end + + test("is_valids") do + @builder.append_values([[false], [true, true], [true, false, true]], + [true, false, true]) + array = @builder.finish + assert_equal([ + [false], + nil, + [true, false, true], + ], + array.collect {|list| list ? list.to_a : nil}) + end + end +end diff --git a/ruby/red-arrow/test/test-list-array.rb b/ruby/red-arrow/test/test-list-array.rb new file mode 100644 index 0000000000000..c1f762492e4ef --- /dev/null +++ b/ruby/red-arrow/test/test-list-array.rb @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class ListArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + test("build") do + data_type = Arrow::ListDataType.new(name: "visible", type: :boolean) + values = [ + [true, false], + nil, + [false, true, false], + ] + array = Arrow::ListArray.new(data_type, values) + assert_equal(values, + array.collect {|value| value ? value.to_a : nil}) + end + end +end diff --git a/ruby/red-arrow/test/test-struct-array-builder.rb b/ruby/red-arrow/test/test-struct-array-builder.rb new file mode 100644 index 0000000000000..205564c816c30 --- /dev/null +++ b/ruby/red-arrow/test/test-struct-array-builder.rb @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class StructArrayBuilderTest < Test::Unit::TestCase + def setup + @data_type = Arrow::StructDataType.new(visible: {type: :boolean}, + count: {type: :uint64}) + @builder = Arrow::StructArrayBuilder.new(@data_type) + end + + sub_test_case("#append_value") do + test("nil") do + @builder.append_value(nil) + array = @builder.finish + assert_equal([ + [nil], + [nil], + ], + [ + array[0].to_a, + array[1].to_a, + ]) + end + + test("Array") do + @builder.append_value([true, 1]) + array = @builder.finish + assert_equal([ + [true], + [1], + ], + [ + array[0].to_a, + array[1].to_a, + ]) + end + + test("Hash") do + @builder.append_value(count: 1, visible: true) + array = @builder.finish + assert_equal([ + [true], + [1], + ], + [ + array[0].to_a, + array[1].to_a, + ]) + end + end + + sub_test_case("#append_values") do + test("[nil]") do + @builder.append_values([nil]) + array = @builder.finish + assert_equal([ + [nil], + [nil], + ], + [ + array[0].to_a, + array[1].to_a, + ]) + end + + test("[Array]") do + @builder.append_values([[true, 1]]) + array = @builder.finish + assert_equal([ + [true], + [1], + ], + [ + array[0].to_a, + array[1].to_a, + ]) + end + + test("[Hash]") do + @builder.append_values([{count: 1, visible: true}]) + array = @builder.finish + assert_equal([ + [true], + [1], + ], + [ + array[0].to_a, + array[1].to_a, + ]) + end + + test("[nil, Array, Hash]") do + @builder.append_values([ + nil, + [true, 1], + {count: 2, visible: false}, + ]) + array = @builder.finish + assert_equal([ + [nil, true, false], + [nil, 1, 2], + ], + [ + array[0].to_a, + array[1].to_a, + ]) + end + + test("is_valids") do + @builder.append_values([ + [true, 1], + [false, 2], + [true, 3], + ], + [ + true, + false, + true, + ]) + array = @builder.finish + assert_equal([ + [true, nil, true], + [1, nil, 3], + ], + [ + array[0].to_a, + array[1].to_a, + ]) + end + end +end diff --git a/ruby/red-arrow/test/test-struct-array.rb b/ruby/red-arrow/test/test-struct-array.rb index 1957db4d1fd5a..986b0a9db1696 100644 --- a/ruby/red-arrow/test/test-struct-array.rb +++ b/ruby/red-arrow/test/test-struct-array.rb @@ -16,6 +16,27 @@ # under the License. class StructArrayTest < Test::Unit::TestCase + sub_test_case(".new") do + test("build") do + data_type = Arrow::StructDataType.new(visible: :boolean, + count: :uint64) + values = [ + [true, 1], + nil, + [false, 2], + ] + array = Arrow::StructArray.new(data_type, values) + assert_equal([ + [true, nil, false], + [1, nil, 2], + ], + [ + array[0].to_a, + array[1].to_a, + ]) + end + end + test("#[]") do type = Arrow::StructDataType.new([ Arrow::Field.new("field1", :boolean), From ed1d60d0e459108b23ce4ff9bc9129a005058ece Mon Sep 17 00:00:00 2001 From: Siddharth Dave Date: Mon, 7 Jan 2019 10:42:50 -0600 Subject: [PATCH 036/203] ARROW-3544: [Gandiva] [C++] Create function registry in multiple compilation units to reduce build times Refactored function_registry into separate files. The function signatures are now split across 3 different files viz. function_registry.cc, function_registry_binaryfn.cc & function_registry_unaryfn.cc. This approach reduces the build times on my setup with little refactoring. I looked into tensorflow as well, as Wes had suggested, however tensorflow heavily utilizes tempalate based approach, which is quite different from this & may require quite a bit of refactoring in gandiva. Author: Siddharth Dave Closes #3051 from siddharthdave/master and squashes the following commits: 1d75ac7f6 ARROW-3544: Extremely long compile time for function_registry.cc in release mode on clang 6 removed individual clasess for each registry. removed unused header files. cleaned up code. ab93602f3 ARROW-3544: Extremely long compile time for function_registry.cc in release mode on clang 6 fixed lint errors with make format 8161eddb5 ARROW-3544: Extremely long compile time for function_registry.cc in release mode on clang 6 added a note in function_registry_common.h that its for internal use. 60954a038 ARROW-3544: Extremely long compile time for function_registry.cc in release mode on clang 6 > Replaced STRINGIFY with ARROW_STRINGIFY 241b6340c ARROW-3544: Extremely long compile time for function_registry.cc in release mode on clang 6 > Yet another refactor of macros > removed redundant comments faeffeef4 ARROW-3544: Extremely long compile time for function_registry.cc in release mode on clang 6 1. incorporated review comments suggested by ravindra 2. refactored code a bit & removed unused includes etc. 508b7835e ARROW-3544: Extremely long compile time for function_registry.cc in release mode on clang 6 Fixed make check-format errors. b8176dd40 ARROW-3544: Extremely long compile time for function_registry.cc in release mode on clang 6 Incorported review comments: 1. removed duplicate code/macros & moved it into a new header file function_registry_common.h 2. added separate classes for holding different function types 3. during initialization, the map is populated by individualy populating from these classes into 1 common map. 1788fb32e ARROW-3544: Extremely long compile time for function_registry.cc in release mode on clang 6 Fixing whitespace issue reported during 'make lint' 2cdb6df58 ARROW-3544: Extremely long compile time for function_registry.cc in release mode on clang 6 Refactored function_registry into separate files. The function signatures are now split across 3 different files viz. function_registry.cc, function_registry_binaryfn.cc & function_registry_unaryfn.cc. This approach reduces the build times on my setup with little refactoring. --- cpp/src/arrow/test-util.h | 48 +- cpp/src/arrow/util/macros.h | 2 + cpp/src/gandiva/CMakeLists.txt | 6 + cpp/src/gandiva/function_registry.cc | 440 ++---------------- cpp/src/gandiva/function_registry.h | 25 +- .../gandiva/function_registry_arithmetic.cc | 78 ++++ .../gandiva/function_registry_arithmetic.h | 30 ++ cpp/src/gandiva/function_registry_common.h | 218 +++++++++ cpp/src/gandiva/function_registry_datetime.cc | 65 +++ cpp/src/gandiva/function_registry_datetime.h | 30 ++ cpp/src/gandiva/function_registry_hash.cc | 53 +++ cpp/src/gandiva/function_registry_hash.h | 30 ++ cpp/src/gandiva/function_registry_math_ops.cc | 67 +++ cpp/src/gandiva/function_registry_math_ops.h | 30 ++ cpp/src/gandiva/function_registry_string.cc | 50 ++ cpp/src/gandiva/function_registry_string.h | 30 ++ .../function_registry_timestamp_arithmetic.cc | 81 ++++ .../function_registry_timestamp_arithmetic.h | 30 ++ cpp/src/gandiva/native_function.h | 4 +- 19 files changed, 862 insertions(+), 455 deletions(-) create mode 100644 cpp/src/gandiva/function_registry_arithmetic.cc create mode 100644 cpp/src/gandiva/function_registry_arithmetic.h create mode 100644 cpp/src/gandiva/function_registry_common.h create mode 100644 cpp/src/gandiva/function_registry_datetime.cc create mode 100644 cpp/src/gandiva/function_registry_datetime.h create mode 100644 cpp/src/gandiva/function_registry_hash.cc create mode 100644 cpp/src/gandiva/function_registry_hash.h create mode 100644 cpp/src/gandiva/function_registry_math_ops.cc create mode 100644 cpp/src/gandiva/function_registry_math_ops.h create mode 100644 cpp/src/gandiva/function_registry_string.cc create mode 100644 cpp/src/gandiva/function_registry_string.h create mode 100644 cpp/src/gandiva/function_registry_timestamp_arithmetic.cc create mode 100644 cpp/src/gandiva/function_registry_timestamp_arithmetic.h diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h index 33321633090af..aa7c73e59ac54 100644 --- a/cpp/src/arrow/test-util.h +++ b/cpp/src/arrow/test-util.h @@ -46,35 +46,33 @@ #include "arrow/util/macros.h" #include "arrow/util/visibility.h" -#define STRINGIFY(x) #x - -#define ASSERT_RAISES(ENUM, expr) \ - do { \ - ::arrow::Status s = (expr); \ - if (!s.Is##ENUM()) { \ - FAIL() << "Expected '" STRINGIFY(expr) "' to fail with " STRINGIFY( \ - ENUM) ", but got " \ - << s.ToString(); \ - } \ +#define ASSERT_RAISES(ENUM, expr) \ + do { \ + ::arrow::Status s = (expr); \ + if (!s.Is##ENUM()) { \ + FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \ + ENUM) ", but got " \ + << s.ToString(); \ + } \ } while (false) -#define ASSERT_RAISES_WITH_MESSAGE(ENUM, message, expr) \ - do { \ - ::arrow::Status s = (expr); \ - if (!s.Is##ENUM()) { \ - FAIL() << "Expected '" STRINGIFY(expr) "' to fail with " STRINGIFY( \ - ENUM) ", but got " \ - << s.ToString(); \ - } \ - ASSERT_EQ((message), s.ToString()); \ +#define ASSERT_RAISES_WITH_MESSAGE(ENUM, message, expr) \ + do { \ + ::arrow::Status s = (expr); \ + if (!s.Is##ENUM()) { \ + FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \ + ENUM) ", but got " \ + << s.ToString(); \ + } \ + ASSERT_EQ((message), s.ToString()); \ } while (false) -#define ASSERT_OK(expr) \ - do { \ - ::arrow::Status _s = (expr); \ - if (!_s.ok()) { \ - FAIL() << "'" STRINGIFY(expr) "' failed with " << _s.ToString(); \ - } \ +#define ASSERT_OK(expr) \ + do { \ + ::arrow::Status _s = (expr); \ + if (!_s.ok()) { \ + FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _s.ToString(); \ + } \ } while (false) #define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr)) diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h index 1d188820837fc..f4c58f4030afd 100644 --- a/cpp/src/arrow/util/macros.h +++ b/cpp/src/arrow/util/macros.h @@ -18,6 +18,8 @@ #ifndef ARROW_UTIL_MACROS_H #define ARROW_UTIL_MACROS_H +#define ARROW_STRINGIFY(x) #x + // From Google gutil #ifndef ARROW_DISALLOW_COPY_AND_ASSIGN #define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \ diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 6b67c8699c511..90fe7cf8c9c57 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -55,6 +55,12 @@ set(SRC_FILES annotator.cc exported_funcs_registry.cc filter.cc function_registry.cc + function_registry_arithmetic.cc + function_registry_datetime.cc + function_registry_hash.cc + function_registry_math_ops.cc + function_registry_string.cc + function_registry_timestamp_arithmetic.cc function_signature.cc gdv_function_stubs.cc llvm_generator.cc diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 3928fbeb0edb3..83d80b4988690 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -16,7 +16,15 @@ // under the License. #include "gandiva/function_registry.h" - +#include "gandiva/function_registry_arithmetic.h" +#include "gandiva/function_registry_datetime.h" +#include "gandiva/function_registry_hash.h" +#include "gandiva/function_registry_math_ops.h" +#include "gandiva/function_registry_string.h" +#include "gandiva/function_registry_timestamp_arithmetic.h" + +#include +#include #include namespace gandiva { @@ -35,424 +43,46 @@ using arrow::uint32; using arrow::uint64; using arrow::uint8; using arrow::utf8; +using std::iterator; using std::vector; -#define STRINGIFY(a) #a - -// Binary functions that : -// - have the same input type for both params -// - output type is same as the input type -// - NULL handling is of type NULL_IF_NULL -// -// The pre-compiled fn name includes the base name & input type names. eg. add_int32_int32 -#define BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(NAME, TYPE) \ - NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, TYPE(), kResultNullIfNull, \ - STRINGIFY(NAME##_##TYPE##_##TYPE)) - -// Binary functions that : -// - have the same input type for both params -// - NULL handling is of type NULL_IINTERNAL -// - can return error. -// -// The pre-compiled fn name includes the base name & input type names. eg. add_int32_int32 -#define BINARY_UNSAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE) \ - NativeFunction(#NAME, DataTypeVector{IN_TYPE(), IN_TYPE()}, OUT_TYPE(), \ - kResultNullIfNull, STRINGIFY(NAME##_##IN_TYPE##_##IN_TYPE), \ - NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors) - -#define BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(NAME, TYPE) \ - BINARY_UNSAFE_NULL_IF_NULL(NAME, TYPE, TYPE) - -// Binary functions that : -// - have different input types, or output type -// - NULL handling is of type NULL_IF_NULL -// -// The pre-compiled fn name includes the base name & input type names. eg. mod_int64_int32 -#define BINARY_GENERIC_SAFE_NULL_IF_NULL(NAME, IN_TYPE1, IN_TYPE2, OUT_TYPE) \ - NativeFunction(#NAME, DataTypeVector{IN_TYPE1(), IN_TYPE2()}, OUT_TYPE(), \ - kResultNullIfNull, STRINGIFY(NAME##_##IN_TYPE1##_##IN_TYPE2)) - -// Binary functions that : -// - have the same input type -// - output type is boolean -// - NULL handling is of type NULL_IF_NULL -// -// The pre-compiled fn name includes the base name & input type names. -// eg. equal_int32_int32 -#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL(NAME, TYPE) \ - NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullIfNull, \ - STRINGIFY(NAME##_##TYPE##_##TYPE)) - -// Unary functions that : -// - NULL handling is of type NULL_IF_NULL -// -// The pre-compiled fn name includes the base name & input type name. eg. castFloat_int32 -#define UNARY_SAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE) \ - NativeFunction(#NAME, DataTypeVector{IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \ - STRINGIFY(NAME##_##IN_TYPE)) - -// Unary functions that : -// - NULL handling is of type NULL_NEVER -// -// The pre-compiled fn name includes the base name & input type name. eg. isnull_int32 -#define UNARY_SAFE_NULL_NEVER_BOOL(NAME, TYPE) \ - NativeFunction(#NAME, DataTypeVector{TYPE()}, boolean(), kResultNullNever, \ - STRINGIFY(NAME##_##TYPE)) - -// Unary functions that : -// - NULL handling is of type NULL_INTERNAL -// -// The pre-compiled fn name includes the base name & input type name. eg. castFloat_int32 -#define UNARY_UNSAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE) \ - NativeFunction(#NAME, DataTypeVector{IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \ - STRINGIFY(NAME##_##IN_TYPE), \ - NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors) - -// Binary functions that : -// - NULL handling is of type NULL_NEVER -// -// The pre-compiled fn name includes the base name & input type names, -// eg. is_distinct_from_int32_int32 -#define BINARY_SAFE_NULL_NEVER_BOOL(NAME, TYPE) \ - NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullNever, \ - STRINGIFY(NAME##_##TYPE##_##TYPE)) - -// Extract functions (used with data/time types) that : -// - NULL handling is of type NULL_IF_NULL -// -// The pre-compiled fn name includes the base name & input type name. eg. extractYear_date -#define EXTRACT_SAFE_NULL_IF_NULL(NAME, TYPE) \ - NativeFunction(#NAME, DataTypeVector{TYPE()}, int64(), kResultNullIfNull, \ - STRINGIFY(NAME##_##TYPE)) - -// Hash32 functions that : -// - NULL handling is of type NULL_NEVER -// -// The pre-compiled fn name includes the base name & input type name. hash32_int8 -#define HASH32_SAFE_NULL_NEVER(NAME, TYPE) \ - NativeFunction(#NAME, DataTypeVector{TYPE()}, int32(), kResultNullNever, \ - STRINGIFY(NAME##_##TYPE)) - -// Hash32 functions that : -// - NULL handling is of type NULL_NEVER -// -// The pre-compiled fn name includes the base name & input type name. hash32_int8 -#define HASH64_SAFE_NULL_NEVER(NAME, TYPE) \ - NativeFunction(#NAME, DataTypeVector{TYPE()}, int64(), kResultNullNever, \ - STRINGIFY(NAME##_##TYPE)) - -// Hash32 functions with seed that : -// - NULL handling is of type NULL_NEVER -// -// The pre-compiled fn name includes the base name & input type name. hash32WithSeed_int8 -#define HASH32_SEED_SAFE_NULL_NEVER(NAME, TYPE) \ - NativeFunction(#NAME, DataTypeVector{TYPE(), int32()}, int32(), kResultNullNever, \ - STRINGIFY(NAME##WithSeed_##TYPE)) - -// Hash64 functions with seed that : -// - NULL handling is of type NULL_NEVER -// -// The pre-compiled fn name includes the base name & input type name. hash32WithSeed_int8 -#define HASH64_SEED_SAFE_NULL_NEVER(NAME, TYPE) \ - NativeFunction(#NAME, DataTypeVector{TYPE(), int64()}, int64(), kResultNullNever, \ - STRINGIFY(NAME##WithSeed_##TYPE)) - -// Iterate the inner macro over all numeric types -#define NUMERIC_TYPES(INNER, NAME) \ - INNER(NAME, int8), INNER(NAME, int16), INNER(NAME, int32), INNER(NAME, int64), \ - INNER(NAME, uint8), INNER(NAME, uint16), INNER(NAME, uint32), INNER(NAME, uint64), \ - INNER(NAME, float32), INNER(NAME, float64) - -// Iterate the inner macro over numeric and date/time types -#define NUMERIC_DATE_TYPES(INNER, NAME) \ - NUMERIC_TYPES(INNER, NAME), DATE_TYPES(INNER, NAME), TIME_TYPES(INNER, NAME) - -// Iterate the inner macro over all date types -#define DATE_TYPES(INNER, NAME) INNER(NAME, date64), INNER(NAME, timestamp) - -// Iterate the inner macro over all time types -#define TIME_TYPES(INNER, NAME) INNER(NAME, time32) - -// Iterate the inner macro over all data types -#define VAR_LEN_TYPES(INNER, NAME) INNER(NAME, utf8), INNER(NAME, binary) - -// Iterate the inner macro over all numeric types, date types and bool type -#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME) \ - NUMERIC_DATE_TYPES(INNER, NAME), INNER(NAME, boolean) - -// Iterate the inner macro over all numeric types, date types, bool and varlen types -#define NUMERIC_BOOL_DATE_VAR_LEN_TYPES(INNER, NAME) \ - NUMERIC_BOOL_DATE_TYPES(INNER, NAME), VAR_LEN_TYPES(INNER, NAME) - -// list of registered native functions. -NativeFunction FunctionRegistry::pc_registry_[] = { - // Arithmetic operations - NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, add), - NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, subtract), - NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, multiply), - NUMERIC_TYPES(BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL, divide), - BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int32, int32), - BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int64, int64), - NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, equal), - NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, not_equal), - NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, less_than), - NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, less_than_or_equal_to), - NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, greater_than), - NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, greater_than_or_equal_to), - UNARY_SAFE_NULL_IF_NULL(not, boolean, boolean), - - // cast operations - UNARY_SAFE_NULL_IF_NULL(castBIGINT, int32, int64), - UNARY_SAFE_NULL_IF_NULL(castFLOAT4, int32, float32), - UNARY_SAFE_NULL_IF_NULL(castFLOAT4, int64, float32), - UNARY_SAFE_NULL_IF_NULL(castFLOAT8, int32, float64), - UNARY_SAFE_NULL_IF_NULL(castFLOAT8, int64, float64), - UNARY_SAFE_NULL_IF_NULL(castFLOAT8, float32, float64), - UNARY_SAFE_NULL_IF_NULL(castDATE, int64, date64), - - // extended math ops - UNARY_SAFE_NULL_IF_NULL(cbrt, int32, float64), - UNARY_SAFE_NULL_IF_NULL(cbrt, int64, float64), - UNARY_SAFE_NULL_IF_NULL(cbrt, uint32, float64), - UNARY_SAFE_NULL_IF_NULL(cbrt, uint64, float64), - UNARY_SAFE_NULL_IF_NULL(cbrt, float32, float64), - UNARY_SAFE_NULL_IF_NULL(cbrt, float64, float64), - - UNARY_SAFE_NULL_IF_NULL(exp, int32, float64), - UNARY_SAFE_NULL_IF_NULL(exp, int64, float64), - UNARY_SAFE_NULL_IF_NULL(exp, uint32, float64), - UNARY_SAFE_NULL_IF_NULL(exp, uint64, float64), - UNARY_SAFE_NULL_IF_NULL(exp, float32, float64), - UNARY_SAFE_NULL_IF_NULL(exp, float64, float64), - - UNARY_SAFE_NULL_IF_NULL(log, int32, float64), - UNARY_SAFE_NULL_IF_NULL(log, int64, float64), - UNARY_SAFE_NULL_IF_NULL(log, uint32, float64), - UNARY_SAFE_NULL_IF_NULL(log, uint64, float64), - UNARY_SAFE_NULL_IF_NULL(log, float32, float64), - UNARY_SAFE_NULL_IF_NULL(log, float64, float64), - - UNARY_SAFE_NULL_IF_NULL(log10, int32, float64), - UNARY_SAFE_NULL_IF_NULL(log10, int64, float64), - UNARY_SAFE_NULL_IF_NULL(log10, uint32, float64), - UNARY_SAFE_NULL_IF_NULL(log10, uint64, float64), - UNARY_SAFE_NULL_IF_NULL(log10, float32, float64), - UNARY_SAFE_NULL_IF_NULL(log10, float64, float64), - - BINARY_UNSAFE_NULL_IF_NULL(log, int32, float64), - BINARY_UNSAFE_NULL_IF_NULL(log, int64, float64), - BINARY_UNSAFE_NULL_IF_NULL(log, uint32, float64), - BINARY_UNSAFE_NULL_IF_NULL(log, uint64, float64), - BINARY_UNSAFE_NULL_IF_NULL(log, float32, float64), - BINARY_UNSAFE_NULL_IF_NULL(log, float64, float64), - - BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(power, float64), - - // nullable never operations - NUMERIC_BOOL_DATE_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, isnull), - NUMERIC_BOOL_DATE_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, isnotnull), - NUMERIC_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, isnumeric), - - // nullable never binary operations - NUMERIC_BOOL_DATE_TYPES(BINARY_SAFE_NULL_NEVER_BOOL, is_distinct_from), - NUMERIC_BOOL_DATE_TYPES(BINARY_SAFE_NULL_NEVER_BOOL, is_not_distinct_from), - - // date/timestamp operations - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractMillennium), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractCentury), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDecade), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractYear), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDoy), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractQuarter), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractMonth), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractWeek), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDow), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDay), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractHour), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractMinute), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractSecond), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractEpoch), - - BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, date64, date64, float64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, timestamp, timestamp, float64), - - // date_trunc operations on date/timestamp - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Millennium), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Century), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Decade), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Year), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Quarter), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Month), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Week), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Day), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Hour), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Minute), - DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Second), - - // time operations - TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractHour), - TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractMinute), - TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractSecond), - - // timestamp diff operations - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffSecond, timestamp, timestamp, int32), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffMinute, timestamp, timestamp, int32), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffHour, timestamp, timestamp, int32), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffDay, timestamp, timestamp, int32), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffWeek, timestamp, timestamp, int32), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffMonth, timestamp, timestamp, int32), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffQuarter, timestamp, timestamp, int32), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampdiffYear, timestamp, timestamp, int32), - - // timestamp add int32 operations - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddSecond, timestamp, int32, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMinute, timestamp, int32, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddHour, timestamp, int32, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddDay, timestamp, int32, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddWeek, timestamp, int32, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMonth, timestamp, int32, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddQuarter, timestamp, int32, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddYear, timestamp, int32, timestamp), - // date add int32 operations - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddSecond, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMinute, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddHour, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddDay, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddWeek, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMonth, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddQuarter, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddYear, date64, int32, date64), - - // timestamp add int64 operations - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddSecond, timestamp, int64, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMinute, timestamp, int64, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddHour, timestamp, int64, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddDay, timestamp, int64, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddWeek, timestamp, int64, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMonth, timestamp, int64, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddQuarter, timestamp, int64, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddYear, timestamp, int64, timestamp), - // date add int64 operations - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddSecond, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMinute, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddHour, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddDay, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddWeek, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddMonth, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddQuarter, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(timestampaddYear, date64, int64, date64), - - // date_add(date64, int32), date_add(timestamp, int32) - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(add, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, timestamp, int32, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(add, timestamp, int32, timestamp), - - // date_add(date64, int64), date_add(timestamp, int64) - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(add, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, timestamp, int64, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(add, timestamp, int64, timestamp), - - // date_add(int32, date64), date_add(int32, timestamp) - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, int32, date64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(add, int32, date64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, int32, timestamp, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(add, int32, timestamp, timestamp), - - // date_add(int64, date64), date_add(int64, timestamp) - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, int64, date64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(add, int64, date64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_add, int64, timestamp, timestamp), - BINARY_GENERIC_SAFE_NULL_IF_NULL(add, int64, timestamp, timestamp), - - // date_sub(date64, int32), subtract and date_diff - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_sub, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(subtract, date64, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_diff, date64, int32, date64), - // date_sub(timestamp, int32), subtract and date_diff - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_sub, timestamp, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(subtract, timestamp, int32, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_diff, timestamp, int32, date64), - - // date_sub(date64, int64), subtract and date_diff - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_sub, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(subtract, date64, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_diff, date64, int64, date64), - // date_sub(timestamp, int64), subtract and date_diff - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_sub, timestamp, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(subtract, timestamp, int64, date64), - BINARY_GENERIC_SAFE_NULL_IF_NULL(date_diff, timestamp, int64, date64), - - // hash functions - NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SAFE_NULL_NEVER, hash), - NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SAFE_NULL_NEVER, hash32), - NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SAFE_NULL_NEVER, hash32AsDouble), - NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SEED_SAFE_NULL_NEVER, hash32), - NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SEED_SAFE_NULL_NEVER, hash32AsDouble), - - NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SAFE_NULL_NEVER, hash64), - NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SAFE_NULL_NEVER, hash64AsDouble), - NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SEED_SAFE_NULL_NEVER, hash64), - NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SEED_SAFE_NULL_NEVER, hash64AsDouble), - - // utf8/binary operations - UNARY_SAFE_NULL_IF_NULL(octet_length, utf8, int32), - UNARY_SAFE_NULL_IF_NULL(octet_length, binary, int32), - UNARY_SAFE_NULL_IF_NULL(bit_length, utf8, int32), - UNARY_SAFE_NULL_IF_NULL(bit_length, binary, int32), - UNARY_UNSAFE_NULL_IF_NULL(char_length, utf8, int32), - UNARY_UNSAFE_NULL_IF_NULL(length, utf8, int32), - UNARY_UNSAFE_NULL_IF_NULL(lengthUtf8, binary, int32), - - VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, equal), - VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, not_equal), - VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, less_than), - VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, less_than_or_equal_to), - VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, greater_than), - VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, greater_than_or_equal_to), +FunctionRegistry::iterator FunctionRegistry::begin() const { + return &(*pc_registry_.begin()); +} - BINARY_RELATIONAL_SAFE_NULL_IF_NULL(starts_with, utf8), - BINARY_RELATIONAL_SAFE_NULL_IF_NULL(ends_with, utf8), +FunctionRegistry::iterator FunctionRegistry::end() const { + return &(*pc_registry_.end()); +} - NativeFunction("upper", DataTypeVector{utf8()}, utf8(), kResultNullIfNull, - "upper_utf8", NativeFunction::kNeedsContext), +std::vector FunctionRegistry::pc_registry_; - NativeFunction("like", DataTypeVector{utf8(), utf8()}, boolean(), kResultNullIfNull, - "gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder), +SignatureMap FunctionRegistry::pc_registry_map_ = InitPCMap(); - NativeFunction("castDATE", DataTypeVector{utf8()}, date64(), kResultNullIfNull, - "castDATE_utf8", - NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), +SignatureMap FunctionRegistry::InitPCMap() { + SignatureMap map; - NativeFunction("to_date", DataTypeVector{utf8(), utf8(), int32()}, date64(), - kResultNullInternal, "gdv_fn_to_date_utf8_utf8_int32", - NativeFunction::kNeedsContext | NativeFunction::kNeedsFunctionHolder | - NativeFunction::kCanReturnErrors), -}; // namespace gandiva + auto v1 = GetArithmeticFunctionRegistry(); + pc_registry_.insert(std::end(pc_registry_), v1.begin(), v1.end()); -FunctionRegistry::iterator FunctionRegistry::begin() const { - return std::begin(pc_registry_); -} + auto v2 = GetDateTimeFunctionRegistry(); + pc_registry_.insert(std::end(pc_registry_), v2.begin(), v2.end()); -FunctionRegistry::iterator FunctionRegistry::end() const { - return std::end(pc_registry_); -} + auto v3 = GetHashFunctionRegistry(); + pc_registry_.insert(std::end(pc_registry_), v3.begin(), v3.end()); -FunctionRegistry::SignatureMap FunctionRegistry::pc_registry_map_ = InitPCMap(); + auto v4 = GetMathOpsFunctionRegistry(); + pc_registry_.insert(std::end(pc_registry_), v4.begin(), v4.end()); -FunctionRegistry::SignatureMap FunctionRegistry::InitPCMap() { - SignatureMap map; + auto v5 = GetStringFunctionRegistry(); + pc_registry_.insert(std::end(pc_registry_), v5.begin(), v5.end()); - int num_entries = static_cast(sizeof(pc_registry_) / sizeof(NativeFunction)); - for (int i = 0; i < num_entries; i++) { - const NativeFunction* entry = &pc_registry_[i]; + auto v6 = GetDateTimeArithmeticFunctionRegistry(); + pc_registry_.insert(std::end(pc_registry_), v6.begin(), v6.end()); - DCHECK(map.find(&entry->signature()) == map.end()); - map[&entry->signature()] = entry; - // printf("%s -> %s\n", entry->signature().ToString().c_str(), - // entry->pc_name().c_str()); + for (auto& elem : pc_registry_) { + map.insert(std::make_pair(&(elem.signature()), &elem)); } + return map; } diff --git a/cpp/src/gandiva/function_registry.h b/cpp/src/gandiva/function_registry.h index 0f74089fc6d8e..810bf2d3eb338 100644 --- a/cpp/src/gandiva/function_registry.h +++ b/cpp/src/gandiva/function_registry.h @@ -18,8 +18,8 @@ #ifndef GANDIVA_FUNCTION_REGISTRY_H #define GANDIVA_FUNCTION_REGISTRY_H -#include - +#include +#include "gandiva/function_registry_common.h" #include "gandiva/gandiva_aliases.h" #include "gandiva/native_function.h" @@ -37,28 +37,9 @@ class FunctionRegistry { iterator end() const; private: - struct KeyHash { - std::size_t operator()(const FunctionSignature* k) const { return k->Hash(); } - }; - - struct KeyEquals { - bool operator()(const FunctionSignature* s1, const FunctionSignature* s2) const { - return *s1 == *s2; - } - }; - - static DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); } - - static DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); } - - static DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); } - - typedef std::unordered_map - SignatureMap; static SignatureMap InitPCMap(); - static NativeFunction pc_registry_[]; + static std::vector pc_registry_; static SignatureMap pc_registry_map_; }; diff --git a/cpp/src/gandiva/function_registry_arithmetic.cc b/cpp/src/gandiva/function_registry_arithmetic.cc new file mode 100644 index 0000000000000..800bc493f0019 --- /dev/null +++ b/cpp/src/gandiva/function_registry_arithmetic.cc @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/function_registry_arithmetic.h" +#include "gandiva/function_registry_common.h" + +namespace gandiva { + +#define BINARY_SYMMETRIC_FN(name) NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, name) + +#define BINARY_RELATIONAL_BOOL_FN(name) \ + NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name) + +#define BINARY_RELATIONAL_BOOL_DATE_FN(name) \ + NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name) + +#define UNARY_OCTET_LEN_FN(name) \ + UNARY_SAFE_NULL_IF_NULL(name, utf8, int32), UNARY_SAFE_NULL_IF_NULL(name, binary, int32) + +#define UNARY_CAST_TO_FLOAT64(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT8, name, float64) + +#define UNARY_CAST_TO_FLOAT32(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT4, name, float32) + +std::vector GetArithmeticFunctionRegistry() { + static std::vector arithmetic_fn_registry_ = { + UNARY_SAFE_NULL_IF_NULL(not, boolean, boolean), + UNARY_SAFE_NULL_IF_NULL(castBIGINT, int32, int64), + + UNARY_CAST_TO_FLOAT32(int32), + UNARY_CAST_TO_FLOAT32(int64), + + UNARY_CAST_TO_FLOAT64(int32), + UNARY_CAST_TO_FLOAT64(int64), + UNARY_CAST_TO_FLOAT64(float32), + + UNARY_SAFE_NULL_IF_NULL(castDATE, int64, date64), + + BINARY_SYMMETRIC_FN(add), + BINARY_SYMMETRIC_FN(subtract), + BINARY_SYMMETRIC_FN(multiply), + + NUMERIC_TYPES(BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL, divide), + BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int32, int32), + BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int64, int64), + + BINARY_RELATIONAL_BOOL_FN(equal), + BINARY_RELATIONAL_BOOL_FN(not_equal), + + BINARY_RELATIONAL_BOOL_DATE_FN(less_than), + BINARY_RELATIONAL_BOOL_DATE_FN(less_than_or_equal_to), + BINARY_RELATIONAL_BOOL_DATE_FN(greater_than), + BINARY_RELATIONAL_BOOL_DATE_FN(greater_than_or_equal_to), + + UNARY_OCTET_LEN_FN(octet_length), + UNARY_OCTET_LEN_FN(bit_length), + + UNARY_UNSAFE_NULL_IF_NULL(char_length, utf8, int32), + UNARY_UNSAFE_NULL_IF_NULL(length, utf8, int32), + UNARY_UNSAFE_NULL_IF_NULL(lengthUtf8, binary, int32)}; + + return arithmetic_fn_registry_; +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/function_registry_arithmetic.h b/cpp/src/gandiva/function_registry_arithmetic.h new file mode 100644 index 0000000000000..e98a4e7b5b1b4 --- /dev/null +++ b/cpp/src/gandiva/function_registry_arithmetic.h @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_FUNCTION_REGISTRY_ARITHMETIC_H +#define GANDIVA_FUNCTION_REGISTRY_ARITHMETIC_H + +#include +#include "gandiva/native_function.h" + +namespace gandiva { + +std::vector GetArithmeticFunctionRegistry(); + +} // namespace gandiva + +#endif // GANDIVA_FUNCTION_REGISTRY_ARITHMETIC_H diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h new file mode 100644 index 0000000000000..78babce9a7dbf --- /dev/null +++ b/cpp/src/gandiva/function_registry_common.h @@ -0,0 +1,218 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_FUNCTION_REGISTRY_COMMON_H +#define GANDIVA_FUNCTION_REGISTRY_COMMON_H + +#include +#include +#include + +#include "gandiva/arrow.h" +#include "gandiva/function_signature.h" +#include "gandiva/gandiva_aliases.h" +#include "gandiva/native_function.h" + +/* This is a private file, intended for internal use by gandiva & must not be included + * directly. + */ +namespace gandiva { + +using arrow::binary; +using arrow::boolean; +using arrow::date64; +using arrow::float32; +using arrow::float64; +using arrow::int16; +using arrow::int32; +using arrow::int64; +using arrow::int8; +using arrow::uint16; +using arrow::uint32; +using arrow::uint64; +using arrow::uint8; +using arrow::utf8; +using std::vector; + +inline DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); } + +inline DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); } + +inline DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); } + +struct KeyHash { + std::size_t operator()(const FunctionSignature* k) const { return k->Hash(); } +}; + +struct KeyEquals { + bool operator()(const FunctionSignature* s1, const FunctionSignature* s2) const { + return *s1 == *s2; + } +}; + +typedef std::unordered_map + SignatureMap; + +// Binary functions that : +// - have the same input type for both params +// - output type is same as the input type +// - NULL handling is of type NULL_IF_NULL +// +// The pre-compiled fn name includes the base name & input type names. eg. add_int32_int32 +#define BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(NAME, TYPE) \ + NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, TYPE(), kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##TYPE##_##TYPE)) + +// Binary functions that : +// - have the same input type for both params +// - NULL handling is of type NULL_IINTERNAL +// - can return error. +// +// The pre-compiled fn name includes the base name & input type names. eg. add_int32_int32 +#define BINARY_UNSAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE) \ + NativeFunction(#NAME, DataTypeVector{IN_TYPE(), IN_TYPE()}, OUT_TYPE(), \ + kResultNullIfNull, ARROW_STRINGIFY(NAME##_##IN_TYPE##_##IN_TYPE), \ + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors) + +#define BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(NAME, TYPE) \ + BINARY_UNSAFE_NULL_IF_NULL(NAME, TYPE, TYPE) + +// Binary functions that : +// - have different input types, or output type +// - NULL handling is of type NULL_IF_NULL +// +// The pre-compiled fn name includes the base name & input type names. eg. mod_int64_int32 +#define BINARY_GENERIC_SAFE_NULL_IF_NULL(NAME, IN_TYPE1, IN_TYPE2, OUT_TYPE) \ + NativeFunction(#NAME, DataTypeVector{IN_TYPE1(), IN_TYPE2()}, OUT_TYPE(), \ + kResultNullIfNull, ARROW_STRINGIFY(NAME##_##IN_TYPE1##_##IN_TYPE2)) + +// Binary functions that : +// - have the same input type +// - output type is boolean +// - NULL handling is of type NULL_IF_NULL +// +// The pre-compiled fn name includes the base name & input type names. +// eg. equal_int32_int32 +#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL(NAME, TYPE) \ + NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##TYPE##_##TYPE)) + +// Unary functions that : +// - NULL handling is of type NULL_IF_NULL +// +// The pre-compiled fn name includes the base name & input type name. eg. castFloat_int32 +#define UNARY_SAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE) \ + NativeFunction(#NAME, DataTypeVector{IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##IN_TYPE)) + +// Unary functions that : +// - NULL handling is of type NULL_NEVER +// +// The pre-compiled fn name includes the base name & input type name. eg. isnull_int32 +#define UNARY_SAFE_NULL_NEVER_BOOL(NAME, TYPE) \ + NativeFunction(#NAME, DataTypeVector{TYPE()}, boolean(), kResultNullNever, \ + ARROW_STRINGIFY(NAME##_##TYPE)) + +// Unary functions that : +// - NULL handling is of type NULL_INTERNAL +// +// The pre-compiled fn name includes the base name & input type name. eg. castFloat_int32 +#define UNARY_UNSAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE) \ + NativeFunction(#NAME, DataTypeVector{IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##IN_TYPE), \ + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors) + +// Binary functions that : +// - NULL handling is of type NULL_NEVER +// +// The pre-compiled fn name includes the base name & input type names, +// eg. is_distinct_from_int32_int32 +#define BINARY_SAFE_NULL_NEVER_BOOL(NAME, TYPE) \ + NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullNever, \ + ARROW_STRINGIFY(NAME##_##TYPE##_##TYPE)) + +// Extract functions (used with data/time types) that : +// - NULL handling is of type NULL_IF_NULL +// +// The pre-compiled fn name includes the base name & input type name. eg. extractYear_date +#define EXTRACT_SAFE_NULL_IF_NULL(NAME, TYPE) \ + NativeFunction(#NAME, DataTypeVector{TYPE()}, int64(), kResultNullIfNull, \ + ARROW_STRINGIFY(NAME##_##TYPE)) + +// Hash32 functions that : +// - NULL handling is of type NULL_NEVER +// +// The pre-compiled fn name includes the base name & input type name. hash32_int8 +#define HASH32_SAFE_NULL_NEVER(NAME, TYPE) \ + NativeFunction(#NAME, DataTypeVector{TYPE()}, int32(), kResultNullNever, \ + ARROW_STRINGIFY(NAME##_##TYPE)) + +// Hash32 functions that : +// - NULL handling is of type NULL_NEVER +// +// The pre-compiled fn name includes the base name & input type name. hash32_int8 +#define HASH64_SAFE_NULL_NEVER(NAME, TYPE) \ + NativeFunction(#NAME, DataTypeVector{TYPE()}, int64(), kResultNullNever, \ + ARROW_STRINGIFY(NAME##_##TYPE)) + +// Hash32 functions with seed that : +// - NULL handling is of type NULL_NEVER +// +// The pre-compiled fn name includes the base name & input type name. hash32WithSeed_int8 +#define HASH32_SEED_SAFE_NULL_NEVER(NAME, TYPE) \ + NativeFunction(#NAME, DataTypeVector{TYPE(), int32()}, int32(), kResultNullNever, \ + ARROW_STRINGIFY(NAME##WithSeed_##TYPE)) + +// Hash64 functions with seed that : +// - NULL handling is of type NULL_NEVER +// +// The pre-compiled fn name includes the base name & input type name. hash32WithSeed_int8 +#define HASH64_SEED_SAFE_NULL_NEVER(NAME, TYPE) \ + NativeFunction(#NAME, DataTypeVector{TYPE(), int64()}, int64(), kResultNullNever, \ + ARROW_STRINGIFY(NAME##WithSeed_##TYPE)) + +// Iterate the inner macro over all numeric types +#define NUMERIC_TYPES(INNER, NAME) \ + INNER(NAME, int8), INNER(NAME, int16), INNER(NAME, int32), INNER(NAME, int64), \ + INNER(NAME, uint8), INNER(NAME, uint16), INNER(NAME, uint32), INNER(NAME, uint64), \ + INNER(NAME, float32), INNER(NAME, float64) + +// Iterate the inner macro over numeric and date/time types +#define NUMERIC_DATE_TYPES(INNER, NAME) \ + NUMERIC_TYPES(INNER, NAME), DATE_TYPES(INNER, NAME), TIME_TYPES(INNER, NAME) + +// Iterate the inner macro over all date types +#define DATE_TYPES(INNER, NAME) INNER(NAME, date64), INNER(NAME, timestamp) + +// Iterate the inner macro over all time types +#define TIME_TYPES(INNER, NAME) INNER(NAME, time32) + +// Iterate the inner macro over all data types +#define VAR_LEN_TYPES(INNER, NAME) INNER(NAME, utf8), INNER(NAME, binary) + +// Iterate the inner macro over all numeric types, date types and bool type +#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME) \ + NUMERIC_DATE_TYPES(INNER, NAME), INNER(NAME, boolean) + +// Iterate the inner macro over all numeric types, date types, bool and varlen types +#define NUMERIC_BOOL_DATE_VAR_LEN_TYPES(INNER, NAME) \ + NUMERIC_BOOL_DATE_TYPES(INNER, NAME), VAR_LEN_TYPES(INNER, NAME) + +} // namespace gandiva + +#endif diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc new file mode 100644 index 0000000000000..145b7d39395b4 --- /dev/null +++ b/cpp/src/gandiva/function_registry_datetime.cc @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/function_registry_datetime.h" +#include "gandiva/function_registry_common.h" + +namespace gandiva { + +#define DATE_EXTRACTION_FNS(name) \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Millennium), \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Century), \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Decade), \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Year), \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Quarter), \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Month), \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Week), \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Day), \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour), \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute), \ + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second) + +#define TIME_EXTRACTION_FNS(name) \ + TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour), \ + TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute), \ + TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second) + +std::vector GetDateTimeFunctionRegistry() { + static std::vector date_time_fn_registry_ = { + DATE_EXTRACTION_FNS(extract), + DATE_EXTRACTION_FNS(date_trunc_), + + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDoy), + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDow), + DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractEpoch), + + TIME_EXTRACTION_FNS(extract), + + NativeFunction("castDATE", DataTypeVector{utf8()}, date64(), kResultNullIfNull, + "castDATE_utf8", + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), + + NativeFunction("to_date", DataTypeVector{utf8(), utf8(), int32()}, date64(), + kResultNullInternal, "gdv_fn_to_date_utf8_utf8_int32", + NativeFunction::kNeedsContext | + NativeFunction::kNeedsFunctionHolder | + NativeFunction::kCanReturnErrors)}; + + return date_time_fn_registry_; +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/function_registry_datetime.h b/cpp/src/gandiva/function_registry_datetime.h new file mode 100644 index 0000000000000..c9b88942215d8 --- /dev/null +++ b/cpp/src/gandiva/function_registry_datetime.h @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_FUNCTION_REGISTRY_DATE_TIME_H +#define GANDIVA_FUNCTION_REGISTRY_DATE_TIME_H + +#include +#include "gandiva/native_function.h" + +namespace gandiva { + +std::vector GetDateTimeFunctionRegistry(); + +} // namespace gandiva + +#endif // GANDIVA_FUNCTION_REGISTRY_DATE_TIME_H diff --git a/cpp/src/gandiva/function_registry_hash.cc b/cpp/src/gandiva/function_registry_hash.cc new file mode 100644 index 0000000000000..a163a230eaca3 --- /dev/null +++ b/cpp/src/gandiva/function_registry_hash.cc @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/function_registry_hash.h" +#include "gandiva/function_registry_common.h" + +namespace gandiva { + +#define HASH32_SAFE_NULL_NEVER_FN(name) \ + NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SAFE_NULL_NEVER, name) + +#define HASH32_SEED_SAFE_NULL_NEVER_FN(name) \ + NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SEED_SAFE_NULL_NEVER, name) + +#define HASH64_SAFE_NULL_NEVER_FN(name) \ + NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SAFE_NULL_NEVER, name) + +#define HASH64_SEED_SAFE_NULL_NEVER_FN(name) \ + NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SEED_SAFE_NULL_NEVER, name) + +std::vector GetHashFunctionRegistry() { + static std::vector hash_fn_registry_ = { + HASH32_SAFE_NULL_NEVER_FN(hash), + HASH32_SAFE_NULL_NEVER_FN(hash32), + HASH32_SAFE_NULL_NEVER_FN(hash32AsDouble), + + HASH32_SEED_SAFE_NULL_NEVER_FN(hash32), + HASH32_SEED_SAFE_NULL_NEVER_FN(hash32AsDouble), + + HASH64_SAFE_NULL_NEVER_FN(hash64), + HASH64_SAFE_NULL_NEVER_FN(hash64AsDouble), + + HASH64_SEED_SAFE_NULL_NEVER_FN(hash64), + HASH64_SEED_SAFE_NULL_NEVER_FN(hash64AsDouble)}; + + return hash_fn_registry_; +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/function_registry_hash.h b/cpp/src/gandiva/function_registry_hash.h new file mode 100644 index 0000000000000..dc02cb21e37b5 --- /dev/null +++ b/cpp/src/gandiva/function_registry_hash.h @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_FUNCTION_REGISTRY_HASH_H +#define GANDIVA_FUNCTION_REGISTRY_HASH_H + +#include +#include "gandiva/native_function.h" + +namespace gandiva { + +std::vector GetHashFunctionRegistry(); + +} // namespace gandiva + +#endif // GANDIVA_FUNCTION_REGISTRY_HASH_H diff --git a/cpp/src/gandiva/function_registry_math_ops.cc b/cpp/src/gandiva/function_registry_math_ops.cc new file mode 100644 index 0000000000000..31b4b13119a86 --- /dev/null +++ b/cpp/src/gandiva/function_registry_math_ops.cc @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/function_registry_math_ops.h" +#include "gandiva/function_registry_common.h" + +namespace gandiva { + +#define MATH_UNARY_OPS(name) \ + UNARY_SAFE_NULL_IF_NULL(name, int32, float64), \ + UNARY_SAFE_NULL_IF_NULL(name, int64, float64), \ + UNARY_SAFE_NULL_IF_NULL(name, uint32, float64), \ + UNARY_SAFE_NULL_IF_NULL(name, uint64, float64), \ + UNARY_SAFE_NULL_IF_NULL(name, float32, float64), \ + UNARY_SAFE_NULL_IF_NULL(name, float64, float64) + +#define MATH_BINARY_UNSAFE(name) \ + BINARY_UNSAFE_NULL_IF_NULL(name, int32, float64), \ + BINARY_UNSAFE_NULL_IF_NULL(name, int64, float64), \ + BINARY_UNSAFE_NULL_IF_NULL(name, uint32, float64), \ + BINARY_UNSAFE_NULL_IF_NULL(name, uint64, float64), \ + BINARY_UNSAFE_NULL_IF_NULL(name, float32, float64), \ + BINARY_UNSAFE_NULL_IF_NULL(name, float64, float64) + +#define UNARY_SAFE_NULL_NEVER_BOOL_FN(name) \ + NUMERIC_BOOL_DATE_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, name) + +#define BINARY_SAFE_NULL_NEVER_BOOL_FN(name) \ + NUMERIC_BOOL_DATE_TYPES(BINARY_SAFE_NULL_NEVER_BOOL, name) + +std::vector GetMathOpsFunctionRegistry() { + static std::vector math_fn_registry_ = { + MATH_UNARY_OPS(cbrt), + MATH_UNARY_OPS(exp), + MATH_UNARY_OPS(log), + MATH_UNARY_OPS(log10), + + MATH_BINARY_UNSAFE(log), + + BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(power, float64), + + UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull), + UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull), + + NUMERIC_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, isnumeric), + + BINARY_SAFE_NULL_NEVER_BOOL_FN(is_distinct_from), + BINARY_SAFE_NULL_NEVER_BOOL_FN(is_not_distinct_from)}; + + return math_fn_registry_; +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/function_registry_math_ops.h b/cpp/src/gandiva/function_registry_math_ops.h new file mode 100644 index 0000000000000..0204ffc8809ac --- /dev/null +++ b/cpp/src/gandiva/function_registry_math_ops.h @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_FUNCTION_REGISTRY_MATHOPS_H +#define GANDIVA_FUNCTION_REGISTRY_MATHOPS_H + +#include +#include "gandiva/native_function.h" + +namespace gandiva { + +std::vector GetMathOpsFunctionRegistry(); + +} // namespace gandiva + +#endif // GANDIVA_FUNCTION_REGISTRY_MATHOPS_H diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc new file mode 100644 index 0000000000000..c97925af9cbb3 --- /dev/null +++ b/cpp/src/gandiva/function_registry_string.cc @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/function_registry_string.h" +#include "gandiva/function_registry_common.h" + +namespace gandiva { + +#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(name) \ + VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name) + +#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(name) \ + BINARY_RELATIONAL_SAFE_NULL_IF_NULL(name, utf8) + +std::vector GetStringFunctionRegistry() { + static std::vector string_fn_registry_ = { + BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(equal), + BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(not_equal), + BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(less_than), + BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(less_than_or_equal_to), + BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(greater_than), + BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(greater_than_or_equal_to), + + BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(starts_with), + BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(ends_with), + + NativeFunction("upper", DataTypeVector{utf8()}, utf8(), kResultNullIfNull, + "upper_utf8", NativeFunction::kNeedsContext), + + NativeFunction("like", DataTypeVector{utf8(), utf8()}, boolean(), kResultNullIfNull, + "gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder)}; + + return string_fn_registry_; +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/function_registry_string.h b/cpp/src/gandiva/function_registry_string.h new file mode 100644 index 0000000000000..c9217893e5c0b --- /dev/null +++ b/cpp/src/gandiva/function_registry_string.h @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_FUNCTION_REGISTRY_STRING_H +#define GANDIVA_FUNCTION_REGISTRY_STRING_H + +#include +#include "gandiva/native_function.h" + +namespace gandiva { + +std::vector GetStringFunctionRegistry(); + +} // namespace gandiva + +#endif // GANDIVA_FUNCTION_REGISTRY_STRING_H diff --git a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc new file mode 100644 index 0000000000000..7af76909b7d8f --- /dev/null +++ b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/function_registry_timestamp_arithmetic.h" +#include "gandiva/function_registry_common.h" + +namespace gandiva { + +#define TIMESTAMP_ADD_FNS(name) \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int32, timestamp), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int32, date64), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int64, timestamp), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int64, date64) + +#define TIMESTAMP_DIFF_FN(name) \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, timestamp, int32) + +#define DATE_ADD_FNS(name) \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int32, date64), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int32, timestamp), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int64, date64), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int64, timestamp), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, date64, date64), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, timestamp, timestamp), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, date64, date64), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, timestamp, timestamp) + +#define DATE_DIFF_FNS(name) \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int32, date64), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int32, date64), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int64, date64), \ + BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int64, date64) + +std::vector GetDateTimeArithmeticFunctionRegistry() { + static std::vector datetime_fn_registry_ = { + BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, date64, date64, float64), + BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, timestamp, timestamp, float64), + + TIMESTAMP_DIFF_FN(timestampdiffSecond), + TIMESTAMP_DIFF_FN(timestampdiffMinute), + TIMESTAMP_DIFF_FN(timestampdiffHour), + TIMESTAMP_DIFF_FN(timestampdiffDay), + TIMESTAMP_DIFF_FN(timestampdiffWeek), + TIMESTAMP_DIFF_FN(timestampdiffMonth), + TIMESTAMP_DIFF_FN(timestampdiffQuarter), + TIMESTAMP_DIFF_FN(timestampdiffYear), + + TIMESTAMP_ADD_FNS(timestampaddSecond), + TIMESTAMP_ADD_FNS(timestampaddMinute), + TIMESTAMP_ADD_FNS(timestampaddHour), + TIMESTAMP_ADD_FNS(timestampaddDay), + TIMESTAMP_ADD_FNS(timestampaddWeek), + TIMESTAMP_ADD_FNS(timestampaddMonth), + TIMESTAMP_ADD_FNS(timestampaddQuarter), + TIMESTAMP_ADD_FNS(timestampaddYear), + + DATE_ADD_FNS(date_add), + DATE_ADD_FNS(add), + + DATE_DIFF_FNS(date_sub), + DATE_DIFF_FNS(subtract), + DATE_DIFF_FNS(date_diff)}; + + return datetime_fn_registry_; +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/function_registry_timestamp_arithmetic.h b/cpp/src/gandiva/function_registry_timestamp_arithmetic.h new file mode 100644 index 0000000000000..f1b97093663ba --- /dev/null +++ b/cpp/src/gandiva/function_registry_timestamp_arithmetic.h @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_FUNCTION_REGISTRY_TIMESTAMP_ARITHMETIC_H +#define GANDIVA_FUNCTION_REGISTRY_TIMESTAMP_ARITHMETIC_H + +#include +#include "gandiva/native_function.h" + +namespace gandiva { + +std::vector GetDateTimeArithmeticFunctionRegistry(); + +} // namespace gandiva + +#endif // GANDIVA_FUNCTION_REGISTRY_TIMESTAMP_ARITHMETIC_H diff --git a/cpp/src/gandiva/native_function.h b/cpp/src/gandiva/native_function.h index 7a250e01cb619..5b130a9313c5b 100644 --- a/cpp/src/gandiva/native_function.h +++ b/cpp/src/gandiva/native_function.h @@ -52,7 +52,6 @@ class NativeFunction { bool NeedsFunctionHolder() const { return (flags_ & kNeedsFunctionHolder) != 0; } bool CanReturnErrors() const { return (flags_ & kCanReturnErrors) != 0; } - private: NativeFunction(const std::string& base_name, const DataTypeVector& param_types, DataTypePtr ret_type, const ResultNullableType& result_nullable_type, const std::string& pc_name, int32_t flags = 0) @@ -61,6 +60,7 @@ class NativeFunction { result_nullable_type_(result_nullable_type), pc_name_(pc_name) {} + private: FunctionSignature signature_; /// attributes @@ -69,8 +69,6 @@ class NativeFunction { /// pre-compiled function name. std::string pc_name_; - - friend class FunctionRegistry; }; } // end namespace gandiva From 1aecb987790bb78c084a2c8f4ce224acc2dfd13b Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 7 Jan 2019 20:14:39 +0100 Subject: [PATCH 037/203] ARROW-4179: [Python] Use more public API to determine whether a test has a pytest mark or not There was an internal API change in pytest 4.1.0 that resulted in our pytest configuration logic failing to check if marks were set or not on unit tests. I confirmed that the following fixes the problem both on pytest 4.0.x (where things still worked) and 4.1.0 (when things broke) Author: Wes McKinney Closes #3333 from wesm/ARROW-4179 and squashes the following commits: 646c1cb2 Use iter_markers to get a list of marks for each unit test since the behavior of item.obj changed --- python/pyarrow/tests/conftest.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index 3c092cfb60247..daaba59d4d35e 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -146,6 +146,8 @@ def pytest_collection_modifyitems(config, items): def pytest_runtest_setup(item): only_set = False + item_marks = {mark.name: mark for mark in item.iter_markers()} + for group in groups: flag = '--{0}'.format(group) only_flag = '--only-{0}'.format(group) @@ -154,7 +156,7 @@ def pytest_runtest_setup(item): if item.config.getoption(only_flag): only_set = True - elif getattr(item.obj, group, None): + elif group in item_marks: is_enabled = (item.config.getoption(flag) or item.config.getoption(enable_flag)) is_disabled = item.config.getoption(disable_flag) @@ -165,8 +167,7 @@ def pytest_runtest_setup(item): skip_item = True for group in groups: only_flag = '--only-{0}'.format(group) - if (getattr(item.obj, group, False) and - item.config.getoption(only_flag)): + if group in item_marks and item.config.getoption(only_flag): skip_item = False if skip_item: From 72405a1fa3c8548b6d331f9049fed74aefd1e9ae Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 8 Jan 2019 05:25:47 +0900 Subject: [PATCH 038/203] ARROW-4151: [Rust] Restructure project directories Author: Chao Sun Closes #3325 from sunchao/ARROW-4151 and squashes the following commits: 869dc69f Fix cargo coverage c24223ec Fix CI scripts 0672fa18 Fix 00-prepare.sh 0dbbb08a ARROW-4151: Restructure project directories --- .travis.yml | 2 +- ci/travis_script_rust.sh | 8 +-- dev/release/00-prepare.sh | 6 +-- dev/release/rat_exclude_files.txt | 2 +- rust/Cargo.toml | 46 ++--------------- rust/arrow/Cargo.toml | 57 ++++++++++++++++++++++ rust/{ => arrow}/README.md | 0 rust/{ => arrow}/benches/array_from_vec.rs | 0 rust/{ => arrow}/benches/builder.rs | 0 rust/{ => arrow}/examples/builders.rs | 0 rust/{ => arrow}/examples/dynamic_types.rs | 0 rust/{ => arrow}/examples/read_csv.rs | 0 rust/{ => arrow}/src/array.rs | 0 rust/{ => arrow}/src/array_data.rs | 0 rust/{ => arrow}/src/array_ops.rs | 0 rust/{ => arrow}/src/bitmap.rs | 0 rust/{ => arrow}/src/buffer.rs | 0 rust/{ => arrow}/src/builder.rs | 0 rust/{ => arrow}/src/csv/mod.rs | 0 rust/{ => arrow}/src/csv/reader.rs | 0 rust/{ => arrow}/src/datatypes.rs | 0 rust/{ => arrow}/src/error.rs | 0 rust/{ => arrow}/src/lib.rs | 0 rust/{ => arrow}/src/memory.rs | 0 rust/{ => arrow}/src/mod.rs | 0 rust/{ => arrow}/src/record_batch.rs | 0 rust/{ => arrow}/src/tensor.rs | 0 rust/{ => arrow}/src/util/bit_util.rs | 0 rust/{ => arrow}/src/util/mod.rs | 0 rust/{ => arrow}/src/util/test_util.rs | 0 rust/{ => arrow}/test/data/null_test.csv | 0 rust/{ => arrow}/test/data/uk_cities.csv | 0 rust/parquet/Cargo.toml | 2 +- 33 files changed, 71 insertions(+), 52 deletions(-) create mode 100644 rust/arrow/Cargo.toml rename rust/{ => arrow}/README.md (100%) rename rust/{ => arrow}/benches/array_from_vec.rs (100%) rename rust/{ => arrow}/benches/builder.rs (100%) rename rust/{ => arrow}/examples/builders.rs (100%) rename rust/{ => arrow}/examples/dynamic_types.rs (100%) rename rust/{ => arrow}/examples/read_csv.rs (100%) rename rust/{ => arrow}/src/array.rs (100%) rename rust/{ => arrow}/src/array_data.rs (100%) rename rust/{ => arrow}/src/array_ops.rs (100%) rename rust/{ => arrow}/src/bitmap.rs (100%) rename rust/{ => arrow}/src/buffer.rs (100%) rename rust/{ => arrow}/src/builder.rs (100%) rename rust/{ => arrow}/src/csv/mod.rs (100%) rename rust/{ => arrow}/src/csv/reader.rs (100%) rename rust/{ => arrow}/src/datatypes.rs (100%) rename rust/{ => arrow}/src/error.rs (100%) rename rust/{ => arrow}/src/lib.rs (100%) rename rust/{ => arrow}/src/memory.rs (100%) rename rust/{ => arrow}/src/mod.rs (100%) rename rust/{ => arrow}/src/record_batch.rs (100%) rename rust/{ => arrow}/src/tensor.rs (100%) rename rust/{ => arrow}/src/util/bit_util.rs (100%) rename rust/{ => arrow}/src/util/mod.rs (100%) rename rust/{ => arrow}/src/util/test_util.rs (100%) rename rust/{ => arrow}/test/data/null_test.csv (100%) rename rust/{ => arrow}/test/data/uk_cities.csv (100%) diff --git a/.travis.yml b/.travis.yml index 916ccf460ecf8..ffbb691f652f5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -307,7 +307,7 @@ matrix: - pushd ${TRAVIS_BUILD_DIR}/rust # Run coverage for codecov.io - mkdir -p target/kcov - - RUST_BACKTRACE=1 RUSTUP_TOOLCHAIN=stable cargo coverage --verbose + - RUST_BACKTRACE=1 RUSTUP_TOOLCHAIN=nightly cargo coverage --verbose - bash <(curl -s https://codecov.io/bash) || echo "Codecov did not collect coverage reports" - name: Go language: go diff --git a/ci/travis_script_rust.sh b/ci/travis_script_rust.sh index af61dd39446ff..8e3c8c3906b24 100755 --- a/ci/travis_script_rust.sh +++ b/ci/travis_script_rust.sh @@ -31,11 +31,11 @@ rustup show # raises on any formatting errors cargo +stable fmt --all -- --check -# raises on any warnings -cargo rustc -- -D warnings - -cargo build +RUSTFLAGS="-D warnings" cargo build cargo test + +# run examples +cd arrow cargo run --example builders cargo run --example dynamic_types cargo run --example read_csv diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh index 20d9ab8fce651..1c233a35c21ef 100755 --- a/dev/release/00-prepare.sh +++ b/dev/release/00-prepare.sh @@ -100,9 +100,9 @@ update_versions() { cd "${SOURCE_DIR}/../../rust" sed -i.bak -r -e \ "s/^version = \".+\"/version = \"${version}\"/g" \ - Cargo.toml parquet/Cargo.toml - rm -f Cargo.toml.bak parquet/Cargo.toml.bak - git add Cargo.toml parquet/Cargo.toml + arrow/Cargo.toml parquet/Cargo.toml + rm -f arrow/Cargo.toml.bak parquet/Cargo.toml.bak + git add arrow/Cargo.toml parquet/Cargo.toml cd - } diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 1086793630b7d..720b19d894ace 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -187,5 +187,5 @@ r/README.Rmd r/man/*.Rd .gitattributes ruby/red-arrow/.yardopts -rust/test/data/*.csv +rust/arrow/test/data/*.csv rust/rust-toolchain diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 1bf64d73ade5e..abfb71ada7951 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -15,46 +15,8 @@ # specific language governing permissions and limitations # under the License. -[package] -name = "arrow" -version = "0.12.0-SNAPSHOT" -description = "Rust implementation of Apache Arrow" -homepage = "https://github.com/apache/arrow" -repository = "https://github.com/apache/arrow" -authors = ["Apache Arrow "] -license = "Apache-2.0" -keywords = [ "arrow" ] -include = [ - "src/**/*.rs", - "Cargo.toml", -] -edition = "2018" - -[lib] -name = "arrow" -path = "src/lib.rs" - -[dependencies] -bytes = "0.4" -libc = "0.2" -serde = { version = "1.0.80", features = ["alloc", "rc"] } -serde_derive = "1.0.80" -serde_json = "1.0.13" -rand = "0.5" -csv = "1.0.0" -num = "0.2" - -[dev-dependencies] -criterion = "0.2" -lazy_static = "1" - -[[bench]] -name = "array_from_vec" -harness = false - -[[bench]] -name = "builder" -harness = false - [workspace] -members = ["parquet"] \ No newline at end of file +members = [ + "arrow", + "parquet", +] \ No newline at end of file diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml new file mode 100644 index 0000000000000..77e8d53fa55b5 --- /dev/null +++ b/rust/arrow/Cargo.toml @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "arrow" +version = "0.12.0-SNAPSHOT" +description = "Rust implementation of Apache Arrow" +homepage = "https://github.com/apache/arrow" +repository = "https://github.com/apache/arrow" +authors = ["Apache Arrow "] +license = "Apache-2.0" +keywords = [ "arrow" ] +include = [ + "src/**/*.rs", + "Cargo.toml", +] +edition = "2018" + +[lib] +name = "arrow" +path = "src/lib.rs" + +[dependencies] +bytes = "0.4" +libc = "0.2" +serde = { version = "1.0.80", features = ["alloc", "rc"] } +serde_derive = "1.0.80" +serde_json = "1.0.13" +rand = "0.5" +csv = "1.0.0" +num = "0.2" + +[dev-dependencies] +criterion = "0.2" +lazy_static = "1" + +[[bench]] +name = "array_from_vec" +harness = false + +[[bench]] +name = "builder" +harness = false \ No newline at end of file diff --git a/rust/README.md b/rust/arrow/README.md similarity index 100% rename from rust/README.md rename to rust/arrow/README.md diff --git a/rust/benches/array_from_vec.rs b/rust/arrow/benches/array_from_vec.rs similarity index 100% rename from rust/benches/array_from_vec.rs rename to rust/arrow/benches/array_from_vec.rs diff --git a/rust/benches/builder.rs b/rust/arrow/benches/builder.rs similarity index 100% rename from rust/benches/builder.rs rename to rust/arrow/benches/builder.rs diff --git a/rust/examples/builders.rs b/rust/arrow/examples/builders.rs similarity index 100% rename from rust/examples/builders.rs rename to rust/arrow/examples/builders.rs diff --git a/rust/examples/dynamic_types.rs b/rust/arrow/examples/dynamic_types.rs similarity index 100% rename from rust/examples/dynamic_types.rs rename to rust/arrow/examples/dynamic_types.rs diff --git a/rust/examples/read_csv.rs b/rust/arrow/examples/read_csv.rs similarity index 100% rename from rust/examples/read_csv.rs rename to rust/arrow/examples/read_csv.rs diff --git a/rust/src/array.rs b/rust/arrow/src/array.rs similarity index 100% rename from rust/src/array.rs rename to rust/arrow/src/array.rs diff --git a/rust/src/array_data.rs b/rust/arrow/src/array_data.rs similarity index 100% rename from rust/src/array_data.rs rename to rust/arrow/src/array_data.rs diff --git a/rust/src/array_ops.rs b/rust/arrow/src/array_ops.rs similarity index 100% rename from rust/src/array_ops.rs rename to rust/arrow/src/array_ops.rs diff --git a/rust/src/bitmap.rs b/rust/arrow/src/bitmap.rs similarity index 100% rename from rust/src/bitmap.rs rename to rust/arrow/src/bitmap.rs diff --git a/rust/src/buffer.rs b/rust/arrow/src/buffer.rs similarity index 100% rename from rust/src/buffer.rs rename to rust/arrow/src/buffer.rs diff --git a/rust/src/builder.rs b/rust/arrow/src/builder.rs similarity index 100% rename from rust/src/builder.rs rename to rust/arrow/src/builder.rs diff --git a/rust/src/csv/mod.rs b/rust/arrow/src/csv/mod.rs similarity index 100% rename from rust/src/csv/mod.rs rename to rust/arrow/src/csv/mod.rs diff --git a/rust/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs similarity index 100% rename from rust/src/csv/reader.rs rename to rust/arrow/src/csv/reader.rs diff --git a/rust/src/datatypes.rs b/rust/arrow/src/datatypes.rs similarity index 100% rename from rust/src/datatypes.rs rename to rust/arrow/src/datatypes.rs diff --git a/rust/src/error.rs b/rust/arrow/src/error.rs similarity index 100% rename from rust/src/error.rs rename to rust/arrow/src/error.rs diff --git a/rust/src/lib.rs b/rust/arrow/src/lib.rs similarity index 100% rename from rust/src/lib.rs rename to rust/arrow/src/lib.rs diff --git a/rust/src/memory.rs b/rust/arrow/src/memory.rs similarity index 100% rename from rust/src/memory.rs rename to rust/arrow/src/memory.rs diff --git a/rust/src/mod.rs b/rust/arrow/src/mod.rs similarity index 100% rename from rust/src/mod.rs rename to rust/arrow/src/mod.rs diff --git a/rust/src/record_batch.rs b/rust/arrow/src/record_batch.rs similarity index 100% rename from rust/src/record_batch.rs rename to rust/arrow/src/record_batch.rs diff --git a/rust/src/tensor.rs b/rust/arrow/src/tensor.rs similarity index 100% rename from rust/src/tensor.rs rename to rust/arrow/src/tensor.rs diff --git a/rust/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs similarity index 100% rename from rust/src/util/bit_util.rs rename to rust/arrow/src/util/bit_util.rs diff --git a/rust/src/util/mod.rs b/rust/arrow/src/util/mod.rs similarity index 100% rename from rust/src/util/mod.rs rename to rust/arrow/src/util/mod.rs diff --git a/rust/src/util/test_util.rs b/rust/arrow/src/util/test_util.rs similarity index 100% rename from rust/src/util/test_util.rs rename to rust/arrow/src/util/test_util.rs diff --git a/rust/test/data/null_test.csv b/rust/arrow/test/data/null_test.csv similarity index 100% rename from rust/test/data/null_test.csv rename to rust/arrow/test/data/null_test.csv diff --git a/rust/test/data/uk_cities.csv b/rust/arrow/test/data/uk_cities.csv similarity index 100% rename from rust/test/data/uk_cities.csv rename to rust/arrow/test/data/uk_cities.csv diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml index e0272ab4f09e1..3cb4f05052315 100644 --- a/rust/parquet/Cargo.toml +++ b/rust/parquet/Cargo.toml @@ -40,7 +40,7 @@ lz4 = "1.23" zstd = "0.4" chrono = "0.4" num-bigint = "0.2" -arrow = { path = ".." } +arrow = { path = "../arrow" } [dev-dependencies] lazy_static = "1" From b92b1f5b08a64004c8b35db24a34ac71de7bd0e3 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 7 Jan 2019 14:33:51 -0600 Subject: [PATCH 039/203] ARROW-4125: [Python] Don't fail ASV if Plasma extension is not built (e.g. on Windows) I would guess I'm the first person to try to run the benchmark suite on Windows! Author: Wes McKinney Closes #3271 from wesm/benchmark-no-plasma and squashes the following commits: c99b76fae flake c7ede9fd4 Revert whitespace change 4938932d2 Check for ImportError in benchmarks/plasma.py 008ae7b98 Don't fail ASV if Plasma extension is not built (e.g. on Windows) --- python/benchmarks/plasma.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/benchmarks/plasma.py b/python/benchmarks/plasma.py index 398ec72561255..90a284515315c 100644 --- a/python/benchmarks/plasma.py +++ b/python/benchmarks/plasma.py @@ -18,7 +18,12 @@ import numpy as np import timeit -import pyarrow.plasma as plasma +try: + import pyarrow.plasma as plasma +except ImportError: + # TODO(wesm): These are not asv benchmarks, so we can just fail + # silently here + pass class SimplePlasmaThroughput(object): From 134081bea48d48307ed08b2e638fa40a3415ba77 Mon Sep 17 00:00:00 2001 From: jlapacik Date: Mon, 7 Jan 2019 15:47:33 -0600 Subject: [PATCH 040/203] ARROW-4126: [Go] offset not used when accessing boolean array Closes https://github.com/apache/arrow/issues/3273 . Author: jlapacik Closes #3275 from jlapacik/fix/go-boolean-slice and squashes the following commits: 67c5d739a assign slice value in out of bounds tests 9e3ac33dd allocate new slice for each test case 6901d09f1 ARROW-4126: offset not used when accessing boolean array --- go/arrow/array/boolean.go | 7 +- go/arrow/array/boolean_test.go | 260 +++++++++++++++++++++++++++++++++ 2 files changed, 266 insertions(+), 1 deletion(-) create mode 100644 go/arrow/array/boolean_test.go diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go index 19a692345e357..68de951e0ce8c 100644 --- a/go/arrow/array/boolean.go +++ b/go/arrow/array/boolean.go @@ -45,7 +45,12 @@ func NewBooleanData(data *Data) *Boolean { return a } -func (a *Boolean) Value(i int) bool { return bitutil.BitIsSet(a.values, i) } +func (a *Boolean) Value(i int) bool { + if i < 0 || i >= a.array.data.length { + panic("arrow/array: index out of range") + } + return bitutil.BitIsSet(a.values, a.array.data.offset+i) +} func (a *Boolean) String() string { o := new(strings.Builder) diff --git a/go/arrow/array/boolean_test.go b/go/arrow/array/boolean_test.go new file mode 100644 index 0000000000000..e6f4b9bf2bc51 --- /dev/null +++ b/go/arrow/array/boolean_test.go @@ -0,0 +1,260 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package array_test + +import ( + "reflect" + "testing" + + "github.com/apache/arrow/go/arrow/array" + "github.com/apache/arrow/go/arrow/memory" +) + +func TestBooleanSliceData(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + values := []bool{true, false, true, true, true, true, true, false, true, false} + + b := array.NewBooleanBuilder(pool) + defer b.Release() + + for _, v := range values { + b.Append(v) + } + + arr := b.NewArray().(*array.Boolean) + defer arr.Release() + + if got, want := arr.Len(), len(values); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + vs := make([]bool, arr.Len()) + + for i := range vs { + vs[i] = arr.Value(i) + } + + if got, want := vs, values; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + + tests := []struct { + interval [2]int64 + want []bool + }{ + { + interval: [2]int64{0, 0}, + want: []bool{}, + }, + { + interval: [2]int64{10, 10}, + want: []bool{}, + }, + { + interval: [2]int64{0, 5}, + want: []bool{true, false, true, true, true}, + }, + { + interval: [2]int64{5, 10}, + want: []bool{true, true, false, true, false}, + }, + { + interval: [2]int64{2, 7}, + want: []bool{true, true, true, true, true}, + }, + } + + for _, tc := range tests { + t.Run("", func(t *testing.T) { + + slice := array.NewSlice(arr, tc.interval[0], tc.interval[1]).(*array.Boolean) + defer slice.Release() + + if got, want := slice.Len(), len(tc.want); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + vs := make([]bool, slice.Len()) + + for i := range vs { + vs[i] = slice.Value(i) + } + + if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + }) + } +} + +func TestBooleanSliceDataWithNull(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + values := []bool{true, false, true, false, false, false, true, false, true, false} + valids := []bool{true, false, true, true, true, true, true, false, true, true} + + b := array.NewBooleanBuilder(pool) + defer b.Release() + + b.AppendValues(values, valids) + + arr := b.NewArray().(*array.Boolean) + defer arr.Release() + + if got, want := arr.Len(), len(valids); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + if got, want := arr.NullN(), 2; got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + vs := make([]bool, arr.Len()) + + for i := range vs { + vs[i] = arr.Value(i) + } + + if got, want := vs, values; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + + tests := []struct { + interval [2]int64 + nulls int + want []bool + }{ + { + interval: [2]int64{2, 9}, + nulls: 1, + want: []bool{true, false, false, false, true, false, true}, + }, + { + interval: [2]int64{0, 7}, + nulls: 1, + want: []bool{true, false, true, false, false, false, true}, + }, + { + interval: [2]int64{1, 8}, + nulls: 2, + want: []bool{false, true, false, false, false, true, false}, + }, + { + interval: [2]int64{2, 7}, + nulls: 0, + want: []bool{true, false, false, false, true}, + }, + } + + for _, tc := range tests { + t.Run("", func(t *testing.T) { + + slice := array.NewSlice(arr, tc.interval[0], tc.interval[1]).(*array.Boolean) + defer slice.Release() + + if got, want := slice.NullN(), tc.nulls; got != want { + t.Errorf("got=%d, want=%d", got, want) + } + + if got, want := slice.Len(), len(tc.want); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + vs := make([]bool, slice.Len()) + + for i := range vs { + vs[i] = slice.Value(i) + } + + if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + }) + } +} + +func TestBooleanSliceOutOfBounds(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + values := []bool{true, false, true, false, true, false, true, false, true, false} + + b := array.NewBooleanBuilder(pool) + defer b.Release() + + for _, v := range values { + b.Append(v) + } + + arr := b.NewArray().(*array.Boolean) + defer arr.Release() + + slice := array.NewSlice(arr, 3, 8).(*array.Boolean) + defer slice.Release() + + tests := []struct { + index int + panic bool + }{ + { + index: -1, + panic: true, + }, + { + index: 5, + panic: true, + }, + { + index: 0, + panic: false, + }, + { + index: 4, + panic: false, + }, + } + + for _, tc := range tests { + t.Run("", func(t *testing.T) { + + var val bool + + if tc.panic { + defer func() { + e := recover() + if e == nil { + t.Fatalf("this should have panicked, but did not; slice value %v", val) + } + if got, want := e.(string), "arrow/array: index out of range"; got != want { + t.Fatalf("invalid error. got=%q, want=%q", got, want) + } + }() + } else { + defer func() { + if e := recover(); e != nil { + t.Fatalf("unexpected panic: %v", e) + } + }() + } + + val = slice.Value(tc.index) + }) + } +} From 0eadd412eb1bf10ebd7ec6babcd18a6852fb82a2 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 7 Jan 2019 19:33:09 -0700 Subject: [PATCH 041/203] ARROW-3665: [Rust] Implement StructArrayBuilder This implements `StructArrayBuilder` which can be used to build struct arrays. There are some trickness in terms of being able to store child builders of different types. A natural way is to box them into `ArrayBuilder` trait and store in a vector. But, this makes it impossible to cast them into specific type in `field_builder()`. To solve the above issue, this maintains two references to each input builder instance, one in `Box` type and another in `Box` type. The former is used for casting mentioned above, while the latter is used for calling general methods on an builder, such as `len()` and `finish()`. To enable this, this also changed `ArrayBuilder::finish` to return a `ArrayRef` instead of a specific array. The old `finish` method is implemented on each specific array builder, so one can obtain, say, `Int32Array` from a `Int32ArrayBuilder`. Author: Chao Sun Closes #3276 from sunchao/ARROW-3665 and squashes the following commits: 8fa3b61 Rename to_any to into_box_any 8aac785 Add append 227f3b2 Add from_schema 0e8e669 Fix rustfmt issues 5f59518 ARROW-3665: Implement StructArrayBuilder --- rust/arrow/examples/builders.rs | 2 +- rust/arrow/src/array.rs | 12 +- rust/arrow/src/array_ops.rs | 2 +- rust/arrow/src/bitmap.rs | 2 +- rust/arrow/src/builder.rs | 465 ++++++++++++++++++++++++++++---- rust/arrow/src/csv/reader.rs | 18 +- rust/arrow/src/datatypes.rs | 2 +- 7 files changed, 439 insertions(+), 64 deletions(-) diff --git a/rust/arrow/examples/builders.rs b/rust/arrow/examples/builders.rs index 5273558d966e0..92f45ce67d981 100644 --- a/rust/arrow/examples/builders.rs +++ b/rust/arrow/examples/builders.rs @@ -18,7 +18,7 @@ ///! Many builders are available to easily create different types of arrow arrays extern crate arrow; -use arrow::builder::{ArrayBuilder, Int32Builder}; +use arrow::builder::Int32Builder; fn main() { // Primitive Arrays diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs index 251dd35eea150..5184b66426399 100644 --- a/rust/arrow/src/array.rs +++ b/rust/arrow/src/array.rs @@ -568,11 +568,17 @@ impl From for BinaryArray { "BinaryArray can only be created from List arrays, mismatched data types." ); - let data = ArrayData::builder(DataType::Utf8) + let mut builder = ArrayData::builder(DataType::Utf8) .len(v.len()) .add_buffer(v.data().buffers()[0].clone()) - .add_buffer(v.data().child_data()[0].buffers()[0].clone()) - .build(); + .add_buffer(v.data().child_data()[0].buffers()[0].clone()); + if let Some(bitmap) = v.data().null_bitmap() { + builder = builder + .null_count(v.data().null_count()) + .null_bit_buffer(bitmap.bits.clone()) + } + + let data = builder.build(); Self::from(data) } } diff --git a/rust/arrow/src/array_ops.rs b/rust/arrow/src/array_ops.rs index 517111ba76a45..69637094942cf 100644 --- a/rust/arrow/src/array_ops.rs +++ b/rust/arrow/src/array_ops.rs @@ -22,7 +22,7 @@ use std::ops::{Add, Div, Mul, Sub}; use num::Zero; use crate::array::{Array, BooleanArray, PrimitiveArray}; -use crate::builder::{ArrayBuilder, PrimitiveArrayBuilder}; +use crate::builder::PrimitiveArrayBuilder; use crate::datatypes; use crate::datatypes::ArrowNumericType; use crate::error::{ArrowError, Result}; diff --git a/rust/arrow/src/bitmap.rs b/rust/arrow/src/bitmap.rs index 3d5a77f78a51e..b5771c2b171c8 100644 --- a/rust/arrow/src/bitmap.rs +++ b/rust/arrow/src/bitmap.rs @@ -20,7 +20,7 @@ use crate::util::bit_util; #[derive(PartialEq, Debug)] pub struct Bitmap { - bits: Buffer, + pub(crate) bits: Buffer, } impl Bitmap { diff --git a/rust/arrow/src/builder.rs b/rust/arrow/src/builder.rs index a4c8666233877..b762c516331eb 100644 --- a/rust/arrow/src/builder.rs +++ b/rust/arrow/src/builder.rs @@ -22,6 +22,7 @@ use std::any::Any; use std::io::Write; use std::marker::PhantomData; use std::mem; +use std::sync::Arc; use crate::array::*; use crate::array_data::ArrayData; @@ -211,15 +212,12 @@ impl BufferBuilderTrait for BufferBuilder { } /// Trait for dealing with different array builders at runtime -pub trait ArrayBuilder { - /// The type of array that this builder creates - type ArrayType: Array; - +pub trait ArrayBuilder: Any { /// Returns the number of array slots in the builder fn len(&self) -> usize; /// Builds the array - fn finish(&mut self) -> Self::ArrayType; + fn finish(&mut self) -> ArrayRef; /// Returns the builder as an non-mutable `Any` reference. /// @@ -234,6 +232,9 @@ pub trait ArrayBuilder { /// type. In this case, one can first cast this into a `Any`, and then use /// `downcast_mut` to get a reference on the specific builder. fn as_any_mut(&mut self) -> &mut Any; + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box; } /// Array builder for fixed-width primitive types @@ -255,8 +256,6 @@ pub type Float32Builder = PrimitiveArrayBuilder; pub type Float64Builder = PrimitiveArrayBuilder; impl ArrayBuilder for PrimitiveArrayBuilder { - type ArrayType = PrimitiveArray; - /// Returns the builder as an non-mutable `Any` reference. fn as_any(&self) -> &Any { self @@ -267,22 +266,19 @@ impl ArrayBuilder for PrimitiveArrayBuilder { self } + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + /// Returns the number of array slots in the builder fn len(&self) -> usize { self.values_builder.len } - /// Builds the `PrimitiveArray` and reset this builder. - fn finish(&mut self) -> PrimitiveArray { - let len = self.len(); - let null_bit_buffer = self.bitmap_builder.finish(); - let data = ArrayData::builder(T::get_data_type()) - .len(len) - .null_count(len - bit_util::count_set_bits(null_bit_buffer.data())) - .add_buffer(self.values_builder.finish()) - .null_bit_buffer(null_bit_buffer) - .build(); - PrimitiveArray::::from(data) + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) } } @@ -329,6 +325,23 @@ impl PrimitiveArrayBuilder { self.values_builder.push_slice(v)?; Ok(()) } + + /// Builds the `PrimitiveArray` and reset this builder. + pub fn finish(&mut self) -> PrimitiveArray { + let len = self.len(); + let null_bit_buffer = self.bitmap_builder.finish(); + let null_count = len - bit_util::count_set_bits(null_bit_buffer.data()); + let mut builder = ArrayData::builder(T::get_data_type()) + .len(len) + .add_buffer(self.values_builder.finish()); + if null_count > 0 { + builder = builder + .null_count(null_count) + .null_bit_buffer(null_bit_buffer); + } + let data = builder.build(); + PrimitiveArray::::from(data) + } } /// Array builder for `ListArray` @@ -357,8 +370,6 @@ impl ArrayBuilder for ListArrayBuilder where T: 'static, { - type ArrayType = ListArray; - /// Returns the builder as an non-mutable `Any` reference. fn as_any(&self) -> &Any { self @@ -369,13 +380,45 @@ where self } + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + /// Returns the number of array slots in the builder fn len(&self) -> usize { self.len } + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } +} + +impl ListArrayBuilder +where + T: 'static, +{ + /// Returns the child array builder as a mutable reference. + /// + /// This mutable reference can be used to push values into the child array builder, + /// but you must call `append` to delimit each distinct list value. + pub fn values(&mut self) -> &mut T { + &mut self.values_builder + } + + /// Finish the current variable-length list array slot + pub fn append(&mut self, is_valid: bool) -> Result<()> { + self.offsets_builder + .push(self.values_builder.len() as i32)?; + self.bitmap_builder.push(is_valid)?; + self.len += 1; + Ok(()) + } + /// Builds the `ListArray` and reset this builder. - fn finish(&mut self) -> ListArray { + pub fn finish(&mut self) -> ListArray { let len = self.len(); self.len = 0; let values_arr = self @@ -401,33 +444,12 @@ where } } -impl ListArrayBuilder { - /// Returns the child array builder as a mutable reference. - /// - /// This mutable reference can be used to push values into the child array builder, - /// but you must call `append` to delimit each distinct list value. - pub fn values(&mut self) -> &mut T { - &mut self.values_builder - } - - /// Finish the current variable-length list array slot - pub fn append(&mut self, is_valid: bool) -> Result<()> { - self.offsets_builder - .push(self.values_builder.len() as i32)?; - self.bitmap_builder.push(is_valid)?; - self.len += 1; - Ok(()) - } -} - /// Array builder for `BinaryArray` pub struct BinaryArrayBuilder { builder: ListArrayBuilder, } impl ArrayBuilder for BinaryArrayBuilder { - type ArrayType = BinaryArray; - /// Returns the builder as an non-mutable `Any` reference. fn as_any(&self) -> &Any { self @@ -438,14 +460,19 @@ impl ArrayBuilder for BinaryArrayBuilder { self } + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + /// Returns the number of array slots in the builder fn len(&self) -> usize { self.builder.len() } - /// Builds the `BinaryArray` and reset this builder. - fn finish(&mut self) -> BinaryArray { - BinaryArray::from(self.builder.finish()) + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) } } @@ -481,6 +508,179 @@ impl BinaryArrayBuilder { pub fn append(&mut self, is_valid: bool) -> Result<()> { self.builder.append(is_valid) } + + /// Append a null value to the array. + pub fn append_null(&mut self) -> Result<()> { + self.append(false) + } + + /// Builds the `BinaryArray` and reset this builder. + pub fn finish(&mut self) -> BinaryArray { + BinaryArray::from(self.builder.finish()) + } +} + +/// Array builder for Struct types. +/// +/// Note that callers should make sure that methods of all the child field builders are +/// properly called to maintain the consistency of the data structure. +pub struct StructArrayBuilder { + fields: Vec, + field_anys: Vec>, + field_builders: Vec>, + bitmap_builder: BooleanBufferBuilder, + len: usize, +} + +impl ArrayBuilder for StructArrayBuilder { + /// Returns the number of array slots in the builder. + /// + /// Note that this always return the first child field builder's length, and it is + /// the caller's responsibility to maintain the consistency that all the child field + /// builder should have the equal number of elements. + fn len(&self) -> usize { + self.len + } + + /// Builds the array. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } + + /// Returns the builder as an non-mutable `Any` reference. + /// + /// This is most useful when one wants to call non-mutable APIs on a specific builder + /// type. In this case, one can first cast this into a `Any`, and then use + /// `downcast_ref` to get a reference on the specific builder. + fn as_any(&self) -> &Any { + self + } + + /// Returns the builder as an mutable `Any` reference. + /// + /// This is most useful when one wants to call mutable APIs on a specific builder + /// type. In this case, one can first cast this into a `Any`, and then use + /// `downcast_mut` to get a reference on the specific builder. + fn as_any_mut(&mut self) -> &mut Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } +} + +impl StructArrayBuilder { + pub fn new(fields: Vec, builders: Vec>) -> Self { + let mut field_anys = Vec::with_capacity(builders.len()); + let mut field_builders = Vec::with_capacity(builders.len()); + + // Create and maintain two references for each of the input builder. We need the + // extra `Any` reference because we need to cast the builder to a specific type + // in `field_builder()` by calling `downcast_mut`. + for f in builders.into_iter() { + let raw_f = Box::into_raw(f); + let raw_f_copy = raw_f; + unsafe { + field_anys.push(Box::from_raw(raw_f).into_box_any()); + field_builders.push(Box::from_raw(raw_f_copy)); + } + } + + Self { + fields, + field_anys, + field_builders, + bitmap_builder: BooleanBufferBuilder::new(0), + len: 0, + } + } + + pub fn from_schema(schema: Schema, capacity: usize) -> Self { + let fields = schema.fields(); + let mut builders = Vec::with_capacity(fields.len()); + for f in schema.fields() { + builders.push(Self::from_field(f.clone(), capacity)); + } + Self::new(schema.fields, builders) + } + + fn from_field(f: Field, capacity: usize) -> Box { + match f.data_type() { + DataType::Boolean => Box::new(BooleanBuilder::new(capacity)), + DataType::Int8 => Box::new(Int8Builder::new(capacity)), + DataType::Int16 => Box::new(Int16Builder::new(capacity)), + DataType::Int32 => Box::new(Int32Builder::new(capacity)), + DataType::Int64 => Box::new(Int64Builder::new(capacity)), + DataType::UInt8 => Box::new(UInt8Builder::new(capacity)), + DataType::UInt16 => Box::new(UInt16Builder::new(capacity)), + DataType::UInt32 => Box::new(UInt32Builder::new(capacity)), + DataType::UInt64 => Box::new(UInt64Builder::new(capacity)), + DataType::Float32 => Box::new(Float32Builder::new(capacity)), + DataType::Float64 => Box::new(Float64Builder::new(capacity)), + DataType::Utf8 => Box::new(BinaryArrayBuilder::new(capacity)), + DataType::Struct(fields) => { + let schema = Schema::new(fields.clone()); + Box::new(Self::from_schema(schema, capacity)) + } + t @ _ => panic!("Data type {:?} is not currently supported", t), + } + } + + /// Returns a mutable reference to the child field builder at index `i`. + /// Result will be `None` if the input type `T` provided doesn't match the actual + /// field builder's type. + pub fn field_builder(&mut self, i: usize) -> Option<&mut T> { + self.field_anys[i].downcast_mut::() + } + + /// Returns the number of fields for the struct this builder is building. + pub fn num_fields(&self) -> usize { + self.field_builders.len() + } + + /// Appends an element (either null or non-null) to the struct. The actual elements + /// should be appended for each child sub-array in a consistent way. + pub fn append(&mut self, is_valid: bool) -> Result<()> { + self.bitmap_builder.push(is_valid)?; + self.len += 1; + Ok(()) + } + + /// Appends a null element to the struct. + pub fn append_null(&mut self) -> Result<()> { + self.append(false) + } + + /// Builds the `StructArray` and reset this builder. + pub fn finish(&mut self) -> StructArray { + let mut child_data = Vec::with_capacity(self.field_builders.len()); + for f in &mut self.field_builders { + let arr = f.finish(); + child_data.push(arr.data()); + } + + let null_bit_buffer = self.bitmap_builder.finish(); + let null_count = self.len - bit_util::count_set_bits(null_bit_buffer.data()); + let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone())) + .len(self.len) + .child_data(child_data); + if null_count > 0 { + builder = builder + .null_count(null_count) + .null_bit_buffer(null_bit_buffer); + } + StructArray::from(builder.build()) + } +} + +impl Drop for StructArrayBuilder { + fn drop(&mut self) { + // To avoid double drop on the field array builders. + let builders = ::std::mem::replace(&mut self.field_builders, Vec::new()); + ::std::mem::forget(builders); + } } #[cfg(test)] @@ -488,6 +688,7 @@ mod tests { use super::*; use crate::array::Array; + use crate::bitmap::Bitmap; #[test] fn test_builder_i32_empty() { @@ -983,4 +1184,178 @@ mod tests { assert_eq!(5, binary_array.value_offset(2)); assert_eq!(5, binary_array.value_length(2)); } + + #[test] + fn test_struct_array_builder() { + let string_builder = BinaryArrayBuilder::new(4); + let int_builder = Int32Builder::new(4); + + let mut fields = Vec::new(); + let mut field_builders = Vec::new(); + fields.push(Field::new("f1", DataType::Utf8, false)); + field_builders.push(Box::new(string_builder) as Box); + fields.push(Field::new("f2", DataType::Int32, false)); + field_builders.push(Box::new(int_builder) as Box); + + let mut builder = StructArrayBuilder::new(fields, field_builders); + assert_eq!(2, builder.num_fields()); + + let string_builder = builder + .field_builder::(0) + .expect("builder at field 0 should be binary builder"); + string_builder.push_string("joe").unwrap(); + string_builder.append_null().unwrap(); + string_builder.append_null().unwrap(); + string_builder.push_string("mark").unwrap(); + + let int_builder = builder + .field_builder::(1) + .expect("builder at field 1 should be int builder"); + int_builder.push(1).unwrap(); + int_builder.push(2).unwrap(); + int_builder.push_null().unwrap(); + int_builder.push(4).unwrap(); + + builder.append(true).unwrap(); + builder.append(true).unwrap(); + builder.append_null().unwrap(); + builder.append(true).unwrap(); + + let arr = builder.finish(); + + let struct_data = arr.data(); + assert_eq!(4, struct_data.len()); + assert_eq!(1, struct_data.null_count()); + assert_eq!( + &Some(Bitmap::from(Buffer::from(&[11_u8]))), + struct_data.null_bitmap() + ); + + let expected_string_data = ArrayData::builder(DataType::Utf8) + .len(4) + .null_count(2) + .null_bit_buffer(Buffer::from(&[9_u8])) + .add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice())) + .add_buffer(Buffer::from("joemark".as_bytes())) + .build(); + + let expected_int_data = ArrayData::builder(DataType::Int32) + .len(4) + .null_count(1) + .null_bit_buffer(Buffer::from(&[11_u8])) + .add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice())) + .build(); + + assert_eq!(expected_string_data, arr.column(0).data()); + + // TODO: implement equality for ArrayData + assert_eq!(expected_int_data.len(), arr.column(1).data().len()); + assert_eq!( + expected_int_data.null_count(), + arr.column(1).data().null_count() + ); + assert_eq!( + expected_int_data.null_bitmap(), + arr.column(1).data().null_bitmap() + ); + let expected_value_buf = expected_int_data.buffers()[0].clone(); + let actual_value_buf = arr.column(1).data().buffers()[0].clone(); + for i in 0..expected_int_data.len() { + if !expected_int_data.is_null(i) { + assert_eq!( + expected_value_buf.data()[i * 4..(i + 1) * 4], + actual_value_buf.data()[i * 4..(i + 1) * 4] + ); + } + } + } + + #[test] + fn test_struct_array_builder_finish() { + let int_builder = Int32Builder::new(10); + let bool_builder = BooleanBuilder::new(10); + + let mut fields = Vec::new(); + let mut field_builders = Vec::new(); + fields.push(Field::new("f1", DataType::Int32, false)); + field_builders.push(Box::new(int_builder) as Box); + fields.push(Field::new("f2", DataType::Boolean, false)); + field_builders.push(Box::new(bool_builder) as Box); + + let mut builder = StructArrayBuilder::new(fields, field_builders); + builder + .field_builder::(0) + .unwrap() + .push_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + .unwrap(); + builder + .field_builder::(1) + .unwrap() + .push_slice(&[ + false, true, false, true, false, true, false, true, false, true, + ]) + .unwrap(); + + let arr = builder.finish(); + assert_eq!(10, arr.len()); + assert_eq!(0, builder.len()); + + builder + .field_builder::(0) + .unwrap() + .push_slice(&[1, 3, 5, 7, 9]) + .unwrap(); + builder + .field_builder::(1) + .unwrap() + .push_slice(&[false, true, false, true, false]) + .unwrap(); + + let arr = builder.finish(); + assert_eq!(5, arr.len()); + assert_eq!(0, builder.len()); + } + + #[test] + fn test_struct_array_builder_from_schema() { + let mut fields = Vec::new(); + fields.push(Field::new("f1", DataType::Float32, false)); + fields.push(Field::new("f2", DataType::Utf8, false)); + let mut sub_fields = Vec::new(); + sub_fields.push(Field::new("g1", DataType::Int32, false)); + sub_fields.push(Field::new("g2", DataType::Boolean, false)); + let struct_type = DataType::Struct(sub_fields); + fields.push(Field::new("f3", struct_type, false)); + + let mut builder = StructArrayBuilder::from_schema(Schema::new(fields), 5); + assert_eq!(3, builder.num_fields()); + assert!(builder.field_builder::(0).is_some()); + assert!(builder.field_builder::(1).is_some()); + assert!(builder.field_builder::(2).is_some()); + } + + #[test] + #[should_panic(expected = "Data type List(Int64) is not currently supported")] + fn test_struct_array_builder_from_schema_unsupported_type() { + let mut fields = Vec::new(); + fields.push(Field::new("f1", DataType::Int16, false)); + let list_type = DataType::List(Box::new(DataType::Int64)); + fields.push(Field::new("f2", list_type, false)); + + let _ = StructArrayBuilder::from_schema(Schema::new(fields), 5); + } + + #[test] + fn test_struct_array_builder_field_builder_type_mismatch() { + let int_builder = Int32Builder::new(10); + + let mut fields = Vec::new(); + let mut field_builders = Vec::new(); + fields.push(Field::new("f1", DataType::Int32, false)); + field_builders.push(Box::new(int_builder) as Box); + + let mut builder = StructArrayBuilder::new(fields, field_builders); + assert!(builder.field_builder::(0).is_none()); + } + } diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs index b9c46fc3217cc..10be0abe96081 100644 --- a/rust/arrow/src/csv/reader.rs +++ b/rust/arrow/src/csv/reader.rs @@ -46,7 +46,7 @@ use std::sync::Arc; use csv as csv_crate; -use crate::array::{ArrayRef, BinaryArray}; +use crate::array::ArrayRef; use crate::builder::*; use crate::datatypes::*; use crate::error::{ArrowError, Result}; @@ -138,20 +138,14 @@ impl Reader { &DataType::Float32 => self.build_primitive_array::(rows, i), &DataType::Float64 => self.build_primitive_array::(rows, i), &DataType::Utf8 => { - let values_builder: UInt8Builder = UInt8Builder::new(rows.len()); - let mut list_builder = ListArrayBuilder::new(values_builder); + let mut builder = BinaryArrayBuilder::new(rows.len()); for row_index in 0..rows.len() { match rows[row_index].get(*i) { - Some(s) => { - list_builder.values().push_slice(s.as_bytes()).unwrap(); - list_builder.append(true).unwrap(); - } - _ => { - list_builder.append(false).unwrap(); - } + Some(s) => builder.push_string(s).unwrap(), + _ => builder.append(false).unwrap(), } } - Ok(Arc::new(BinaryArray::from(list_builder.finish())) as ArrayRef) + Ok(Arc::new(builder.finish()) as ArrayRef) } other => Err(ArrowError::ParseError(format!( "Unsupported data type {:?}", @@ -196,7 +190,7 @@ impl Reader { _ => builder.push_null()?, } } - Ok(Arc::new(builder.finish()) as ArrayRef) + Ok(Arc::new(builder.finish())) } } diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 49e06eb0969b2..0627b4523a1ce 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -350,7 +350,7 @@ impl fmt::Display for Field { /// layout. #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Schema { - fields: Vec, + pub(crate) fields: Vec, } impl Schema { From a1ea48b51982e9ac13b28728edf8e009527eea2e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 7 Jan 2019 20:14:13 -0700 Subject: [PATCH 042/203] ARROW-4185: [Rust] Change directory before running Rust examples on Windows Author: Andy Grove Closes #3341 from andygrove/ARROW-4185 and squashes the following commits: c9fa73e Change directory before running Rust examples --- ci/rust-build-main.bat | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/rust-build-main.bat b/ci/rust-build-main.bat index 6ef451204d45a..ac5c9e7589245 100644 --- a/ci/rust-build-main.bat +++ b/ci/rust-build-main.bat @@ -36,6 +36,7 @@ cargo test --target %TARGET% --release || exit /B @echo @echo Run example (release) @echo --------------------- +cd arrow cargo run --example builders --target %TARGET% --release || exit /B cargo run --example dynamic_types --target %TARGET% --release || exit /B cargo run --example read_csv --target %TARGET% --release || exit /B From 1143942bc5264d89a343031e522ffc5aa7abf7b3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 7 Jan 2019 22:51:10 -0700 Subject: [PATCH 043/203] ARROW-4042: [Rust] Rename BinaryArray::get_value to value This PR makes BinaryArray consistent with PrimitiveArray (and with the C++ implementation) Author: Andy Grove Closes #3343 from andygrove/ARROW-4042 and squashes the following commits: 861d09e Rename BinaryArray::get_value to value for consistency with PrimitiveArray --- rust/arrow/examples/read_csv.rs | 2 +- rust/arrow/src/array.rs | 16 ++++++++-------- rust/arrow/src/builder.rs | 12 ++++++------ rust/arrow/src/csv/reader.rs | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/rust/arrow/examples/read_csv.rs b/rust/arrow/examples/read_csv.rs index 147d2f9c23845..fd15e333bcfc9 100644 --- a/rust/arrow/examples/read_csv.rs +++ b/rust/arrow/examples/read_csv.rs @@ -59,7 +59,7 @@ fn main() { .unwrap(); for i in 0..batch.num_rows() { - let city_name: String = String::from_utf8(city.get_value(i).to_vec()).unwrap(); + let city_name: String = String::from_utf8(city.value(i).to_vec()).unwrap(); println!( "City: {}, Latitude: {}, Longitude: {}", diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs index 5184b66426399..f8272eb007db6 100644 --- a/rust/arrow/src/array.rs +++ b/rust/arrow/src/array.rs @@ -470,7 +470,7 @@ pub struct BinaryArray { impl BinaryArray { /// Returns the element at index `i` as a byte slice. - pub fn get_value(&self, i: usize) -> &[u8] { + pub fn value(&self, i: usize) -> &[u8] { assert!(i < self.data.len(), "BinaryArray out of bounds access"); let offset = i.checked_add(self.data.offset()).unwrap(); unsafe { @@ -486,7 +486,7 @@ impl BinaryArray { /// /// Note this doesn't do any bound checking, for performance reason. pub fn get_string(&self, i: usize) -> String { - let slice = self.get_value(i); + let slice = self.value(i); unsafe { String::from_utf8_unchecked(Vec::from(slice)) } } @@ -951,13 +951,13 @@ mod tests { let binary_array = BinaryArray::from(array_data); assert_eq!(3, binary_array.len()); assert_eq!(0, binary_array.null_count()); - assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.get_value(0)); + assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0)); assert_eq!("hello", binary_array.get_string(0)); - assert_eq!([] as [u8; 0], binary_array.get_value(1)); + assert_eq!([] as [u8; 0], binary_array.value(1)); assert_eq!("", binary_array.get_string(1)); assert_eq!( [b'p', b'a', b'r', b'q', b'u', b'e', b't'], - binary_array.get_value(2) + binary_array.value(2) ); assert_eq!("parquet", binary_array.get_string(2)); assert_eq!(5, binary_array.value_offset(2)); @@ -977,7 +977,7 @@ mod tests { let binary_array = BinaryArray::from(array_data); assert_eq!( [b'p', b'a', b'r', b'q', b'u', b'e', b't'], - binary_array.get_value(1) + binary_array.value(1) ); assert_eq!("parquet", binary_array.get_string(1)); assert_eq!(5, binary_array.value_offset(0)); @@ -1019,7 +1019,7 @@ mod tests { assert_eq!(binary_array1.len(), binary_array2.len()); assert_eq!(binary_array1.null_count(), binary_array2.null_count()); for i in 0..binary_array1.len() { - assert_eq!(binary_array1.get_value(i), binary_array2.get_value(i)); + assert_eq!(binary_array1.value(i), binary_array2.value(i)); assert_eq!(binary_array1.get_string(i), binary_array2.get_string(i)); assert_eq!(binary_array1.value_offset(i), binary_array2.value_offset(i)); assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i)); @@ -1082,7 +1082,7 @@ mod tests { .add_buffer(Buffer::from(&values[..])) .build(); let binary_array = BinaryArray::from(array_data); - binary_array.get_value(4); + binary_array.value(4); } #[test] diff --git a/rust/arrow/src/builder.rs b/rust/arrow/src/builder.rs index b762c516331eb..a0bb43c7dee53 100644 --- a/rust/arrow/src/builder.rs +++ b/rust/arrow/src/builder.rs @@ -1133,11 +1133,11 @@ mod tests { assert_eq!(3, binary_array.len()); assert_eq!(0, binary_array.null_count()); - assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.get_value(0)); + assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0)); assert_eq!("hello", binary_array.get_string(0)); - assert_eq!([] as [u8; 0], binary_array.get_value(1)); + assert_eq!([] as [u8; 0], binary_array.value(1)); assert_eq!("", binary_array.get_string(1)); - assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.get_value(2)); + assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2)); assert_eq!("world", binary_array.get_string(2)); assert_eq!(5, binary_array.value_offset(2)); assert_eq!(5, binary_array.value_length(2)); @@ -1175,11 +1175,11 @@ mod tests { assert_eq!(3, binary_array.len()); assert_eq!(0, binary_array.null_count()); - assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.get_value(0)); + assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0)); assert_eq!("hello", binary_array.get_string(0)); - assert_eq!([] as [u8; 0], binary_array.get_value(1)); + assert_eq!([] as [u8; 0], binary_array.value(1)); assert_eq!("", binary_array.get_string(1)); - assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.get_value(2)); + assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2)); assert_eq!("world", binary_array.get_string(2)); assert_eq!(5, binary_array.value_offset(2)); assert_eq!(5, binary_array.value_length(2)); diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs index 10be0abe96081..57c7dde1b250d 100644 --- a/rust/arrow/src/csv/reader.rs +++ b/rust/arrow/src/csv/reader.rs @@ -231,7 +231,7 @@ mod tests { .downcast_ref::() .unwrap(); - let city_name: String = String::from_utf8(city.get_value(13).to_vec()).unwrap(); + let city_name: String = String::from_utf8(city.value(13).to_vec()).unwrap(); assert_eq!("Aberdeen, Aberdeen City, UK", city_name); } From 2057859744cb2ada93fc97838e09eb954963dc00 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 8 Jan 2019 11:03:17 +0100 Subject: [PATCH 044/203] ARROW-4188: [Rust] Move Rust README to top level rust directory Author: Andy Grove Closes #3342 from andygrove/ARROW-4188 and squashes the following commits: fedcd7bc split README between top level and arrow level b68f77cb Merge branch 'master' into ARROW-4188 e6dbd87f add badges back f2ee7e05 Move Rust README to top level rust directory --- rust/README.md | 50 ++++++++++++++++++++++++++++++++++++++++++++ rust/arrow/README.md | 22 ------------------- 2 files changed, 50 insertions(+), 22 deletions(-) create mode 100644 rust/README.md diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 0000000000000..8fe7885de068c --- /dev/null +++ b/rust/README.md @@ -0,0 +1,50 @@ + + +# Native Rust implementation of Apache Arrow + +## The Rust implementation of Arrow consists of the following crates + +- Arrow [(README)](arrow/README.md) +- Parquet [(README)](parquet/README.md) + +## Run Tests + +Parquet support in Arrow requires data to test against, this data is in a +git submodule. To pull down this data run the following: + +```bash +git submodule update --init +``` + +The data can then be found in `cpp/submodules/parquet_testing/data`. +Create a new environment variable called `PARQUET_TEST_DATA` to point +to this location and then `cargo test` as usual. + +## Code Formatting + +Our CI uses `rustfmt` to check code formatting. Although the project is +built and tested against nightly rust we use the stable version of +`rustfmt`. So before submitting a PR be sure to run the following +and check for lint issues: + +```bash +cargo +stable fmt --all -- --check +``` + diff --git a/rust/arrow/README.md b/rust/arrow/README.md index cbfd4dd684a0f..9df2dd2e9e26f 100644 --- a/rust/arrow/README.md +++ b/rust/arrow/README.md @@ -57,28 +57,6 @@ cargo run --example dynamic_types cargo run --example read_csv ``` -## Run Tests - -Parquet support in Arrow requires data to test against, this data is in a -git submodule. To pull down this data run the following: - -```bash -git submodule update --init -``` - -The data can then be found in `cpp/submodules/parquet_testing/data`. -Create a new environment variable called `PARQUET_TEST_DATA` to point -to this location and then `cargo test` as usual. - -Our CI uses `rustfmt` to check code formatting. Although the project is -built and tested against nightly rust we use the stable version of -`rustfmt`. So before submitting a PR be sure to run the following -and check for lint issues: - -```bash -cargo +stable fmt --all -- --check -``` - # Publishing to crates.io An Arrow committer can publish this crate after an official project release has From 55848a36edb5ea5e0765068ef5f09d07d09d4898 Mon Sep 17 00:00:00 2001 From: Pindikura Ravindra Date: Tue, 8 Jan 2019 16:13:18 +0530 Subject: [PATCH 045/203] ARROW-4104: [Java] fix a race condition in AllocationManager (#3246) --- .../java/org/apache/arrow/memory/AllocationManager.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java b/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java index 687674f951b89..c10d246013290 100644 --- a/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java +++ b/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java @@ -230,7 +230,7 @@ public boolean transferBalance(final BufferLedger target) { // since two balance transfers out from the allocator manager could cause incorrect // accounting, we need to ensure // that this won't happen by synchronizing on the allocator manager instance. - synchronized (this) { + synchronized (AllocationManager.this) { if (owningLedger != this) { return true; } @@ -310,7 +310,7 @@ public int decrement(int decrement) { allocator.assertOpen(); final int outcome; - synchronized (this) { + synchronized (AllocationManager.this) { outcome = bufRefCnt.addAndGet(-decrement); if (outcome == 0) { lDestructionTime = System.nanoTime(); @@ -411,7 +411,7 @@ public int getSize() { * @return Amount of accounted(owned) memory associated with this ledger. */ public int getAccountedSize() { - synchronized (this) { + synchronized (AllocationManager.this) { if (owningLedger == this) { return size; } else { From 8704f8bd98f1edcf1f9ecc51d6fb3b4b5b4ecb88 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Tue, 8 Jan 2019 22:32:13 +0900 Subject: [PATCH 046/203] ARROW-4183: [Ruby] Add Arrow::Struct as an element of Arrow::StructArray Returning Arrow::Array by Arrow::StructArray#[] is deprecated. It'll return Arrow::Struct in the next release. It's for consistency. All Arrow::Array#[] implementations should return an element. Author: Kouhei Sutou Closes #3338 from kou/ruby-struct and squashes the following commits: a0561954 Add Arrow::Struct as an element of Arrow::StructArray --- .../lib/arrow/struct-array-builder.rb | 9 ++- ruby/red-arrow/lib/arrow/struct-array.rb | 34 ++++++++ ruby/red-arrow/lib/arrow/struct.rb | 68 ++++++++++++++++ .../test/test-struct-array-builder.rb | 47 +++++++---- ruby/red-arrow/test/test-struct-array.rb | 58 +++++++++---- ruby/red-arrow/test/test-struct.rb | 81 +++++++++++++++++++ 6 files changed, 263 insertions(+), 34 deletions(-) create mode 100644 ruby/red-arrow/lib/arrow/struct.rb create mode 100644 ruby/red-arrow/test/test-struct.rb diff --git a/ruby/red-arrow/lib/arrow/struct-array-builder.rb b/ruby/red-arrow/lib/arrow/struct-array-builder.rb index 883ce84da7de7..52f75aab46d35 100644 --- a/ruby/red-arrow/lib/arrow/struct-array-builder.rb +++ b/ruby/red-arrow/lib/arrow/struct-array-builder.rb @@ -73,13 +73,20 @@ def append_value(*args) value.each_with_index do |sub_value, i| self[i].append_value(sub_value) end + when Arrow::Struct + append_value_raw + value.values.each_with_index do |sub_value, i| + self[i].append_value(sub_value) + end when Hash append_value_raw value.each do |name, sub_value| self[name].append_value(sub_value) end else - message = "struct value must be nil, Array or Hash: #{value.inspect}" + message = + "struct value must be nil, Array, " + + "Arrow::Struct or Hash: #{value.inspect}" raise ArgumentError, message end else diff --git a/ruby/red-arrow/lib/arrow/struct-array.rb b/ruby/red-arrow/lib/arrow/struct-array.rb index 4f9834c5d330f..e55a507868f1a 100644 --- a/ruby/red-arrow/lib/arrow/struct-array.rb +++ b/ruby/red-arrow/lib/arrow/struct-array.rb @@ -15,10 +15,44 @@ # specific language governing permissions and limitations # under the License. +require "arrow/struct" + module Arrow class StructArray def [](i) + warn("Use #{self.class}\#find_field instead. " + + "This will returns Arrow::Struct instead of Arrow::Array " + + "since 0.13.0.") get_field(i) end + + def get_value(i) + Struct.new(self, i) + end + + def find_field(index_or_name) + case index_or_name + when String, Symbol + name = index_or_name + (@name_to_field ||= build_name_to_field)[name.to_s] + else + index = index_or_name + cached_fields[index] + end + end + + private + def cached_fields + @fields ||= fields + end + + def build_name_to_field + name_to_field = {} + field_arrays = cached_fields + value_data_type.fields.each_with_index do |field, i| + name_to_field[field.name] = field_arrays[i] + end + name_to_field + end end end diff --git a/ruby/red-arrow/lib/arrow/struct.rb b/ruby/red-arrow/lib/arrow/struct.rb new file mode 100644 index 0000000000000..4ae12b871e49e --- /dev/null +++ b/ruby/red-arrow/lib/arrow/struct.rb @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class Struct + attr_accessor :index + def initialize(array, index) + @array = array + @index = index + end + + def [](field_name_or_field_index) + field = @array.find_field(field_name_or_field_index) + return nil if field.nil? + field[@index] + end + + def fields + @array.value_data_type.fields + end + + def values + @array.fields.collect do |field| + field[@index] + end + end + + def to_a + values + end + + def to_h + attributes = {} + field_arrays = @array.fields + fields.each_with_index do |field, i| + attributes[field.name] = field_arrays[i][@index] + end + attributes + end + + def respond_to_missing?(name, include_private) + return true if @array.find_field(name) + super + end + + def method_missing(name, *args, &block) + if args.empty? + field = @array.find_field(name) + return field[@index] if field + end + super + end + end +end diff --git a/ruby/red-arrow/test/test-struct-array-builder.rb b/ruby/red-arrow/test/test-struct-array-builder.rb index 205564c816c30..42e1ded78e318 100644 --- a/ruby/red-arrow/test/test-struct-array-builder.rb +++ b/ruby/red-arrow/test/test-struct-array-builder.rb @@ -31,8 +31,8 @@ def setup [nil], ], [ - array[0].to_a, - array[1].to_a, + array.find_field(0).to_a, + array.find_field(1).to_a, ]) end @@ -44,8 +44,23 @@ def setup [1], ], [ - array[0].to_a, - array[1].to_a, + array.find_field(0).to_a, + array.find_field(1).to_a, + ]) + end + + test("Arrow::Struct") do + source_array = Arrow::StructArray.new(@data_type, [[true, 1]]) + struct = source_array.get_value(0) + @builder.append_value(struct) + array = @builder.finish + assert_equal([ + [true], + [1], + ], + [ + array.find_field(0).to_a, + array.find_field(1).to_a, ]) end @@ -57,8 +72,8 @@ def setup [1], ], [ - array[0].to_a, - array[1].to_a, + array.find_field(0).to_a, + array.find_field(1).to_a, ]) end end @@ -72,8 +87,8 @@ def setup [nil], ], [ - array[0].to_a, - array[1].to_a, + array.find_field(0).to_a, + array.find_field(1).to_a, ]) end @@ -85,8 +100,8 @@ def setup [1], ], [ - array[0].to_a, - array[1].to_a, + array.find_field(0).to_a, + array.find_field(1).to_a, ]) end @@ -98,8 +113,8 @@ def setup [1], ], [ - array[0].to_a, - array[1].to_a, + array.find_field(0).to_a, + array.find_field(1).to_a, ]) end @@ -115,8 +130,8 @@ def setup [nil, 1, 2], ], [ - array[0].to_a, - array[1].to_a, + array.find_field(0).to_a, + array.find_field(1).to_a, ]) end @@ -137,8 +152,8 @@ def setup [1, nil, 3], ], [ - array[0].to_a, - array[1].to_a, + array.find_field(0).to_a, + array.find_field(1).to_a, ]) end end diff --git a/ruby/red-arrow/test/test-struct-array.rb b/ruby/red-arrow/test/test-struct-array.rb index 986b0a9db1696..5a00434713a33 100644 --- a/ruby/red-arrow/test/test-struct-array.rb +++ b/ruby/red-arrow/test/test-struct-array.rb @@ -31,27 +31,51 @@ class StructArrayTest < Test::Unit::TestCase [1, nil, 2], ], [ - array[0].to_a, - array[1].to_a, + array.find_field(0).to_a, + array.find_field(1).to_a, ]) end end - test("#[]") do - type = Arrow::StructDataType.new([ - Arrow::Field.new("field1", :boolean), - Arrow::Field.new("field2", :uint64), - ]) - builder = Arrow::StructArrayBuilder.new(type) - builder.append - builder.get_field_builder(0).append(true) - builder.get_field_builder(1).append(1) - builder.append - builder.get_field_builder(0).append(false) - builder.get_field_builder(1).append(2) - array = builder.finish + sub_test_case("instance methods") do + def setup + @data_type = Arrow::StructDataType.new(visible: {type: :boolean}, + count: {type: :uint64}) + @values = [ + [true, 1], + [false, 2], + ] + @array = Arrow::StructArray.new(@data_type, @values) + end - assert_equal([[true, false], [1, 2]], - [array[0].to_a, array[1].to_a]) + test("#[]") do + notify("TODO: Returns Arrow::Struct instead.") + assert_equal([[true, false], [1, 2]], + [@array[0].to_a, @array[1].to_a]) + end + + sub_test_case("#find_field") do + test("Integer") do + assert_equal([ + [true, false], + [1, 2], + ], + [ + @array.find_field(0).to_a, + @array.find_field(1).to_a, + ]) + end + + test("String, Symbol") do + assert_equal([ + [true, false], + [1, 2], + ], + [ + @array.find_field("visible").to_a, + @array.find_field(:count).to_a, + ]) + end + end end end diff --git a/ruby/red-arrow/test/test-struct.rb b/ruby/red-arrow/test/test-struct.rb new file mode 100644 index 0000000000000..412549c7dfb34 --- /dev/null +++ b/ruby/red-arrow/test/test-struct.rb @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class StructTest < Test::Unit::TestCase + def setup + @data_type = Arrow::StructDataType.new(visible: {type: :boolean}, + count: {type: :uint64}) + @values = [ + [true, 1], + [false, 2], + ] + @array = Arrow::StructArray.new(@data_type, @values) + @struct = @array.get_value(0) + end + + sub_test_case("#[]") do + test("Integer") do + assert_equal(true, @struct[0]) + end + + test("String") do + assert_equal(true, @struct["visible"]) + end + + test("Symbol") do + assert_equal(true, @struct[:visible]) + end + end + + test("#fields") do + assert_equal(@data_type.fields, + @struct.fields) + end + + test("#values") do + assert_equal([true, 1], + @struct.values) + end + + test("#to_a") do + assert_equal([true, 1], + @struct.to_a) + end + + test("#to_h") do + assert_equal({ + "visible" => true, + "count" => 1, + }, + @struct.to_h) + end + + test("#respond_to_missing?") do + assert_equal([ + true, + false, + ], + [ + @struct.respond_to?(:visible), + @struct.respond_to?(:nonexistent), + ]) + end + + test("#method_missing?") do + assert_equal(1, @struct.count) + end +end From af07f75c1f692d1ed4cea93d358ff1acda6a1771 Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Tue, 8 Jan 2019 06:45:13 -0700 Subject: [PATCH 047/203] ARROW-4060: [Rust] Add parquet arrow converter. This is the first step of adding an arrow reader and writer for parquet-rs. This commit contains a converter which converts parquet schema to arrow schema. Copied from this pr https://github.com/sunchao/parquet-rs/pull/185. Author: Renjie Liu Closes #3279 from liurenjie1024/rust-arrow-schema-converter and squashes the following commits: 1bfa00f Resolve conflict 8806b16 Add parquet arrow converter --- rust/parquet/src/errors.rs | 6 + rust/parquet/src/lib.rs | 1 + rust/parquet/src/reader/mod.rs | 25 + rust/parquet/src/reader/schema.rs | 779 ++++++++++++++++++++++++++++++ rust/parquet/src/schema/types.rs | 14 +- 5 files changed, 824 insertions(+), 1 deletion(-) create mode 100644 rust/parquet/src/reader/mod.rs create mode 100644 rust/parquet/src/reader/schema.rs diff --git a/rust/parquet/src/errors.rs b/rust/parquet/src/errors.rs index a5532c1eb66dc..abfbda9dba9f2 100644 --- a/rust/parquet/src/errors.rs +++ b/rust/parquet/src/errors.rs @@ -50,6 +50,12 @@ quick_error! { display("EOF: {}", message) description(message) } + /// Arrow error. + /// Returned when reading into arrow or writing from arrow. + ArrowError(message: String) { + display("Arrow: {}", message) + description(message) + } } } diff --git a/rust/parquet/src/lib.rs b/rust/parquet/src/lib.rs index 75c56f5054f19..cad85ecde317c 100644 --- a/rust/parquet/src/lib.rs +++ b/rust/parquet/src/lib.rs @@ -37,5 +37,6 @@ pub mod column; pub mod compression; mod encodings; pub mod file; +pub mod reader; pub mod record; pub mod schema; diff --git a/rust/parquet/src/reader/mod.rs b/rust/parquet/src/reader/mod.rs new file mode 100644 index 0000000000000..fe580c5e92b37 --- /dev/null +++ b/rust/parquet/src/reader/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [Apache Arrow](http://arrow.apache.org/) is a cross-language development platform for +//! in-memory data. +//! +//! This mod provides API for converting between arrow and parquet. + +pub mod schema; + +pub use self::schema::{parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns}; diff --git a/rust/parquet/src/reader/schema.rs b/rust/parquet/src/reader/schema.rs new file mode 100644 index 0000000000000..68fd867a821cd --- /dev/null +++ b/rust/parquet/src/reader/schema.rs @@ -0,0 +1,779 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Provides API for converting parquet schema to arrow schema and vice versa. +//! +//! The main interfaces for converting parquet schema to arrow schema are +//! `parquet_to_arrow_schema` and `parquet_to_arrow_schema_by_columns`. +//! +//! The interfaces for converting arrow schema to parquet schema is coming. + +use std::{collections::HashSet, rc::Rc}; + +use crate::basic::{LogicalType, Repetition, Type as PhysicalType}; +use crate::errors::{ParquetError::ArrowError, Result}; +use crate::schema::types::{SchemaDescPtr, Type, TypePtr}; + +use arrow::datatypes::{DataType, Field, Schema}; + +/// Convert parquet schema to arrow schema. +pub fn parquet_to_arrow_schema(parquet_schema: SchemaDescPtr) -> Result { + parquet_to_arrow_schema_by_columns(parquet_schema.clone(), 0..parquet_schema.columns().len()) +} + +/// Convert parquet schema to arrow schema, only preserving some leaf columns. +pub fn parquet_to_arrow_schema_by_columns( + parquet_schema: SchemaDescPtr, + column_indices: T, +) -> Result +where + T: IntoIterator, +{ + let mut base_nodes = Vec::new(); + let mut base_nodes_set = HashSet::new(); + let mut leaves = HashSet::new(); + + for c in column_indices { + let column = parquet_schema.column(c).self_type() as *const Type; + let root = parquet_schema.get_column_root_ptr(c); + let root_raw_ptr = root.clone().as_ref() as *const Type; + + leaves.insert(column); + if !base_nodes_set.contains(&root_raw_ptr) { + base_nodes.push(root); + base_nodes_set.insert(root_raw_ptr); + } + } + + let leaves = Rc::new(leaves); + base_nodes + .into_iter() + .map(|t| ParquetTypeConverter::new(t, leaves.clone()).to_field()) + .collect::>>>() + .map(|result| result.into_iter().filter_map(|f| f).collect::>()) + .map(|fields| Schema::new(fields)) +} + +/// This struct is used to group methods and data structures used to convert parquet +/// schema together. +struct ParquetTypeConverter { + schema: TypePtr, + /// This is the columns that need to be converted to arrow schema. + columns_to_convert: Rc>, +} + +impl ParquetTypeConverter { + fn new(schema: TypePtr, columns_to_convert: Rc>) -> Self { + Self { + schema, + columns_to_convert, + } + } + + fn clone_with_schema(&self, other: TypePtr) -> Self { + Self { + schema: other, + columns_to_convert: self.columns_to_convert.clone(), + } + } +} + +impl ParquetTypeConverter { + // Public interfaces. + + /// Converts parquet schema to arrow data type. + /// + /// This function discards schema name. + /// + /// If this schema is a primitive type and not included in the leaves, the result is + /// Ok(None). + /// + /// If this schema is a group type and none of its children is reserved in the + /// conversion, the result is Ok(None). + fn to_data_type(&self) -> Result> { + match self.schema.as_ref() { + Type::PrimitiveType { .. } => self.to_primitive_type(), + Type::GroupType { .. } => self.to_group_type(), + } + } + + /// Converts parquet schema to arrow field. + /// + /// This method is roughly the same as + /// [`to_data_type`](`ParquetTypeConverter::to_data_type`), except it reserves schema + /// name. + fn to_field(&self) -> Result> { + self.to_data_type() + .map(|opt| opt.map(|dt| Field::new(self.schema.name(), dt, self.is_nullable()))) + } + + // Utility functions. + + /// Checks whether this schema is nullable. + fn is_nullable(&self) -> bool { + let basic_info = self.schema.get_basic_info(); + if basic_info.has_repetition() { + match basic_info.repetition() { + Repetition::OPTIONAL => true, + Repetition::REPEATED => true, + Repetition::REQUIRED => false, + } + } else { + false + } + } + + fn is_repeated(&self) -> bool { + let basic_info = self.schema.get_basic_info(); + + basic_info.has_repetition() && basic_info.repetition() == Repetition::REPEATED + } + + fn is_self_included(&self) -> bool { + self.columns_to_convert + .contains(&(self.schema.as_ref() as *const Type)) + } + + // Functions for primitive types. + + /// Entry point for converting parquet primitive type to arrow type. + /// + /// This function takes care of repetition. + fn to_primitive_type(&self) -> Result> { + if self.is_self_included() { + self.to_primitive_type_inner().map(|dt| { + if self.is_repeated() { + Some(DataType::List(Box::new(dt))) + } else { + Some(dt) + } + }) + } else { + Ok(None) + } + } + + /// Converting parquet primitive type to arrow data type. + fn to_primitive_type_inner(&self) -> Result { + match self.schema.get_physical_type() { + PhysicalType::BOOLEAN => Ok(DataType::Boolean), + PhysicalType::INT32 => self.to_int32(), + PhysicalType::INT64 => self.to_int64(), + PhysicalType::FLOAT => Ok(DataType::Float32), + PhysicalType::DOUBLE => Ok(DataType::Float64), + PhysicalType::BYTE_ARRAY => self.to_byte_array(), + other => Err(ArrowError(format!( + "Unable to convert parquet type {}", + other + ))), + } + } + + fn to_int32(&self) -> Result { + match self.schema.get_basic_info().logical_type() { + LogicalType::NONE => Ok(DataType::Int32), + LogicalType::UINT_8 => Ok(DataType::UInt8), + LogicalType::UINT_16 => Ok(DataType::UInt16), + LogicalType::UINT_32 => Ok(DataType::UInt32), + LogicalType::INT_8 => Ok(DataType::Int8), + LogicalType::INT_16 => Ok(DataType::Int16), + LogicalType::INT_32 => Ok(DataType::Int32), + other => Err(ArrowError(format!( + "Unable to convert parquet logical type {}", + other + ))), + } + } + + fn to_int64(&self) -> Result { + match self.schema.get_basic_info().logical_type() { + LogicalType::NONE => Ok(DataType::Int64), + LogicalType::INT_64 => Ok(DataType::Int64), + LogicalType::UINT_64 => Ok(DataType::UInt64), + other => Err(ArrowError(format!( + "Unable to convert parquet logical type {}", + other + ))), + } + } + + fn to_byte_array(&self) -> Result { + match self.schema.get_basic_info().logical_type() { + LogicalType::UTF8 => Ok(DataType::Utf8), + other => Err(ArrowError(format!( + "Unable to convert parquet logical type {}", + other + ))), + } + } + + // Functions for group types. + + /// Entry point for converting parquet group type. + /// + /// This function takes care of logical type and repetition. + fn to_group_type(&self) -> Result> { + if self.is_repeated() { + self.to_struct() + .map(|opt| opt.map(|dt| DataType::List(Box::new(dt)))) + } else { + match self.schema.get_basic_info().logical_type() { + LogicalType::LIST => self.to_list(), + _ => self.to_struct(), + } + } + } + + /// Converts a parquet group type to arrow struct. + fn to_struct(&self) -> Result> { + match self.schema.as_ref() { + Type::PrimitiveType { .. } => panic!( + "{:?} is a struct type, and can't be processed as primitive.", + self.schema + ), + Type::GroupType { + basic_info: _, + fields, + } => fields + .iter() + .map(|field_ptr| self.clone_with_schema(field_ptr.clone()).to_field()) + .collect::>>>() + .map(|result| result.into_iter().filter_map(|f| f).collect::>()) + .map(|fields| { + if fields.is_empty() { + None + } else { + Some(DataType::Struct(fields)) + } + }), + } + } + + /// Converts a parquet list to arrow list. + /// + /// To fully understand this algorithm, please refer to + /// [parquet doc](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md). + fn to_list(&self) -> Result> { + match self.schema.as_ref() { + Type::PrimitiveType { .. } => panic!( + "{:?} is a list type and can't be processed as primitive.", + self.schema + ), + Type::GroupType { + basic_info: _, + fields, + } if fields.len() == 1 => { + let list_item = fields.first().unwrap(); + let item_converter = self.clone_with_schema(list_item.clone()); + + let item_type = match list_item.as_ref() { + Type::PrimitiveType { .. } => { + if item_converter.is_repeated() { + item_converter.to_primitive_type_inner().map(|dt| Some(dt)) + } else { + Err(ArrowError( + "Primitive element type of list must be repeated.".to_string(), + )) + } + } + Type::GroupType { + basic_info: _, + fields, + } => { + if fields.len() > 1 { + item_converter.to_struct() + } else if fields.len() == 1 + && list_item.name() != "array" + && list_item.name() != format!("{}_tuple", self.schema.name()) + { + let nested_item = fields.first().unwrap(); + let nested_item_converter = self.clone_with_schema(nested_item.clone()); + + nested_item_converter.to_data_type() + } else { + item_converter.to_struct() + } + } + }; + + item_type.map(|opt| opt.map(|dt| DataType::List(Box::new(dt)))) + } + _ => Err(ArrowError( + "Group element type of list can only contain one field.".to_string(), + )), + } + } +} + +#[cfg(test)] +mod tests { + use std::rc::Rc; + + use crate::schema::{parser::parse_message_type, types::SchemaDescriptor}; + + use arrow::datatypes::{DataType, Field}; + + use super::{parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns}; + + #[test] + fn test_flat_primitives() { + let message_type = " + message test_schema { + REQUIRED BOOLEAN boolean; + REQUIRED INT32 int8 (INT_8); + REQUIRED INT32 int16 (INT_16); + REQUIRED INT32 int32; + REQUIRED INT64 int64 ; + OPTIONAL DOUBLE double; + OPTIONAL FLOAT float; + OPTIONAL BINARY string (UTF8); + } + "; + let parquet_group_type = parse_message_type(message_type).unwrap(); + + let parquet_schema = SchemaDescriptor::new(Rc::new(parquet_group_type)); + let converted_arrow_schema = parquet_to_arrow_schema(Rc::new(parquet_schema)).unwrap(); + + let arrow_fields = vec![ + Field::new("boolean", DataType::Boolean, false), + Field::new("int8", DataType::Int8, false), + Field::new("int16", DataType::Int16, false), + Field::new("int32", DataType::Int32, false), + Field::new("int64", DataType::Int64, false), + Field::new("double", DataType::Float64, true), + Field::new("float", DataType::Float32, true), + Field::new("string", DataType::Utf8, true), + ]; + + assert_eq!(&arrow_fields, converted_arrow_schema.fields()); + } + + #[test] + fn test_duplicate_fields() { + let message_type = " + message test_schema { + REQUIRED BOOLEAN boolean; + REQUIRED INT32 int8 (INT_8); + } + "; + + let parquet_group_type = parse_message_type(message_type).unwrap(); + + let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type))); + let converted_arrow_schema = parquet_to_arrow_schema(parquet_schema.clone()).unwrap(); + + let arrow_fields = vec![ + Field::new("boolean", DataType::Boolean, false), + Field::new("int8", DataType::Int8, false), + ]; + assert_eq!(&arrow_fields, converted_arrow_schema.fields()); + + let converted_arrow_schema = + parquet_to_arrow_schema_by_columns(parquet_schema.clone(), vec![0usize, 1usize]) + .unwrap(); + assert_eq!(&arrow_fields, converted_arrow_schema.fields()); + } + + #[test] + fn test_parquet_lists() { + let mut arrow_fields = Vec::new(); + + // LIST encoding example taken from parquet-format/LogicalTypes.md + let message_type = " + message test_schema { + REQUIRED GROUP my_list (LIST) { + REPEATED GROUP list { + OPTIONAL BINARY element (UTF8); + } + } + OPTIONAL GROUP my_list (LIST) { + REPEATED GROUP list { + REQUIRED BINARY element (UTF8); + } + } + OPTIONAL GROUP array_of_arrays (LIST) { + REPEATED GROUP list { + REQUIRED GROUP element (LIST) { + REPEATED GROUP list { + REQUIRED INT32 element; + } + } + } + } + OPTIONAL GROUP my_list (LIST) { + REPEATED GROUP element { + REQUIRED BINARY str (UTF8); + } + } + OPTIONAL GROUP my_list (LIST) { + REPEATED INT32 element; + } + OPTIONAL GROUP my_list (LIST) { + REPEATED GROUP element { + REQUIRED BINARY str (UTF8); + REQUIRED INT32 num; + } + } + OPTIONAL GROUP my_list (LIST) { + REPEATED GROUP array { + REQUIRED BINARY str (UTF8); + } + + } + OPTIONAL GROUP my_list (LIST) { + REPEATED GROUP my_list_tuple { + REQUIRED BINARY str (UTF8); + } + } + REPEATED INT32 name; + } + "; + + // // List (list non-null, elements nullable) + // required group my_list (LIST) { + // repeated group list { + // optional binary element (UTF8); + // } + // } + { + arrow_fields.push(Field::new( + "my_list", + DataType::List(Box::new(DataType::Utf8)), + false, + )); + } + + // // List (list nullable, elements non-null) + // optional group my_list (LIST) { + // repeated group list { + // required binary element (UTF8); + // } + // } + { + arrow_fields.push(Field::new( + "my_list", + DataType::List(Box::new(DataType::Utf8)), + true, + )); + } + + // Element types can be nested structures. For example, a list of lists: + // + // // List> + // optional group array_of_arrays (LIST) { + // repeated group list { + // required group element (LIST) { + // repeated group list { + // required int32 element; + // } + // } + // } + // } + { + let arrow_inner_list = DataType::List(Box::new(DataType::Int32)); + arrow_fields.push(Field::new( + "array_of_arrays", + DataType::List(Box::new(arrow_inner_list)), + true, + )); + } + + // // List (list nullable, elements non-null) + // optional group my_list (LIST) { + // repeated group element { + // required binary str (UTF8); + // }; + // } + { + arrow_fields.push(Field::new( + "my_list", + DataType::List(Box::new(DataType::Utf8)), + true, + )); + } + + // // List (nullable list, non-null elements) + // optional group my_list (LIST) { + // repeated int32 element; + // } + { + arrow_fields.push(Field::new( + "my_list", + DataType::List(Box::new(DataType::Int32)), + true, + )); + } + + // // List> (nullable list, non-null elements) + // optional group my_list (LIST) { + // repeated group element { + // required binary str (UTF8); + // required int32 num; + // }; + // } + { + let arrow_struct = DataType::Struct(vec![ + Field::new("str", DataType::Utf8, false), + Field::new("num", DataType::Int32, false), + ]); + arrow_fields.push(Field::new( + "my_list", + DataType::List(Box::new(arrow_struct)), + true, + )); + } + + // // List> (nullable list, non-null elements) + // optional group my_list (LIST) { + // repeated group array { + // required binary str (UTF8); + // }; + // } + // Special case: group is named array + { + let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]); + arrow_fields.push(Field::new( + "my_list", + DataType::List(Box::new(arrow_struct)), + true, + )); + } + + // // List> (nullable list, non-null elements) + // optional group my_list (LIST) { + // repeated group my_list_tuple { + // required binary str (UTF8); + // }; + // } + // Special case: group named ends in _tuple + { + let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]); + arrow_fields.push(Field::new( + "my_list", + DataType::List(Box::new(arrow_struct)), + true, + )); + } + + // One-level encoding: Only allows required lists with required cells + // repeated value_type name + { + arrow_fields.push(Field::new( + "name", + DataType::List(Box::new(DataType::Int32)), + true, + )); + } + + let parquet_group_type = parse_message_type(message_type).unwrap(); + + let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type))); + let converted_arrow_schema = parquet_to_arrow_schema(parquet_schema.clone()).unwrap(); + let converted_fields = converted_arrow_schema.fields(); + + assert_eq!(arrow_fields.len(), converted_fields.len()); + for i in 0..arrow_fields.len() { + assert_eq!(arrow_fields[i], converted_fields[i]); + } + } + + #[test] + fn test_nested_schema() { + let mut arrow_fields = Vec::new(); + { + let group1_fields = vec![ + Field::new("leaf1", DataType::Boolean, false), + Field::new("leaf2", DataType::Int32, false), + ]; + let group1_struct = Field::new("group1", DataType::Struct(group1_fields), false); + arrow_fields.push(group1_struct); + + let leaf3_field = Field::new("leaf3", DataType::Int64, false); + arrow_fields.push(leaf3_field); + } + + let message_type = " + message test_schema { + REQUIRED GROUP group1 { + REQUIRED BOOLEAN leaf1; + REQUIRED INT32 leaf2; + } + REQUIRED INT64 leaf3; + } + "; + let parquet_group_type = parse_message_type(message_type).unwrap(); + + let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type))); + let converted_arrow_schema = parquet_to_arrow_schema(parquet_schema.clone()).unwrap(); + let converted_fields = converted_arrow_schema.fields(); + + assert_eq!(arrow_fields.len(), converted_fields.len()); + for i in 0..arrow_fields.len() { + assert_eq!(arrow_fields[i], converted_fields[i]); + } + } + + #[test] + fn test_nested_schema_partial() { + let mut arrow_fields = Vec::new(); + { + let group1_fields = vec![Field::new("leaf1", DataType::Int64, false)]; + let group1 = Field::new("group1", DataType::Struct(group1_fields), false); + arrow_fields.push(group1); + + let group2_fields = vec![Field::new("leaf4", DataType::Int64, false)]; + let group2 = Field::new("group2", DataType::Struct(group2_fields), false); + arrow_fields.push(group2); + + arrow_fields.push(Field::new("leaf5", DataType::Int64, false)); + } + + let message_type = " + message test_schema { + REQUIRED GROUP group1 { + REQUIRED INT64 leaf1; + REQUIRED INT64 leaf2; + } + REQUIRED GROUP group2 { + REQUIRED INT64 leaf3; + REQUIRED INT64 leaf4; + } + REQUIRED INT64 leaf5; + } + "; + let parquet_group_type = parse_message_type(message_type).unwrap(); + + // Expected partial arrow schema (columns 0, 3, 4): + // required group group1 { + // required int64 leaf1; + // } + // required group group2 { + // required int64 leaf4; + // } + // required int64 leaf5; + + let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type))); + let converted_arrow_schema = + parquet_to_arrow_schema_by_columns(parquet_schema.clone(), vec![0, 3, 4]).unwrap(); + let converted_fields = converted_arrow_schema.fields(); + + assert_eq!(arrow_fields.len(), converted_fields.len()); + for i in 0..arrow_fields.len() { + assert_eq!(arrow_fields[i], converted_fields[i]); + } + } + + #[test] + fn test_nested_schema_partial_ordering() { + let mut arrow_fields = Vec::new(); + { + let group2_fields = vec![Field::new("leaf4", DataType::Int64, false)]; + let group2 = Field::new("group2", DataType::Struct(group2_fields), false); + arrow_fields.push(group2); + + arrow_fields.push(Field::new("leaf5", DataType::Int64, false)); + + let group1_fields = vec![Field::new("leaf1", DataType::Int64, false)]; + let group1 = Field::new("group1", DataType::Struct(group1_fields), false); + arrow_fields.push(group1); + } + + let message_type = " + message test_schema { + REQUIRED GROUP group1 { + REQUIRED INT64 leaf1; + REQUIRED INT64 leaf2; + } + REQUIRED GROUP group2 { + REQUIRED INT64 leaf3; + REQUIRED INT64 leaf4; + } + REQUIRED INT64 leaf5; + } + "; + let parquet_group_type = parse_message_type(message_type).unwrap(); + + // Expected partial arrow schema (columns 3, 4, 0): + // required group group1 { + // required int64 leaf1; + // } + // required group group2 { + // required int64 leaf4; + // } + // required int64 leaf5; + + let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type))); + let converted_arrow_schema = + parquet_to_arrow_schema_by_columns(parquet_schema.clone(), vec![3, 4, 0]).unwrap(); + let converted_fields = converted_arrow_schema.fields(); + + assert_eq!(arrow_fields.len(), converted_fields.len()); + for i in 0..arrow_fields.len() { + assert_eq!(arrow_fields[i], converted_fields[i]); + } + } + + #[test] + fn test_repeated_nested_schema() { + let mut arrow_fields = Vec::new(); + { + arrow_fields.push(Field::new("leaf1", DataType::Int32, true)); + + let inner_group_list = Field::new( + "innerGroup", + DataType::List(Box::new(DataType::Struct(vec![Field::new( + "leaf3", + DataType::Int32, + true, + )]))), + true, + ); + + let outer_group_list = Field::new( + "outerGroup", + DataType::List(Box::new(DataType::Struct(vec![ + Field::new("leaf2", DataType::Int32, true), + inner_group_list, + ]))), + true, + ); + arrow_fields.push(outer_group_list); + } + + let message_type = " + message test_schema { + OPTIONAL INT32 leaf1; + REPEATED GROUP outerGroup { + OPTIONAL INT32 leaf2; + REPEATED GROUP innerGroup { + OPTIONAL INT32 leaf3; + } + } + } + "; + let parquet_group_type = parse_message_type(message_type).unwrap(); + + let parquet_schema = Rc::new(SchemaDescriptor::new(Rc::new(parquet_group_type))); + let converted_arrow_schema = parquet_to_arrow_schema(parquet_schema.clone()).unwrap(); + let converted_fields = converted_arrow_schema.fields(); + + assert_eq!(arrow_fields.len(), converted_fields.len()); + for i in 0..arrow_fields.len() { + assert_eq!(arrow_fields[i], converted_fields[i]); + } + } +} diff --git a/rust/parquet/src/schema/types.rs b/rust/parquet/src/schema/types.rs index 30ee9f60e1a3e..aa314d6100183 100644 --- a/rust/parquet/src/schema/types.rs +++ b/rust/parquet/src/schema/types.rs @@ -741,19 +741,31 @@ impl SchemaDescriptor { /// Returns column root [`Type`](`::schema::types::Type`) for a field position. pub fn get_column_root(&self, i: usize) -> &Type { + let result = self.column_root_of(i); + result.as_ref() + } + + /// Returns column root [`Type`](`::schema::types::Type`) pointer for a field position. + pub fn get_column_root_ptr(&self, i: usize) -> TypePtr { + let result = self.column_root_of(i); + result.clone() + } + + fn column_root_of(&self, i: usize) -> &Rc { assert!( i < self.leaves.len(), "Index out of bound: {} not in [0, {})", i, self.leaves.len() ); + let result = self.leaf_to_base.get(&i); assert!( result.is_some(), "Expected a value for index {} but found None", i ); - result.unwrap().as_ref() + result.unwrap() } /// Returns schema as [`Type`](`::schema::types::Type`). From 4f2f53336f2293eea33235e86e41aa9f08e98a1a Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 8 Jan 2019 15:02:14 +0100 Subject: [PATCH 048/203] ARROW-4178: [C++] Fix TSan and UBSan errors Author: Antoine Pitrou Closes #3334 from pitrou/ARROW-4178-tsan-ubsan-fixes and squashes the following commits: b836f733 ARROW-4178: Fix TSan and UBSan errors --- cpp/build-support/run-test.sh | 4 ++ cpp/build-support/tsan-suppressions.txt | 19 +++++++ cpp/build-support/ubsan-suppressions.txt | 16 ++++++ cpp/src/arrow/compare.cc | 10 +++- cpp/src/arrow/compute/kernels/cast.cc | 1 + cpp/src/arrow/csv/column-builder.cc | 4 +- cpp/src/arrow/io/file-test.cc | 4 +- cpp/src/arrow/io/readahead-test.cc | 56 +++++++++++++++++-- cpp/src/arrow/util/bit-stream-utils.h | 3 +- cpp/src/arrow/util/bit-util-test.cc | 2 + cpp/src/arrow/util/decimal-test.cc | 4 +- cpp/src/arrow/util/decimal.cc | 16 ++++-- cpp/src/arrow/util/int-util.h | 16 ++++++ cpp/src/arrow/util/macros.h | 9 +++ cpp/src/arrow/util/parsing.h | 5 +- cpp/src/arrow/util/thread-pool-test.cc | 3 +- cpp/src/parquet/arrow/reader.cc | 70 +++++++++++++++--------- cpp/src/parquet/bloom_filter.h | 4 +- cpp/src/parquet/column_reader-test.cc | 4 +- cpp/src/parquet/encoding-internal.h | 7 ++- cpp/src/parquet/types.h | 3 +- cpp/src/parquet/util/memory.cc | 7 ++- cpp/src/parquet/util/memory.h | 1 + 23 files changed, 213 insertions(+), 55 deletions(-) create mode 100644 cpp/build-support/tsan-suppressions.txt create mode 100644 cpp/build-support/ubsan-suppressions.txt diff --git a/cpp/build-support/run-test.sh b/cpp/build-support/run-test.sh index 656ab7bd3b805..6b1c09efb4d8d 100755 --- a/cpp/build-support/run-test.sh +++ b/cpp/build-support/run-test.sh @@ -80,6 +80,10 @@ function setup_sanitizers() { TSAN_OPTIONS="$TSAN_OPTIONS history_size=7" export TSAN_OPTIONS + UBSAN_OPTIONS="$UBSAN_OPTIONS print_stacktrace=1" + UBSAN_OPTIONS="$UBSAN_OPTIONS suppressions=$ROOT/build-support/ubsan-suppressions.txt" + export UBSAN_OPTIONS + # Enable leak detection even under LLVM 3.4, where it was disabled by default. # This flag only takes effect when running an ASAN build. # ASAN_OPTIONS="$ASAN_OPTIONS detect_leaks=1" diff --git a/cpp/build-support/tsan-suppressions.txt b/cpp/build-support/tsan-suppressions.txt new file mode 100644 index 0000000000000..ce897c8591188 --- /dev/null +++ b/cpp/build-support/tsan-suppressions.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Thread leak in CUDA +thread:libcuda.so diff --git a/cpp/build-support/ubsan-suppressions.txt b/cpp/build-support/ubsan-suppressions.txt new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/cpp/build-support/ubsan-suppressions.txt @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index 2f4f5d16364f1..efc8ad82faf93 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -324,7 +324,15 @@ static bool IsEqualPrimitive(const PrimitiveArray& left, const PrimitiveArray& r right_data = right.values()->data() + right.offset() * byte_width; } - if (left.null_count() > 0) { + if (byte_width == 0) { + // Special case 0-width data, as the data pointers may be null + for (int64_t i = 0; i < left.length(); ++i) { + if (left.IsNull(i) != right.IsNull(i)) { + return false; + } + } + return true; + } else if (left.null_count() > 0) { for (int64_t i = 0; i < left.length(); ++i) { const bool left_null = left.IsNull(i); const bool right_null = right.IsNull(i); diff --git a/cpp/src/arrow/compute/kernels/cast.cc b/cpp/src/arrow/compute/kernels/cast.cc index 15746d4c9965e..092aebc8c3d2e 100644 --- a/cpp/src/arrow/compute/kernels/cast.cc +++ b/cpp/src/arrow/compute/kernels/cast.cc @@ -404,6 +404,7 @@ struct is_float_truncate< template struct CastFunctor::value>::type> { + ARROW_DISABLE_UBSAN("float-cast-overflow") void operator()(FunctionContext* ctx, const CastOptions& options, const ArrayData& input, ArrayData* output) { using in_type = typename I::c_type; diff --git a/cpp/src/arrow/csv/column-builder.cc b/cpp/src/arrow/csv/column-builder.cc index 28cbad47580e8..1f37046798fd7 100644 --- a/cpp/src/arrow/csv/column-builder.cc +++ b/cpp/src/arrow/csv/column-builder.cc @@ -305,12 +305,12 @@ Status InferringColumnBuilder::TryConvertChunk(size_t chunk_index) { void InferringColumnBuilder::Insert(int64_t block_index, const std::shared_ptr& parser) { - DCHECK_NE(converter_, nullptr); - // Create a slot for the new chunk and spawn a task to convert it size_t chunk_index = static_cast(block_index); { std::lock_guard lock(mutex_); + + DCHECK_NE(converter_, nullptr); if (chunks_.size() <= chunk_index) { chunks_.resize(chunk_index + 1); } diff --git a/cpp/src/arrow/io/file-test.cc b/cpp/src/arrow/io/file-test.cc index 6d780c0940eba..f329ae9d504e5 100644 --- a/cpp/src/arrow/io/file-test.cc +++ b/cpp/src/arrow/io/file-test.cc @@ -468,10 +468,10 @@ class MyMemoryPool : public MemoryPool { int64_t bytes_allocated() const override { return -1; } - int64_t num_allocations() const { return num_allocations_; } + int64_t num_allocations() const { return num_allocations_.load(); } private: - int64_t num_allocations_; + std::atomic num_allocations_; }; TEST_F(TestReadableFile, CustomMemoryPool) { diff --git a/cpp/src/arrow/io/readahead-test.cc b/cpp/src/arrow/io/readahead-test.cc index b7f404f666983..6575e898590d8 100644 --- a/cpp/src/arrow/io/readahead-test.cc +++ b/cpp/src/arrow/io/readahead-test.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,51 @@ using internal::checked_cast; namespace io { namespace internal { +class LockedInputStream : public InputStream { + public: + explicit LockedInputStream(const std::shared_ptr& stream) + : stream_(stream) {} + + Status Close() override { + std::lock_guard lock(mutex_); + return stream_->Close(); + } + + bool closed() const override { + std::lock_guard lock(mutex_); + return stream_->closed(); + } + + Status Tell(int64_t* position) const override { + std::lock_guard lock(mutex_); + return stream_->Tell(position); + } + + Status Read(int64_t nbytes, int64_t* bytes_read, void* buffer) override { + std::lock_guard lock(mutex_); + return stream_->Read(nbytes, bytes_read, buffer); + } + + Status Read(int64_t nbytes, std::shared_ptr* out) override { + std::lock_guard lock(mutex_); + return stream_->Read(nbytes, out); + } + + bool supports_zero_copy() const override { + std::lock_guard lock(mutex_); + return stream_->supports_zero_copy(); + } + + util::string_view Peek(int64_t nbytes) const override { + std::lock_guard lock(mutex_); + return stream_->Peek(nbytes); + } + + protected: + std::shared_ptr stream_; + mutable std::mutex mutex_; +}; + static void sleep_for(double seconds) { std::this_thread::sleep_for( std::chrono::nanoseconds(static_cast(seconds * 1e9))); @@ -57,13 +103,13 @@ static void busy_wait(double seconds, std::function predicate) { } } -std::shared_ptr DataReader(const std::string& data) { +std::shared_ptr DataReader(const std::string& data) { std::shared_ptr buffer; ABORT_NOT_OK(Buffer::FromString(data, &buffer)); - return std::make_shared(buffer); + return std::make_shared(std::make_shared(buffer)); } -static int64_t WaitForPosition(const RandomAccessFile& file, int64_t expected, +static int64_t WaitForPosition(const FileInterface& file, int64_t expected, double seconds = 0.2) { int64_t pos = -1; busy_wait(seconds, [&]() -> bool { @@ -73,12 +119,12 @@ static int64_t WaitForPosition(const RandomAccessFile& file, int64_t expected, return pos; } -static void AssertEventualPosition(const RandomAccessFile& file, int64_t expected) { +static void AssertEventualPosition(const FileInterface& file, int64_t expected) { int64_t pos = WaitForPosition(file, expected); ASSERT_EQ(pos, expected) << "File didn't reach expected position"; } -static void AssertPosition(const RandomAccessFile& file, int64_t expected) { +static void AssertPosition(const FileInterface& file, int64_t expected) { int64_t pos = -1; ABORT_NOT_OK(file.Tell(&pos)); ASSERT_EQ(pos, expected) << "File didn't reach expected position"; diff --git a/cpp/src/arrow/util/bit-stream-utils.h b/cpp/src/arrow/util/bit-stream-utils.h index ae62a7ff1e2b3..ad86ee87c9fda 100644 --- a/cpp/src/arrow/util/bit-stream-utils.h +++ b/cpp/src/arrow/util/bit-stream-utils.h @@ -397,7 +397,8 @@ inline bool BitReader::GetVlqInt(int32_t* v) { } inline bool BitWriter::PutZigZagVlqInt(int32_t v) { - uint32_t u = (v << 1) ^ (v >> 31); + // Note negative left shift is undefined + uint32_t u = (static_cast(v) << 1) ^ (v >> 31); return PutVlqInt(u); } diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc index 5f181e9b7b14c..b12e2ecf9eef9 100644 --- a/cpp/src/arrow/util/bit-util-test.cc +++ b/cpp/src/arrow/util/bit-util-test.cc @@ -756,7 +756,9 @@ static void TestZigZag(int32_t v) { TEST(BitStreamUtil, ZigZag) { TestZigZag(0); TestZigZag(1); + TestZigZag(1234); TestZigZag(-1); + TestZigZag(-1234); TestZigZag(std::numeric_limits::max()); TestZigZag(-std::numeric_limits::max()); } diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc index 94c270280ea3c..5925d98d9d8d5 100644 --- a/cpp/src/arrow/util/decimal-test.cc +++ b/cpp/src/arrow/util/decimal-test.cc @@ -417,8 +417,8 @@ TEST(Decimal128Test, TestFromBigEndian) { auto negated = -value; little_endian = negated.ToBytes(); std::reverse(little_endian.begin(), little_endian.end()); - // Convert all of the bytes since we have to include the sign bit - ASSERT_OK(Decimal128::FromBigEndian(little_endian.data(), 16, &out)); + // The sign bit is looked up in the MSB + ASSERT_OK(Decimal128::FromBigEndian(little_endian.data() + 15 - ii, ii + 1, &out)); ASSERT_EQ(negated, out); // Take the complement and convert to big endian diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index f6e110561b275..c980e2a9e773c 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -29,11 +29,15 @@ #include "arrow/status.h" #include "arrow/util/bit-util.h" #include "arrow/util/decimal.h" +#include "arrow/util/int-util.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" namespace arrow { +using internal::SafeLeftShift; +using internal::SafeSignedAdd; + static const Decimal128 ScaleMultipliers[] = { Decimal128(0LL), Decimal128(10LL), @@ -405,7 +409,7 @@ Decimal128& Decimal128::Negate() { low_bits_ = ~low_bits_ + 1; high_bits_ = ~high_bits_; if (low_bits_ == 0) { - ++high_bits_; + high_bits_ = SafeSignedAdd(high_bits_, 1); } return *this; } @@ -414,9 +418,9 @@ Decimal128& Decimal128::Abs() { return *this < 0 ? Negate() : *this; } Decimal128& Decimal128::operator+=(const Decimal128& right) { const uint64_t sum = low_bits_ + right.low_bits_; - high_bits_ += right.high_bits_; + high_bits_ = SafeSignedAdd(high_bits_, right.high_bits_); if (sum < low_bits_) { - ++high_bits_; + high_bits_ = SafeSignedAdd(high_bits_, 1); } low_bits_ = sum; return *this; @@ -454,7 +458,7 @@ Decimal128& Decimal128::operator&=(const Decimal128& right) { Decimal128& Decimal128::operator<<=(uint32_t bits) { if (bits != 0) { if (bits < 64) { - high_bits_ <<= bits; + high_bits_ = SafeLeftShift(high_bits_, bits); high_bits_ |= (low_bits_ >> (64 - bits)); low_bits_ <<= bits; } else if (bits < 128) { @@ -925,7 +929,7 @@ Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, Decimal12 } else { high = -1 * (is_negative && length < kMaxDecimalBytes); // Shift left enough bits to make room for the incoming int64_t - high <<= high_bits_offset * CHAR_BIT; + high = SafeLeftShift(high, high_bits_offset * CHAR_BIT); // Preserve the upper bits by inplace OR-ing the int64_t high |= high_bits; } @@ -943,7 +947,7 @@ Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, Decimal12 // Sign extend the low bits if necessary low = -1 * (is_negative && length < 8); // Shift left enough bits to make room for the incoming int64_t - low <<= low_bits_offset * CHAR_BIT; + low = SafeLeftShift(low, low_bits_offset * CHAR_BIT); // Preserve the upper bits by inplace OR-ing the int64_t low |= low_bits; } diff --git a/cpp/src/arrow/util/int-util.h b/cpp/src/arrow/util/int-util.h index 66d389e5f40cf..d3ae09f75cfa6 100644 --- a/cpp/src/arrow/util/int-util.h +++ b/cpp/src/arrow/util/int-util.h @@ -19,6 +19,7 @@ #define ARROW_UTIL_INT_UTIL_H #include +#include #include "arrow/util/visibility.h" @@ -67,6 +68,21 @@ template ARROW_EXPORT void TransposeInts(const InputInt* source, OutputInt* dest, int64_t length, const int32_t* transpose_map); +/// Signed addition with well-defined behaviour on overflow (as unsigned) +template +SignedInt SafeSignedAdd(SignedInt u, SignedInt v) { + using UnsignedInt = typename std::make_unsigned::type; + return static_cast(static_cast(u) + + static_cast(v)); +} + +/// Signed left shift with well-defined behaviour on negative numbers or overflow +template +SignedInt SafeLeftShift(SignedInt u, Shift shift) { + using UnsignedInt = typename std::make_unsigned::type; + return static_cast(static_cast(u) << shift); +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h index f4c58f4030afd..ab258252695ab 100644 --- a/cpp/src/arrow/util/macros.h +++ b/cpp/src/arrow/util/macros.h @@ -113,6 +113,15 @@ #endif #endif // !defined(MANUALLY_ALIGNED_STRUCT) +// ---------------------------------------------------------------------- +// Convenience macro disabling a particular UBSan check in a function + +#if defined(__clang__) +#define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature))) +#else +#define ARROW_DISABLE_UBSAN(feature) +#endif + // ---------------------------------------------------------------------- // From googletest // (also in parquet-cpp) diff --git a/cpp/src/arrow/util/parsing.h b/cpp/src/arrow/util/parsing.h index 46d0f7c322b46..23e7061ac8738 100644 --- a/cpp/src/arrow/util/parsing.h +++ b/cpp/src/arrow/util/parsing.h @@ -335,7 +335,10 @@ class StringToSignedIntConverterMixin { if (ARROW_PREDICT_FALSE(unsigned_value > max_negative)) { return false; } - *out = static_cast(-static_cast(unsigned_value)); + // To avoid both compiler warnings (with unsigned negation) + // and undefined behaviour (with signed negation overflow), + // use the expanded formula for 2's complement negation. + *out = static_cast(~unsigned_value + 1); } else { if (ARROW_PREDICT_FALSE(unsigned_value > max_positive)) { return false; diff --git a/cpp/src/arrow/util/thread-pool-test.cc b/cpp/src/arrow/util/thread-pool-test.cc index 22a8db21fd280..c0deb20ccdde1 100644 --- a/cpp/src/arrow/util/thread-pool-test.cc +++ b/cpp/src/arrow/util/thread-pool-test.cc @@ -298,7 +298,8 @@ TEST_F(TestThreadPool, Submit) { // Test fork safety on Unix -#if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER)) +#if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \ + defined(THREAD_SANITIZER)) TEST_F(TestThreadPool, ForkSafety) { pid_t child_pid; int child_status; diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index b5905fddff489..58c703f7fe068 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -29,6 +29,7 @@ #include "arrow/api.h" #include "arrow/util/bit-util.h" +#include "arrow/util/int-util.h" #include "arrow/util/logging.h" #include "arrow/util/thread-pool.h" @@ -76,6 +77,8 @@ namespace parquet { namespace arrow { using ::arrow::BitUtil::BytesForBits; +using ::arrow::BitUtil::FromBigEndian; +using ::arrow::internal::SafeLeftShift; template using ArrayType = typename ::arrow::TypeTraits::ArrayType; @@ -1098,8 +1101,6 @@ struct TransferFunctor< }; static uint64_t BytesToInteger(const uint8_t* bytes, int32_t start, int32_t stop) { - using ::arrow::BitUtil::FromBigEndian; - const int32_t length = stop - start; DCHECK_GE(length, 0); @@ -1155,37 +1156,54 @@ static constexpr int32_t kMaxDecimalBytes = 16; /// \brief Convert a sequence of big-endian bytes to one int64_t (high bits) and one /// uint64_t (low bits). -static void BytesToIntegerPair(const uint8_t* bytes, - const int32_t total_number_of_bytes_used, int64_t* high, - uint64_t* low) { - DCHECK_GE(total_number_of_bytes_used, kMinDecimalBytes); - DCHECK_LE(total_number_of_bytes_used, kMaxDecimalBytes); - - /// Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the - /// sign bit. - const bool is_negative = static_cast(bytes[0]) < 0; +static void BytesToIntegerPair(const uint8_t* bytes, const int32_t length, + int64_t* out_high, uint64_t* out_low) { + DCHECK_GE(length, kMinDecimalBytes); + DCHECK_LE(length, kMaxDecimalBytes); - /// Sign extend the low bits if necessary - *low = UINT64_MAX * (is_negative && total_number_of_bytes_used < 8); - *high = -1 * (is_negative && total_number_of_bytes_used < kMaxDecimalBytes); + // XXX This code is copied from Decimal::FromBigEndian - /// Stop byte of the high bytes - const int32_t high_bits_offset = std::max(0, total_number_of_bytes_used - 8); + int64_t high, low; + + // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the + // sign bit. + const bool is_negative = static_cast(bytes[0]) < 0; - /// Shift left enough bits to make room for the incoming int64_t - *high <<= high_bits_offset * CHAR_BIT; + // 1. Extract the high bytes + // Stop byte of the high bytes + const int32_t high_bits_offset = std::max(0, length - 8); + const auto high_bits = BytesToInteger(bytes, 0, high_bits_offset); - /// Preserve the upper bits by inplace OR-ing the int64_t - *high |= BytesToInteger(bytes, 0, high_bits_offset); + if (high_bits_offset == 8) { + // Avoid undefined shift by 64 below + high = high_bits; + } else { + high = -1 * (is_negative && length < kMaxDecimalBytes); + // Shift left enough bits to make room for the incoming int64_t + high = SafeLeftShift(high, high_bits_offset * CHAR_BIT); + // Preserve the upper bits by inplace OR-ing the int64_t + high |= high_bits; + } - /// Stop byte of the low bytes - const int32_t low_bits_offset = std::min(total_number_of_bytes_used, 8); + // 2. Extract the low bytes + // Stop byte of the low bytes + const int32_t low_bits_offset = std::min(length, 8); + const auto low_bits = BytesToInteger(bytes, high_bits_offset, length); - /// Shift left enough bits to make room for the incoming uint64_t - *low <<= low_bits_offset * CHAR_BIT; + if (low_bits_offset == 8) { + // Avoid undefined shift by 64 below + low = low_bits; + } else { + // Sign extend the low bits if necessary + low = -1 * (is_negative && length < 8); + // Shift left enough bits to make room for the incoming int64_t + low = SafeLeftShift(low, low_bits_offset * CHAR_BIT); + // Preserve the upper bits by inplace OR-ing the int64_t + low |= low_bits; + } - /// Preserve the upper bits by inplace OR-ing the uint64_t - *low |= BytesToInteger(bytes, high_bits_offset, total_number_of_bytes_used); + *out_high = high; + *out_low = static_cast(low); } static inline void RawBytesToDecimalBytes(const uint8_t* value, int32_t byte_width, diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h index 0078051b49735..a66fc8d1b080c 100644 --- a/cpp/src/parquet/bloom_filter.h +++ b/cpp/src/parquet/bloom_filter.h @@ -155,11 +155,13 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter { static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) { DCHECK(fpp > 0.0 && fpp < 1.0); const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8)); - uint32_t num_bits = static_cast(m); + uint32_t num_bits; // Handle overflow. if (m < 0 || m > kMaximumBloomFilterBytes << 3) { num_bits = static_cast(kMaximumBloomFilterBytes << 3); + } else { + num_bits = static_cast(m); } // Round up to lower bound diff --git a/cpp/src/parquet/column_reader-test.cc b/cpp/src/parquet/column_reader-test.cc index 60f2be2362510..0475ca591de02 100644 --- a/cpp/src/parquet/column_reader-test.cc +++ b/cpp/src/parquet/column_reader-test.cc @@ -102,7 +102,7 @@ class TestPrimitiveReader : public ::testing::Test { &vresult[0] + total_values_read, &values_read)); total_values_read += static_cast(values_read); batch_actual += batch; - batch_size = std::max(batch_size * 2, 4096); + batch_size = std::min(1 << 24, std::max(batch_size * 2, 4096)); } while (batch > 0); ASSERT_EQ(num_levels_, batch_actual); @@ -147,7 +147,7 @@ class TestPrimitiveReader : public ::testing::Test { total_values_read += batch - static_cast(null_count); batch_actual += batch; levels_actual += static_cast(levels_read); - batch_size = std::max(batch_size * 2, 4096); + batch_size = std::min(1 << 24, std::max(batch_size * 2, 4096)); } while ((batch > 0) || (levels_read > 0)); ASSERT_EQ(num_levels_, levels_actual); diff --git a/cpp/src/parquet/encoding-internal.h b/cpp/src/parquet/encoding-internal.h index e2dfc2380ddcf..8fbfb402a7fb1 100644 --- a/cpp/src/parquet/encoding-internal.h +++ b/cpp/src/parquet/encoding-internal.h @@ -83,7 +83,10 @@ inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values, if (data_size < bytes_to_decode) { ParquetException::EofException(); } - memcpy(out, data, bytes_to_decode); + // If bytes_to_decode == 0, data could be null + if (bytes_to_decode > 0) { + memcpy(out, data, bytes_to_decode); + } return bytes_to_decode; } @@ -382,7 +385,7 @@ template inline void DictionaryDecoder::SetDict(Decoder* dictionary) { int num_dictionary_values = dictionary->values_left(); dictionary_.Resize(num_dictionary_values); - dictionary->Decode(&dictionary_[0], num_dictionary_values); + dictionary->Decode(dictionary_.data(), num_dictionary_values); } template <> diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index 1812f5547abc2..2bc51e7dc7902 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -160,7 +160,8 @@ struct ByteArray { }; inline bool operator==(const ByteArray& left, const ByteArray& right) { - return left.len == right.len && 0 == std::memcmp(left.ptr, right.ptr, left.len); + return left.len == right.len && + (left.len == 0 || std::memcmp(left.ptr, right.ptr, left.len) == 0); } inline bool operator!=(const ByteArray& left, const ByteArray& right) { diff --git a/cpp/src/parquet/util/memory.cc b/cpp/src/parquet/util/memory.cc index 6251f1c85c085..b3f83bdfdfd32 100644 --- a/cpp/src/parquet/util/memory.cc +++ b/cpp/src/parquet/util/memory.cc @@ -233,8 +233,11 @@ void InMemoryOutputStream::Write(const uint8_t* data, int64_t length) { PARQUET_THROW_NOT_OK(buffer_->Resize(new_capacity)); capacity_ = new_capacity; } - memcpy(Head(), data, length); - size_ += length; + // If length == 0, data may be null + if (length > 0) { + memcpy(Head(), data, length); + size_ += length; + } } int64_t InMemoryOutputStream::Tell() { return size_; } diff --git a/cpp/src/parquet/util/memory.h b/cpp/src/parquet/util/memory.h index 8677e6b9dacbc..d63ed84dd7ead 100644 --- a/cpp/src/parquet/util/memory.h +++ b/cpp/src/parquet/util/memory.h @@ -66,6 +66,7 @@ class PARQUET_EXPORT Vector { void Swap(Vector& v); inline T& operator[](int64_t i) const { return data_[i]; } + T* data() { return data_; } const T* data() const { return data_; } private: From d6ddcbf1566be6afb0e123589adfb5e5d60e3a4c Mon Sep 17 00:00:00 2001 From: Pindikura Ravindra Date: Tue, 8 Jan 2019 09:32:38 -0600 Subject: [PATCH 049/203] ARROW-3701: [Gandiva] add op for decimal 128 The code changes are complete. However, the perf in the non-fast code path is slow - I'll debug and fix that. Author: Pindikura Ravindra Author: praveenbingo Closes #2942 from pravindra/decimal2 and squashes the following commits: 0f7e78a76 ARROW-3701: off gandiva tests in py 2.7 613524602 ARROW-3701: fix format error c0fddfbc6 ARROW-3701: fix python unresolved symbol db8581162 ARROW-3701: added a comment regarding structs. 194c4377a ARROW-3701: revert surefire version 5d07b79e2 ARROW-3701: Address review comments 36691c1c7 ARROW-3701: add benchmark for large decimals 75f7ac9d4 ARROW-3701: misc cleanups 59db4603d ARROW-3701: Fix java checkstyle issue 8a227ec9c ARROW-3701: Workaround for jni JIT issue 9cbd4ab59 ARROW-3701: switch to surefire 2.19 for dbg ecaff4631 ARROW-3701: Enable decimal tests 54a210511 ARROW-3701: Support for decimal literal and null b76a3ec1b ARROW-3701: First decimal function --- .travis.yml | 3 +- cpp/src/arrow/util/decimal-test.cc | 104 +++++ cpp/src/arrow/util/decimal.cc | 97 ++++- cpp/src/arrow/util/decimal.h | 19 + cpp/src/gandiva/CMakeLists.txt | 4 + cpp/src/gandiva/arrow.h | 11 + cpp/src/gandiva/decimal_full.h | 75 ++++ cpp/src/gandiva/decimal_ir.cc | 405 ++++++++++++++++++ cpp/src/gandiva/decimal_ir.h | 171 ++++++++ cpp/src/gandiva/decimal_type_util.cc | 80 ++++ cpp/src/gandiva/decimal_type_util.h | 90 ++++ cpp/src/gandiva/decimal_type_util_test.cc | 58 +++ cpp/src/gandiva/engine.cc | 9 +- cpp/src/gandiva/engine.h | 2 + cpp/src/gandiva/expression_registry.cc | 4 +- cpp/src/gandiva/function_ir_builder.cc | 81 ++++ cpp/src/gandiva/function_ir_builder.h | 64 +++ cpp/src/gandiva/function_registry.cc | 19 +- .../gandiva/function_registry_arithmetic.cc | 2 + cpp/src/gandiva/function_registry_common.h | 1 + cpp/src/gandiva/function_signature.h | 18 +- cpp/src/gandiva/jni/CMakeLists.txt | 2 +- .../gandiva/jni/expression_registry_helper.cc | 7 +- cpp/src/gandiva/jni/jni_common.cc | 6 + cpp/src/gandiva/literal_holder.h | 5 +- cpp/src/gandiva/llvm_generator.cc | 168 +++++--- cpp/src/gandiva/llvm_generator.h | 9 +- cpp/src/gandiva/llvm_types.cc | 1 + cpp/src/gandiva/llvm_types.h | 25 +- cpp/src/gandiva/lvalue.h | 35 +- cpp/src/gandiva/precompiled/CMakeLists.txt | 12 +- cpp/src/gandiva/precompiled/decimal_ops.cc | 219 ++++++++++ cpp/src/gandiva/precompiled/decimal_ops.h | 37 ++ .../gandiva/precompiled/decimal_ops_test.cc | 75 ++++ .../gandiva/precompiled/decimal_wrapper.cc | 43 ++ cpp/src/gandiva/projector.cc | 6 +- cpp/src/gandiva/proto/Types.proto | 8 + cpp/src/gandiva/tests/CMakeLists.txt | 8 +- cpp/src/gandiva/tests/decimal_single_test.cc | 224 ++++++++++ cpp/src/gandiva/tests/decimal_test.cc | 237 ++++++++++ cpp/src/gandiva/tests/generate_data.h | 20 + cpp/src/gandiva/tests/micro_benchmarks.cc | 126 +++++- cpp/src/gandiva/tests/test_util.h | 14 + cpp/src/gandiva/tests/timed_evaluate.h | 4 +- cpp/src/gandiva/tree_expr_builder.cc | 10 + cpp/src/gandiva/tree_expr_builder.h | 3 + cpp/valgrind.supp | 6 + java/gandiva/pom.xml | 7 +- .../evaluator/ConfigurationBuilder.java | 32 -- .../gandiva/evaluator/DecimalTypeUtil.java | 86 ++++ .../gandiva/evaluator/ExpressionRegistry.java | 5 +- .../arrow/gandiva/evaluator/Filter.java | 16 +- .../arrow/gandiva/evaluator/JniLoader.java | 148 +++++++ .../arrow/gandiva/evaluator/JniWrapper.java | 93 +--- .../arrow/gandiva/evaluator/Projector.java | 20 +- .../arrow/gandiva/expression/DecimalNode.java | 54 +++ .../arrow/gandiva/expression/TreeBuilder.java | 4 + .../gandiva/evaluator/BaseEvaluatorTest.java | 15 + .../evaluator/DecimalTypeUtilTest.java | 89 ++++ .../evaluator/ProjectorDecimalTest.java | 157 +++++++ python/pyarrow/gandiva.pyx | 10 + 61 files changed, 3128 insertions(+), 235 deletions(-) create mode 100644 cpp/src/gandiva/decimal_full.h create mode 100644 cpp/src/gandiva/decimal_ir.cc create mode 100644 cpp/src/gandiva/decimal_ir.h create mode 100644 cpp/src/gandiva/decimal_type_util.cc create mode 100644 cpp/src/gandiva/decimal_type_util.h create mode 100644 cpp/src/gandiva/decimal_type_util_test.cc create mode 100644 cpp/src/gandiva/function_ir_builder.cc create mode 100644 cpp/src/gandiva/function_ir_builder.h create mode 100644 cpp/src/gandiva/precompiled/decimal_ops.cc create mode 100644 cpp/src/gandiva/precompiled/decimal_ops.h create mode 100644 cpp/src/gandiva/precompiled/decimal_ops_test.cc create mode 100644 cpp/src/gandiva/precompiled/decimal_wrapper.cc create mode 100644 cpp/src/gandiva/tests/decimal_single_test.cc create mode 100644 cpp/src/gandiva/tests/decimal_test.cc create mode 100644 java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java create mode 100644 java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java create mode 100644 java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java create mode 100644 java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java create mode 100644 java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java diff --git a/.travis.yml b/.travis.yml index ffbb691f652f5..8532cc7f3b662 100644 --- a/.travis.yml +++ b/.travis.yml @@ -121,7 +121,6 @@ matrix: - ARROW_TRAVIS_COVERAGE=1 - ARROW_TRAVIS_PYTHON_DOCS=1 - ARROW_TRAVIS_PYTHON_JVM=1 - - ARROW_TRAVIS_PYTHON_GANDIVA=1 - ARROW_TRAVIS_OPTIONAL_INSTALL=1 - ARROW_BUILD_WARNING_LEVEL=CHECKIN # TODO(wesm): Run the benchmarks outside of Travis @@ -138,6 +137,8 @@ matrix: - export PLASMA_VALGRIND=0 - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 2.7 || travis_terminate 1 - export PLASMA_VALGRIND=1 + # Gandiva tests are not enabled with python 2.7 + - ARROW_TRAVIS_PYTHON_GANDIVA=1 - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6 || travis_terminate 1 - $TRAVIS_BUILD_DIR/ci/travis_upload_cpp_coverage.sh - name: "[OS X] C++ w/ XCode 8.3" diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc index 5925d98d9d8d5..73ac48cf88f20 100644 --- a/cpp/src/arrow/util/decimal-test.cc +++ b/cpp/src/arrow/util/decimal-test.cc @@ -466,4 +466,108 @@ TEST(Decimal128Test, TestToInteger) { ASSERT_RAISES(Invalid, invalid_int64.ToInteger(&out2)); } +TEST(Decimal128Test, GetWholeAndFraction) { + Decimal128 value("123456"); + Decimal128 whole; + Decimal128 fraction; + int32_t out; + + value.GetWholeAndFraction(0, &whole, &fraction); + ASSERT_OK(whole.ToInteger(&out)); + ASSERT_EQ(123456, out); + ASSERT_OK(fraction.ToInteger(&out)); + ASSERT_EQ(0, out); + + value.GetWholeAndFraction(1, &whole, &fraction); + ASSERT_OK(whole.ToInteger(&out)); + ASSERT_EQ(12345, out); + ASSERT_OK(fraction.ToInteger(&out)); + ASSERT_EQ(6, out); + + value.GetWholeAndFraction(5, &whole, &fraction); + ASSERT_OK(whole.ToInteger(&out)); + ASSERT_EQ(1, out); + ASSERT_OK(fraction.ToInteger(&out)); + ASSERT_EQ(23456, out); + + value.GetWholeAndFraction(7, &whole, &fraction); + ASSERT_OK(whole.ToInteger(&out)); + ASSERT_EQ(0, out); + ASSERT_OK(fraction.ToInteger(&out)); + ASSERT_EQ(123456, out); +} + +TEST(Decimal128Test, GetWholeAndFractionNegative) { + Decimal128 value("-123456"); + Decimal128 whole; + Decimal128 fraction; + int32_t out; + + value.GetWholeAndFraction(0, &whole, &fraction); + ASSERT_OK(whole.ToInteger(&out)); + ASSERT_EQ(-123456, out); + ASSERT_OK(fraction.ToInteger(&out)); + ASSERT_EQ(0, out); + + value.GetWholeAndFraction(1, &whole, &fraction); + ASSERT_OK(whole.ToInteger(&out)); + ASSERT_EQ(-12345, out); + ASSERT_OK(fraction.ToInteger(&out)); + ASSERT_EQ(-6, out); + + value.GetWholeAndFraction(5, &whole, &fraction); + ASSERT_OK(whole.ToInteger(&out)); + ASSERT_EQ(-1, out); + ASSERT_OK(fraction.ToInteger(&out)); + ASSERT_EQ(-23456, out); + + value.GetWholeAndFraction(7, &whole, &fraction); + ASSERT_OK(whole.ToInteger(&out)); + ASSERT_EQ(0, out); + ASSERT_OK(fraction.ToInteger(&out)); + ASSERT_EQ(-123456, out); +} + +TEST(Decimal128Test, IncreaseScale) { + Decimal128 result; + int32_t out; + + result = Decimal128("1234").IncreaseScaleBy(3); + ASSERT_OK(result.ToInteger(&out)); + ASSERT_EQ(1234000, out); + + result = Decimal128("-1234").IncreaseScaleBy(3); + ASSERT_OK(result.ToInteger(&out)); + ASSERT_EQ(-1234000, out); +} + +TEST(Decimal128Test, ReduceScaleAndRound) { + Decimal128 result; + int32_t out; + + result = Decimal128("123456").ReduceScaleBy(1, false); + ASSERT_OK(result.ToInteger(&out)); + ASSERT_EQ(12345, out); + + result = Decimal128("123456").ReduceScaleBy(1, true); + ASSERT_OK(result.ToInteger(&out)); + ASSERT_EQ(12346, out); + + result = Decimal128("123451").ReduceScaleBy(1, true); + ASSERT_OK(result.ToInteger(&out)); + ASSERT_EQ(12345, out); + + result = Decimal128("-123789").ReduceScaleBy(2, true); + ASSERT_OK(result.ToInteger(&out)); + ASSERT_EQ(-1238, out); + + result = Decimal128("-123749").ReduceScaleBy(2, true); + ASSERT_OK(result.ToInteger(&out)); + ASSERT_EQ(-1237, out); + + result = Decimal128("-123750").ReduceScaleBy(2, true); + ASSERT_OK(result.ToInteger(&out)); + ASSERT_EQ(-1238, out); +} + } // namespace arrow diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index c980e2a9e773c..8d6c06931a8f8 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -39,7 +39,7 @@ using internal::SafeLeftShift; using internal::SafeSignedAdd; static const Decimal128 ScaleMultipliers[] = { - Decimal128(0LL), + Decimal128(1LL), Decimal128(10LL), Decimal128(100LL), Decimal128(1000LL), @@ -79,6 +79,47 @@ static const Decimal128 ScaleMultipliers[] = { Decimal128(542101086242752217LL, 68739955140067328ULL), Decimal128(5421010862427522170LL, 687399551400673280ULL)}; +static const Decimal128 ScaleMultipliersHalf[] = { + Decimal128(0ULL), + Decimal128(5ULL), + Decimal128(50ULL), + Decimal128(500ULL), + Decimal128(5000ULL), + Decimal128(50000ULL), + Decimal128(500000ULL), + Decimal128(5000000ULL), + Decimal128(50000000ULL), + Decimal128(500000000ULL), + Decimal128(5000000000ULL), + Decimal128(50000000000ULL), + Decimal128(500000000000ULL), + Decimal128(5000000000000ULL), + Decimal128(50000000000000ULL), + Decimal128(500000000000000ULL), + Decimal128(5000000000000000ULL), + Decimal128(50000000000000000ULL), + Decimal128(500000000000000000ULL), + Decimal128(5000000000000000000ULL), + Decimal128(2LL, 13106511852580896768ULL), + Decimal128(27LL, 1937910009842106368ULL), + Decimal128(271LL, 932356024711512064ULL), + Decimal128(2710LL, 9323560247115120640ULL), + Decimal128(27105LL, 1001882102603448320ULL), + Decimal128(271050LL, 10018821026034483200ULL), + Decimal128(2710505LL, 7954489891797073920ULL), + Decimal128(27105054LL, 5757922623132532736ULL), + Decimal128(271050543LL, 2238994010196672512ULL), + Decimal128(2710505431LL, 3943196028257173504ULL), + Decimal128(27105054312LL, 2538472135152631808ULL), + Decimal128(271050543121LL, 6937977277816766464ULL), + Decimal128(2710505431213LL, 14039540557039009792ULL), + Decimal128(27105054312137LL, 11268197054423236608ULL), + Decimal128(271050543121376LL, 2001506101975056384ULL), + Decimal128(2710505431213761LL, 1568316946041012224ULL), + Decimal128(27105054312137610LL, 15683169460410122240ULL), + Decimal128(271050543121376108LL, 9257742014424809472ULL), + Decimal128(2710505431213761085LL, 343699775700336640ULL)}; + static constexpr uint64_t kIntMask = 0xFFFFFFFF; static constexpr auto kCarryBit = static_cast(1) << static_cast(32); @@ -888,6 +929,60 @@ Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale, return Status::OK(); } +void Decimal128::GetWholeAndFraction(int scale, Decimal128* whole, + Decimal128* fraction) const { + DCHECK_GE(scale, 0); + DCHECK_LE(scale, 38); + + Decimal128 multiplier(ScaleMultipliers[scale]); + DCHECK_OK(Divide(multiplier, whole, fraction)); +} + +const Decimal128& Decimal128::GetScaleMultiplier(int32_t scale) { + DCHECK_GE(scale, 0); + DCHECK_LE(scale, 38); + + return ScaleMultipliers[scale]; +} + +Decimal128 Decimal128::IncreaseScaleBy(int32_t increase_by) const { + DCHECK_GE(increase_by, 0); + DCHECK_LE(increase_by, 38); + + return (*this) * ScaleMultipliers[increase_by]; +} + +Decimal128 Decimal128::ReduceScaleBy(int32_t reduce_by, bool round) const { + DCHECK_GE(reduce_by, 0); + DCHECK_LE(reduce_by, 38); + + Decimal128 divisor(ScaleMultipliers[reduce_by]); + Decimal128 result; + Decimal128 remainder; + DCHECK_OK(Divide(divisor, &result, &remainder)); + if (round) { + auto divisor_half = ScaleMultipliersHalf[reduce_by]; + if (remainder.Abs() >= divisor_half) { + if (result > 0) { + result += 1; + } else { + result -= 1; + } + } + } + return result; +} + +int32_t Decimal128::CountLeadingBinaryZeros() const { + DCHECK_GE(*this, Decimal128(0)); + + if (high_bits_ == 0) { + return BitUtil::CountLeadingZeros(low_bits_) + 64; + } else { + return BitUtil::CountLeadingZeros(static_cast(high_bits_)); + } +} + // Helper function used by Decimal128::FromBigEndian static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) { // We don't bounds check the length here because this is called by diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h index f59a4a42abed6..5734fa0d5a57a 100644 --- a/cpp/src/arrow/util/decimal.h +++ b/cpp/src/arrow/util/decimal.h @@ -139,9 +139,28 @@ class ARROW_EXPORT Decimal128 { /// \return error status if the length is an invalid value static Status FromBigEndian(const uint8_t* data, int32_t length, Decimal128* out); + /// \brief seperate the integer and fractional parts for the given scale. + void GetWholeAndFraction(int32_t scale, Decimal128* whole, Decimal128* fraction) const; + + /// \brief Scale multiplier for given scale value. + static const Decimal128& GetScaleMultiplier(int32_t scale); + /// \brief Convert Decimal128 from one scale to another Status Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out) const; + /// \brief Scale up. + Decimal128 IncreaseScaleBy(int32_t increase_by) const; + + /// \brief Scale down. + /// - If 'round' is true, the right-most digits are dropped and the result value is + /// rounded up (+1 for +ve, -1 for -ve) based on the value of the dropped digits + /// (>= 10^reduce_by / 2). + /// - If 'round' is false, the right-most digits are simply dropped. + Decimal128 ReduceScaleBy(int32_t reduce_by, bool round = true) const; + + /// \brief count the number of leading binary zeroes. + int32_t CountLeadingBinaryZeros() const; + /// \brief Convert to a signed integer template > Status ToInteger(T* out) const { diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 90fe7cf8c9c57..e743b0e041cb8 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -46,6 +46,8 @@ set(SRC_FILES annotator.cc bitmap_accumulator.cc configuration.cc context_helper.cc + decimal_ir.cc + decimal_type_util.cc engine.cc date_utils.cc expr_decomposer.cc @@ -54,6 +56,7 @@ set(SRC_FILES annotator.cc expression_registry.cc exported_funcs_registry.cc filter.cc + function_ir_builder.cc function_registry.cc function_registry_arithmetic.cc function_registry_datetime.cc @@ -175,6 +178,7 @@ ADD_GANDIVA_TEST(lru_cache_test) ADD_GANDIVA_TEST(to_date_holder_test) ADD_GANDIVA_TEST(simple_arena_test) ADD_GANDIVA_TEST(like_holder_test) +ADD_GANDIVA_TEST(decimal_type_util_test) if (ARROW_GANDIVA_JAVA) add_subdirectory(jni) diff --git a/cpp/src/gandiva/arrow.h b/cpp/src/gandiva/arrow.h index ea283523a56dc..cc2bd9a10294b 100644 --- a/cpp/src/gandiva/arrow.h +++ b/cpp/src/gandiva/arrow.h @@ -35,6 +35,9 @@ using ArrayPtr = std::shared_ptr; using DataTypePtr = std::shared_ptr; using DataTypeVector = std::vector; +using Decimal128TypePtr = std::shared_ptr; +using Decimal128TypeVector = std::vector; + using FieldPtr = std::shared_ptr; using FieldVector = std::vector; @@ -48,6 +51,14 @@ using ArrayDataVector = std::vector; using Status = arrow::Status; using StatusCode = arrow::StatusCode; +static inline bool is_decimal_128(DataTypePtr type) { + if (type->id() == arrow::Type::DECIMAL) { + auto decimal_type = arrow::internal::checked_cast(type.get()); + return decimal_type->byte_width() == 16; + } else { + return false; + } +} } // namespace gandiva #endif // GANDIVA_EXPR_ARROW_H diff --git a/cpp/src/gandiva/decimal_full.h b/cpp/src/gandiva/decimal_full.h new file mode 100644 index 0000000000000..3b84da1c03584 --- /dev/null +++ b/cpp/src/gandiva/decimal_full.h @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DECIMAL_FULL_H +#define DECIMAL_FULL_H + +#include +#include +#include +#include "arrow/util/decimal.h" + +namespace gandiva { + +using Decimal128 = arrow::Decimal128; + +/// Represents a 128-bit decimal value along with its precision and scale. +class Decimal128Full { + public: + Decimal128Full(int64_t high_bits, uint64_t low_bits, int32_t precision, int32_t scale) + : value_(high_bits, low_bits), precision_(precision), scale_(scale) {} + + Decimal128Full(std::string value, int32_t precision, int32_t scale) + : value_(value), precision_(precision), scale_(scale) {} + + Decimal128Full(const Decimal128& value, int32_t precision, int32_t scale) + : value_(value), precision_(precision), scale_(scale) {} + + Decimal128Full(int32_t precision, int32_t scale) + : value_(0), precision_(precision), scale_(scale) {} + + uint32_t scale() const { return scale_; } + + uint32_t precision() const { return precision_; } + + const arrow::Decimal128& value() const { return value_; } + + inline std::string ToString() const { + return value_.ToString(0) + "," + std::to_string(precision_) + "," + + std::to_string(scale_); + } + + friend std::ostream& operator<<(std::ostream& os, const Decimal128Full& dec) { + os << dec.ToString(); + return os; + } + + private: + Decimal128 value_; + + int32_t precision_; + int32_t scale_; +}; + +inline bool operator==(const Decimal128Full& left, const Decimal128Full& right) { + return left.value() == right.value() && left.precision() == right.precision() && + left.scale() == right.scale(); +} + +} // namespace gandiva + +#endif // DECIMAL_FULL_H diff --git a/cpp/src/gandiva/decimal_ir.cc b/cpp/src/gandiva/decimal_ir.cc new file mode 100644 index 0000000000000..38b35a64b293f --- /dev/null +++ b/cpp/src/gandiva/decimal_ir.cc @@ -0,0 +1,405 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "arrow/status.h" +#include "gandiva/decimal_ir.h" +#include "gandiva/decimal_type_util.h" + +// Algorithms adapted from Apache Impala + +namespace gandiva { + +#define ADD_TRACE_32(msg, value) \ + if (enable_ir_traces_) { \ + AddTrace32(msg, value); \ + } +#define ADD_TRACE_128(msg, value) \ + if (enable_ir_traces_) { \ + AddTrace128(msg, value); \ + } + +const char* DecimalIR::kScaleMultipliersName = "gandivaScaleMultipliers"; + +/// Populate globals required by decimal IR. +/// TODO: can this be done just once ? +void DecimalIR::AddGlobals(Engine* engine) { + auto types = engine->types(); + + // populate vector : [ 1, 10, 100, 1000, ..] + std::string value = "1"; + std::vector scale_multipliers; + for (int i = 0; i < DecimalTypeUtil::kMaxPrecision + 1; ++i) { + auto multiplier = + llvm::ConstantInt::get(llvm::Type::getInt128Ty(*engine->context()), value, 10); + scale_multipliers.push_back(multiplier); + value.append("0"); + } + + auto array_type = + llvm::ArrayType::get(types->i128_type(), DecimalTypeUtil::kMaxPrecision + 1); + auto initializer = llvm::ConstantArray::get( + array_type, llvm::ArrayRef(scale_multipliers)); + + auto globalScaleMultipliers = new llvm::GlobalVariable( + *engine->module(), array_type, true /*constant*/, + llvm::GlobalValue::LinkOnceAnyLinkage, initializer, kScaleMultipliersName); + globalScaleMultipliers->setAlignment(16); +} + +// Lookup intrinsic functions +void DecimalIR::InitializeIntrinsics() { + sadd_with_overflow_fn_ = llvm::Intrinsic::getDeclaration( + module(), llvm::Intrinsic::sadd_with_overflow, types()->i128_type()); + DCHECK_NE(sadd_with_overflow_fn_, nullptr); + + smul_with_overflow_fn_ = llvm::Intrinsic::getDeclaration( + module(), llvm::Intrinsic::smul_with_overflow, types()->i128_type()); + DCHECK_NE(smul_with_overflow_fn_, nullptr); + + i128_with_overflow_struct_type_ = + sadd_with_overflow_fn_->getFunctionType()->getReturnType(); +} + +// CPP: return kScaleMultipliers[scale] +llvm::Value* DecimalIR::GetScaleMultiplier(llvm::Value* scale) { + auto const_array = module()->getGlobalVariable(kScaleMultipliersName); + auto ptr = ir_builder()->CreateGEP(const_array, {types()->i32_constant(0), scale}); + return ir_builder()->CreateLoad(ptr); +} + +// CPP: x <= y ? y : x +llvm::Value* DecimalIR::GetHigherScale(llvm::Value* x_scale, llvm::Value* y_scale) { + llvm::Value* le = ir_builder()->CreateICmpSLE(x_scale, y_scale); + return ir_builder()->CreateSelect(le, y_scale, x_scale); +} + +// CPP: return (increase_scale_by <= 0) ? +// in_value : in_value * GetScaleMultiplier(increase_scale_by) +llvm::Value* DecimalIR::IncreaseScale(llvm::Value* in_value, + llvm::Value* increase_scale_by) { + llvm::Value* le_zero = + ir_builder()->CreateICmpSLE(increase_scale_by, types()->i32_constant(0)); + // then block + auto then_lambda = [&] { return in_value; }; + + // else block + auto else_lambda = [&] { + llvm::Value* multiplier = GetScaleMultiplier(increase_scale_by); + return ir_builder()->CreateMul(in_value, multiplier); + }; + + return BuildIfElse(le_zero, types()->i128_type(), then_lambda, else_lambda); +} + +// CPP: return (increase_scale_by <= 0) ? +// {in_value,false} : {in_value * GetScaleMultiplier(increase_scale_by),true} +// +// The return value also indicates if there was an overflow while increasing the scale. +DecimalIR::ValueWithOverflow DecimalIR::IncreaseScaleWithOverflowCheck( + llvm::Value* in_value, llvm::Value* increase_scale_by) { + llvm::Value* le_zero = + ir_builder()->CreateICmpSLE(increase_scale_by, types()->i32_constant(0)); + + // then block + auto then_lambda = [&] { + ValueWithOverflow ret{in_value, types()->false_constant()}; + return ret.AsStruct(this); + }; + + // else block + auto else_lambda = [&] { + llvm::Value* multiplier = GetScaleMultiplier(increase_scale_by); + return ir_builder()->CreateCall(smul_with_overflow_fn_, {in_value, multiplier}); + }; + + auto ir_struct = + BuildIfElse(le_zero, i128_with_overflow_struct_type_, then_lambda, else_lambda); + return ValueWithOverflow::MakeFromStruct(this, ir_struct); +} + +// CPP: return (reduce_scale_by <= 0) ? +// in_value : in_value / GetScaleMultiplier(reduce_scale_by) +// +// ReduceScale cannot cause an overflow. +llvm::Value* DecimalIR::ReduceScale(llvm::Value* in_value, llvm::Value* reduce_scale_by) { + auto le_zero = ir_builder()->CreateICmpSLE(reduce_scale_by, types()->i32_constant(0)); + // then block + auto then_lambda = [&] { return in_value; }; + + // else block + auto else_lambda = [&] { + // TODO : handle rounding. + llvm::Value* multiplier = GetScaleMultiplier(reduce_scale_by); + return ir_builder()->CreateSDiv(in_value, multiplier); + }; + + return BuildIfElse(le_zero, types()->i128_type(), then_lambda, else_lambda); +} + +/// @brief Fast-path for add +/// Adjust x and y to the same scale, and add them. +llvm::Value* DecimalIR::AddFastPath(const ValueFull& x, const ValueFull& y) { + auto higher_scale = GetHigherScale(x.scale(), y.scale()); + ADD_TRACE_32("AddFastPath : higher_scale", higher_scale); + + // CPP : x_scaled = IncreaseScale(x_value, higher_scale - x_scale) + auto x_delta = ir_builder()->CreateSub(higher_scale, x.scale()); + auto x_scaled = IncreaseScale(x.value(), x_delta); + ADD_TRACE_128("AddFastPath : x_scaled", x_scaled); + + // CPP : y_scaled = IncreaseScale(y_value, higher_scale - y_scale) + auto y_delta = ir_builder()->CreateSub(higher_scale, y.scale()); + auto y_scaled = IncreaseScale(y.value(), y_delta); + ADD_TRACE_128("AddFastPath : y_scaled", y_scaled); + + auto sum = ir_builder()->CreateAdd(x_scaled, y_scaled); + ADD_TRACE_128("AddFastPath : sum", sum); + return sum; +} + +// @brief Add with overflow check. +/// Adjust x and y to the same scale, add them, and reduce sum to output scale. +/// If there is an overflow, the sum is set to 0. +DecimalIR::ValueWithOverflow DecimalIR::AddWithOverflowCheck(const ValueFull& x, + const ValueFull& y, + const ValueFull& out) { + auto higher_scale = GetHigherScale(x.scale(), y.scale()); + ADD_TRACE_32("AddWithOverflowCheck : higher_scale", higher_scale); + + // CPP : x_scaled = IncreaseScale(x_value, higher_scale - x.scale()) + auto x_delta = ir_builder()->CreateSub(higher_scale, x.scale()); + auto x_scaled = IncreaseScaleWithOverflowCheck(x.value(), x_delta); + ADD_TRACE_128("AddWithOverflowCheck : x_scaled", x_scaled.value()); + + // CPP : y_scaled = IncreaseScale(y_value, higher_scale - y_scale) + auto y_delta = ir_builder()->CreateSub(higher_scale, y.scale()); + auto y_scaled = IncreaseScaleWithOverflowCheck(y.value(), y_delta); + ADD_TRACE_128("AddWithOverflowCheck : y_scaled", y_scaled.value()); + + // CPP : sum = x_scaled + y_scaled + auto sum_ir_struct = ir_builder()->CreateCall(sadd_with_overflow_fn_, + {x_scaled.value(), y_scaled.value()}); + auto sum = ValueWithOverflow::MakeFromStruct(this, sum_ir_struct); + ADD_TRACE_128("AddWithOverflowCheck : sum", sum.value()); + + // CPP : overflow ? 0 : sum / GetScaleMultiplier(max_scale - out_scale) + auto overflow = GetCombinedOverflow({x_scaled, y_scaled, sum}); + ADD_TRACE_32("AddWithOverflowCheck : overflow", overflow); + auto then_lambda = [&] { + // if there is an overflow, the value returned won't be used. so, save the division. + return types()->i128_constant(0); + }; + auto else_lambda = [&] { + auto reduce_scale_by = ir_builder()->CreateSub(higher_scale, out.scale()); + return ReduceScale(sum.value(), reduce_scale_by); + }; + auto sum_descaled = + BuildIfElse(overflow, types()->i128_type(), then_lambda, else_lambda); + return ValueWithOverflow(sum_descaled, overflow); +} + +// This is pretty complex, so use CPP fns. +llvm::Value* DecimalIR::AddLarge(const ValueFull& x, const ValueFull& y, + const ValueFull& out) { + std::vector args; + + auto x_split = ValueSplit::MakeFromInt128(this, x.value()); + args.push_back(x_split.high()); + args.push_back(x_split.low()); + args.push_back(x.precision()); + args.push_back(x.scale()); + + auto y_split = ValueSplit::MakeFromInt128(this, y.value()); + args.push_back(y_split.high()); + args.push_back(y_split.low()); + args.push_back(y.precision()); + args.push_back(y.scale()); + + args.push_back(out.precision()); + args.push_back(out.scale()); + + auto split = ir_builder()->CreateCall( + module()->getFunction("add_large_decimal128_decimal128"), args); + + auto sum = ValueSplit::MakeFromStruct(this, split).AsInt128(this); + ADD_TRACE_128("AddLarge : sum", sum); + return sum; +} + +/// The output scale/precision cannot be arbitary values. The algo here depends on them +/// to be the same as computed in DecimalTypeSql. +/// TODO: enforce this. +Status DecimalIR::BuildAdd() { + // Create fn prototype : + // int128_t + // add_decimal128_decimal128(int128_t x_value, int32_t x_precision, int32_t x_scale, + // int128_t y_value, int32_t y_precision, int32_t y_scale + // int32_t out_precision, int32_t out_scale) + auto i32 = types()->i32_type(); + auto i128 = types()->i128_type(); + auto function = BuildFunction("add_decimal128_decimal128", i128, + { + {"x_value", i128}, + {"x_precision", i32}, + {"x_scale", i32}, + {"y_value", i128}, + {"y_precision", i32}, + {"y_scale", i32}, + {"out_precision", i32}, + {"out_scale", i32}, + }); + + auto arg_iter = function->arg_begin(); + ValueFull x(&arg_iter[0], &arg_iter[1], &arg_iter[2]); + ValueFull y(&arg_iter[3], &arg_iter[4], &arg_iter[5]); + ValueFull out(nullptr, &arg_iter[6], &arg_iter[7]); + + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + // CPP : + // if (out_precision < 38) { + // return AddFastPath(x, y) + // } else { + // ret = AddWithOverflowCheck(x, y) + // if (ret.overflow) + // return AddLarge(x, y) + // else + // return ret.value; + // } + llvm::Value* lt_max_precision = ir_builder()->CreateICmpSLT( + out.precision(), types()->i32_constant(DecimalTypeUtil::kMaxPrecision)); + auto then_lambda = [&] { + // fast-path add + return AddFastPath(x, y); + }; + auto else_lambda = [&] { + if (kUseOverflowIntrinsics) { + // do the add and check if there was overflow + auto ret = AddWithOverflowCheck(x, y, out); + + // if there is an overflow, switch to the AddLarge codepath. + return BuildIfElse(ret.overflow(), types()->i128_type(), + [&] { return AddLarge(x, y, out); }, + [&] { return ret.value(); }); + } else { + return AddLarge(x, y, out); + } + }; + auto value = + BuildIfElse(lt_max_precision, types()->i128_type(), then_lambda, else_lambda); + + // store result to out + ir_builder()->CreateRet(value); + return Status::OK(); +} + +Status DecimalIR::AddFunctions(Engine* engine) { + auto decimal_ir = std::make_shared(engine); + + // Populate global variables used by decimal operations. + decimal_ir->AddGlobals(engine); + + // Lookup intrinsic functions + decimal_ir->InitializeIntrinsics(); + + // build "add" + return decimal_ir->BuildAdd(); +} + +// Do an bitwise-or of all the overflow bits. +llvm::Value* DecimalIR::GetCombinedOverflow( + std::vector vec) { + llvm::Value* res = types()->false_constant(); + for (auto& val : vec) { + res = ir_builder()->CreateOr(res, val.overflow()); + } + return res; +} + +DecimalIR::ValueSplit DecimalIR::ValueSplit::MakeFromInt128(DecimalIR* decimal_ir, + llvm::Value* in) { + auto builder = decimal_ir->ir_builder(); + auto types = decimal_ir->types(); + + auto high = builder->CreateLShr(in, types->i128_constant(64)); + high = builder->CreateTrunc(high, types->i64_type()); + auto low = builder->CreateTrunc(in, types->i64_type()); + return ValueSplit(high, low); +} + +/// Convert IR struct {%i64, %i64} to cpp class ValueSplit +DecimalIR::ValueSplit DecimalIR::ValueSplit::MakeFromStruct(DecimalIR* decimal_ir, + llvm::Value* dstruct) { + auto builder = decimal_ir->ir_builder(); + auto high = builder->CreateExtractValue(dstruct, 0); + auto low = builder->CreateExtractValue(dstruct, 1); + return DecimalIR::ValueSplit(high, low); +} + +llvm::Value* DecimalIR::ValueSplit::AsInt128(DecimalIR* decimal_ir) const { + auto builder = decimal_ir->ir_builder(); + auto types = decimal_ir->types(); + + auto value = builder->CreateSExt(high_, types->i128_type()); + value = builder->CreateShl(value, types->i128_constant(64)); + value = builder->CreateAdd(value, builder->CreateZExt(low_, types->i128_type())); + return value; +} + +/// Convert IR struct {%i128, %i1} to cpp class ValueWithOverflow +DecimalIR::ValueWithOverflow DecimalIR::ValueWithOverflow::MakeFromStruct( + DecimalIR* decimal_ir, llvm::Value* dstruct) { + auto builder = decimal_ir->ir_builder(); + auto value = builder->CreateExtractValue(dstruct, 0); + auto overflow = builder->CreateExtractValue(dstruct, 1); + return DecimalIR::ValueWithOverflow(value, overflow); +} + +/// Convert to IR struct {%i128, %i1} +llvm::Value* DecimalIR::ValueWithOverflow::AsStruct(DecimalIR* decimal_ir) const { + auto builder = decimal_ir->ir_builder(); + + auto undef = llvm::UndefValue::get(decimal_ir->i128_with_overflow_struct_type_); + auto struct_val = builder->CreateInsertValue(undef, value(), 0); + return builder->CreateInsertValue(struct_val, overflow(), 1); +} + +/// debug traces +void DecimalIR::AddTrace(const std::string& fmt, std::vector args) { + DCHECK(enable_ir_traces_); + + auto ir_str = ir_builder()->CreateGlobalStringPtr(fmt); + args.insert(args.begin(), ir_str); + ir_builder()->CreateCall(module()->getFunction("printf"), args, "trace"); +} + +void DecimalIR::AddTrace32(const std::string& msg, llvm::Value* value) { + AddTrace("DECIMAL_IR_TRACE:: " + msg + " %d\n", {value}); +} + +void DecimalIR::AddTrace128(const std::string& msg, llvm::Value* value) { + // convert i128 into two i64s for printing + auto split = ValueSplit::MakeFromInt128(this, value); + AddTrace("DECIMAL_IR_TRACE:: " + msg + " %llx:%llx (%lld:%llu)\n", + {split.high(), split.low(), split.high(), split.low()}); +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/decimal_ir.h b/cpp/src/gandiva/decimal_ir.h new file mode 100644 index 0000000000000..fae762c362d94 --- /dev/null +++ b/cpp/src/gandiva/decimal_ir.h @@ -0,0 +1,171 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_DECIMAL_ADD_IR_BUILDER_H +#define GANDIVA_DECIMAL_ADD_IR_BUILDER_H + +#include +#include +#include + +#include "gandiva/function_ir_builder.h" + +namespace gandiva { + +/// @brief Decimal IR functions +class DecimalIR : public FunctionIRBuilder { + public: + explicit DecimalIR(Engine* engine) + : FunctionIRBuilder(engine), enable_ir_traces_(false) {} + + /// Build decimal IR functions and add them to the engine. + static Status AddFunctions(Engine* engine); + + void EnableTraces() { enable_ir_traces_ = true; } + + private: + /// The intrinsic fn for divide with small divisors is about 10x slower, so not + /// using these. + static const bool kUseOverflowIntrinsics = false; + + // Holder for an i128 value, along with its with scale and precision. + class ValueFull { + public: + ValueFull(llvm::Value* value, llvm::Value* precision, llvm::Value* scale) + : value_(value), precision_(precision), scale_(scale) {} + + llvm::Value* value() const { return value_; } + llvm::Value* precision() const { return precision_; } + llvm::Value* scale() const { return scale_; } + + private: + llvm::Value* value_; + llvm::Value* precision_; + llvm::Value* scale_; + }; + + // Holder for an i128 value, and a boolean indicating overflow. + class ValueWithOverflow { + public: + ValueWithOverflow(llvm::Value* value, llvm::Value* overflow) + : value_(value), overflow_(overflow) {} + + // Make from IR struct + static ValueWithOverflow MakeFromStruct(DecimalIR* decimal_ir, llvm::Value* dstruct); + + // Build a corresponding IR struct + llvm::Value* AsStruct(DecimalIR* decimal_ir) const; + + llvm::Value* value() const { return value_; } + llvm::Value* overflow() const { return overflow_; } + + private: + llvm::Value* value_; + llvm::Value* overflow_; + }; + + // Holder for an i128 value that is split into two i64s + class ValueSplit { + public: + ValueSplit(llvm::Value* high, llvm::Value* low) : high_(high), low_(low) {} + + // Make from i128 value + static ValueSplit MakeFromInt128(DecimalIR* decimal_ir, llvm::Value* in); + + // Make from IR struct + static ValueSplit MakeFromStruct(DecimalIR* decimal_ir, llvm::Value* dstruct); + + // Combine the two parts into an i128 + llvm::Value* AsInt128(DecimalIR* decimal_ir) const; + + llvm::Value* high() const { return high_; } + llvm::Value* low() const { return low_; } + + private: + llvm::Value* high_; + llvm::Value* low_; + }; + + // Add global variables to the module. + static void AddGlobals(Engine* engine); + + // Initialize intrinsic functions that are used by decimal operations. + void InitializeIntrinsics(); + + // Create IR builder for decimal add function. + static Status MakeAdd(Engine* engine, std::shared_ptr* out); + + // Get the multiplier for specified scale (i.e 10^scale) + llvm::Value* GetScaleMultiplier(llvm::Value* scale); + + // Get the higher of the two scales + llvm::Value* GetHigherScale(llvm::Value* x_scale, llvm::Value* y_scale); + + // Increase scale of 'in_value' by 'increase_scale_by'. + // - If 'increase_scale_by' is <= 0, does nothing. + llvm::Value* IncreaseScale(llvm::Value* in_value, llvm::Value* increase_scale_by); + + // Similar to IncreaseScale. but, also check if there is overflow. + ValueWithOverflow IncreaseScaleWithOverflowCheck(llvm::Value* in_value, + llvm::Value* increase_scale_by); + + // Reduce scale of 'in_value' by 'reduce_scale_by'. + // - If 'reduce_scale_by' is <= 0, does nothing. + llvm::Value* ReduceScale(llvm::Value* in_value, llvm::Value* reduce_scale_by); + + // Fast path of add: guaranteed no overflow + llvm::Value* AddFastPath(const ValueFull& x, const ValueFull& y); + + // Similar to AddFastPath, but check if there's an overflow. + ValueWithOverflow AddWithOverflowCheck(const ValueFull& x, const ValueFull& y, + const ValueFull& out); + + // Do addition of large integers (both positive and negative). + llvm::Value* AddLarge(const ValueFull& x, const ValueFull& y, const ValueFull& out); + + // Get the combined overflow (logical or). + llvm::Value* GetCombinedOverflow(std::vector values); + + // Build the function for adding decimals. + Status BuildAdd(); + + // Add a trace in IR code. + void AddTrace(const std::string& fmt, std::vector args); + + // Add a trace msg along with a 32-bit integer. + void AddTrace32(const std::string& msg, llvm::Value* value); + + // Add a trace msg along with a 128-bit integer. + void AddTrace128(const std::string& msg, llvm::Value* value); + + // name of the global variable having the array of scale multipliers. + static const char* kScaleMultipliersName; + + // Intrinsic functions + llvm::Function* sadd_with_overflow_fn_; + llvm::Function* smul_with_overflow_fn_; + + // struct { i128: value, i1: overflow} + llvm::Type* i128_with_overflow_struct_type_; + + // if set to true, ir traces are enabled. Useful for debugging. + bool enable_ir_traces_; +}; + +} // namespace gandiva + +#endif // GANDIVA_FUNCTION_IR_BUILDER_H diff --git a/cpp/src/gandiva/decimal_type_util.cc b/cpp/src/gandiva/decimal_type_util.cc new file mode 100644 index 0000000000000..0ebfe661ce63d --- /dev/null +++ b/cpp/src/gandiva/decimal_type_util.cc @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/decimal_type_util.h" +#include "gandiva/logging.h" + +namespace gandiva { + +constexpr int32_t DecimalTypeUtil::kMaxDecimal32Precision; +constexpr int32_t DecimalTypeUtil::kMaxDecimal64Precision; +constexpr int32_t DecimalTypeUtil::kMaxPrecision; + +constexpr int32_t DecimalTypeUtil::kMaxScale; +constexpr int32_t DecimalTypeUtil::kMinAdjustedScale; + +#define DCHECK_TYPE(type) \ + { \ + DCHECK_GE(type->scale(), 0); \ + DCHECK_LE(type->precision(), kMaxPrecision); \ + } + +// Implementation of decimal rules. +Status DecimalTypeUtil::GetResultType(Op op, const Decimal128TypeVector& in_types, + Decimal128TypePtr* out_type) { + DCHECK_EQ(in_types.size(), 2); + + *out_type = nullptr; + auto t1 = in_types[0]; + auto t2 = in_types[1]; + DCHECK_TYPE(t1); + DCHECK_TYPE(t2); + + int32_t s1 = t1->scale(); + int32_t s2 = t2->scale(); + int32_t p1 = t1->precision(); + int32_t p2 = t2->precision(); + int32_t result_scale; + int32_t result_precision; + + switch (op) { + case kOpAdd: + case kOpSubtract: + result_scale = std::max(s1, s2); + result_precision = std::max(p1 - s1, p2 - s2) + result_scale + 1; + break; + + case kOpMultiply: + result_scale = s1 + s2; + result_precision = p1 + p2 + 1; + break; + + case kOpDivide: + result_scale = std::max(kMinAdjustedScale, s1 + p2 + 1); + result_precision = p1 - s1 + s2 + result_scale; + break; + + case kOpMod: + result_scale = std::max(s1, s2); + result_precision = std::min(p1 - s1, p2 - s2) + result_scale; + break; + } + *out_type = MakeAdjustedType(result_precision, result_scale); + return Status::OK(); +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/decimal_type_util.h b/cpp/src/gandiva/decimal_type_util.h new file mode 100644 index 0000000000000..2c095c159bba0 --- /dev/null +++ b/cpp/src/gandiva/decimal_type_util.h @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Adapted from Apache Impala + +#ifndef GANDIVA_DECIMAL_TYPE_SQL_H +#define GANDIVA_DECIMAL_TYPE_SQL_H + +#include +#include + +#include "gandiva/arrow.h" + +namespace gandiva { + +/// @brief Handles conversion of scale/precision for operations on decimal types. +/// TODO : do validations for all of these. +class DecimalTypeUtil { + public: + enum Op { + kOpAdd, + kOpSubtract, + kOpMultiply, + kOpDivide, + kOpMod, + }; + + /// The maximum precision representable by a 4-byte decimal + static constexpr int32_t kMaxDecimal32Precision = 9; + + /// The maximum precision representable by a 8-byte decimal + static constexpr int32_t kMaxDecimal64Precision = 18; + + /// The maximum precision representable by a 16-byte decimal + static constexpr int32_t kMaxPrecision = 38; + + // The maximum scale representable. + static constexpr int32_t kMaxScale = kMaxPrecision; + + // When operating on decimal inputs, the integer part of the output can exceed the + // max precision. In such cases, the scale can be reduced, upto a minimum of + // kMinAdjustedScale. + // * There is no strong reason for 6, but both SQLServer and Impala use 6 too. + static constexpr int32_t kMinAdjustedScale = 6; + + // For specified operation and input scale/precision, determine the output + // scale/precision. + static Status GetResultType(Op op, const Decimal128TypeVector& in_types, + Decimal128TypePtr* out_type); + + static Decimal128TypePtr MakeType(int32_t precision, int32_t scale); + + private: + static Decimal128TypePtr MakeAdjustedType(int32_t precision, int32_t scale); +}; + +inline Decimal128TypePtr DecimalTypeUtil::MakeType(int32_t precision, int32_t scale) { + return std::dynamic_pointer_cast( + arrow::decimal(precision, scale)); +} + +// Reduce the scale if possible so that precision stays <= kMaxPrecision +inline Decimal128TypePtr DecimalTypeUtil::MakeAdjustedType(int32_t precision, + int32_t scale) { + if (precision > kMaxPrecision) { + int32_t min_scale = std::min(scale, kMinAdjustedScale); + int32_t delta = precision - kMaxPrecision; + precision = kMaxPrecision; + scale = std::max(scale - delta, min_scale); + } + return MakeType(precision, scale); +} + +} // namespace gandiva + +#endif // GANDIVA_DECIMAL_TYPE_SQL_H diff --git a/cpp/src/gandiva/decimal_type_util_test.cc b/cpp/src/gandiva/decimal_type_util_test.cc new file mode 100644 index 0000000000000..a593990638af5 --- /dev/null +++ b/cpp/src/gandiva/decimal_type_util_test.cc @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Adapted from Apache Impala + +#include + +#include "gandiva/decimal_type_util.h" +#include "tests/test_util.h" + +namespace gandiva { + +#define DECIMAL_TYPE(p, s) DecimalTypeUtil::MakeType(p, s) + +Decimal128TypePtr DoOp(DecimalTypeUtil::Op op, Decimal128TypePtr d1, + Decimal128TypePtr d2) { + Decimal128TypePtr ret_type; + EXPECT_OK(DecimalTypeUtil::GetResultType(op, {d1, d2}, &ret_type)); + return ret_type; +} + +TEST(DecimalResultTypes, Basic) { + EXPECT_ARROW_TYPE_EQUALS( + DECIMAL_TYPE(31, 10), + DoOp(DecimalTypeUtil::kOpAdd, DECIMAL_TYPE(30, 10), DECIMAL_TYPE(30, 10))); + + EXPECT_ARROW_TYPE_EQUALS( + DECIMAL_TYPE(32, 6), + DoOp(DecimalTypeUtil::kOpAdd, DECIMAL_TYPE(30, 6), DECIMAL_TYPE(30, 5))); + + EXPECT_ARROW_TYPE_EQUALS( + DECIMAL_TYPE(38, 9), + DoOp(DecimalTypeUtil::kOpAdd, DECIMAL_TYPE(30, 10), DECIMAL_TYPE(38, 10))); + + EXPECT_ARROW_TYPE_EQUALS( + DECIMAL_TYPE(38, 9), + DoOp(DecimalTypeUtil::kOpAdd, DECIMAL_TYPE(38, 10), DECIMAL_TYPE(38, 38))); + + EXPECT_ARROW_TYPE_EQUALS( + DECIMAL_TYPE(38, 6), + DoOp(DecimalTypeUtil::kOpAdd, DECIMAL_TYPE(38, 10), DECIMAL_TYPE(38, 2))); +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index da7a6d886c0e0..9aaafea8e498e 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -39,6 +39,7 @@ #include #include #include +#include "gandiva/decimal_ir.h" #include "gandiva/exported_funcs_registry.h" namespace gandiva { @@ -94,6 +95,10 @@ Status Engine::Make(std::shared_ptr config, auto status = engine_obj->LoadPreCompiledIRFiles(config->byte_code_file_path()); ARROW_RETURN_NOT_OK(status); + // Add decimal functions + status = DecimalIR::AddFunctions(engine_obj.get()); + ARROW_RETURN_NOT_OK(status); + *engine = std::move(engine_obj); return Status::OK(); } @@ -183,7 +188,7 @@ Status Engine::FinalizeModule(bool optimise_ir, bool dump_ir) { // run the optimiser llvm::PassManagerBuilder pass_builder; - pass_builder.OptLevel = 2; + pass_builder.OptLevel = 3; pass_builder.populateModulePassManager(*pass_manager); pass_manager->run(*module_); @@ -222,7 +227,7 @@ void Engine::DumpIR(std::string prefix) { std::string str; llvm::raw_string_ostream stream(str); - module_->print(stream, NULL); + module_->print(stream, nullptr); std::cout << "====" << prefix << "===" << str << "\n"; } diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index f377ebc38d3ef..16b5a56ebdb36 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -37,6 +37,8 @@ namespace gandiva { +class FunctionIRBuilder; + /// \brief LLVM Execution engine wrapper. class Engine { public: diff --git a/cpp/src/gandiva/expression_registry.cc b/cpp/src/gandiva/expression_registry.cc index fb5a45e779926..1a087c96f33bd 100644 --- a/cpp/src/gandiva/expression_registry.cc +++ b/cpp/src/gandiva/expression_registry.cc @@ -136,10 +136,12 @@ void ExpressionRegistry::AddArrowTypesToVector(arrow::Type::type& type, case arrow::Type::type::NA: vector.push_back(arrow::null()); break; + case arrow::Type::type::DECIMAL: + vector.push_back(arrow::decimal(0, 0)); + break; case arrow::Type::type::FIXED_SIZE_BINARY: case arrow::Type::type::MAP: case arrow::Type::type::INTERVAL: - case arrow::Type::type::DECIMAL: case arrow::Type::type::LIST: case arrow::Type::type::STRUCT: case arrow::Type::type::UNION: diff --git a/cpp/src/gandiva/function_ir_builder.cc b/cpp/src/gandiva/function_ir_builder.cc new file mode 100644 index 0000000000000..194273933cd15 --- /dev/null +++ b/cpp/src/gandiva/function_ir_builder.cc @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/function_ir_builder.h" + +namespace gandiva { + +llvm::Value* FunctionIRBuilder::BuildIfElse(llvm::Value* condition, + llvm::Type* return_type, + std::function then_func, + std::function else_func) { + llvm::IRBuilder<>* builder = ir_builder(); + llvm::Function* function = builder->GetInsertBlock()->getParent(); + DCHECK_NE(function, nullptr); + + // Create blocks for the then, else and merge cases. + llvm::BasicBlock* then_bb = llvm::BasicBlock::Create(*context(), "then", function); + llvm::BasicBlock* else_bb = llvm::BasicBlock::Create(*context(), "else", function); + llvm::BasicBlock* merge_bb = llvm::BasicBlock::Create(*context(), "merge", function); + + builder->CreateCondBr(condition, then_bb, else_bb); + + // Emit the then block. + builder->SetInsertPoint(then_bb); + auto then_value = then_func(); + builder->CreateBr(merge_bb); + + // refresh then_bb for phi (could have changed due to code generation of then_value). + then_bb = builder->GetInsertBlock(); + + // Emit the else block. + builder->SetInsertPoint(else_bb); + auto else_value = else_func(); + builder->CreateBr(merge_bb); + + // refresh else_bb for phi (could have changed due to code generation of else_value). + else_bb = builder->GetInsertBlock(); + + // Emit the merge block. + builder->SetInsertPoint(merge_bb); + llvm::PHINode* result_value = builder->CreatePHI(return_type, 2, "res_value"); + result_value->addIncoming(then_value, then_bb); + result_value->addIncoming(else_value, else_bb); + return result_value; +} + +llvm::Function* FunctionIRBuilder::BuildFunction(const std::string& function_name, + llvm::Type* return_type, + std::vector in_args) { + std::vector arg_types; + for (auto& arg : in_args) { + arg_types.push_back(arg.type); + } + auto prototype = llvm::FunctionType::get(return_type, arg_types, false /*isVarArg*/); + auto function = llvm::Function::Create(prototype, llvm::GlobalValue::ExternalLinkage, + function_name, module()); + + uint32_t i = 0; + for (auto& fn_arg : function->args()) { + DCHECK_LT(i, in_args.size()); + fn_arg.setName(in_args[i].name); + ++i; + } + return function; +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/function_ir_builder.h b/cpp/src/gandiva/function_ir_builder.h new file mode 100644 index 0000000000000..7d6003a62d5bf --- /dev/null +++ b/cpp/src/gandiva/function_ir_builder.h @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef GANDIVA_FUNCTION_IR_BUILDER_H +#define GANDIVA_FUNCTION_IR_BUILDER_H + +#include +#include +#include +#include + +#include "gandiva/engine.h" +#include "gandiva/gandiva_aliases.h" +#include "gandiva/llvm_types.h" + +namespace gandiva { + +/// @brief Base class for building IR functions. +class FunctionIRBuilder { + public: + explicit FunctionIRBuilder(Engine* engine) : engine_(engine) {} + virtual ~FunctionIRBuilder() = default; + + protected: + LLVMTypes* types() { return engine_->types(); } + llvm::Module* module() { return engine_->module(); } + llvm::LLVMContext* context() { return engine_->context(); } + llvm::IRBuilder<>* ir_builder() { return engine_->ir_builder(); } + + /// Build an if-else block. + llvm::Value* BuildIfElse(llvm::Value* condition, llvm::Type* return_type, + std::function then_func, + std::function else_func); + + struct NamedArg { + std::string name; + llvm::Type* type; + }; + + /// Build llvm fn. + llvm::Function* BuildFunction(const std::string& function_name, llvm::Type* return_type, + std::vector in_args); + + private: + Engine* engine_; +}; + +} // namespace gandiva + +#endif // GANDIVA_FUNCTION_IR_BUILDER_H diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 83d80b4988690..452cb6339954c 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -29,23 +29,6 @@ namespace gandiva { -using arrow::binary; -using arrow::boolean; -using arrow::date64; -using arrow::float32; -using arrow::float64; -using arrow::int16; -using arrow::int32; -using arrow::int64; -using arrow::int8; -using arrow::uint16; -using arrow::uint32; -using arrow::uint64; -using arrow::uint8; -using arrow::utf8; -using std::iterator; -using std::vector; - FunctionRegistry::iterator FunctionRegistry::begin() const { return &(*pc_registry_.begin()); } @@ -89,7 +72,7 @@ SignatureMap FunctionRegistry::InitPCMap() { const NativeFunction* FunctionRegistry::LookupSignature( const FunctionSignature& signature) const { auto got = pc_registry_map_.find(&signature); - return got == pc_registry_map_.end() ? NULL : got->second; + return got == pc_registry_map_.end() ? nullptr : got->second; } } // namespace gandiva diff --git a/cpp/src/gandiva/function_registry_arithmetic.cc b/cpp/src/gandiva/function_registry_arithmetic.cc index 800bc493f0019..c5a798cb4e235 100644 --- a/cpp/src/gandiva/function_registry_arithmetic.cc +++ b/cpp/src/gandiva/function_registry_arithmetic.cc @@ -57,6 +57,8 @@ std::vector GetArithmeticFunctionRegistry() { BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int32, int32), BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int64, int64), + BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(add, decimal128), + BINARY_RELATIONAL_BOOL_FN(equal), BINARY_RELATIONAL_BOOL_FN(not_equal), diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h index 78babce9a7dbf..3ae065a14769d 100644 --- a/cpp/src/gandiva/function_registry_common.h +++ b/cpp/src/gandiva/function_registry_common.h @@ -53,6 +53,7 @@ inline DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); } inline DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); } inline DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); } +inline DataTypePtr decimal128() { return arrow::decimal(0, 0); } struct KeyHash { std::size_t operator()(const FunctionSignature* k) const { return k->Hash(); } diff --git a/cpp/src/gandiva/function_signature.h b/cpp/src/gandiva/function_signature.h index e5dff245b158f..ee82abc367e20 100644 --- a/cpp/src/gandiva/function_signature.h +++ b/cpp/src/gandiva/function_signature.h @@ -56,10 +56,22 @@ class FunctionSignature { std::string ToString() const; private: - // TODO : for some of the types, this shouldn't match type specific data. eg. for - // decimals, this shouldn't match precision/scale. bool DataTypeEquals(const DataTypePtr left, const DataTypePtr right) const { - return left->Equals(right); + if (left->id() == right->id()) { + switch (left->id()) { + case arrow::Type::DECIMAL: { + // For decimal types, the precision/scale isn't part of the signature. + auto dleft = arrow::internal::checked_cast(left.get()); + auto dright = arrow::internal::checked_cast(right.get()); + return (dleft != NULL) && (dright != NULL) && + (dleft->byte_width() == dright->byte_width()); + } + default: + return left->Equals(right); + } + } else { + return false; + } } std::string base_name_; diff --git a/cpp/src/gandiva/jni/CMakeLists.txt b/cpp/src/gandiva/jni/CMakeLists.txt index a07d3903a75ac..afc7fadeed4ad 100644 --- a/cpp/src/gandiva/jni/CMakeLists.txt +++ b/cpp/src/gandiva/jni/CMakeLists.txt @@ -78,5 +78,5 @@ add_dependencies(gandiva ${GANDIVA_JNI_LIBRARIES}) # statically linked stdc++ has conflicts with stdc++ loaded by other libraries. if (NOT APPLE) set_target_properties(gandiva_jni_shared PROPERTIES - LINK_FLAGS "-Wl,--version-script=${CMAKE_SOURCE_DIR}/src/gandiva/jni/symbols.map") + LINK_FLAGS "-Wl,--no-as-needed -Wl,--version-script=${CMAKE_SOURCE_DIR}/src/gandiva/jni/symbols.map") endif() diff --git a/cpp/src/gandiva/jni/expression_registry_helper.cc b/cpp/src/gandiva/jni/expression_registry_helper.cc index 5227329db472a..b5c6880a25cf1 100644 --- a/cpp/src/gandiva/jni/expression_registry_helper.cc +++ b/cpp/src/gandiva/jni/expression_registry_helper.cc @@ -121,10 +121,15 @@ void ArrowToProtobuf(DataTypePtr type, types::ExtGandivaType* gandiva_data_type) case arrow::Type::type::NA: gandiva_data_type->set_type(types::GandivaType::NONE); break; + case arrow::Type::type::DECIMAL: { + gandiva_data_type->set_type(types::GandivaType::DECIMAL); + gandiva_data_type->set_precision(0); + gandiva_data_type->set_scale(0); + break; + } case arrow::Type::type::FIXED_SIZE_BINARY: case arrow::Type::type::MAP: case arrow::Type::type::INTERVAL: - case arrow::Type::type::DECIMAL: case arrow::Type::type::LIST: case arrow::Type::type::STRUCT: case arrow::Type::type::UNION: diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc index 639ad361f4a8a..7ad0d6d6ff449 100644 --- a/cpp/src/gandiva/jni/jni_common.cc +++ b/cpp/src/gandiva/jni/jni_common.cc @@ -381,6 +381,12 @@ NodePtr ProtoTypeToNode(const types::TreeNode& node) { return TreeExprBuilder::MakeBinaryLiteral(node.binarynode().value()); } + if (node.has_decimalnode()) { + std::string value = node.decimalnode().value(); + gandiva::Decimal128Full literal(value, node.decimalnode().precision(), + node.decimalnode().scale()); + return TreeExprBuilder::MakeDecimalLiteral(literal); + } std::cerr << "Unknown node type in protobuf\n"; return nullptr; } diff --git a/cpp/src/gandiva/literal_holder.h b/cpp/src/gandiva/literal_holder.h index 0a65ea2c3e249..ad6afcea1f413 100644 --- a/cpp/src/gandiva/literal_holder.h +++ b/cpp/src/gandiva/literal_holder.h @@ -22,11 +22,14 @@ #include +#include +#include "gandiva/decimal_full.h" + namespace gandiva { using LiteralHolder = boost::variant; + uint16_t, uint32_t, uint64_t, std::string, Decimal128Full>; } // namespace gandiva diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index 50f147b2fc7dd..9ddbe93fa68ff 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -399,6 +399,17 @@ llvm::Value* LLVMGenerator::AddFunctionCall(const std::string& full_name, return value; } +std::shared_ptr LLVMGenerator::BuildDecimalLValue(llvm::Value* value, + DataTypePtr arrow_type) { + // only decimals of size 128-bit supported. + DCHECK(is_decimal_128(arrow_type)); + auto decimal_type = + arrow::internal::checked_cast(arrow_type.get()); + return std::make_shared(value, nullptr, + types()->i32_constant(decimal_type->precision()), + types()->i32_constant(decimal_type->scale())); +} + #define ADD_VISITOR_TRACE(...) \ if (generator_->enable_ir_traces_) { \ generator_->AddTrace(__VA_ARGS__); \ @@ -422,20 +433,33 @@ LLVMGenerator::Visitor::Visitor(LLVMGenerator* generator, llvm::Function* functi void LLVMGenerator::Visitor::Visit(const VectorReadFixedLenValueDex& dex) { llvm::IRBuilder<>* builder = ir_builder(); - llvm::Value* slot_ref = GetBufferReference(dex.DataIdx(), kBufferTypeData, dex.Field()); - llvm::Value* slot_value; - if (dex.FieldType()->id() == arrow::Type::BOOL) { - slot_value = generator_->GetPackedBitValue(slot_ref, loop_var_); - } else { - llvm::Value* slot_offset = builder->CreateGEP(slot_ref, loop_var_); - slot_value = builder->CreateLoad(slot_offset, dex.FieldName()); - } + std::shared_ptr lvalue; + + switch (dex.FieldType()->id()) { + case arrow::Type::BOOL: + slot_value = generator_->GetPackedBitValue(slot_ref, loop_var_); + lvalue = std::make_shared(slot_value); + break; + case arrow::Type::DECIMAL: { + auto slot_offset = builder->CreateGEP(slot_ref, loop_var_); + slot_value = builder->CreateLoad(slot_offset, dex.FieldName()); + lvalue = generator_->BuildDecimalLValue(slot_value, dex.FieldType()); + break; + } + + default: { + auto slot_offset = builder->CreateGEP(slot_ref, loop_var_); + slot_value = builder->CreateLoad(slot_offset, dex.FieldName()); + lvalue = std::make_shared(slot_value); + break; + } + } ADD_VISITOR_TRACE("visit fixed-len data vector " + dex.FieldName() + " value %T", slot_value); - result_.reset(new LValue(slot_value)); + result_ = lvalue; } void LLVMGenerator::Visitor::Visit(const VectorReadVarLenValueDex& dex) { @@ -572,6 +596,19 @@ void LLVMGenerator::Visitor::Visit(const LiteralDex& dex) { value = types->i64_constant(boost::get(dex.holder())); break; + case arrow::Type::DECIMAL: { + // build code for struct + auto decimal_value = boost::get(dex.holder()); + auto int_value = + llvm::ConstantInt::get(llvm::Type::getInt128Ty(*generator_->context()), + decimal_value.value().ToIntegerString(), 10); + auto type = arrow::decimal(decimal_value.precision(), decimal_value.scale()); + auto lvalue = generator_->BuildDecimalLValue(int_value, type); + // set it as the l-value and return. + result_ = lvalue; + return; + } + default: DCHECK(0); } @@ -589,13 +626,14 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) { auto params = BuildParams(dex.function_holder().get(), dex.args(), false, native_function->NeedsContext()); + auto arrow_return_type = dex.func_descriptor()->return_type(); if (native_function->CanReturnErrors()) { // slow path : if a function can return errors, skip invoking the function // unless all of the input args are valid. Otherwise, it can cause spurious errors. llvm::IRBuilder<>* builder = ir_builder(); LLVMTypes* types = generator_->types(); - auto arrow_type_id = native_function->signature().ret_type()->id(); + auto arrow_type_id = arrow_return_type->id(); auto result_type = types->IRType(arrow_type_id); // Build combined validity of the args. @@ -609,7 +647,7 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) { auto then_lambda = [&] { ADD_VISITOR_TRACE("fn " + function_name + " can return errors : all args valid, invoke fn"); - return BuildFunctionCall(native_function, ¶ms); + return BuildFunctionCall(native_function, arrow_return_type, ¶ms); }; // else block @@ -624,10 +662,10 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) { return std::make_shared(else_value, else_value_len); }; - result_ = BuildIfElse(is_valid, then_lambda, else_lambda, result_type); + result_ = BuildIfElse(is_valid, then_lambda, else_lambda, arrow_return_type); } else { // fast path : invoke function without computing validities. - result_ = BuildFunctionCall(native_function, ¶ms); + result_ = BuildFunctionCall(native_function, arrow_return_type, ¶ms); } } @@ -639,7 +677,8 @@ void LLVMGenerator::Visitor::Visit(const NullableNeverFuncDex& dex) { auto params = BuildParams(dex.function_holder().get(), dex.args(), true, native_function->NeedsContext()); - result_ = BuildFunctionCall(native_function, ¶ms); + auto arrow_return_type = dex.func_descriptor()->return_type(); + result_ = BuildFunctionCall(native_function, arrow_return_type, ¶ms); } void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { @@ -659,7 +698,8 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { new llvm::AllocaInst(types->i8_type(), 0, "result_valid", entry_block_); params.push_back(result_valid_ptr); - result_ = BuildFunctionCall(native_function, ¶ms); + auto arrow_return_type = dex.func_descriptor()->return_type(); + result_ = BuildFunctionCall(native_function, arrow_return_type, ¶ms); // load the result validity and truncate to i1. llvm::Value* result_valid_i8 = builder->CreateLoad(result_valid_ptr); @@ -672,7 +712,6 @@ void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { void LLVMGenerator::Visitor::Visit(const IfDex& dex) { ADD_VISITOR_TRACE("visit IfExpression"); llvm::IRBuilder<>* builder = ir_builder(); - LLVMTypes* types = generator_->types(); // Evaluate condition. LValuePtr if_condition = BuildValueAndValidity(dex.condition_vv()); @@ -714,9 +753,8 @@ void LLVMGenerator::Visitor::Visit(const IfDex& dex) { }; // build the if-else condition. - auto result_type = types->IRType(dex.result_type()->id()); - result_ = BuildIfElse(validAndMatched, then_lambda, else_lambda, result_type); - if (result_type == types->i8_ptr_type()) { + result_ = BuildIfElse(validAndMatched, then_lambda, else_lambda, dex.result_type()); + if (arrow::is_binary_like(dex.result_type()->id())) { ADD_VISITOR_TRACE("IfElse result length %T", result_->length()); } ADD_VISITOR_TRACE("IfElse result value %T", result_->data()); @@ -906,7 +944,7 @@ void LLVMGenerator::Visitor::VisitInExpression(const InExprDexBase& dex) { LValuePtr LLVMGenerator::Visitor::BuildIfElse(llvm::Value* condition, std::function then_func, std::function else_func, - llvm::Type* result_type) { + DataTypePtr result_type) { llvm::IRBuilder<>* builder = ir_builder(); llvm::LLVMContext* context = generator_->context(); LLVMTypes* types = generator_->types(); @@ -936,17 +974,31 @@ LValuePtr LLVMGenerator::Visitor::BuildIfElse(llvm::Value* condition, // Emit the merge block. builder->SetInsertPoint(merge_bb); - llvm::PHINode* result_value = builder->CreatePHI(result_type, 2, "res_value"); + auto llvm_type = types->IRType(result_type->id()); + llvm::PHINode* result_value = builder->CreatePHI(llvm_type, 2, "res_value"); result_value->addIncoming(then_lvalue->data(), then_bb); result_value->addIncoming(else_lvalue->data(), else_bb); - llvm::PHINode* result_length = nullptr; - if (result_type == types->i8_ptr_type()) { - result_length = builder->CreatePHI(types->i32_type(), 2, "res_length"); - result_length->addIncoming(then_lvalue->length(), then_bb); - result_length->addIncoming(else_lvalue->length(), else_bb); + LValuePtr ret; + switch (result_type->id()) { + case arrow::Type::STRING: { + llvm::PHINode* result_length; + result_length = builder->CreatePHI(types->i32_type(), 2, "res_length"); + result_length->addIncoming(then_lvalue->length(), then_bb); + result_length->addIncoming(else_lvalue->length(), else_bb); + ret = std::make_shared(result_value, result_length); + break; + } + + case arrow::Type::DECIMAL: + ret = generator_->BuildDecimalLValue(result_value, result_type); + break; + + default: + ret = std::make_shared(result_value); + break; } - return std::make_shared(result_value, result_length); + return ret; } LValuePtr LLVMGenerator::Visitor::BuildValueAndValidity(const ValueValidityPair& pair) { @@ -963,25 +1015,46 @@ LValuePtr LLVMGenerator::Visitor::BuildValueAndValidity(const ValueValidityPair& } LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, + DataTypePtr arrow_return_type, std::vector* params) { - auto arrow_return_type = func->signature().ret_type()->id(); - auto llvm_return_type = generator_->types()->IRType(arrow_return_type); - - // add extra arg for return length for variable len return types (alloced on stack). - llvm::AllocaInst* result_len_ptr = nullptr; - if (arrow::is_binary_like(arrow_return_type)) { - result_len_ptr = new llvm::AllocaInst(generator_->types()->i32_type(), 0, - "result_len", entry_block_); - params->push_back(result_len_ptr); - has_arena_allocs_ = true; - } + auto types = generator_->types(); + auto arrow_return_type_id = arrow_return_type->id(); + auto llvm_return_type = types->IRType(arrow_return_type_id); + + if (arrow_return_type_id == arrow::Type::DECIMAL) { + // For decimal fns, the output precision/scale are passed along as parameters. + // + // convert from this : + // out = add_decimal(v1, p1, s1, v2, p2, s2) + // to: + // out = add_decimal(v1, p1, s1, v2, p2, s2, out_p, out_s) + + // Append the out_precision and out_scale + auto ret_lvalue = generator_->BuildDecimalLValue(nullptr, arrow_return_type); + params->push_back(ret_lvalue->precision()); + params->push_back(ret_lvalue->scale()); + + // Make the function call + auto out = generator_->AddFunctionCall(func->pc_name(), llvm_return_type, *params); + ret_lvalue->set_data(out); + return ret_lvalue; + } else { + // add extra arg for return length for variable len return types (alloced on stack). + llvm::AllocaInst* result_len_ptr = nullptr; + if (arrow::is_binary_like(arrow_return_type_id)) { + result_len_ptr = new llvm::AllocaInst(generator_->types()->i32_type(), 0, + "result_len", entry_block_); + params->push_back(result_len_ptr); + has_arena_allocs_ = true; + } - // Make the function call - llvm::IRBuilder<>* builder = ir_builder(); - auto value = generator_->AddFunctionCall(func->pc_name(), llvm_return_type, *params); - auto value_len = - (result_len_ptr == nullptr) ? nullptr : builder->CreateLoad(result_len_ptr); - return std::make_shared(value, value_len); + // Make the function call + llvm::IRBuilder<>* builder = ir_builder(); + auto value = generator_->AddFunctionCall(func->pc_name(), llvm_return_type, *params); + auto value_len = + (result_len_ptr == nullptr) ? nullptr : builder->CreateLoad(result_len_ptr); + return std::make_shared(value, value_len); + } } std::vector LLVMGenerator::Visitor::BuildParams( @@ -1007,12 +1080,9 @@ std::vector LLVMGenerator::Visitor::BuildParams( DexPtr value_expr = pair->value_expr(); value_expr->Accept(*this); LValue& result_ref = *result(); - params.push_back(result_ref.data()); - // build length (for var len data types) - if (result_ref.length() != nullptr) { - params.push_back(result_ref.length()); - } + // append all the parameters corresponding to this LValue. + result_ref.AppendFunctionParams(¶ms); // build validity. if (with_validity) { diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h index 49f209d280d13..937e5acc87b2e 100644 --- a/cpp/src/gandiva/llvm_generator.h +++ b/cpp/src/gandiva/llvm_generator.h @@ -119,12 +119,13 @@ class LLVMGenerator { bool with_validity, bool with_context); // Generate code to onvoke a function call. - LValuePtr BuildFunctionCall(const NativeFunction* func, + LValuePtr BuildFunctionCall(const NativeFunction* func, DataTypePtr arrow_return_type, std::vector* params); // Generate code for an if-else condition. LValuePtr BuildIfElse(llvm::Value* condition, std::function then_func, - std::function else_func, llvm::Type* result_type); + std::function else_func, + DataTypePtr arrow_return_type); // Switch to the entry_block and get reference of the validity/value/offsets buffer llvm::Value* GetBufferReference(int idx, BufferType buffer_type, FieldPtr field); @@ -184,6 +185,10 @@ class LLVMGenerator { void ClearPackedBitValueIfFalse(llvm::Value* bitmap, llvm::Value* position, llvm::Value* value); + // Generate code to build a DecimalLValue with specified value/precision/scale. + std::shared_ptr BuildDecimalLValue(llvm::Value* value, + DataTypePtr arrow_type); + /// Generate code to make a function call (to a pre-compiled IR function) which takes /// 'args' and has a return type 'ret_type'. llvm::Value* AddFunctionCall(const std::string& full_name, llvm::Type* ret_type, diff --git a/cpp/src/gandiva/llvm_types.cc b/cpp/src/gandiva/llvm_types.cc index 0b89d96e3fb02..18ff627a5651f 100644 --- a/cpp/src/gandiva/llvm_types.cc +++ b/cpp/src/gandiva/llvm_types.cc @@ -40,6 +40,7 @@ LLVMTypes::LLVMTypes(llvm::LLVMContext& context) : context_(context) { {arrow::Type::type::TIMESTAMP, i64_type()}, {arrow::Type::type::STRING, i8_ptr_type()}, {arrow::Type::type::BINARY, i8_ptr_type()}, + {arrow::Type::type::DECIMAL, i128_type()}, }; } diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h index dab47d059f7f2..9cf4dd5d1c850 100644 --- a/cpp/src/gandiva/llvm_types.h +++ b/cpp/src/gandiva/llvm_types.h @@ -43,6 +43,8 @@ class LLVMTypes { llvm::Type* i64_type() { return llvm::Type::getInt64Ty(context_); } + llvm::Type* i128_type() { return llvm::Type::getInt128Ty(context_); } + llvm::Type* float_type() { return llvm::Type::getFloatTy(context_); } llvm::Type* double_type() { return llvm::Type::getDoubleTy(context_); } @@ -53,12 +55,19 @@ class LLVMTypes { llvm::PointerType* i64_ptr_type() { return llvm::PointerType::get(i64_type(), 0); } - llvm::PointerType* ptr_type(llvm::Type* base_type) { - return llvm::PointerType::get(base_type, 0); + llvm::PointerType* i128_ptr_type() { return llvm::PointerType::get(i128_type(), 0); } + + llvm::StructType* i128_split_type() { + // struct with high/low bits (see decimal_ops.cc:DecimalSplit) + return llvm::StructType::get(context_, {i64_type(), i64_type()}, false); } llvm::Type* void_type() { return llvm::Type::getVoidTy(context_); } + llvm::PointerType* ptr_type(llvm::Type* base_type) { + return llvm::PointerType::get(base_type, 0); + } + llvm::Constant* true_constant() { return llvm::ConstantInt::get(context_, llvm::APInt(1, 1)); } @@ -87,6 +96,18 @@ class LLVMTypes { return llvm::ConstantInt::get(context_, llvm::APInt(64, val)); } + llvm::Constant* i128_constant(int64_t val) { + return llvm::ConstantInt::get(context_, llvm::APInt(128, val)); + } + + llvm::Constant* i128_zero() { + return llvm::ConstantInt::get(context_, llvm::APInt(128, 0)); + } + + llvm::Constant* i128_one() { + return llvm::ConstantInt::get(context_, llvm::APInt(128, 1)); + } + llvm::Constant* float_constant(float val) { return llvm::ConstantFP::get(float_type(), val); } diff --git a/cpp/src/gandiva/lvalue.h b/cpp/src/gandiva/lvalue.h index 2ff03dcdd9c56..ce5040f6c37a6 100644 --- a/cpp/src/gandiva/lvalue.h +++ b/cpp/src/gandiva/lvalue.h @@ -18,9 +18,11 @@ #ifndef GANDIVA_LVALUE_H #define GANDIVA_LVALUE_H -#include "arrow/util/macros.h" +#include #include +#include "arrow/util/macros.h" +#include "gandiva/logging.h" namespace gandiva { @@ -30,17 +32,48 @@ class LValue { explicit LValue(llvm::Value* data, llvm::Value* length = NULLPTR, llvm::Value* validity = NULLPTR) : data_(data), length_(length), validity_(validity) {} + virtual ~LValue() = default; llvm::Value* data() { return data_; } llvm::Value* length() { return length_; } llvm::Value* validity() { return validity_; } + void set_data(llvm::Value* data) { data_ = data; } + + // Append the params required when passing this as a function parameter. + virtual void AppendFunctionParams(std::vector* params) { + params->push_back(data_); + if (length_ != NULLPTR) { + params->push_back(length_); + } + } + private: llvm::Value* data_; llvm::Value* length_; llvm::Value* validity_; }; +class DecimalLValue : public LValue { + public: + DecimalLValue(llvm::Value* data, llvm::Value* validity, llvm::Value* precision, + llvm::Value* scale) + : LValue(data, NULLPTR, validity), precision_(precision), scale_(scale) {} + + llvm::Value* precision() { return precision_; } + llvm::Value* scale() { return scale_; } + + void AppendFunctionParams(std::vector* params) override { + LValue::AppendFunctionParams(params); + params->push_back(precision_); + params->push_back(scale_); + } + + private: + llvm::Value* precision_; + llvm::Value* scale_; +}; + } // namespace gandiva #endif // GANDIVA_LVALUE_H diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt index 21a74bd4916ee..eab0b9007b09e 100644 --- a/cpp/src/gandiva/precompiled/CMakeLists.txt +++ b/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -20,12 +20,16 @@ project(gandiva) set(PRECOMPILED_SRCS arithmetic_ops.cc bitmap.cc + decimal_ops.cc + decimal_wrapper.cc extended_math_ops.cc hash.cc print.cc string_ops.cc time.cc - timestamp_arithmetic.cc) + timestamp_arithmetic.cc + ../../arrow/status.cc + ../../arrow/util/decimal.cc) # Create bitcode for each of the source files. foreach(SRC_FILE ${PRECOMPILED_SRCS}) @@ -35,7 +39,10 @@ foreach(SRC_FILE ${PRECOMPILED_SRCS}) add_custom_command( OUTPUT ${BC_FILE} COMMAND ${CLANG_EXECUTABLE} - -std=c++11 -emit-llvm -O2 -c ${ABSOLUTE_SRC} -o ${BC_FILE} + -std=c++11 -emit-llvm + -DNDEBUG # DCHECK macros not implemented in precompiled code + -fno-use-cxa-atexit # Workaround for unresolved __dso_handle + -O3 -c ${ABSOLUTE_SRC} -o ${BC_FILE} -I${CMAKE_SOURCE_DIR}/src DEPENDS ${SRC_FILE}) list(APPEND BC_FILES ${BC_FILE}) @@ -77,4 +84,5 @@ if (ARROW_BUILD_TESTS) add_precompiled_unit_test(string_ops_test.cc string_ops.cc ../context_helper.cc) add_precompiled_unit_test(arithmetic_ops_test.cc arithmetic_ops.cc ../context_helper.cc) add_precompiled_unit_test(extended_math_ops_test.cc extended_math_ops.cc ../context_helper.cc) + add_precompiled_unit_test(decimal_ops_test.cc decimal_ops.cc ../decimal_type_util.cc) endif() diff --git a/cpp/src/gandiva/precompiled/decimal_ops.cc b/cpp/src/gandiva/precompiled/decimal_ops.cc new file mode 100644 index 0000000000000..57cb83e222367 --- /dev/null +++ b/cpp/src/gandiva/precompiled/decimal_ops.cc @@ -0,0 +1,219 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Alogrithms adapted from Apache Impala + +#include "gandiva/precompiled/decimal_ops.h" + +#include + +#include "gandiva/decimal_type_util.h" +#include "gandiva/logging.h" + +namespace gandiva { +namespace decimalops { + +static Decimal128 CheckAndIncreaseScale(Decimal128 in, int32_t delta) { + return (delta <= 0) ? in : in.IncreaseScaleBy(delta); +} + +static Decimal128 CheckAndReduceScale(Decimal128 in, int32_t delta) { + return (delta <= 0) ? in : in.ReduceScaleBy(delta); +} + +/// Adjust x and y to the same scale, and add them. +static Decimal128 AddFastPath(const Decimal128Full& x, const Decimal128Full& y, + int32_t out_scale) { + auto higher_scale = std::max(x.scale(), y.scale()); + + auto x_scaled = CheckAndIncreaseScale(x.value(), higher_scale - x.scale()); + auto y_scaled = CheckAndIncreaseScale(y.value(), higher_scale - y.scale()); + return x_scaled + y_scaled; +} + +/// Add x and y, caller has ensured there can be no overflow. +static Decimal128 AddNoOverflow(const Decimal128Full& x, const Decimal128Full& y, + int32_t out_scale) { + auto higher_scale = std::max(x.scale(), y.scale()); + auto sum = AddFastPath(x, y, out_scale); + return CheckAndReduceScale(sum, higher_scale - out_scale); +} + +/// Both x_value and y_value must be >= 0 +static Decimal128 AddLargePositive(const Decimal128Full& x, const Decimal128Full& y, + int32_t out_scale) { + DCHECK_GE(x.value(), 0); + DCHECK_GE(y.value(), 0); + + // separate out whole/fractions. + Decimal128 x_left, x_right, y_left, y_right; + x.value().GetWholeAndFraction(x.scale(), &x_left, &x_right); + y.value().GetWholeAndFraction(y.scale(), &y_left, &y_right); + + // Adjust fractional parts to higher scale. + auto higher_scale = std::max(x.scale(), y.scale()); + auto x_right_scaled = CheckAndIncreaseScale(x_right, higher_scale - x.scale()); + auto y_right_scaled = CheckAndIncreaseScale(y_right, higher_scale - y.scale()); + + Decimal128 right; + Decimal128 carry_to_left; + auto multiplier = Decimal128::GetScaleMultiplier(higher_scale); + if (x_right_scaled >= multiplier - y_right_scaled) { + right = x_right_scaled - (multiplier - y_right_scaled); + carry_to_left = 1; + } else { + right = x_right_scaled + y_right_scaled; + carry_to_left = 0; + } + right = CheckAndReduceScale(right, higher_scale - out_scale); + + auto left = x_left + y_left + carry_to_left; + return (left * Decimal128::GetScaleMultiplier(out_scale)) + right; +} + +/// x_value and y_value cannot be 0, and one must be positive and the other negative. +static Decimal128 AddLargeNegative(const Decimal128Full& x, const Decimal128Full& y, + int32_t out_scale) { + DCHECK_NE(x.value(), 0); + DCHECK_NE(y.value(), 0); + DCHECK((x.value() < 0 && y.value() > 0) || (x.value() > 0 && y.value() < 0)); + + // separate out whole/fractions. + Decimal128 x_left, x_right, y_left, y_right; + x.value().GetWholeAndFraction(x.scale(), &x_left, &x_right); + y.value().GetWholeAndFraction(y.scale(), &y_left, &y_right); + + // Adjust fractional parts to higher scale. + auto higher_scale = std::max(x.scale(), y.scale()); + x_right = CheckAndIncreaseScale(x_right, higher_scale - x.scale()); + y_right = CheckAndIncreaseScale(y_right, higher_scale - y.scale()); + + // Overflow not possible because one is +ve and the other is -ve. + auto left = x_left + y_left; + auto right = x_right + y_right; + + // If the whole and fractional parts have different signs, then we need to make the + // fractional part have the same sign as the whole part. If either left or right is + // zero, then nothing needs to be done. + if (left < 0 && right > 0) { + left += 1; + right -= Decimal128::GetScaleMultiplier(higher_scale); + } else if (left > 0 && right < 0) { + left -= 1; + right += Decimal128::GetScaleMultiplier(higher_scale); + } + right = CheckAndReduceScale(right, higher_scale - out_scale); + return (left * Decimal128::GetScaleMultiplier(out_scale)) + right; +} + +static Decimal128 AddLarge(const Decimal128Full& x, const Decimal128Full& y, + int32_t out_scale) { + if (x.value() >= 0 && y.value() >= 0) { + // both positive or 0 + return AddLargePositive(x, y, out_scale); + } else if (x.value() <= 0 && y.value() <= 0) { + // both negative or 0 + Decimal128Full x_neg(-x.value(), x.precision(), x.scale()); + Decimal128Full y_neg(-y.value(), y.precision(), y.scale()); + return -AddLargePositive(x_neg, y_neg, out_scale); + } else { + // one positive and the other negative + return AddLargeNegative(x, y, out_scale); + } +} + +// Suppose we have a number that requires x bits to be represented and we scale it up by +// 10^scale_by. Let's say now y bits are required to represent it. This function returns +// the maximum possible y - x for a given 'scale_by'. +inline int32_t MaxBitsRequiredIncreaseAfterScaling(int32_t scale_by) { + // We rely on the following formula: + // bits_required(x * 10^y) <= bits_required(x) + floor(log2(10^y)) + 1 + // We precompute floor(log2(10^x)) + 1 for x = 0, 1, 2...75, 76 + DCHECK_GE(scale_by, 0); + DCHECK_LE(scale_by, 76); + static const int32_t floor_log2_plus_one[] = { + 0, 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40, 44, 47, 50, + 54, 57, 60, 64, 67, 70, 74, 77, 80, 84, 87, 90, 94, 97, 100, 103, + 107, 110, 113, 117, 120, 123, 127, 130, 133, 137, 140, 143, 147, 150, 153, 157, + 160, 163, 167, 170, 173, 177, 180, 183, 187, 190, 193, 196, 200, 203, 206, 210, + 213, 216, 220, 223, 226, 230, 233, 236, 240, 243, 246, 250, 253}; + return floor_log2_plus_one[scale_by]; +} + +// If we have a number with 'num_lz' leading zeros, and we scale it up by 10^scale_by, +// this function returns the minimum number of leading zeros the result can have. +inline int32_t MinLeadingZerosAfterScaling(int32_t num_lz, int32_t scale_by) { + DCHECK_GE(scale_by, 0); + DCHECK_LE(scale_by, 76); + int32_t result = num_lz - MaxBitsRequiredIncreaseAfterScaling(scale_by); + return result; +} + +// Returns the maximum possible number of bits required to represent num * 10^scale_by. +inline int32_t MaxBitsRequiredAfterScaling(const Decimal128Full& num, int32_t scale_by) { + auto value = num.value(); + auto value_abs = value.Abs(); + + int32_t num_occupied = 128 - value_abs.CountLeadingBinaryZeros(); + DCHECK_GE(scale_by, 0); + DCHECK_LE(scale_by, 76); + return num_occupied + MaxBitsRequiredIncreaseAfterScaling(scale_by); +} + +// Returns the minimum number of leading zero x or y would have after one of them gets +// scaled up to match the scale of the other one. +inline int32_t MinLeadingZeros(const Decimal128Full& x, const Decimal128Full& y) { + auto x_value = x.value(); + auto x_value_abs = x_value.Abs(); + + auto y_value = y.value(); + auto y_value_abs = y_value.Abs(); + + int32_t x_lz = x_value_abs.CountLeadingBinaryZeros(); + int32_t y_lz = y_value_abs.CountLeadingBinaryZeros(); + if (x.scale() < y.scale()) { + x_lz = MinLeadingZerosAfterScaling(x_lz, y.scale() - x.scale()); + } else if (x.scale() > y.scale()) { + y_lz = MinLeadingZerosAfterScaling(y_lz, x.scale() - y.scale()); + } + return std::min(x_lz, y_lz); +} + +Decimal128 Add(const Decimal128Full& x, const Decimal128Full& y, int32_t out_precision, + int32_t out_scale) { + if (out_precision < DecimalTypeUtil::kMaxPrecision) { + // fast-path add + return AddFastPath(x, y, out_scale); + } else { + int32_t min_lz = MinLeadingZeros(x, y); + if (min_lz >= 3) { + // If both numbers have at least MIN_LZ leading zeros, we can add them directly + // without the risk of overflow. + // We want the result to have at least 2 leading zeros, which ensures that it fits + // into the maximum decimal because 2^126 - 1 < 10^38 - 1. If both x and y have at + // least 3 leading zeros, then we are guaranteed that the result will have at lest 2 + // leading zeros. + return AddNoOverflow(x, y, out_scale); + } else { + // slower-version : add whole/fraction parts separately, and then, combine. + return AddLarge(x, y, out_scale); + } + } +} + +} // namespace decimalops +} // namespace gandiva diff --git a/cpp/src/gandiva/precompiled/decimal_ops.h b/cpp/src/gandiva/precompiled/decimal_ops.h new file mode 100644 index 0000000000000..25f094e4a8faa --- /dev/null +++ b/cpp/src/gandiva/precompiled/decimal_ops.h @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DECIMAL_SQL_H +#define DECIMAL_SQL_H + +#include +#include +#include "gandiva/decimal_full.h" + +namespace gandiva { +namespace decimalops { + +/// Return the sum of 'x' and 'y'. +/// out_precision and out_scale are passed along for efficiency, they must match +/// the rules in DecimalTypeSql::GetResultType. +Decimal128 Add(const Decimal128Full& x, const Decimal128Full& y, int32_t out_precision, + int32_t out_scale); + +} // namespace decimalops +} // namespace gandiva + +#endif // DECIMAL_SQL_H diff --git a/cpp/src/gandiva/precompiled/decimal_ops_test.cc b/cpp/src/gandiva/precompiled/decimal_ops_test.cc new file mode 100644 index 0000000000000..7daf734509b20 --- /dev/null +++ b/cpp/src/gandiva/precompiled/decimal_ops_test.cc @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "arrow/test-util.h" +#include "gandiva/decimal_type_util.h" +#include "gandiva/precompiled/decimal_ops.h" +#include "gandiva/precompiled/types.h" + +namespace gandiva { + +class TestDecimalSql : public ::testing::Test { + protected: + static void AddAndVerify(const Decimal128Full& x, const Decimal128Full& y, + const Decimal128Full& expected); +}; + +#define EXPECT_DECIMAL_EQ(x, y, expected, actual) \ + EXPECT_EQ(expected, actual) << (x).ToString() << " + " << (y).ToString() \ + << " expected : " << expected.ToString() << " actual " \ + << actual.ToString() + +void TestDecimalSql::AddAndVerify(const Decimal128Full& x, const Decimal128Full& y, + const Decimal128Full& expected) { + auto t1 = std::make_shared(x.precision(), x.scale()); + auto t2 = std::make_shared(y.precision(), y.scale()); + + Decimal128TypePtr out_type; + EXPECT_OK(DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd, {t1, t2}, &out_type)); + + auto out_value = decimalops::Add(x, y, out_type->precision(), out_type->scale()); + EXPECT_DECIMAL_EQ(x, y, expected, + Decimal128Full(out_value, out_type->precision(), out_type->scale())); +} + +TEST_F(TestDecimalSql, Add) { + // fast-path + AddAndVerify(Decimal128Full{"201", 30, 3}, // x + Decimal128Full{"301", 30, 3}, // y + Decimal128Full{"502", 31, 3}); // expected + + // max precision + AddAndVerify(Decimal128Full{"09999999999999999999999999999999000000", 38, 5}, // x + Decimal128Full{"100", 38, 7}, // y + Decimal128Full{"99999999999999999999999999999990000010", 38, 6}); + + // Both -ve + AddAndVerify(Decimal128Full{"-201", 30, 3}, // x + Decimal128Full{"-301", 30, 2}, // y + Decimal128Full{"-3211", 32, 3}); // expected + + // -ve and max precision + AddAndVerify(Decimal128Full{"-09999999999999999999999999999999000000", 38, 5}, // x + Decimal128Full{"-100", 38, 7}, // y + Decimal128Full{"-99999999999999999999999999999990000010", 38, 6}); +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/precompiled/decimal_wrapper.cc b/cpp/src/gandiva/precompiled/decimal_wrapper.cc new file mode 100644 index 0000000000000..fdc751f7fe87c --- /dev/null +++ b/cpp/src/gandiva/precompiled/decimal_wrapper.cc @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/precompiled/decimal_ops.h" +#include "gandiva/precompiled/types.h" + +extern "C" { + +/// TODO : Passing around structs in IR can be fragile due to c-abi compatibility issues. +/// This seems to work for now, but will need to revisit if we hit issues. +struct DecimalSplit { + int64_t high_bits; + uint64_t low_bits; +}; + +FORCE_INLINE +DecimalSplit add_large_decimal128_decimal128(int64_t x_high, uint64_t x_low, + int32_t x_precision, int32_t x_scale, + int64_t y_high, uint64_t y_low, + int32_t y_precision, int32_t y_scale, + int32_t out_precision, int32_t out_scale) { + gandiva::Decimal128Full x(x_high, x_low, x_precision, x_scale); + gandiva::Decimal128Full y(y_high, y_low, y_precision, y_scale); + + arrow::Decimal128 out = gandiva::decimalops::Add(x, y, out_precision, out_scale); + return DecimalSplit{out.high_bits(), out.low_bits()}; +} + +} // extern "C" diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc index 4cb352f2ad3c1..8fc5b8c446927 100644 --- a/cpp/src/gandiva/projector.cc +++ b/cpp/src/gandiva/projector.cc @@ -143,7 +143,8 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch, arrow::MemoryPool* p // TODO : handle variable-len vectors Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records, arrow::MemoryPool* pool, ArrayDataPtr* array_data) { - ARROW_RETURN_IF(!arrow::is_primitive(type->id()), + const auto* fw_type = dynamic_cast(type.get()); + ARROW_RETURN_IF(fw_type == nullptr, Status::Invalid("Unsupported output data type ", type)); std::shared_ptr null_bitmap; @@ -151,8 +152,7 @@ Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records, ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(pool, bitmap_bytes, &null_bitmap)); std::shared_ptr data; - const auto& fw_type = dynamic_cast(*type); - int64_t data_len = arrow::BitUtil::BytesForBits(num_records * fw_type.bit_width()); + int64_t data_len = arrow::BitUtil::BytesForBits(num_records * fw_type->bit_width()); ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(pool, data_len, &data)); // This is not strictly required but valgrind gets confused and detects this diff --git a/cpp/src/gandiva/proto/Types.proto b/cpp/src/gandiva/proto/Types.proto index ac19d0f1c1919..7474065f68b73 100644 --- a/cpp/src/gandiva/proto/Types.proto +++ b/cpp/src/gandiva/proto/Types.proto @@ -146,6 +146,13 @@ message BinaryNode { optional bytes value = 1; } +message DecimalNode { + optional string value = 1; + optional int32 precision = 2; + optional int32 scale = 3; +} + + message TreeNode { optional FieldNode fieldNode = 1; optional FunctionNode fnNode = 2; @@ -164,6 +171,7 @@ message TreeNode { optional DoubleNode doubleNode = 16; optional StringNode stringNode = 17; optional BinaryNode binaryNode = 18; + optional DecimalNode decimalNode = 19; } message ExpressionRoot { diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt index 9558fc0757f7b..b47e5fd5add59 100644 --- a/cpp/src/gandiva/tests/CMakeLists.txt +++ b/cpp/src/gandiva/tests/CMakeLists.txt @@ -27,11 +27,17 @@ ADD_GANDIVA_TEST(to_string_test) ADD_GANDIVA_TEST(hash_test) ADD_GANDIVA_TEST(in_expr_test) ADD_GANDIVA_TEST(null_validity_test) +ADD_GANDIVA_TEST(decimal_test) +ADD_GANDIVA_TEST(decimal_single_test) ADD_GANDIVA_TEST(projector_test_static SOURCES projector_test.cc USE_STATIC_LINKING) -ADD_BENCHMARK(micro_benchmarks +ADD_GANDIVA_TEST(decimal_single_test_static + SOURCES decimal_single_test.cc + USE_STATIC_LINKING) + +ADD_ARROW_BENCHMARK(micro_benchmarks PREFIX "gandiva" EXTRA_LINK_LIBS gandiva_static) diff --git a/cpp/src/gandiva/tests/decimal_single_test.cc b/cpp/src/gandiva/tests/decimal_single_test.cc new file mode 100644 index 0000000000000..728ccb7f79f4c --- /dev/null +++ b/cpp/src/gandiva/tests/decimal_single_test.cc @@ -0,0 +1,224 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include "arrow/memory_pool.h" +#include "arrow/status.h" + +#include "gandiva/decimal_full.h" +#include "gandiva/decimal_type_util.h" +#include "gandiva/projector.h" +#include "gandiva/tests/test_util.h" +#include "gandiva/tree_expr_builder.h" + +using arrow::Decimal128; + +namespace gandiva { + +#define EXPECT_DECIMAL_SUM_EQUALS(x, y, expected, actual) \ + EXPECT_EQ(expected, actual) << (x).ToString() << " + " << (y).ToString() \ + << " expected : " << (expected).ToString() \ + << " actual : " << (actual).ToString(); + +Decimal128Full decimal_literal(const char* value, int precision, int scale) { + std::string value_string = std::string(value); + return Decimal128Full(value_string, precision, scale); +} + +class TestDecimalOps : public ::testing::Test { + public: + void SetUp() { pool_ = arrow::default_memory_pool(); } + + ArrayPtr MakeDecimalVector(const Decimal128Full& in); + void AddAndVerify(const Decimal128Full& x, const Decimal128Full& y, + const Decimal128Full& expected); + + protected: + arrow::MemoryPool* pool_; +}; + +ArrayPtr TestDecimalOps::MakeDecimalVector(const Decimal128Full& in) { + std::vector ret; + + Decimal128 decimal_value = in.value(); + + auto decimal_type = std::make_shared(in.precision(), in.scale()); + return MakeArrowArrayDecimal(decimal_type, {decimal_value}, {true}); +} + +void TestDecimalOps::AddAndVerify(const Decimal128Full& x, const Decimal128Full& y, + const Decimal128Full& expected) { + auto x_type = std::make_shared(x.precision(), x.scale()); + auto y_type = std::make_shared(y.precision(), y.scale()); + auto field_x = field("x", x_type); + auto field_y = field("y", y_type); + auto schema = arrow::schema({field_x, field_y}); + + Decimal128TypePtr output_type; + auto status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd, {x_type, y_type}, + &output_type); + EXPECT_OK(status); + + // output fields + auto res = field("res", output_type); + + // build expression : x + y + auto expr = TreeExprBuilder::MakeExpression("add", {field_x, field_y}, res); + + // Build a projector for the expression. + std::shared_ptr projector; + status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_OK(status); + + // Create a row-batch with some sample data + auto array_a = MakeDecimalVector(x); + auto array_b = MakeDecimalVector(y); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, 1 /*num_records*/, {array_a, array_b}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_OK(status); + + // Validate results + auto out_array = dynamic_cast(outputs[0].get()); + const Decimal128 out_value(out_array->GetValue(0)); + + auto dtype = dynamic_cast(out_array->type().get()); + std::string value_string = out_value.ToString(0); + Decimal128Full actual{value_string, dtype->precision(), dtype->scale()}; + + EXPECT_DECIMAL_SUM_EQUALS(x, y, expected, actual); +} + +TEST_F(TestDecimalOps, TestAdd) { + // fast-path + AddAndVerify(decimal_literal("201", 30, 3), // x + decimal_literal("301", 30, 3), // y + decimal_literal("502", 31, 3)); // expected + + AddAndVerify(decimal_literal("201", 30, 3), // x + decimal_literal("301", 30, 2), // y + decimal_literal("3211", 32, 3)); // expected + + AddAndVerify(decimal_literal("201", 30, 3), // x + decimal_literal("301", 30, 4), // y + decimal_literal("2311", 32, 4)); // expected + + // max precision, but no overflow + AddAndVerify(decimal_literal("201", 38, 3), // x + decimal_literal("301", 38, 3), // y + decimal_literal("502", 38, 3)); // expected + + AddAndVerify(decimal_literal("201", 38, 3), // x + decimal_literal("301", 38, 2), // y + decimal_literal("3211", 38, 3)); // expected + + AddAndVerify(decimal_literal("201", 38, 3), // x + decimal_literal("301", 38, 4), // y + decimal_literal("2311", 38, 4)); // expected + + AddAndVerify(decimal_literal("201", 38, 3), // x + decimal_literal("301", 38, 7), // y + decimal_literal("201030", 38, 6)); // expected + + AddAndVerify(decimal_literal("1201", 38, 3), // x + decimal_literal("1801", 38, 3), // y + decimal_literal("3002", 38, 3)); // carry-over from fractional + + // max precision + AddAndVerify(decimal_literal("09999999999999999999999999999999000000", 38, 5), // x + decimal_literal("100", 38, 7), // y + decimal_literal("99999999999999999999999999999990000010", 38, 6)); + + AddAndVerify(decimal_literal("-09999999999999999999999999999999000000", 38, 5), // x + decimal_literal("100", 38, 7), // y + decimal_literal("-99999999999999999999999999999989999990", 38, 6)); + + AddAndVerify(decimal_literal("09999999999999999999999999999999000000", 38, 5), // x + decimal_literal("-100", 38, 7), // y + decimal_literal("99999999999999999999999999999989999990", 38, 6)); + + AddAndVerify(decimal_literal("-09999999999999999999999999999999000000", 38, 5), // x + decimal_literal("-100", 38, 7), // y + decimal_literal("-99999999999999999999999999999990000010", 38, 6)); + + AddAndVerify(decimal_literal("09999999999999999999999999999999999999", 38, 6), // x + decimal_literal("89999999999999999999999999999999999999", 38, 7), // y + decimal_literal("18999999999999999999999999999999999999", 38, 6)); + + // Both -ve + AddAndVerify(decimal_literal("-201", 30, 3), // x + decimal_literal("-301", 30, 2), // y + decimal_literal("-3211", 32, 3)); // expected + + AddAndVerify(decimal_literal("-201", 38, 3), // x + decimal_literal("-301", 38, 4), // y + decimal_literal("-2311", 38, 4)); // expected + + // Mix of +ve and -ve + AddAndVerify(decimal_literal("-201", 30, 3), // x + decimal_literal("301", 30, 2), // y + decimal_literal("2809", 32, 3)); // expected + + AddAndVerify(decimal_literal("-201", 38, 3), // x + decimal_literal("301", 38, 4), // y + decimal_literal("-1709", 38, 4)); // expected + + AddAndVerify(decimal_literal("201", 38, 3), // x + decimal_literal("-301", 38, 7), // y + decimal_literal("200970", 38, 6)); // expected + + AddAndVerify(decimal_literal("-1901", 38, 4), // x + decimal_literal("1801", 38, 4), // y + decimal_literal("-100", 38, 4)); // expected + + AddAndVerify(decimal_literal("1801", 38, 4), // x + decimal_literal("-1901", 38, 4), // y + decimal_literal("-100", 38, 4)); // expected + + // rounding +ve + AddAndVerify(decimal_literal("1000999", 38, 6), // x + decimal_literal("10000999", 38, 7), // y + decimal_literal("2001099", 38, 6)); + + AddAndVerify(decimal_literal("1000999", 38, 6), // x + decimal_literal("10000995", 38, 7), // y + decimal_literal("2001099", 38, 6)); + + AddAndVerify(decimal_literal("1000999", 38, 6), // x + decimal_literal("10000992", 38, 7), // y + decimal_literal("2001098", 38, 6)); + + // rounding -ve + AddAndVerify(decimal_literal("-1000999", 38, 6), // x + decimal_literal("-10000999", 38, 7), // y + decimal_literal("-2001099", 38, 6)); + + AddAndVerify(decimal_literal("-1000999", 38, 6), // x + decimal_literal("-10000995", 38, 7), // y + decimal_literal("-2001099", 38, 6)); + + AddAndVerify(decimal_literal("-1000999", 38, 6), // x + decimal_literal("-10000992", 38, 7), // y + decimal_literal("-2001098", 38, 6)); +} +} // namespace gandiva diff --git a/cpp/src/gandiva/tests/decimal_test.cc b/cpp/src/gandiva/tests/decimal_test.cc new file mode 100644 index 0000000000000..f048fd275a61b --- /dev/null +++ b/cpp/src/gandiva/tests/decimal_test.cc @@ -0,0 +1,237 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include "arrow/memory_pool.h" +#include "arrow/status.h" +#include "arrow/util/decimal.h" + +#include "gandiva/decimal_type_util.h" +#include "gandiva/projector.h" +#include "gandiva/tests/test_util.h" +#include "gandiva/tree_expr_builder.h" + +using arrow::Decimal128; + +namespace gandiva { + +class TestDecimal : public ::testing::Test { + public: + void SetUp() { pool_ = arrow::default_memory_pool(); } + + std::vector MakeDecimalVector(std::vector values, + int32_t scale); + + protected: + arrow::MemoryPool* pool_; +}; + +std::vector TestDecimal::MakeDecimalVector(std::vector values, + int32_t scale) { + std::vector ret; + for (auto str : values) { + Decimal128 str_value; + int32_t str_precision; + int32_t str_scale; + + auto status = Decimal128::FromString(str, &str_value, &str_precision, &str_scale); + DCHECK_OK(status); + + Decimal128 scaled_value; + status = str_value.Rescale(str_scale, scale, &scaled_value); + ret.push_back(scaled_value); + } + return ret; +} + +TEST_F(TestDecimal, TestSimple) { + // schema for input fields + constexpr int32_t precision = 36; + constexpr int32_t scale = 18; + auto decimal_type = std::make_shared(precision, scale); + auto field_a = field("a", decimal_type); + auto field_b = field("b", decimal_type); + auto field_c = field("c", decimal_type); + auto schema = arrow::schema({field_a, field_b, field_c}); + + Decimal128TypePtr add2_type; + auto status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd, + {decimal_type, decimal_type}, &add2_type); + + Decimal128TypePtr output_type; + status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd, + {add2_type, decimal_type}, &output_type); + + // output fields + auto res = field("res0", output_type); + + // build expression : a + b + c + auto node_a = TreeExprBuilder::MakeField(field_a); + auto node_b = TreeExprBuilder::MakeField(field_b); + auto node_c = TreeExprBuilder::MakeField(field_c); + auto add2 = TreeExprBuilder::MakeFunction("add", {node_a, node_b}, add2_type); + auto add3 = TreeExprBuilder::MakeFunction("add", {add2, node_c}, output_type); + auto expr = TreeExprBuilder::MakeExpression(add3, res); + + // Build a projector for the expression. + std::shared_ptr projector; + status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + DCHECK_OK(status); + + // Create a row-batch with some sample data + int num_records = 4; + auto array_a = + MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"1", "2", "3", "4"}, scale), + {false, true, true, true}); + auto array_b = + MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"2", "3", "4", "5"}, scale), + {false, true, true, true}); + auto array_c = + MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"3", "4", "5", "6"}, scale), + {true, true, true, true}); + + // prepare input record batch + auto in_batch = + arrow::RecordBatch::Make(schema, num_records, {array_a, array_b, array_c}); + + auto expected = + MakeArrowArrayDecimal(output_type, MakeDecimalVector({"6", "9", "12", "15"}, scale), + {false, true, true, true}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + DCHECK_OK(status); + + // Validate results + EXPECT_ARROW_ARRAY_EQUALS(expected, outputs[0]); +} + +TEST_F(TestDecimal, TestLiteral) { + // schema for input fields + constexpr int32_t precision = 36; + constexpr int32_t scale = 18; + auto decimal_type = std::make_shared(precision, scale); + auto field_a = field("a", decimal_type); + auto schema = arrow::schema({ + field_a, + }); + + Decimal128TypePtr add2_type; + auto status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd, + {decimal_type, decimal_type}, &add2_type); + + // output fields + auto res = field("res0", add2_type); + + // build expression : a + b + c + auto node_a = TreeExprBuilder::MakeField(field_a); + static std::string decimal_point_six = "6"; + Decimal128Full literal(decimal_point_six, 2, 1); + auto node_b = TreeExprBuilder::MakeDecimalLiteral(literal); + auto add2 = TreeExprBuilder::MakeFunction("add", {node_a, node_b}, add2_type); + auto expr = TreeExprBuilder::MakeExpression(add2, res); + + // Build a projector for the expression. + std::shared_ptr projector; + status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + DCHECK_OK(status); + + // Create a row-batch with some sample data + int num_records = 4; + auto array_a = + MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"1", "2", "3", "4"}, scale), + {false, true, true, true}); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a}); + + auto expected = MakeArrowArrayDecimal( + add2_type, MakeDecimalVector({"1.6", "2.6", "3.6", "4.6"}, scale), + {false, true, true, true}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + DCHECK_OK(status); + + // Validate results + EXPECT_ARROW_ARRAY_EQUALS(expected, outputs[0]); +} + +TEST_F(TestDecimal, TestIfElse) { + // schema for input fields + constexpr int32_t precision = 36; + constexpr int32_t scale = 18; + auto decimal_type = std::make_shared(precision, scale); + auto field_a = field("a", decimal_type); + auto field_b = field("b", decimal_type); + auto field_c = field("c", arrow::boolean()); + auto schema = arrow::schema({field_a, field_b, field_c}); + + // output fields + auto field_result = field("res", decimal_type); + + // build expression. + // if (c) + // a + // else + // b + auto node_a = TreeExprBuilder::MakeField(field_a); + auto node_b = TreeExprBuilder::MakeField(field_b); + auto node_c = TreeExprBuilder::MakeField(field_c); + auto if_node = TreeExprBuilder::MakeIf(node_c, node_a, node_b, decimal_type); + + auto expr = TreeExprBuilder::MakeExpression(if_node, field_result); + + // Build a projector for the expressions. + std::shared_ptr projector; + Status status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + DCHECK_OK(status); + + // Create a row-batch with some sample data + int num_records = 4; + auto array_a = + MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"1", "2", "3", "4"}, scale), + {false, true, true, true}); + auto array_b = + MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"2", "3", "4", "5"}, scale), + {true, true, true, true}); + + auto array_c = MakeArrowArrayBool({true, false, true, false}, {true, true, true, true}); + + // expected output + auto exp = + MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"0", "3", "3", "5"}, scale), + {false, true, true, true}); + + // prepare input record batch + auto in_batch = + arrow::RecordBatch::Make(schema, num_records, {array_a, array_b, array_c}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + DCHECK_OK(status); + + // Validate results + EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0)); +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/tests/generate_data.h b/cpp/src/gandiva/tests/generate_data.h index 01665b8ee17c5..398057510cb08 100644 --- a/cpp/src/gandiva/tests/generate_data.h +++ b/cpp/src/gandiva/tests/generate_data.h @@ -19,6 +19,8 @@ #include #include +#include "arrow/util/decimal.h" + #ifndef GANDIVA_GENERATE_DATA_H #define GANDIVA_GENERATE_DATA_H @@ -79,6 +81,24 @@ class Int64DataGenerator : public DataGenerator { Random random_; }; +class Decimal128DataGenerator : public DataGenerator { + public: + explicit Decimal128DataGenerator(bool large) : large_(large) {} + + arrow::Decimal128 GenerateData() { + uint64_t low = random_.next(); + int64_t high = random_.next(); + if (large_) { + high += (1ull << 62); + } + return arrow::Decimal128(high, low); + } + + protected: + bool large_; + Random random_; +}; + class FastUtf8DataGenerator : public DataGenerator { public: explicit FastUtf8DataGenerator(int max_len) : max_len_(max_len), cur_char_('a') {} diff --git a/cpp/src/gandiva/tests/micro_benchmarks.cc b/cpp/src/gandiva/tests/micro_benchmarks.cc index ce86bf0612402..e0794a233a2ce 100644 --- a/cpp/src/gandiva/tests/micro_benchmarks.cc +++ b/cpp/src/gandiva/tests/micro_benchmarks.cc @@ -19,6 +19,7 @@ #include "arrow/memory_pool.h" #include "arrow/status.h" #include "benchmark/benchmark.h" +#include "gandiva/decimal_type_util.h" #include "gandiva/projector.h" #include "gandiva/tests/test_util.h" #include "gandiva/tests/timed_evaluate.h" @@ -31,10 +32,6 @@ using arrow::int32; using arrow::int64; using arrow::utf8; -// TODO : the base numbers are from a mac. they need to be caliberated -// for the hardware used by travis. -float tolerance_ratio = 6.0; - static void TimedTestAdd3(benchmark::State& state) { // schema for input fields auto field0 = field("f0", int64()); @@ -280,6 +277,119 @@ static void TimedTestInExpr(benchmark::State& state) { ASSERT_OK(status); } +static void DoDecimalAdd3(benchmark::State& state, int32_t precision, int32_t scale, + bool large = false) { + // schema for input fields + auto decimal_type = std::make_shared(precision, scale); + auto field0 = field("f0", decimal_type); + auto field1 = field("f1", decimal_type); + auto field2 = field("f2", decimal_type); + auto schema = arrow::schema({field0, field1, field2}); + + Decimal128TypePtr add2_type; + auto status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd, + {decimal_type, decimal_type}, &add2_type); + + Decimal128TypePtr output_type; + status = DecimalTypeUtil::GetResultType(DecimalTypeUtil::kOpAdd, + {add2_type, decimal_type}, &output_type); + + // output field + auto field_sum = field("add", output_type); + + // Build expression + auto part_sum = TreeExprBuilder::MakeFunction( + "add", {TreeExprBuilder::MakeField(field1), TreeExprBuilder::MakeField(field2)}, + add2_type); + auto sum = TreeExprBuilder::MakeFunction( + "add", {TreeExprBuilder::MakeField(field0), part_sum}, output_type); + + auto sum_expr = TreeExprBuilder::MakeExpression(sum, field_sum); + + std::shared_ptr projector; + status = Projector::Make(schema, {sum_expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()); + + Decimal128DataGenerator data_generator(large); + ProjectEvaluator evaluator(projector); + + status = TimedEvaluate( + schema, evaluator, data_generator, arrow::default_memory_pool(), 1 * MILLION, + 16 * THOUSAND, state); + ASSERT_OK(status); +} + +static void DoDecimalAdd2(benchmark::State& state, int32_t precision, int32_t scale, + bool large = false) { + // schema for input fields + auto decimal_type = std::make_shared(precision, scale); + auto field0 = field("f0", decimal_type); + auto field1 = field("f1", decimal_type); + auto schema = arrow::schema({field0, field1}); + + Decimal128TypePtr output_type; + auto status = DecimalTypeUtil::GetResultType( + DecimalTypeUtil::kOpAdd, {decimal_type, decimal_type}, &output_type); + + // output field + auto field_sum = field("add", output_type); + + // Build expression + auto sum = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum); + + std::shared_ptr projector; + status = Projector::Make(schema, {sum}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()); + + Decimal128DataGenerator data_generator(large); + ProjectEvaluator evaluator(projector); + + status = TimedEvaluate( + schema, evaluator, data_generator, arrow::default_memory_pool(), 1 * MILLION, + 16 * THOUSAND, state); + ASSERT_OK(status); +} + +static void DecimalAdd2Fast(benchmark::State& state) { + // use lesser precision to test the fast-path + DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision - 6, 18); +} + +static void DecimalAdd2LeadingZeroes(benchmark::State& state) { + // use max precision to test the large-integer-path + DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision, 6); +} + +static void DecimalAdd2LeadingZeroesWithDiv(benchmark::State& state) { + // use max precision to test the large-integer-path + DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision, 18); +} + +static void DecimalAdd2Large(benchmark::State& state) { + // use max precision to test the large-integer-path + DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision, 18, true); +} + +static void DecimalAdd3Fast(benchmark::State& state) { + // use lesser precision to test the fast-path + DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision - 6, 18); +} + +static void DecimalAdd3LeadingZeroes(benchmark::State& state) { + // use max precision to test the large-integer-path + DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 6); +} + +static void DecimalAdd3LeadingZeroesWithDiv(benchmark::State& state) { + // use max precision to test the large-integer-path + DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 18); +} + +static void DecimalAdd3Large(benchmark::State& state) { + // use max precision to test the large-integer-path + DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 18, true); +} + BENCHMARK(TimedTestAdd3)->MinTime(1.0)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestBigNested)->MinTime(1.0)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestBigNested)->MinTime(1.0)->Unit(benchmark::kMicrosecond); @@ -289,5 +399,13 @@ BENCHMARK(TimedTestFilterLike)->MinTime(1.0)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestAllocs)->MinTime(1.0)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestMultiOr)->MinTime(1.0)->Unit(benchmark::kMicrosecond); BENCHMARK(TimedTestInExpr)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(DecimalAdd2Fast)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(DecimalAdd2LeadingZeroes)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(DecimalAdd2LeadingZeroesWithDiv)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(DecimalAdd2Large)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(DecimalAdd3Fast)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(DecimalAdd3LeadingZeroes)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(DecimalAdd3LeadingZeroesWithDiv)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(DecimalAdd3Large)->MinTime(1.0)->Unit(benchmark::kMicrosecond); } // namespace gandiva diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h index 72b45b124b8dd..0e0e27a0c9aa4 100644 --- a/cpp/src/gandiva/tests/test_util.h +++ b/cpp/src/gandiva/tests/test_util.h @@ -21,6 +21,7 @@ #include #include "arrow/test-util.h" #include "gandiva/arrow.h" +#include "gandiva/configuration.h" #ifndef GANDIVA_TEST_UTIL_H #define GANDIVA_TEST_UTIL_H @@ -46,6 +47,14 @@ static ArrayPtr MakeArrowArray(std::vector values) { return out; } +template +static ArrayPtr MakeArrowArray(const std::shared_ptr& type, + std::vector values, std::vector validity) { + ArrayPtr out; + arrow::ArrayFromVector(type, validity, values, &out); + return out; +} + template static ArrayPtr MakeArrowTypeArray(const std::shared_ptr& type, const std::vector& values, @@ -68,11 +77,16 @@ static ArrayPtr MakeArrowTypeArray(const std::shared_ptr& type, #define MakeArrowArrayFloat64 MakeArrowArray #define MakeArrowArrayUtf8 MakeArrowArray #define MakeArrowArrayBinary MakeArrowArray +#define MakeArrowArrayDecimal MakeArrowArray #define EXPECT_ARROW_ARRAY_EQUALS(a, b) \ EXPECT_TRUE((a)->Equals(b)) << "expected array: " << (a)->ToString() \ << " actual array: " << (b)->ToString(); +#define EXPECT_ARROW_TYPE_EQUALS(a, b) \ + EXPECT_TRUE((a)->Equals(b)) << "expected type: " << (a)->ToString() \ + << " actual type: " << (b)->ToString(); + std::shared_ptr TestConfiguration() { auto builder = ConfigurationBuilder(); builder.set_byte_code_file_path(GANDIVA_BYTE_COMPILE_FILE_PATH); diff --git a/cpp/src/gandiva/tests/timed_evaluate.h b/cpp/src/gandiva/tests/timed_evaluate.h index dab47c2f218be..9db7d88d2a226 100644 --- a/cpp/src/gandiva/tests/timed_evaluate.h +++ b/cpp/src/gandiva/tests/timed_evaluate.h @@ -100,7 +100,9 @@ Status TimedEvaluate(SchemaPtr schema, BaseEvaluator& evaluator, for (int col = 0; col < num_fields; col++) { std::vector data = GenerateData(batch_size, data_generator); std::vector validity(batch_size, true); - ArrayPtr col_data = MakeArrowArray(data, validity); + ArrayPtr col_data = + MakeArrowArray(schema->field(col)->type(), data, validity); + columns.push_back(col_data); } diff --git a/cpp/src/gandiva/tree_expr_builder.cc b/cpp/src/gandiva/tree_expr_builder.cc index 86a2824075497..23a49e2b7929a 100644 --- a/cpp/src/gandiva/tree_expr_builder.cc +++ b/cpp/src/gandiva/tree_expr_builder.cc @@ -19,6 +19,7 @@ #include +#include "gandiva/decimal_type_util.h" #include "gandiva/gandiva_aliases.h" #include "gandiva/node.h" @@ -49,6 +50,11 @@ NodePtr TreeExprBuilder::MakeBinaryLiteral(const std::string& value) { return std::make_shared(arrow::binary(), LiteralHolder(value), false); } +NodePtr TreeExprBuilder::MakeDecimalLiteral(const Decimal128Full& value) { + return std::make_shared(arrow::decimal(value.precision(), value.scale()), + LiteralHolder(value), false); +} + NodePtr TreeExprBuilder::MakeNull(DataTypePtr data_type) { static const std::string empty; @@ -92,6 +98,10 @@ NodePtr TreeExprBuilder::MakeNull(DataTypePtr data_type) { return std::make_shared(data_type, LiteralHolder((int64_t)0), true); case arrow::Type::TIMESTAMP: return std::make_shared(data_type, LiteralHolder((int64_t)0), true); + case arrow::Type::DECIMAL: { + Decimal128Full literal(0, 0); + return std::make_shared(data_type, LiteralHolder(literal), true); + } default: return nullptr; } diff --git a/cpp/src/gandiva/tree_expr_builder.h b/cpp/src/gandiva/tree_expr_builder.h index cd261c8bf978d..ae5f7fb9df3fd 100644 --- a/cpp/src/gandiva/tree_expr_builder.h +++ b/cpp/src/gandiva/tree_expr_builder.h @@ -23,7 +23,9 @@ #include #include +#include "arrow/type.h" #include "gandiva/condition.h" +#include "gandiva/decimal_full.h" #include "gandiva/expression.h" namespace gandiva { @@ -45,6 +47,7 @@ class TreeExprBuilder { static NodePtr MakeLiteral(double value); static NodePtr MakeStringLiteral(const std::string& value); static NodePtr MakeBinaryLiteral(const std::string& value); + static NodePtr MakeDecimalLiteral(const Decimal128Full& value); /// \brief create a node on a null literal. /// returns null if data_type is null or if it's not a supported datatype. diff --git a/cpp/valgrind.supp b/cpp/valgrind.supp index 08076aade4d9e..8d2d5da904bab 100644 --- a/cpp/valgrind.supp +++ b/cpp/valgrind.supp @@ -21,6 +21,12 @@ Memcheck:Cond fun:*CastFunctor*BooleanType* } +{ + :Conditional jump or move depends on uninitialised value(s) + Memcheck:Cond + ... + fun:*llvm*PassManager* +} { :Conditional jump or move depends on uninitialised value(s) Memcheck:Cond diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index d365eb9193ac1..285ea861f9795 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -29,7 +29,7 @@ 2.5.0 18.0 true - ../../cpp/debug + ../../cpp/debug/debug @@ -68,6 +68,11 @@ 2.10 test + + net.java.dev.jna + jna + 4.5.0 + diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java index 96788b39e08ec..46deee95fa717 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java @@ -17,8 +17,6 @@ package org.apache.arrow.gandiva.evaluator; -import org.apache.arrow.gandiva.exceptions.GandivaException; - /** * Used to construct gandiva configuration objects. */ @@ -26,16 +24,6 @@ public class ConfigurationBuilder { private String byteCodeFilePath = ""; - private static volatile long defaultConfiguration = 0L; - - /** - * Ctor - ensure that gandiva is loaded. - * @throws GandivaException - if library cannot be loaded. - */ - public ConfigurationBuilder() throws GandivaException { - JniWrapper.getInstance(); - } - public ConfigurationBuilder withByteCodeFilePath(final String byteCodeFilePath) { this.byteCodeFilePath = byteCodeFilePath; return this; @@ -45,26 +33,6 @@ public String getByteCodeFilePath() { return byteCodeFilePath; } - /** - * Get the default configuration to invoke gandiva. - * @return default configuration - * @throws GandivaException if unable to get native builder instance. - */ - static long getDefaultConfiguration() throws GandivaException { - if (defaultConfiguration == 0L) { - synchronized (ConfigurationBuilder.class) { - if (defaultConfiguration == 0L) { - String defaultByteCodeFilePath = JniWrapper.getInstance().getByteCodeFilePath(); - - defaultConfiguration = new ConfigurationBuilder() - .withByteCodeFilePath(defaultByteCodeFilePath) - .buildConfigInstance(); - } - } - } - return defaultConfiguration; - } - public native long buildConfigInstance(); public native void releaseConfigInstance(long configId); diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java new file mode 100644 index 0000000000000..37dd0f61056b0 --- /dev/null +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.gandiva.evaluator; + +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; + +public class DecimalTypeUtil { + + public enum OperationType { + ADD, + SUBTRACT, + MULTIPLY, + DIVIDE, + MOD + } + + private static final int MIN_ADJUSTED_SCALE = 6; + /// The maximum precision representable by a 16-byte decimal + private static final int MAX_PRECISION = 38; + + public static Decimal getResultTypeForOperation(OperationType operation, Decimal operand1, Decimal + operand2) { + int s1 = operand1.getScale(); + int s2 = operand2.getScale(); + int p1 = operand1.getPrecision(); + int p2 = operand2.getPrecision(); + int resultScale = 0; + int resultPrecision = 0; + switch (operation) { + case ADD: + case SUBTRACT: + resultScale = Math.max(operand1.getScale(), operand2.getScale()); + resultPrecision = resultScale + Math.max(operand1.getPrecision() - operand1.getScale(), + operand2.getPrecision() - operand2.getScale()) + 1; + break; + case MULTIPLY: + resultScale = s1 + s2; + resultPrecision = p1 + p2 + 1; + break; + case DIVIDE: + resultScale = + Math.max(MIN_ADJUSTED_SCALE, operand1.getScale() + operand2.getPrecision() + 1); + resultPrecision = + operand1.getPrecision() - operand1.getScale() + operand2.getScale() + resultScale; + break; + case MOD: + resultScale = Math.max(operand1.getScale(), operand2.getScale()); + resultPrecision = Math.min(operand1.getPrecision() - operand1.getScale(), + operand2.getPrecision() - operand2.getScale()) + + resultScale; + break; + default: + throw new RuntimeException("Needs support"); + } + return adjustScaleIfNeeded(resultPrecision, resultScale); + } + + private static Decimal adjustScaleIfNeeded(int precision, int scale) { + if (precision > MAX_PRECISION) { + int minScale = Math.min(scale, MIN_ADJUSTED_SCALE); + int delta = precision - MAX_PRECISION; + precision = MAX_PRECISION; + scale = Math.max(scale - delta, minScale); + } + return new Decimal(precision, scale); + } + +} + diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java index 9c41c1942e9b3..b9986791850a7 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java @@ -70,7 +70,7 @@ public static ExpressionRegistry getInstance() throws GandivaException { synchronized (ExpressionRegistry.class) { if (INSTANCE == null) { // ensure library is setup. - JniWrapper.getInstance(); + JniLoader.getInstance(); Set typesFromGandiva = getSupportedTypesFromGandiva(); Set functionsFromGandiva = getSupportedFunctionsFromGandiva(); INSTANCE = new ExpressionRegistry(typesFromGandiva, functionsFromGandiva); @@ -173,10 +173,11 @@ private static ArrowType getArrowType(ExtGandivaType type) { BIT_WIDTH_64); case GandivaType.NONE_VALUE: return new ArrowType.Null(); + case GandivaType.DECIMAL_VALUE: + return new ArrowType.Decimal(0,0); case GandivaType.FIXED_SIZE_BINARY_VALUE: case GandivaType.MAP_VALUE: case GandivaType.INTERVAL_VALUE: - case GandivaType.DECIMAL_VALUE: case GandivaType.DICTIONARY_VALUE: case GandivaType.LIST_VALUE: case GandivaType.STRUCT_VALUE: diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java index 25904d3dc1d76..46508b1f97a34 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java @@ -43,11 +43,13 @@ public class Filter { private static final Logger logger = LoggerFactory.getLogger(Filter.class); + private final JniWrapper wrapper; private final long moduleId; private final Schema schema; private boolean closed; - private Filter(long moduleId, Schema schema) { + private Filter(JniWrapper wrapper, long moduleId, Schema schema) { + this.wrapper = wrapper; this.moduleId = moduleId; this.schema = schema; this.closed = false; @@ -63,7 +65,7 @@ private Filter(long moduleId, Schema schema) { * @return A native filter object that can be used to invoke on a RecordBatch */ public static Filter make(Schema schema, Condition condition) throws GandivaException { - return make(schema, condition, ConfigurationBuilder.getDefaultConfiguration()); + return make(schema, condition, JniLoader.getDefaultConfiguration()); } /** @@ -81,11 +83,11 @@ public static Filter make(Schema schema, Condition condition, long configuration // Invoke the JNI layer to create the LLVM module representing the filter. GandivaTypes.Condition conditionBuf = condition.toProtobuf(); GandivaTypes.Schema schemaBuf = ArrowTypeHelper.arrowSchemaToProtobuf(schema); - JniWrapper gandivaBridge = JniWrapper.getInstance(); - long moduleId = gandivaBridge.buildFilter(schemaBuf.toByteArray(), + JniWrapper wrapper = JniLoader.getInstance().getWrapper(); + long moduleId = wrapper.buildFilter(schemaBuf.toByteArray(), conditionBuf.toByteArray(), configurationId); logger.info("Created module for the projector with id {}", moduleId); - return new Filter(moduleId, schema); + return new Filter(wrapper, moduleId, schema); } /** @@ -144,7 +146,7 @@ private void evaluate(int numRows, List buffers, List buf bufSizes[idx++] = bufLayout.getSize(); } - int numRecords = JniWrapper.getInstance().evaluateFilter(this.moduleId, numRows, + int numRecords = wrapper.evaluateFilter(this.moduleId, numRows, bufAddrs, bufSizes, selectionVector.getType().getNumber(), selectionVector.getBuffer().memoryAddress(), selectionVector.getBuffer().capacity()); @@ -161,7 +163,7 @@ public void close() throws GandivaException { return; } - JniWrapper.getInstance().closeFilter(this.moduleId); + wrapper.closeFilter(this.moduleId); this.closed = true; } } diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java new file mode 100644 index 0000000000000..3491b283e5dd5 --- /dev/null +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.gandiva.evaluator; + +import static java.util.UUID.randomUUID; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; + +import org.apache.arrow.gandiva.exceptions.GandivaException; + +import com.sun.jna.NativeLibrary; + +/** + * This class handles loading of the jni library, and acts as a bridge for the native functions. + */ +class JniLoader { + private static final String LIBRARY_NAME = "gandiva_jni"; + private static final String IRHELPERS_BC = "irhelpers.bc"; + + private static volatile JniLoader INSTANCE; + private static volatile long defaultConfiguration = 0L; + + private final String byteCodeFilePath; + private final JniWrapper wrapper; + + private JniLoader(String byteCodeFilePath) { + this.byteCodeFilePath = byteCodeFilePath; + this.wrapper = new JniWrapper(); + } + + static JniLoader getInstance() throws GandivaException { + if (INSTANCE == null) { + synchronized (JniLoader.class) { + if (INSTANCE == null) { + INSTANCE = setupInstance(); + } + } + } + return INSTANCE; + } + + private static JniLoader setupInstance() throws GandivaException { + try { + String tempDir = System.getProperty("java.io.tmpdir"); + loadGandivaLibraryFromJar(tempDir); + File byteCodeFile = moveFileFromJarToTemp(tempDir, IRHELPERS_BC); + return new JniLoader(byteCodeFile.getAbsolutePath()); + } catch (IOException ioException) { + throw new GandivaException("unable to create native instance", ioException); + } + } + + private static void loadGandivaLibraryFromJar(final String tmpDir) + throws IOException, GandivaException { + final String libraryToLoad = System.mapLibraryName(LIBRARY_NAME); + final File libraryFile = moveFileFromJarToTemp(tmpDir, libraryToLoad); + // This is required to load the library with RT_GLOBAL flags. Otherwise, the symbols in the + // libgandiva.so aren't visible to the JIT. + NativeLibrary.getInstance(libraryFile.getAbsolutePath()); + System.load(libraryFile.getAbsolutePath()); + } + + + private static File moveFileFromJarToTemp(final String tmpDir, String libraryToLoad) + throws IOException, GandivaException { + final File temp = setupFile(tmpDir, libraryToLoad); + try (final InputStream is = JniLoader.class.getClassLoader() + .getResourceAsStream(libraryToLoad)) { + if (is == null) { + throw new GandivaException(libraryToLoad + " was not found inside JAR."); + } else { + Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); + } + } + return temp; + } + + private static File setupFile(String tmpDir, String libraryToLoad) + throws IOException, GandivaException { + // accommodate multiple processes running with gandiva jar. + // length should be ok since uuid is only 36 characters. + final String randomizeFileName = libraryToLoad + randomUUID(); + final File temp = new File(tmpDir, randomizeFileName); + if (temp.exists() && !temp.delete()) { + throw new GandivaException("File: " + temp.getAbsolutePath() + + " already exists and cannot be removed."); + } + if (!temp.createNewFile()) { + throw new GandivaException("File: " + temp.getAbsolutePath() + + " could not be created."); + } + temp.deleteOnExit(); + return temp; + } + + /** + * Returns the byte code file path extracted from jar. + */ + public String getByteCodeFilePath() { + return byteCodeFilePath; + } + + /** + * Returns the jni wrapper. + */ + JniWrapper getWrapper() throws GandivaException { + return wrapper; + } + + /** + * Get the default configuration to invoke gandiva. + * @return default configuration + * @throws GandivaException if unable to get native builder instance. + */ + static long getDefaultConfiguration() throws GandivaException { + if (defaultConfiguration == 0L) { + synchronized (ConfigurationBuilder.class) { + if (defaultConfiguration == 0L) { + String defaultByteCodeFilePath = JniLoader.getInstance().getByteCodeFilePath(); + + defaultConfiguration = new ConfigurationBuilder() + .withByteCodeFilePath(defaultByteCodeFilePath) + .buildConfigInstance(); + } + } + } + return defaultConfiguration; + } +} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java index eea42f6976ce4..f00b0fbb9151a 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java @@ -17,100 +17,15 @@ package org.apache.arrow.gandiva.evaluator; -import static java.util.UUID.randomUUID; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.StandardCopyOption; - import org.apache.arrow.gandiva.exceptions.GandivaException; /** * This class is implemented in JNI. This provides the Java interface - * to invoke functions in JNI + * to invoke functions in JNI. + * This file is used to generated the .h files required for jni. Avoid all + * external dependencies in this file. */ -class JniWrapper { - private static final String LIBRARY_NAME = "gandiva_jni"; - private static final String IRHELPERS_BC = "irhelpers.bc"; - - private static volatile JniWrapper INSTANCE; - - private final String byteCodeFilePath; - - private JniWrapper(String byteCodeFilePath) { - this.byteCodeFilePath = byteCodeFilePath; - } - - static JniWrapper getInstance() throws GandivaException { - if (INSTANCE == null) { - synchronized (JniWrapper.class) { - if (INSTANCE == null) { - INSTANCE = setupInstance(); - } - } - } - return INSTANCE; - } - - private static JniWrapper setupInstance() throws GandivaException { - try { - String tempDir = System.getProperty("java.io.tmpdir"); - loadGandivaLibraryFromJar(tempDir); - File byteCodeFile = moveFileFromJarToTemp(tempDir, IRHELPERS_BC); - return new JniWrapper(byteCodeFile.getAbsolutePath()); - } catch (IOException ioException) { - throw new GandivaException("unable to create native instance", ioException); - } - } - - private static void loadGandivaLibraryFromJar(final String tmpDir) - throws IOException, GandivaException { - final String libraryToLoad = System.mapLibraryName(LIBRARY_NAME); - final File libraryFile = moveFileFromJarToTemp(tmpDir, libraryToLoad); - System.load(libraryFile.getAbsolutePath()); - } - - - private static File moveFileFromJarToTemp(final String tmpDir, String libraryToLoad) - throws IOException, GandivaException { - final File temp = setupFile(tmpDir, libraryToLoad); - try (final InputStream is = JniWrapper.class.getClassLoader() - .getResourceAsStream(libraryToLoad)) { - if (is == null) { - throw new GandivaException(libraryToLoad + " was not found inside JAR."); - } else { - Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); - } - } - return temp; - } - - private static File setupFile(String tmpDir, String libraryToLoad) - throws IOException, GandivaException { - // accommodate multiple processes running with gandiva jar. - // length should be ok since uuid is only 36 characters. - final String randomizeFileName = libraryToLoad + randomUUID(); - final File temp = new File(tmpDir, randomizeFileName); - if (temp.exists() && !temp.delete()) { - throw new GandivaException("File: " + temp.getAbsolutePath() + - " already exists and cannot be removed."); - } - if (!temp.createNewFile()) { - throw new GandivaException("File: " + temp.getAbsolutePath() + - " could not be created."); - } - temp.deleteOnExit(); - return temp; - } - - /** - * Returns the byte code file path extracted from jar. - */ - public String getByteCodeFilePath() { - return byteCodeFilePath; - } +public class JniWrapper { /** * Generates the projector module to evaluate the expressions with diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java index d7578936b3d83..af1a4ca539cc4 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java @@ -46,12 +46,14 @@ public class Projector { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(Projector.class); + private JniWrapper wrapper; private final long moduleId; private final Schema schema; private final int numExprs; private boolean closed; - private Projector(long moduleId, Schema schema, int numExprs) { + private Projector(JniWrapper wrapper, long moduleId, Schema schema, int numExprs) { + this.wrapper = wrapper; this.moduleId = moduleId; this.schema = schema; this.numExprs = numExprs; @@ -71,7 +73,7 @@ private Projector(long moduleId, Schema schema, int numExprs) { */ public static Projector make(Schema schema, List exprs) throws GandivaException { - return make(schema, exprs, ConfigurationBuilder.getDefaultConfiguration()); + return make(schema, exprs, JniLoader.getDefaultConfiguration()); } /** @@ -96,11 +98,11 @@ public static Projector make(Schema schema, List exprs, long // Invoke the JNI layer to create the LLVM module representing the expressions GandivaTypes.Schema schemaBuf = ArrowTypeHelper.arrowSchemaToProtobuf(schema); - JniWrapper gandivaBridge = JniWrapper.getInstance(); - long moduleId = gandivaBridge.buildProjector(schemaBuf.toByteArray(), builder.build() - .toByteArray(), configurationId); + JniWrapper wrapper = JniLoader.getInstance().getWrapper(); + long moduleId = wrapper.buildProjector(schemaBuf.toByteArray(), + builder.build().toByteArray(), configurationId); logger.info("Created module for the projector with id {}", moduleId); - return new Projector(moduleId, schema, exprs.size()); + return new Projector(wrapper, moduleId, schema, exprs.size()); } /** @@ -175,9 +177,7 @@ private void evaluate(int numRows, List buffers, List buf valueVector.setValueCount(numRows); } - JniWrapper.getInstance().evaluateProjector(this.moduleId, numRows, - bufAddrs, bufSizes, - outAddrs, outSizes); + wrapper.evaluateProjector(this.moduleId, numRows, bufAddrs, bufSizes, outAddrs, outSizes); } /** @@ -188,7 +188,7 @@ public void close() throws GandivaException { return; } - JniWrapper.getInstance().closeProjector(this.moduleId); + wrapper.closeProjector(this.moduleId); this.closed = true; } } diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java new file mode 100644 index 0000000000000..1b908b9962fb3 --- /dev/null +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.gandiva.expression; + +import java.nio.charset.Charset; + +import org.apache.arrow.gandiva.exceptions.GandivaException; +import org.apache.arrow.gandiva.ipc.GandivaTypes; + +import com.google.protobuf.ByteString; + + +/** + * Used to represent expression tree nodes representing decimal constants. + * Used in the expression (x + 5.0) + */ +class DecimalNode implements TreeNode { + private final String value; + private final int precision; + private final int scale; + + DecimalNode(String value, int precision, int scale) { + this.value = value; + this.precision = precision; + this.scale = scale; + } + + @Override + public GandivaTypes.TreeNode toProtobuf() throws GandivaException { + GandivaTypes.DecimalNode.Builder decimalNode = GandivaTypes.DecimalNode.newBuilder(); + decimalNode.setValue(value); + decimalNode.setPrecision(precision); + decimalNode.setScale(scale); + + GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); + builder.setDecimalNode(decimalNode.build()); + return builder.build(); + } +} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java index f5568591c2002..a220c547e44a6 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java @@ -55,6 +55,10 @@ public static TreeNode makeBinaryLiteral(byte[] binaryConstant) { return new BinaryNode(binaryConstant); } + public static TreeNode makeDecimalLiteral(String decimalConstant, int precision, int scale) { + return new DecimalNode(decimalConstant, precision, scale); + } + /** * create a null literal. */ diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java index aeb3d418a70ac..97c2883c58e5e 100644 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java +++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java @@ -17,6 +17,8 @@ package org.apache.arrow.gandiva.evaluator; +import java.math.BigDecimal; +import java.math.BigInteger; import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -27,6 +29,7 @@ import org.apache.arrow.gandiva.expression.ExpressionTree; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; @@ -229,6 +232,18 @@ ArrowBuf intBuf(int[] ints) { return buffer; } + DecimalVector decimalVector(String[] values, int precision, int scale) { + DecimalVector vector = new DecimalVector("decimal" + Math.random(), allocator, precision, scale); + vector.allocateNew(); + for (int i = 0; i < values.length; i++) { + BigDecimal decimal = new BigDecimal(values[i]); + vector.setSafe(i, decimal); + } + + vector.setValueCount(values.length); + return vector; + } + ArrowBuf longBuf(long[] longs) { ArrowBuf buffer = allocator.buffer(longs.length * 8); for (int i = 0; i < longs.length; i++) { diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java new file mode 100644 index 0000000000000..4a4fb82951c16 --- /dev/null +++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.gandiva.evaluator; + +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.junit.Assert; +import org.junit.Test; + +public class DecimalTypeUtilTest { + + @Test + public void testOutputTypesForAdd() { + ArrowType.Decimal operand1 = getDecimal(30, 10); + ArrowType.Decimal operand2 = getDecimal(30, 10); + ArrowType.Decimal resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil + .OperationType.ADD, operand1, operand2); + Assert.assertTrue(getDecimal(31, 10).equals(resultType)); + + operand1 = getDecimal(30, 6); + operand2 = getDecimal(30, 5); + resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil + .OperationType.ADD, operand1, operand2); + Assert.assertTrue(getDecimal(32, 6).equals(resultType)); + + operand1 = getDecimal(30, 10); + operand2 = getDecimal(38, 10); + resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil + .OperationType.ADD, operand1, operand2); + Assert.assertTrue(getDecimal(38, 9).equals(resultType)); + + operand1 = getDecimal(38, 10); + operand2 = getDecimal(38, 38); + resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil + .OperationType.ADD, operand1, operand2); + Assert.assertTrue(getDecimal(38, 9).equals(resultType)); + + operand1 = getDecimal(38, 10); + operand2 = getDecimal(38, 2); + resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil + .OperationType.ADD, operand1, operand2); + Assert.assertTrue(getDecimal(38, 6).equals(resultType)); + + } + + @Test + public void testOutputTypesForMultiply() { + ArrowType.Decimal operand1 = getDecimal(30, 10); + ArrowType.Decimal operand2 = getDecimal(30, 10); + ArrowType.Decimal resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil + .OperationType.MULTIPLY, operand1, operand2); + Assert.assertTrue(getDecimal(38, 6).equals(resultType)); + + operand1 = getDecimal(38, 10); + operand2 = getDecimal(9, 2); + resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil + .OperationType.MULTIPLY, operand1, operand2); + Assert.assertTrue(getDecimal(38, 6).equals(resultType)); + + } + + @Test + public void testOutputTypesForMod() { + ArrowType.Decimal operand1 = getDecimal(30, 10); + ArrowType.Decimal operand2 = getDecimal(28 , 7); + ArrowType.Decimal resultType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil + .OperationType.MOD, operand1, operand2); + Assert.assertTrue(getDecimal(30, 10).equals(resultType)); + } + + private ArrowType.Decimal getDecimal(int precision, int scale) { + return new ArrowType.Decimal(precision, scale); + } + +} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java new file mode 100644 index 0000000000000..a3a0b4818ac22 --- /dev/null +++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.gandiva.evaluator; + + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.List; + +import org.apache.arrow.gandiva.exceptions.GandivaException; +import org.apache.arrow.gandiva.expression.ExpressionTree; +import org.apache.arrow.gandiva.expression.TreeBuilder; +import org.apache.arrow.gandiva.expression.TreeNode; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.Test; + +import com.google.common.collect.Lists; + +public class ProjectorDecimalTest extends org.apache.arrow.gandiva.evaluator.BaseEvaluatorTest { + + @Test + public void test_add() throws GandivaException { + int precision = 38; + int scale = 8; + ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale); + Field a = Field.nullable("a", decimal); + Field b = Field.nullable("b", decimal); + List args = Lists.newArrayList(a, b); + + ArrowType.Decimal outputType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil + .OperationType.ADD, decimal, decimal); + Field retType = Field.nullable("c", outputType); + ExpressionTree root = TreeBuilder.makeExpression("add", args, retType); + + List exprs = Lists.newArrayList(root); + + Schema schema = new Schema(args); + Projector eval = Projector.make(schema, exprs); + + int numRows = 4; + byte[] validity = new byte[]{(byte) 255}; + String[] aValues = new String[]{"1.12345678","2.12345678","3.12345678","4.12345678"}; + String[] bValues = new String[]{"2.12345678","3.12345678","4.12345678","5.12345678"}; + + DecimalVector valuesa = decimalVector(aValues, precision, scale); + DecimalVector valuesb = decimalVector(bValues, precision, scale); + ArrowRecordBatch batch = + new ArrowRecordBatch( + numRows, + Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)), + Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer(), + valuesb.getValidityBuffer(), valuesb.getDataBuffer())); + + DecimalVector outVector = new DecimalVector("decimal_output", allocator, outputType.getPrecision(), + outputType.getScale()); + outVector.allocateNew(numRows); + + List output = new ArrayList(); + output.add(outVector); + eval.evaluate(batch, output); + + // should have scaled down. + BigDecimal[] expOutput = new BigDecimal[]{BigDecimal.valueOf(3.2469136), + BigDecimal.valueOf(5.2469136), + BigDecimal.valueOf(7.2469136), + BigDecimal.valueOf(9.2469136)}; + + for (int i = 0; i < 4; i++) { + assertFalse(outVector.isNull(i)); + assertTrue("index : " + i + " failed compare", expOutput[i].compareTo(outVector.getObject(i) + ) == 0); + } + + // free buffers + releaseRecordBatch(batch); + releaseValueVectors(output); + eval.close(); + } + + @Test + public void test_add_literal() throws GandivaException { + int precision = 2; + int scale = 0; + ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale); + ArrowType.Decimal literalType = new ArrowType.Decimal(2, 1); + Field a = Field.nullable("a", decimal); + + ArrowType.Decimal outputType = DecimalTypeUtil.getResultTypeForOperation(DecimalTypeUtil + .OperationType.ADD, decimal, literalType); + Field retType = Field.nullable("c", outputType); + TreeNode field = TreeBuilder.makeField(a); + TreeNode literal = TreeBuilder.makeDecimalLiteral("6", 2, 1); + List args = Lists.newArrayList(field, literal); + TreeNode root = TreeBuilder.makeFunction("add", args, outputType); + ExpressionTree tree = TreeBuilder.makeExpression(root, retType); + + List exprs = Lists.newArrayList(tree); + + Schema schema = new Schema(Lists.newArrayList(a)); + Projector eval = Projector.make(schema, exprs); + + int numRows = 4; + String[] aValues = new String[]{"1", "2", "3", "4"}; + + DecimalVector valuesa = decimalVector(aValues, precision, scale); + ArrowRecordBatch batch = + new ArrowRecordBatch( + numRows, + Lists.newArrayList(new ArrowFieldNode(numRows, 0)), + Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer())); + + DecimalVector outVector = new DecimalVector("decimal_output", allocator, outputType.getPrecision(), + outputType.getScale()); + outVector.allocateNew(numRows); + + List output = new ArrayList(); + output.add(outVector); + eval.evaluate(batch, output); + + BigDecimal[] expOutput = new BigDecimal[]{BigDecimal.valueOf(1.6), BigDecimal.valueOf(2.6), + BigDecimal.valueOf(3.6), BigDecimal.valueOf(4.6)}; + + for (int i = 0; i < 4; i++) { + assertFalse(outVector.isNull(i)); + assertTrue(expOutput[i].compareTo(outVector.getObject(i)) == 0); + } + + // free buffers + releaseRecordBatch(batch); + releaseValueVectors(output); + eval.close(); + } +} diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx index 76e55d6ba27ef..715ff9dcfb384 100644 --- a/python/pyarrow/gandiva.pyx +++ b/python/pyarrow/gandiva.pyx @@ -19,6 +19,8 @@ # distutils: language = c++ # cython: embedsignature = True +import os + from libcpp cimport bool as c_bool, nullptr from libcpp.memory cimport shared_ptr, unique_ptr, make_shared from libcpp.string cimport string as c_string @@ -73,6 +75,14 @@ from pyarrow.includes.libgandiva cimport ( CFunctionSignature, GetRegisteredFunctionSignatures) +if os.name == 'posix': + # Expose self with RTLD_GLOBAL so that symbols from gandiva.so and child + # libs (such as libstdc++) can be reached during JIT code execution. + # Another workaround is to use + # sys.setdlopenflags(os.RTLD_GLOBAL | os.RTLD_NOW) + # but it would affect all C extensions loaded in the process. + import ctypes + _dll = ctypes.CDLL(__file__, ctypes.RTLD_GLOBAL) cdef class Node: cdef: From ccec63847e7709317a18036931ef3e3fbeab1f05 Mon Sep 17 00:00:00 2001 From: "Korn, Uwe" Date: Tue, 8 Jan 2019 10:14:53 -0600 Subject: [PATCH 050/203] ARROW-4191: [C++] Use same CC and AR for jemalloc as for the main sources Author: Korn, Uwe Closes #3347 from xhochy/ARROW-4191 and squashes the following commits: 44df02a23 ARROW-4191: Use same CC and AR for jemalloc as for the main sources --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index d8b34862eeaab..5a8c28feab4e8 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -772,7 +772,7 @@ if (ARROW_JEMALLOC) ExternalProject_Add(jemalloc_ep URL ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/jemalloc/${JEMALLOC_VERSION}.tar.gz PATCH_COMMAND touch doc/jemalloc.3 doc/jemalloc.html - CONFIGURE_COMMAND ./autogen.sh "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix=je_arrow_" "--with-private-namespace=je_arrow_private_" "--disable-tls" + CONFIGURE_COMMAND ./autogen.sh "AR=${CMAKE_AR}" "CC=${CMAKE_C_COMPILER}" "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix=je_arrow_" "--with-private-namespace=je_arrow_private_" "--disable-tls" ${EP_LOG_OPTIONS} BUILD_IN_SOURCE 1 BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS} From 326015cfc66e1f657cdd6811620137e9e277b43d Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 8 Jan 2019 10:17:54 -0600 Subject: [PATCH 051/203] ARROW-4186: [C++] BitmapWriter shouldn't clobber data when length == 0 Author: Antoine Pitrou Closes #3348 from pitrou/ARROW-4186-bitmap-writer-zero-length and squashes the following commits: 2299b0906 ARROW-4186: BitmapWriter shouldn't clobber data when length == 0 --- cpp/src/arrow/util/bit-util-test.cc | 79 ++++++++++++++++++----------- cpp/src/arrow/util/bit-util.h | 4 +- 2 files changed, 50 insertions(+), 33 deletions(-) diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc index b12e2ecf9eef9..174e6d0f05235 100644 --- a/cpp/src/arrow/util/bit-util-test.cc +++ b/cpp/src/arrow/util/bit-util-test.cc @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -167,33 +166,40 @@ TEST(BitmapReader, DoesNotReadOutOfBounds) { } TEST(BitmapWriter, NormalOperation) { - { - uint8_t bitmap[] = {0, 0, 0, 0}; - auto writer = internal::BitmapWriter(bitmap, 0, 12); - WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); - // {0b00110110, 0b1010, 0, 0} - ASSERT_BYTES_EQ(bitmap, {0x36, 0x0a, 0, 0}); - } - { - uint8_t bitmap[] = {0xff, 0xff, 0xff, 0xff}; - auto writer = internal::BitmapWriter(bitmap, 0, 12); - WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); - // {0b00110110, 0b11111010, 0xff, 0xff} - ASSERT_BYTES_EQ(bitmap, {0x36, 0xfa, 0xff, 0xff}); - } - { - uint8_t bitmap[] = {0, 0, 0, 0}; - auto writer = internal::BitmapWriter(bitmap, 3, 12); - WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); - // {0b10110000, 0b01010001, 0, 0} - ASSERT_BYTES_EQ(bitmap, {0xb0, 0x51, 0, 0}); - } - { - uint8_t bitmap[] = {0, 0, 0, 0}; - auto writer = internal::BitmapWriter(bitmap, 20, 12); - WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); - // {0, 0, 0b01100000, 0b10100011} - ASSERT_BYTES_EQ(bitmap, {0, 0, 0x60, 0xa3}); + for (const auto fill_byte_int : {0x00, 0xff}) { + const uint8_t fill_byte = static_cast(fill_byte_int); + { + uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; + auto writer = internal::BitmapWriter(bitmap, 0, 12); + WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); + // {0b00110110, 0b....1010, ........, ........} + ASSERT_BYTES_EQ(bitmap, {0x36, static_cast(0x0a | (fill_byte & 0xf0)), + fill_byte, fill_byte}); + } + { + uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; + auto writer = internal::BitmapWriter(bitmap, 3, 12); + WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); + // {0b10110..., 0b.1010001, ........, ........} + ASSERT_BYTES_EQ(bitmap, {static_cast(0xb0 | (fill_byte & 0x07)), + static_cast(0x51 | (fill_byte & 0x80)), fill_byte, + fill_byte}); + } + { + uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; + auto writer = internal::BitmapWriter(bitmap, 20, 12); + WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}); + // {........, ........, 0b0110...., 0b10100011} + ASSERT_BYTES_EQ(bitmap, {fill_byte, fill_byte, + static_cast(0x60 | (fill_byte & 0x0f)), 0xa3}); + } + // 0-length writes + for (int64_t pos = 0; pos < 32; ++pos) { + uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; + auto writer = internal::BitmapWriter(bitmap, pos, 0); + WriteVectorToWriter(writer, {}); + ASSERT_BYTES_EQ(bitmap, {fill_byte, fill_byte, fill_byte, fill_byte}); + } } } @@ -266,6 +272,10 @@ TEST(FirstTimeBitmapWriter, NormalOperation) { } { uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; + { + auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 0); + WriteVectorToWriter(writer, {}); + } { auto writer = internal::FirstTimeBitmapWriter(bitmap, 4, 6); WriteVectorToWriter(writer, {0, 1, 1, 0, 1, 1}); @@ -274,6 +284,10 @@ TEST(FirstTimeBitmapWriter, NormalOperation) { auto writer = internal::FirstTimeBitmapWriter(bitmap, 10, 3); WriteVectorToWriter(writer, {0, 0, 0}); } + { + auto writer = internal::FirstTimeBitmapWriter(bitmap, 13, 0); + WriteVectorToWriter(writer, {}); + } { auto writer = internal::FirstTimeBitmapWriter(bitmap, 13, 3); WriteVectorToWriter(writer, {1, 0, 1}); @@ -319,8 +333,8 @@ TYPED_TEST(TestGenerateBits, NormalOperation) { for (const int64_t start_offset : start_offsets) { for (const int64_t length : lengths) { for (const uint8_t fill_byte : fill_bytes) { - uint8_t bitmap[kSourceSize]; - memset(bitmap, fill_byte, kSourceSize); + uint8_t bitmap[kSourceSize + 1]; + memset(bitmap, fill_byte, kSourceSize + 1); // First call GenerateBits { int64_t ncalled = 0; @@ -344,7 +358,7 @@ TYPED_TEST(TestGenerateBits, NormalOperation) { result_reader.Next(); } } - // Check bits preceding and following generated contents weren't clobbered + // Check bits preceding generated contents weren't clobbered { internal::BitmapReader reader_before(bitmap, 0, start_offset); for (int64_t i = 0; i < start_offset; ++i) { @@ -352,6 +366,9 @@ TYPED_TEST(TestGenerateBits, NormalOperation) { << "mismatch at preceding bit #" << start_offset - i; } } + // Check the byte following generated contents wasn't clobbered + auto byte_after = bitmap[BitUtil::CeilDiv(start_offset + length, 8)]; + ASSERT_EQ(byte_after, fill_byte); } } } diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h index 93b6cb28d91b1..415684e449287 100644 --- a/cpp/src/arrow/util/bit-util.h +++ b/cpp/src/arrow/util/bit-util.h @@ -409,7 +409,7 @@ class BitmapWriter { void Finish() { // Store current byte if we didn't went past bitmap storage - if (bit_mask_ != 0x01 || position_ < length_) { + if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) { bitmap_[byte_offset_] = current_byte_; } } @@ -461,7 +461,7 @@ class FirstTimeBitmapWriter { void Finish() { // Store current byte if we didn't went past bitmap storage - if (bit_mask_ != 0x01 || position_ < length_) { + if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) { bitmap_[byte_offset_] = current_byte_; } } From ac45f3210a194049ef35f49847dbc4ff5e70d48f Mon Sep 17 00:00:00 2001 From: Neville Dipale Date: Tue, 8 Jan 2019 16:49:12 -0700 Subject: [PATCH 052/203] ARROW-3839: [Rust] Add ability to infer schema in CSV reader Resubmission of #3128 Author: Neville Dipale Closes #3349 from nevi-me/rust/infer-csv-schema and squashes the following commits: 0838199 ARROW-3839: Add ability to infer schema in CSV reader --- ci/rust-build-main.bat | 1 + ci/travis_script_rust.sh | 1 + rust/arrow/Cargo.toml | 2 + rust/arrow/examples/read_csv_infer_schema.rs | 66 ++++ rust/arrow/src/csv/mod.rs | 1 + rust/arrow/src/csv/reader.rs | 373 +++++++++++++++++- rust/arrow/src/datatypes.rs | 4 +- rust/arrow/src/error.rs | 37 ++ .../test/data/uk_cities_with_headers.csv | 38 ++ rust/arrow/test/data/various_types.csv | 6 + 10 files changed, 524 insertions(+), 5 deletions(-) create mode 100644 rust/arrow/examples/read_csv_infer_schema.rs create mode 100644 rust/arrow/test/data/uk_cities_with_headers.csv create mode 100644 rust/arrow/test/data/various_types.csv diff --git a/ci/rust-build-main.bat b/ci/rust-build-main.bat index ac5c9e7589245..b36a97acf51ac 100644 --- a/ci/rust-build-main.bat +++ b/ci/rust-build-main.bat @@ -40,5 +40,6 @@ cd arrow cargo run --example builders --target %TARGET% --release || exit /B cargo run --example dynamic_types --target %TARGET% --release || exit /B cargo run --example read_csv --target %TARGET% --release || exit /B +cargo run --example read_csv_infer_schema --target %TARGET% --release || exit /B popd diff --git a/ci/travis_script_rust.sh b/ci/travis_script_rust.sh index 8e3c8c3906b24..c25d64ec42cb6 100755 --- a/ci/travis_script_rust.sh +++ b/ci/travis_script_rust.sh @@ -39,5 +39,6 @@ cd arrow cargo run --example builders cargo run --example dynamic_types cargo run --example read_csv +cargo run --example read_csv_infer_schema popd diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index 77e8d53fa55b5..38e7e5e0ec06e 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -43,6 +43,8 @@ serde_json = "1.0.13" rand = "0.5" csv = "1.0.0" num = "0.2" +regex = "1.1" +lazy_static = "1.2" [dev-dependencies] criterion = "0.2" diff --git a/rust/arrow/examples/read_csv_infer_schema.rs b/rust/arrow/examples/read_csv_infer_schema.rs new file mode 100644 index 0000000000000..9dd2d2aaf2cc2 --- /dev/null +++ b/rust/arrow/examples/read_csv_infer_schema.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate arrow; + +use arrow::array::{BinaryArray, Float64Array}; +use arrow::csv; +use std::fs::File; + +fn main() { + let file = File::open("test/data/uk_cities_with_headers.csv").unwrap(); + let builder = csv::ReaderBuilder::new() + .has_headers(true) + .infer_schema(Some(100)); + let mut csv = builder.build(file).unwrap(); + let batch = csv.next().unwrap().unwrap(); + + println!( + "Loaded {} rows containing {} columns", + batch.num_rows(), + batch.num_columns() + ); + + println!("Inferred schema: {:?}", batch.schema()); + + let city = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let lat = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + let lng = batch + .column(2) + .as_any() + .downcast_ref::() + .unwrap(); + + for i in 0..batch.num_rows() { + let city_name: String = String::from_utf8(city.value(i).to_vec()).unwrap(); + + println!( + "City: {}, Latitude: {}, Longitude: {}", + city_name, + lat.value(i), + lng.value(i) + ); + } +} diff --git a/rust/arrow/src/csv/mod.rs b/rust/arrow/src/csv/mod.rs index 9f2bd1db69db2..6521b196d1e12 100644 --- a/rust/arrow/src/csv/mod.rs +++ b/rust/arrow/src/csv/mod.rs @@ -18,3 +18,4 @@ pub mod reader; pub use self::reader::Reader; +pub use self::reader::ReaderBuilder; diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs index 57c7dde1b250d..49e0302aa0672 100644 --- a/rust/arrow/src/csv/reader.rs +++ b/rust/arrow/src/csv/reader.rs @@ -40,8 +40,11 @@ //! let batch = csv.next().unwrap().unwrap(); //! ``` +use lazy_static::lazy_static; +use regex::{Regex, RegexBuilder}; +use std::collections::HashSet; use std::fs::File; -use std::io::BufReader; +use std::io::{BufReader, Seek, SeekFrom}; use std::sync::Arc; use csv as csv_crate; @@ -54,6 +57,130 @@ use crate::record_batch::RecordBatch; use self::csv_crate::{StringRecord, StringRecordsIntoIter}; +lazy_static! { + static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap(); + static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d*.)$").unwrap(); + static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$") + .case_insensitive(true) + .build() + .unwrap(); +} + +/// Infer the data type of a record +fn infer_field_schema(string: &str) -> DataType { + // when quoting is enabled in the reader, these quotes aren't escaped, we default to Utf8 for them + if string.starts_with("\"") { + return DataType::Utf8; + } + // match regex in a particular order + if BOOLEAN_RE.is_match(string) { + return DataType::Boolean; + } else if DECIMAL_RE.is_match(string) { + return DataType::Float64; + } else if INTEGER_RE.is_match(string) { + return DataType::Int64; + } else { + return DataType::Utf8; + } +} + +/// Infer the schema of a CSV file by reading through the first n records of the file, +/// with `max_read_records` controlling the maximum number of records to read. +/// +/// If `max_read_records` is not set, the whole file is read to infer its schema. +fn infer_file_schema( + mut file: File, + delimiter: u8, + max_read_records: Option, + has_headers: bool, +) -> Result { + let mut csv_reader = csv::ReaderBuilder::new() + .delimiter(delimiter) + .from_reader(BufReader::new(file.try_clone()?)); + + // get or create header names + // when has_headers is false, creates default column names with column_ prefix + let headers: Vec = if has_headers { + let headers = &csv_reader.headers()?.clone(); + headers.iter().map(|s| s.to_string()).collect() + } else { + let first_record_count = &csv_reader.headers()?.len(); + (0..*first_record_count) + .map(|i| format!("column_{}", i + 1)) + .into_iter() + .collect() + }; + + // save the csv reader position after reading headers + let position = csv_reader.position().clone(); + + let header_length = headers.len(); + // keep track of inferred field types + let mut column_types: Vec> = vec![HashSet::new(); header_length]; + // keep track of columns with nulls + let mut nulls: Vec = vec![false; header_length]; + + // return csv reader position to after headers + csv_reader.seek(position)?; + + let mut fields = vec![]; + + for result in csv_reader + .into_records() + .take(max_read_records.unwrap_or(std::usize::MAX)) + { + let record = result?; + + for i in 0..header_length { + let string: Option<&str> = record.get(i); + match string { + Some(s) => { + if s == "" { + nulls[i] = true; + } else { + column_types[i].insert(infer_field_schema(s)); + } + } + _ => {} + } + } + } + + // build schema from inference results + for i in 0..header_length { + let possibilities = &column_types[i]; + let has_nulls = nulls[i]; + let field_name = &headers[i]; + + // determine data type based on possible types + // if there are incompatible types, use DataType::Utf8 + match possibilities.len() { + 1 => { + for dtype in possibilities.iter() { + fields.push(Field::new(&field_name, dtype.clone(), has_nulls)); + } + } + 2 => { + if possibilities.contains(&DataType::Int64) + && possibilities.contains(&DataType::Float64) + { + // we have an integer and double, fall down to double + fields.push(Field::new(&field_name, DataType::Float64, has_nulls)); + } else { + // default to Utf8 for conflicting datatypes (e.g bool and int) + fields.push(Field::new(&field_name, DataType::Utf8, has_nulls)); + } + } + _ => fields.push(Field::new(&field_name, DataType::Utf8, has_nulls)), + } + } + + // return the file seek back to the start + file.seek(SeekFrom::Start(0))?; + + Ok(Schema::new(fields)) +} + /// CSV file reader pub struct Reader { /// Explicit schema for the CSV file @@ -68,6 +195,8 @@ pub struct Reader { impl Reader { /// Create a new CsvReader + /// + /// To customise the Reader, such as to enable schema inference, use `ReaderBuilder` pub fn new( file: File, schema: Arc, @@ -78,10 +207,9 @@ impl Reader { let csv_reader = csv::ReaderBuilder::new() .has_headers(has_headers) .from_reader(BufReader::new(file)); - let record_iter = csv_reader.into_records(); Reader { - schema: schema.clone(), + schema, projection, record_iter, batch_size, @@ -194,6 +322,141 @@ impl Reader { } } +/// CSV file reader builder +pub struct ReaderBuilder { + /// Optional schema for the CSV file + /// + /// If the schema is not supplied, the reader will try to infer the schema + /// based on the CSV structure. + schema: Option>, + /// Whether the file has headers or not + /// + /// If schema inference is run on a file with no headers, default column names + /// are created. + has_headers: bool, + /// An optional column delimiter. Defauits to `b','` + delimiter: Option, + /// Optional maximum number of records to read during schema inference + /// + /// If a number is not provided, all the records are read. + max_records: Option, + /// Batch size (number of records to load each time) + /// + /// The default batch size when using the `ReaderBuilder` is 1024 records + batch_size: usize, + /// Optional projection for which columns to load (zero-based column indices) + projection: Option>, +} + +impl Default for ReaderBuilder { + fn default() -> ReaderBuilder { + ReaderBuilder { + schema: None, + has_headers: false, + delimiter: None, + max_records: None, + batch_size: 1024, + projection: None, + } + } +} + +impl ReaderBuilder { + /// Create a new builder for configuring CSV parsing options. + /// + /// To convert a builder into a reader, call `Reader::from_builder` + /// + /// # Example + /// + /// ``` + /// extern crate arrow; + /// + /// use arrow::csv; + /// use std::fs::File; + /// + /// fn example() -> csv::Reader { + /// let file = File::open("test/data/uk_cities_with_headers.csv").unwrap(); + /// + /// // create a builder, inferring the schema with the first 100 records + /// let builder = csv::ReaderBuilder::new().infer_schema(Some(100)); + /// + /// let reader = builder.build(file).unwrap(); + /// + /// reader + /// } + /// ``` + pub fn new() -> ReaderBuilder { + ReaderBuilder::default() + } + + /// Set the CSV file's schema + pub fn with_schema(mut self, schema: Arc) -> Self { + self.schema = Some(schema); + self + } + + /// Set whether the CSV file has headers + pub fn has_headers(mut self, has_headers: bool) -> Self { + self.has_headers = has_headers; + self + } + + /// Set the CSV file's column delimiter as a byte character + pub fn with_delimiter(mut self, delimiter: u8) -> Self { + self.delimiter = Some(delimiter); + self + } + + /// Set the CSV reader to infer the schema of the file + pub fn infer_schema(mut self, max_records: Option) -> Self { + // remove any schema that is set + self.schema = None; + self.max_records = max_records; + self + } + + /// Set the batch size (number of records to load at one time) + pub fn with_batch_size(mut self, batch_size: usize) -> Self { + self.batch_size = batch_size; + self + } + + /// Set the reader's column projection + pub fn with_projection(mut self, projection: Vec) -> Self { + self.projection = Some(projection); + self + } + + /// Create a new `Reader` from the `ReaderBuilder` + pub fn build(self, file: File) -> Result { + // check if schema should be inferred + let schema = match self.schema { + Some(schema) => schema, + None => { + let inferred_schema = infer_file_schema( + file.try_clone().unwrap(), + self.delimiter.unwrap_or(b','), + self.max_records, + self.has_headers, + )?; + + Arc::new(inferred_schema) + } + }; + let csv_reader = csv::ReaderBuilder::new() + .delimiter(self.delimiter.unwrap_or(b',')) + .has_headers(self.has_headers) + .from_reader(BufReader::new(file)); + let record_iter = csv_reader.into_records(); + Ok(Reader { + schema, + projection: self.projection.clone(), + record_iter, + batch_size: self.batch_size, + }) + } +} + #[cfg(test)] mod tests { use super::*; @@ -236,6 +499,75 @@ mod tests { assert_eq!("Aberdeen, Aberdeen City, UK", city_name); } + #[test] + fn test_csv_with_schema_inference() { + let file = File::open("test/data/uk_cities_with_headers.csv").unwrap(); + + let builder = ReaderBuilder::new().has_headers(true).infer_schema(None); + + let mut csv = builder.build(file).unwrap(); + let batch = csv.next().unwrap().unwrap(); + assert_eq!(37, batch.num_rows()); + assert_eq!(3, batch.num_columns()); + + // access data from a primitive array + let lat = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(57.653484, lat.value(0)); + + // access data from a string array (ListArray) + let city = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + let city_name: String = String::from_utf8(city.value(13).to_vec()).unwrap(); + + assert_eq!("Aberdeen, Aberdeen City, UK", city_name); + } + + #[test] + fn test_csv_with_schema_inference_no_headers() { + let file = File::open("test/data/uk_cities.csv").unwrap(); + + let builder = ReaderBuilder::new().infer_schema(None); + + let mut csv = builder.build(file).unwrap(); + let batch = csv.next().unwrap().unwrap(); + + // csv field names should be 'column_{number}' + let schema = batch.schema(); + assert_eq!("column_1", schema.field(0).name()); + assert_eq!("column_2", schema.field(1).name()); + assert_eq!("column_3", schema.field(2).name()); + + assert_eq!(37, batch.num_rows()); + assert_eq!(3, batch.num_columns()); + + // access data from a primitive array + let lat = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(57.653484, lat.value(0)); + + // access data from a string array (ListArray) + let city = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + let city_name: String = String::from_utf8(city.value(13).to_vec()).unwrap(); + + assert_eq!("Aberdeen, Aberdeen City, UK", city_name); + } + #[test] fn test_csv_with_projection() { let schema = Schema::new(vec![ @@ -272,4 +604,39 @@ mod tests { assert_eq!(false, batch.column(1).is_null(4)); } + #[test] + fn test_nulls_with_inference() { + let file = File::open("test/data/various_types.csv").unwrap(); + + let builder = ReaderBuilder::new() + .infer_schema(None) + .has_headers(true) + .with_delimiter(b'|') + .with_batch_size(512) + .with_projection(vec![0, 1, 2, 3]); + + let mut csv = builder.build(file).unwrap(); + let batch = csv.next().unwrap().unwrap(); + + assert_eq!(5, batch.num_rows()); + assert_eq!(4, batch.num_columns()); + + let schema = batch.schema(); + + assert_eq!(&DataType::Int64, schema.field(0).data_type()); + assert_eq!(&DataType::Float64, schema.field(1).data_type()); + assert_eq!(&DataType::Float64, schema.field(2).data_type()); + assert_eq!(&DataType::Boolean, schema.field(3).data_type()); + + assert_eq!(false, schema.field(0).is_nullable()); + assert_eq!(true, schema.field(1).is_nullable()); + assert_eq!(true, schema.field(2).is_nullable()); + assert_eq!(false, schema.field(3).is_nullable()); + + assert_eq!(false, batch.column(1).is_null(0)); + assert_eq!(false, batch.column(1).is_null(1)); + assert_eq!(true, batch.column(1).is_null(2)); + assert_eq!(false, batch.column(1).is_null(3)); + assert_eq!(false, batch.column(1).is_null(4)); + } } diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 0627b4523a1ce..05db6ce7d40b9 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -42,7 +42,7 @@ use crate::error::{ArrowError, Result}; /// Nested types can themselves be nested within other arrays. /// For more information on these types please see /// [here](https://arrow.apache.org/docs/memory_layout.html). -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)] pub enum DataType { Boolean, Int8, @@ -64,7 +64,7 @@ pub enum DataType { /// Contains the meta-data for a single relative type. /// /// The `Schema` object is an ordered collection of `Field` objects. -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)] pub struct Field { name: String, data_type: DataType, diff --git a/rust/arrow/src/error.rs b/rust/arrow/src/error.rs index 559b2d7205994..b75111fd4e883 100644 --- a/rust/arrow/src/error.rs +++ b/rust/arrow/src/error.rs @@ -15,12 +15,49 @@ // specific language governing permissions and limitations // under the License. +use std::error::Error; + +use csv as csv_crate; + #[derive(Debug, Clone, PartialEq)] pub enum ArrowError { MemoryError(String), ParseError(String), ComputeError(String), DivideByZero, + CsvError(String), + IoError(String), +} + +impl From<::std::io::Error> for ArrowError { + fn from(error: ::std::io::Error) -> Self { + ArrowError::IoError(error.description().to_string()) + } +} + +impl From for ArrowError { + fn from(error: csv_crate::Error) -> Self { + match error.kind() { + csv_crate::ErrorKind::Io(error) => { + ArrowError::CsvError(error.description().to_string()) + } + csv_crate::ErrorKind::Utf8 {pos: _, err} => { + ArrowError::CsvError(format!("Encountered UTF-8 error while reading CSV file: {:?}", err.description())) + } + csv_crate::ErrorKind::UnequalLengths {pos: _, expected_len, len} => { + ArrowError::CsvError( + format!( + "Encountered unequal lengths between records on CSV file. Expected {} records, found {} records", + len, + expected_len + ) + ) + } + _ => { + ArrowError::CsvError("Error reading CSV file".to_string()) + } + } + } } pub type Result = ::std::result::Result; diff --git a/rust/arrow/test/data/uk_cities_with_headers.csv b/rust/arrow/test/data/uk_cities_with_headers.csv new file mode 100644 index 0000000000000..92f5a17bdda38 --- /dev/null +++ b/rust/arrow/test/data/uk_cities_with_headers.csv @@ -0,0 +1,38 @@ +city,lat,lng +"Elgin, Scotland, the UK",57.653484,-3.335724 +"Stoke-on-Trent, Staffordshire, the UK",53.002666,-2.179404 +"Solihull, Birmingham, UK",52.412811,-1.778197 +"Cardiff, Cardiff county, UK",51.481583,-3.179090 +"Eastbourne, East Sussex, UK",50.768036,0.290472 +"Oxford, Oxfordshire, UK",51.752022,-1.257677 +"London, UK",51.509865,-0.118092 +"Swindon, Swindon, UK",51.568535,-1.772232 +"Gravesend, Kent, UK",51.441883,0.370759 +"Northampton, Northamptonshire, UK",52.240479,-0.902656 +"Rugby, Warwickshire, UK",52.370876,-1.265032 +"Sutton Coldfield, West Midlands, UK",52.570385,-1.824042 +"Harlow, Essex, UK",51.772938,0.102310 +"Aberdeen, Aberdeen City, UK",57.149651,-2.099075 +"Swansea, Swansea, UK",51.621441,-3.943646 +"Chesterfield, Derbyshire, UK",53.235046,-1.421629 +"Londonderry, Derry, UK",55.006763,-7.318268 +"Salisbury, Wiltshire, UK",51.068787,-1.794472 +"Weymouth, Dorset, UK",50.614429,-2.457621 +"Wolverhampton, West Midlands, UK",52.591370,-2.110748 +"Preston, Lancashire, UK",53.765762,-2.692337 +"Bournemouth, UK",50.720806,-1.904755 +"Doncaster, South Yorkshire, UK",53.522820,-1.128462 +"Ayr, South Ayrshire, UK",55.458565,-4.629179 +"Hastings, East Sussex, UK",50.854259,0.573453 +"Bedford, UK",52.136436,-0.460739 +"Basildon, Essex, UK",51.572376,0.470009 +"Chippenham, Wiltshire, UK",51.458057,-2.116074 +"Belfast, UK",54.607868,-5.926437 +"Uckfield, East Sussex, UK",50.967941,0.085831 +"Worthing, West Sussex, UK",50.825024,-0.383835 +"Leeds, West Yorkshire, UK",53.801277,-1.548567 +"Kendal, Cumbria, UK",54.328506,-2.743870 +"Plymouth, UK",50.376289,-4.143841 +"Haverhill, Suffolk, UK",52.080875,0.444517 +"Frankton, Warwickshire, UK",52.328415,-1.377561 +"Inverness, the UK",57.477772,-4.224721 \ No newline at end of file diff --git a/rust/arrow/test/data/various_types.csv b/rust/arrow/test/data/various_types.csv new file mode 100644 index 0000000000000..322d9c347aaa6 --- /dev/null +++ b/rust/arrow/test/data/various_types.csv @@ -0,0 +1,6 @@ +c_int|c_float|c_string|c_bool +1|1.1|"1.11"|true +2|2.2|"2.22"|true +3||"3.33"|true +4|4.4||false +5|6.6|""|false \ No newline at end of file From bcca04aabd804263c555945463f5cf4a2ab6216f Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Tue, 8 Jan 2019 16:56:31 -0700 Subject: [PATCH 053/203] ARROW-4172: [Rust] more consistent naming in array builders This is to make the namings in `builder.rs` more consistent: 1. Changes `PrimitiveArrayBuilder` to `PrimitiveBuilder`, similarly for `ListArrayBuilder`, `BinaryArrayBuilder` and `StructArrayBuilder`. The `Array` seems redundant. 2. Currently we use both `push` and `append`, which is a bit confusing. This unifies them by using `append`. Author: Chao Sun Closes #3345 from sunchao/ARROW-4172 and squashes the following commits: 3472d12 ARROW-4172: more consistent naming in array builders --- rust/arrow/examples/builders.rs | 12 +- rust/arrow/src/array.rs | 4 +- rust/arrow/src/array_ops.rs | 22 +- rust/arrow/src/builder.rs | 368 ++++++++++++++++---------------- rust/arrow/src/csv/reader.rs | 10 +- rust/arrow/src/tensor.rs | 12 +- 6 files changed, 214 insertions(+), 214 deletions(-) diff --git a/rust/arrow/examples/builders.rs b/rust/arrow/examples/builders.rs index 92f45ce67d981..f9ba2974ef7c8 100644 --- a/rust/arrow/examples/builders.rs +++ b/rust/arrow/examples/builders.rs @@ -29,14 +29,14 @@ fn main() { // Create a new builder with a capacity of 100 let mut primitive_array_builder = Int32Builder::new(100); - // Push an individual primitive value - primitive_array_builder.push(55).unwrap(); + // Append an individual primitive value + primitive_array_builder.append_value(55).unwrap(); - // Push a null value - primitive_array_builder.push_null().unwrap(); + // Append a null value + primitive_array_builder.append_null().unwrap(); - // Push a slice of primitive values - primitive_array_builder.push_slice(&[39, 89, 12]).unwrap(); + // Append a slice of primitive values + primitive_array_builder.append_slice(&[39, 89, 12]).unwrap(); // Build the `PrimitiveArray` let _primitive_array = primitive_array_builder.finish(); diff --git a/rust/arrow/src/array.rs b/rust/arrow/src/array.rs index f8272eb007db6..78910d55cd687 100644 --- a/rust/arrow/src/array.rs +++ b/rust/arrow/src/array.rs @@ -201,8 +201,8 @@ impl PrimitiveArray { } // Returns a new primitive array builder - pub fn builder(capacity: usize) -> PrimitiveArrayBuilder { - PrimitiveArrayBuilder::::new(capacity) + pub fn builder(capacity: usize) -> PrimitiveBuilder { + PrimitiveBuilder::::new(capacity) } } diff --git a/rust/arrow/src/array_ops.rs b/rust/arrow/src/array_ops.rs index 69637094942cf..f41740a85e0ea 100644 --- a/rust/arrow/src/array_ops.rs +++ b/rust/arrow/src/array_ops.rs @@ -22,7 +22,7 @@ use std::ops::{Add, Div, Mul, Sub}; use num::Zero; use crate::array::{Array, BooleanArray, PrimitiveArray}; -use crate::builder::PrimitiveArrayBuilder; +use crate::builder::PrimitiveBuilder; use crate::datatypes; use crate::datatypes::ArrowNumericType; use crate::error::{ArrowError, Result}; @@ -102,13 +102,13 @@ where "Cannot perform math operation on arrays of different length".to_string(), )); } - let mut b = PrimitiveArrayBuilder::::new(left.len()); + let mut b = PrimitiveBuilder::::new(left.len()); for i in 0..left.len() { let index = i; if left.is_null(i) || right.is_null(i) { - b.push_null()?; + b.append_null()?; } else { - b.push(op(left.value(index), right.value(index))?)?; + b.append_value(op(left.value(index), right.value(index))?)?; } } Ok(b.finish()) @@ -276,7 +276,7 @@ where } else { Some(right.value(index)) }; - b.push(op(l, r))?; + b.append_value(op(l, r))?; } Ok(b.finish()) } @@ -291,9 +291,9 @@ pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result { let mut b = BooleanArray::builder(left.len()); for i in 0..left.len() { if left.is_null(i) || right.is_null(i) { - b.push_null()?; + b.append_null()?; } else { - b.push(left.value(i) && right.value(i))?; + b.append_value(left.value(i) && right.value(i))?; } } Ok(b.finish()) @@ -309,9 +309,9 @@ pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result { let mut b = BooleanArray::builder(left.len()); for i in 0..left.len() { if left.is_null(i) || right.is_null(i) { - b.push_null()?; + b.append_null()?; } else { - b.push(left.value(i) || right.value(i))?; + b.append_value(left.value(i) || right.value(i))?; } } Ok(b.finish()) @@ -322,9 +322,9 @@ pub fn not(left: &BooleanArray) -> Result { let mut b = BooleanArray::builder(left.len()); for i in 0..left.len() { if left.is_null(i) { - b.push_null()?; + b.append_null()?; } else { - b.push(!left.value(i))?; + b.append_value(!left.value(i))?; } } Ok(b.finish()) diff --git a/rust/arrow/src/builder.rs b/rust/arrow/src/builder.rs index a0bb43c7dee53..2a4b702a2738a 100644 --- a/rust/arrow/src/builder.rs +++ b/rust/arrow/src/builder.rs @@ -59,8 +59,8 @@ pub trait BufferBuilderTrait { fn capacity(&self) -> usize; fn advance(&mut self, i: usize) -> Result<()>; fn reserve(&mut self, n: usize) -> Result<()>; - fn push(&mut self, v: T::Native) -> Result<()>; - fn push_slice(&mut self, slice: &[T::Native]) -> Result<()>; + fn append(&mut self, v: T::Native) -> Result<()>; + fn append_slice(&mut self, slice: &[T::Native]) -> Result<()>; fn finish(&mut self) -> Buffer; } @@ -102,14 +102,14 @@ impl BufferBuilderTrait for BufferBuilder { Ok(()) } - /// Pushes a value into the builder, growing the internal buffer as needed. - default fn push(&mut self, v: T::Native) -> Result<()> { + /// Appends a value into the builder, growing the internal buffer as needed. + default fn append(&mut self, v: T::Native) -> Result<()> { self.reserve(1)?; self.write_bytes(v.to_byte_slice(), 1) } - /// Pushes a slice of type `T`, growing the internal buffer as needed. - default fn push_slice(&mut self, slice: &[T::Native]) -> Result<()> { + /// Appends a slice of type `T`, growing the internal buffer as needed. + default fn append_slice(&mut self, slice: &[T::Native]) -> Result<()> { let array_slots = slice.len(); self.reserve(array_slots)?; self.write_bytes(slice.to_byte_slice(), array_slots) @@ -163,11 +163,11 @@ impl BufferBuilderTrait for BufferBuilder { Ok(()) } - /// Pushes a value into the builder, growing the internal buffer as needed. - fn push(&mut self, v: bool) -> Result<()> { + /// Appends a value into the builder, growing the internal buffer as needed. + fn append(&mut self, v: bool) -> Result<()> { self.reserve(1)?; if v { - // For performance the `len` of the buffer is not updated on each push but + // For performance the `len` of the buffer is not updated on each append but // is updated in the `freeze` method instead. unsafe { bit_util::set_bit_raw(self.buffer.raw_data() as *mut u8, self.len); @@ -177,11 +177,11 @@ impl BufferBuilderTrait for BufferBuilder { Ok(()) } - /// Pushes a slice of type `T`, growing the internal buffer as needed. - fn push_slice(&mut self, slice: &[bool]) -> Result<()> { + /// Appends a slice of type `T`, growing the internal buffer as needed. + fn append_slice(&mut self, slice: &[bool]) -> Result<()> { let array_slots = slice.len(); for i in 0..array_slots { - self.push(slice[i])?; + self.append(slice[i])?; } Ok(()) } @@ -201,7 +201,7 @@ impl BufferBuilderTrait for BufferBuilder { /// Reset this builder and returns an immutable `Buffer`. fn finish(&mut self) -> Buffer { - // `push` does not update the buffer's `len` so do it before `freeze` is called. + // `append` does not update the buffer's `len` so do it before `freeze` is called. let new_buffer_len = bit_util::ceil(self.len, 8); debug_assert!(new_buffer_len >= self.buffer.len()); let mut buf = ::std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); @@ -238,24 +238,24 @@ pub trait ArrayBuilder: Any { } /// Array builder for fixed-width primitive types -pub struct PrimitiveArrayBuilder { +pub struct PrimitiveBuilder { values_builder: BufferBuilder, bitmap_builder: BooleanBufferBuilder, } -pub type BooleanBuilder = PrimitiveArrayBuilder; -pub type Int8Builder = PrimitiveArrayBuilder; -pub type Int16Builder = PrimitiveArrayBuilder; -pub type Int32Builder = PrimitiveArrayBuilder; -pub type Int64Builder = PrimitiveArrayBuilder; -pub type UInt8Builder = PrimitiveArrayBuilder; -pub type UInt16Builder = PrimitiveArrayBuilder; -pub type UInt32Builder = PrimitiveArrayBuilder; -pub type UInt64Builder = PrimitiveArrayBuilder; -pub type Float32Builder = PrimitiveArrayBuilder; -pub type Float64Builder = PrimitiveArrayBuilder; - -impl ArrayBuilder for PrimitiveArrayBuilder { +pub type BooleanBuilder = PrimitiveBuilder; +pub type Int8Builder = PrimitiveBuilder; +pub type Int16Builder = PrimitiveBuilder; +pub type Int32Builder = PrimitiveBuilder; +pub type Int64Builder = PrimitiveBuilder; +pub type UInt8Builder = PrimitiveBuilder; +pub type UInt16Builder = PrimitiveBuilder; +pub type UInt32Builder = PrimitiveBuilder; +pub type UInt64Builder = PrimitiveBuilder; +pub type Float32Builder = PrimitiveBuilder; +pub type Float64Builder = PrimitiveBuilder; + +impl ArrayBuilder for PrimitiveBuilder { /// Returns the builder as an non-mutable `Any` reference. fn as_any(&self) -> &Any { self @@ -282,7 +282,7 @@ impl ArrayBuilder for PrimitiveArrayBuilder { } } -impl PrimitiveArrayBuilder { +impl PrimitiveBuilder { /// Creates a new primitive array builder pub fn new(capacity: usize) -> Self { Self { @@ -296,33 +296,33 @@ impl PrimitiveArrayBuilder { self.values_builder.capacity() } - /// Pushes a value of type `T` into the builder - pub fn push(&mut self, v: T::Native) -> Result<()> { - self.bitmap_builder.push(true)?; - self.values_builder.push(v)?; + /// Appends a value of type `T` into the builder + pub fn append_value(&mut self, v: T::Native) -> Result<()> { + self.bitmap_builder.append(true)?; + self.values_builder.append(v)?; Ok(()) } - /// Pushes a null slot into the builder - pub fn push_null(&mut self) -> Result<()> { - self.bitmap_builder.push(false)?; + /// Appends a null slot into the builder + pub fn append_null(&mut self) -> Result<()> { + self.bitmap_builder.append(false)?; self.values_builder.advance(1)?; Ok(()) } - /// Pushes an `Option` into the builder - pub fn push_option(&mut self, v: Option) -> Result<()> { + /// Appends an `Option` into the builder + pub fn append_option(&mut self, v: Option) -> Result<()> { match v { - None => self.push_null()?, - Some(v) => self.push(v)?, + None => self.append_null()?, + Some(v) => self.append_value(v)?, }; Ok(()) } - /// Pushes a slice of type `T` into the builder - pub fn push_slice(&mut self, v: &[T::Native]) -> Result<()> { - self.bitmap_builder.push_slice(&vec![true; v.len()][..])?; - self.values_builder.push_slice(v)?; + /// Appends a slice of type `T` into the builder + pub fn append_slice(&mut self, v: &[T::Native]) -> Result<()> { + self.bitmap_builder.append_slice(&vec![true; v.len()][..])?; + self.values_builder.append_slice(v)?; Ok(()) } @@ -345,18 +345,18 @@ impl PrimitiveArrayBuilder { } /// Array builder for `ListArray` -pub struct ListArrayBuilder { +pub struct ListBuilder { offsets_builder: Int32BufferBuilder, bitmap_builder: BooleanBufferBuilder, values_builder: T, len: usize, } -impl ListArrayBuilder { +impl ListBuilder { /// Creates a new `ListArrayBuilder` from a given values array builder pub fn new(values_builder: T) -> Self { let mut offsets_builder = Int32BufferBuilder::new(values_builder.len() + 1); - offsets_builder.push(0).unwrap(); + offsets_builder.append(0).unwrap(); Self { offsets_builder, bitmap_builder: BooleanBufferBuilder::new(values_builder.len()), @@ -366,7 +366,7 @@ impl ListArrayBuilder { } } -impl ArrayBuilder for ListArrayBuilder +impl ArrayBuilder for ListBuilder where T: 'static, { @@ -396,13 +396,13 @@ where } } -impl ListArrayBuilder +impl ListBuilder where T: 'static, { /// Returns the child array builder as a mutable reference. /// - /// This mutable reference can be used to push values into the child array builder, + /// This mutable reference can be used to append values into the child array builder, /// but you must call `append` to delimit each distinct list value. pub fn values(&mut self) -> &mut T { &mut self.values_builder @@ -411,8 +411,8 @@ where /// Finish the current variable-length list array slot pub fn append(&mut self, is_valid: bool) -> Result<()> { self.offsets_builder - .push(self.values_builder.len() as i32)?; - self.bitmap_builder.push(is_valid)?; + .append(self.values_builder.len() as i32)?; + self.bitmap_builder.append(is_valid)?; self.len += 1; Ok(()) } @@ -431,7 +431,7 @@ where let offset_buffer = self.offsets_builder.finish(); let null_bit_buffer = self.bitmap_builder.finish(); - self.offsets_builder.push(0).unwrap(); + self.offsets_builder.append(0).unwrap(); let data = ArrayData::builder(DataType::List(Box::new(values_data.data_type().clone()))) .len(len) .null_count(len - bit_util::count_set_bits(null_bit_buffer.data())) @@ -445,11 +445,11 @@ where } /// Array builder for `BinaryArray` -pub struct BinaryArrayBuilder { - builder: ListArrayBuilder, +pub struct BinaryBuilder { + builder: ListBuilder, } -impl ArrayBuilder for BinaryArrayBuilder { +impl ArrayBuilder for BinaryBuilder { /// Returns the builder as an non-mutable `Any` reference. fn as_any(&self) -> &Any { self @@ -476,30 +476,30 @@ impl ArrayBuilder for BinaryArrayBuilder { } } -impl BinaryArrayBuilder { - /// Creates a new `BinaryArrayBuilder`, `capacity` is the number of bytes in the values array +impl BinaryBuilder { + /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values array pub fn new(capacity: usize) -> Self { let values_builder = UInt8Builder::new(capacity); Self { - builder: ListArrayBuilder::new(values_builder), + builder: ListBuilder::new(values_builder), } } - /// Pushes a single byte value into the builder's values array. + /// Appends a single byte value into the builder's values array. /// - /// Note, when pushing individual byte values you must call `append` to delimit each + /// Note, when appending individual byte values you must call `append` to delimit each /// distinct list value. - pub fn push(&mut self, value: u8) -> Result<()> { - self.builder.values().push(value)?; + pub fn append_value(&mut self, value: u8) -> Result<()> { + self.builder.values().append_value(value)?; Ok(()) } - /// Pushes a `&String` or `&str` into the builder. + /// Appends a `&String` or `&str` into the builder. /// - /// Automatically calls the `append` method to delimit the string pushed in as a distinct - /// array element. - pub fn push_string(&mut self, value: &str) -> Result<()> { - self.builder.values().push_slice(value.as_bytes())?; + /// Automatically calls the `append` method to delimit the string appended in as a + /// distinct array element. + pub fn append_string(&mut self, value: &str) -> Result<()> { + self.builder.values().append_slice(value.as_bytes())?; self.builder.append(true)?; Ok(()) } @@ -524,7 +524,7 @@ impl BinaryArrayBuilder { /// /// Note that callers should make sure that methods of all the child field builders are /// properly called to maintain the consistency of the data structure. -pub struct StructArrayBuilder { +pub struct StructBuilder { fields: Vec, field_anys: Vec>, field_builders: Vec>, @@ -532,7 +532,7 @@ pub struct StructArrayBuilder { len: usize, } -impl ArrayBuilder for StructArrayBuilder { +impl ArrayBuilder for StructBuilder { /// Returns the number of array slots in the builder. /// /// Note that this always return the first child field builder's length, and it is @@ -571,7 +571,7 @@ impl ArrayBuilder for StructArrayBuilder { } } -impl StructArrayBuilder { +impl StructBuilder { pub fn new(fields: Vec, builders: Vec>) -> Self { let mut field_anys = Vec::with_capacity(builders.len()); let mut field_builders = Vec::with_capacity(builders.len()); @@ -619,7 +619,7 @@ impl StructArrayBuilder { DataType::UInt64 => Box::new(UInt64Builder::new(capacity)), DataType::Float32 => Box::new(Float32Builder::new(capacity)), DataType::Float64 => Box::new(Float64Builder::new(capacity)), - DataType::Utf8 => Box::new(BinaryArrayBuilder::new(capacity)), + DataType::Utf8 => Box::new(BinaryBuilder::new(capacity)), DataType::Struct(fields) => { let schema = Schema::new(fields.clone()); Box::new(Self::from_schema(schema, capacity)) @@ -643,7 +643,7 @@ impl StructArrayBuilder { /// Appends an element (either null or non-null) to the struct. The actual elements /// should be appended for each child sub-array in a consistent way. pub fn append(&mut self, is_valid: bool) -> Result<()> { - self.bitmap_builder.push(is_valid)?; + self.bitmap_builder.append(is_valid)?; self.len += 1; Ok(()) } @@ -675,7 +675,7 @@ impl StructArrayBuilder { } } -impl Drop for StructArrayBuilder { +impl Drop for StructBuilder { fn drop(&mut self) { // To avoid double drop on the field array builders. let builders = ::std::mem::replace(&mut self.field_builders, Vec::new()); @@ -702,7 +702,7 @@ mod tests { #[test] fn test_builder_i32_alloc_zero_bytes() { let mut b = Int32BufferBuilder::new(0); - b.push(123).unwrap(); + b.append(123).unwrap(); let a = b.finish(); assert_eq!(4, a.len()); } @@ -711,7 +711,7 @@ mod tests { fn test_builder_i32() { let mut b = Int32BufferBuilder::new(5); for i in 0..5 { - b.push(i).unwrap(); + b.append(i).unwrap(); } assert_eq!(16, b.capacity()); let a = b.finish(); @@ -723,7 +723,7 @@ mod tests { let mut b = Int32BufferBuilder::new(2); assert_eq!(16, b.capacity()); for i in 0..20 { - b.push(i).unwrap(); + b.append(i).unwrap(); } assert_eq!(32, b.capacity()); let a = b.finish(); @@ -735,7 +735,7 @@ mod tests { let mut b = Int32BufferBuilder::new(5); assert_eq!(16, b.capacity()); for i in 0..10 { - b.push(i).unwrap(); + b.append(i).unwrap(); } let mut a = b.finish(); assert_eq!(40, a.len()); @@ -744,7 +744,7 @@ mod tests { // Try build another buffer after cleaning up. for i in 0..20 { - b.push(i).unwrap() + b.append(i).unwrap() } assert_eq!(32, b.capacity()); a = b.finish(); @@ -769,15 +769,15 @@ mod tests { } #[test] - fn test_push_slice() { + fn test_append_slice() { let mut b = UInt8BufferBuilder::new(0); - b.push_slice("Hello, ".as_bytes()).unwrap(); - b.push_slice("World!".as_bytes()).unwrap(); + b.append_slice("Hello, ".as_bytes()).unwrap(); + b.append_slice("World!".as_bytes()).unwrap(); let buffer = b.finish(); assert_eq!(13, buffer.len()); let mut b = Int32BufferBuilder::new(0); - b.push_slice(&[32, 54]).unwrap(); + b.append_slice(&[32, 54]).unwrap(); let buffer = b.finish(); assert_eq!(8, buffer.len()); } @@ -785,17 +785,17 @@ mod tests { #[test] fn test_write_bytes() { let mut b = BooleanBufferBuilder::new(4); - b.push(false).unwrap(); - b.push(true).unwrap(); - b.push(false).unwrap(); - b.push(true).unwrap(); + b.append(false).unwrap(); + b.append(true).unwrap(); + b.append(false).unwrap(); + b.append(true).unwrap(); assert_eq!(4, b.len()); assert_eq!(512, b.capacity()); let buffer = b.finish(); assert_eq!(1, buffer.len()); let mut b = BooleanBufferBuilder::new(4); - b.push_slice(&[false, true, false, true]).unwrap(); + b.append_slice(&[false, true, false, true]).unwrap(); assert_eq!(4, b.len()); assert_eq!(512, b.capacity()); let buffer = b.finish(); @@ -829,9 +829,9 @@ mod tests { for i in 0..10 { if i == 3 || i == 6 || i == 9 { - builder.push(true).unwrap(); + builder.append(true).unwrap(); } else { - builder.push(false).unwrap(); + builder.append(false).unwrap(); } } let buf2 = builder.finish(); @@ -844,7 +844,7 @@ mod tests { fn test_primitive_array_builder_i32() { let mut builder = Int32Array::builder(5); for i in 0..5 { - builder.push(i).unwrap(); + builder.append_value(i).unwrap(); } let arr = builder.finish(); assert_eq!(5, arr.len()); @@ -864,9 +864,9 @@ mod tests { let mut builder = BooleanArray::builder(10); for i in 0..10 { if i == 3 || i == 6 || i == 9 { - builder.push(true).unwrap(); + builder.append_value(true).unwrap(); } else { - builder.push(false).unwrap(); + builder.append_value(false).unwrap(); } } @@ -883,15 +883,15 @@ mod tests { } #[test] - fn test_primitive_array_builder_push_option() { + fn test_primitive_array_builder_append_option() { let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]); let mut builder = Int32Array::builder(5); - builder.push_option(Some(0)).unwrap(); - builder.push_option(None).unwrap(); - builder.push_option(Some(2)).unwrap(); - builder.push_option(None).unwrap(); - builder.push_option(Some(4)).unwrap(); + builder.append_option(Some(0)).unwrap(); + builder.append_option(None).unwrap(); + builder.append_option(Some(2)).unwrap(); + builder.append_option(None).unwrap(); + builder.append_option(Some(4)).unwrap(); let arr2 = builder.finish(); assert_eq!(arr1.len(), arr2.len()); @@ -907,15 +907,15 @@ mod tests { } #[test] - fn test_primitive_array_builder_push_null() { + fn test_primitive_array_builder_append_null() { let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]); let mut builder = Int32Array::builder(5); - builder.push(0).unwrap(); - builder.push(2).unwrap(); - builder.push_null().unwrap(); - builder.push_null().unwrap(); - builder.push(4).unwrap(); + builder.append_value(0).unwrap(); + builder.append_value(2).unwrap(); + builder.append_null().unwrap(); + builder.append_null().unwrap(); + builder.append_value(4).unwrap(); let arr2 = builder.finish(); assert_eq!(arr1.len(), arr2.len()); @@ -931,14 +931,14 @@ mod tests { } #[test] - fn test_primitive_array_builder_push_slice() { + fn test_primitive_array_builder_append_slice() { let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]); let mut builder = Int32Array::builder(5); - builder.push_slice(&[0, 2]).unwrap(); - builder.push_null().unwrap(); - builder.push_null().unwrap(); - builder.push(4).unwrap(); + builder.append_slice(&[0, 2]).unwrap(); + builder.append_null().unwrap(); + builder.append_null().unwrap(); + builder.append_value(4).unwrap(); let arr2 = builder.finish(); assert_eq!(arr1.len(), arr2.len()); @@ -956,12 +956,12 @@ mod tests { #[test] fn test_primitive_array_builder_finish() { let mut builder = Int32Builder::new(5); - builder.push_slice(&[2, 4, 6, 8]).unwrap(); + builder.append_slice(&[2, 4, 6, 8]).unwrap(); let mut arr = builder.finish(); assert_eq!(4, arr.len()); assert_eq!(0, builder.len()); - builder.push_slice(&[1, 3, 5, 7, 9]).unwrap(); + builder.append_slice(&[1, 3, 5, 7, 9]).unwrap(); arr = builder.finish(); assert_eq!(5, arr.len()); assert_eq!(0, builder.len()); @@ -970,19 +970,19 @@ mod tests { #[test] fn test_list_array_builder() { let values_builder = Int32Builder::new(10); - let mut builder = ListArrayBuilder::new(values_builder); + let mut builder = ListBuilder::new(values_builder); // [[0, 1, 2], [3, 4, 5], [6, 7]] - builder.values().push(0).unwrap(); - builder.values().push(1).unwrap(); - builder.values().push(2).unwrap(); + builder.values().append_value(0).unwrap(); + builder.values().append_value(1).unwrap(); + builder.values().append_value(2).unwrap(); builder.append(true).unwrap(); - builder.values().push(3).unwrap(); - builder.values().push(4).unwrap(); - builder.values().push(5).unwrap(); + builder.values().append_value(3).unwrap(); + builder.values().append_value(4).unwrap(); + builder.values().append_value(5).unwrap(); builder.append(true).unwrap(); - builder.values().push(6).unwrap(); - builder.values().push(7).unwrap(); + builder.values().append_value(6).unwrap(); + builder.values().append_value(7).unwrap(); builder.append(true).unwrap(); let list_array = builder.finish(); @@ -1009,20 +1009,20 @@ mod tests { #[test] fn test_list_array_builder_nulls() { let values_builder = Int32Builder::new(10); - let mut builder = ListArrayBuilder::new(values_builder); + let mut builder = ListBuilder::new(values_builder); // [[0, 1, 2], null, [3, null, 5], [6, 7]] - builder.values().push(0).unwrap(); - builder.values().push(1).unwrap(); - builder.values().push(2).unwrap(); + builder.values().append_value(0).unwrap(); + builder.values().append_value(1).unwrap(); + builder.values().append_value(2).unwrap(); builder.append(true).unwrap(); builder.append(false).unwrap(); - builder.values().push(3).unwrap(); - builder.values().push_null().unwrap(); - builder.values().push(5).unwrap(); + builder.values().append_value(3).unwrap(); + builder.values().append_null().unwrap(); + builder.values().append_value(5).unwrap(); builder.append(true).unwrap(); - builder.values().push(6).unwrap(); - builder.values().push(7).unwrap(); + builder.values().append_value(6).unwrap(); + builder.values().append_value(7).unwrap(); builder.append(true).unwrap(); let list_array = builder.finish(); @@ -1036,18 +1036,18 @@ mod tests { #[test] fn test_list_array_builder_finish() { let values_builder = Int32Array::builder(5); - let mut builder = ListArrayBuilder::new(values_builder); + let mut builder = ListBuilder::new(values_builder); - builder.values().push_slice(&[1, 2, 3]).unwrap(); + builder.values().append_slice(&[1, 2, 3]).unwrap(); builder.append(true).unwrap(); - builder.values().push_slice(&[4, 5, 6]).unwrap(); + builder.values().append_slice(&[4, 5, 6]).unwrap(); builder.append(true).unwrap(); let mut arr = builder.finish(); assert_eq!(2, arr.len()); assert_eq!(0, builder.len()); - builder.values().push_slice(&[7, 8, 9]).unwrap(); + builder.values().append_slice(&[7, 8, 9]).unwrap(); builder.append(true).unwrap(); arr = builder.finish(); assert_eq!(1, arr.len()); @@ -1057,31 +1057,31 @@ mod tests { #[test] fn test_list_list_array_builder() { let primitive_builder = Int32Builder::new(10); - let values_builder = ListArrayBuilder::new(primitive_builder); - let mut builder = ListArrayBuilder::new(values_builder); + let values_builder = ListBuilder::new(primitive_builder); + let mut builder = ListBuilder::new(values_builder); // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] - builder.values().values().push(1).unwrap(); - builder.values().values().push(2).unwrap(); + builder.values().values().append_value(1).unwrap(); + builder.values().values().append_value(2).unwrap(); builder.values().append(true).unwrap(); - builder.values().values().push(3).unwrap(); - builder.values().values().push(4).unwrap(); + builder.values().values().append_value(3).unwrap(); + builder.values().values().append_value(4).unwrap(); builder.values().append(true).unwrap(); builder.append(true).unwrap(); - builder.values().values().push(5).unwrap(); - builder.values().values().push(6).unwrap(); - builder.values().values().push(7).unwrap(); + builder.values().values().append_value(5).unwrap(); + builder.values().values().append_value(6).unwrap(); + builder.values().values().append_value(7).unwrap(); builder.values().append(true).unwrap(); builder.values().append(false).unwrap(); - builder.values().values().push(8).unwrap(); + builder.values().values().append_value(8).unwrap(); builder.values().append(true).unwrap(); builder.append(true).unwrap(); builder.append(false).unwrap(); - builder.values().values().push(9).unwrap(); - builder.values().values().push(10).unwrap(); + builder.values().values().append_value(9).unwrap(); + builder.values().values().append_value(10).unwrap(); builder.values().append(true).unwrap(); builder.append(true).unwrap(); @@ -1111,20 +1111,20 @@ mod tests { #[test] fn test_binary_array_builder() { - let mut builder = BinaryArrayBuilder::new(20); + let mut builder = BinaryBuilder::new(20); - builder.push(b'h').unwrap(); - builder.push(b'e').unwrap(); - builder.push(b'l').unwrap(); - builder.push(b'l').unwrap(); - builder.push(b'o').unwrap(); + builder.append_value(b'h').unwrap(); + builder.append_value(b'e').unwrap(); + builder.append_value(b'l').unwrap(); + builder.append_value(b'l').unwrap(); + builder.append_value(b'o').unwrap(); builder.append(true).unwrap(); builder.append(true).unwrap(); - builder.push(b'w').unwrap(); - builder.push(b'o').unwrap(); - builder.push(b'r').unwrap(); - builder.push(b'l').unwrap(); - builder.push(b'd').unwrap(); + builder.append_value(b'w').unwrap(); + builder.append_value(b'o').unwrap(); + builder.append_value(b'r').unwrap(); + builder.append_value(b'l').unwrap(); + builder.append_value(b'd').unwrap(); builder.append(true).unwrap(); let array = builder.finish(); @@ -1145,29 +1145,29 @@ mod tests { #[test] fn test_binary_array_builder_finish() { - let mut builder = BinaryArrayBuilder::new(10); + let mut builder = BinaryBuilder::new(10); - builder.push_string("hello").unwrap(); - builder.push_string("world").unwrap(); + builder.append_string("hello").unwrap(); + builder.append_string("world").unwrap(); let mut arr = builder.finish(); assert_eq!(2, arr.len()); assert_eq!(0, builder.len()); - builder.push_string("arrow").unwrap(); + builder.append_string("arrow").unwrap(); arr = builder.finish(); assert_eq!(1, arr.len()); assert_eq!(0, builder.len()); } #[test] - fn test_binary_array_builder_push_string() { - let mut builder = BinaryArrayBuilder::new(20); + fn test_binary_array_builder_append_string() { + let mut builder = BinaryBuilder::new(20); let var = "hello".to_owned(); - builder.push_string(&var).unwrap(); + builder.append_string(&var).unwrap(); builder.append(true).unwrap(); - builder.push_string("world").unwrap(); + builder.append_string("world").unwrap(); let array = builder.finish(); @@ -1187,7 +1187,7 @@ mod tests { #[test] fn test_struct_array_builder() { - let string_builder = BinaryArrayBuilder::new(4); + let string_builder = BinaryBuilder::new(4); let int_builder = Int32Builder::new(4); let mut fields = Vec::new(); @@ -1197,24 +1197,24 @@ mod tests { fields.push(Field::new("f2", DataType::Int32, false)); field_builders.push(Box::new(int_builder) as Box); - let mut builder = StructArrayBuilder::new(fields, field_builders); + let mut builder = StructBuilder::new(fields, field_builders); assert_eq!(2, builder.num_fields()); let string_builder = builder - .field_builder::(0) + .field_builder::(0) .expect("builder at field 0 should be binary builder"); - string_builder.push_string("joe").unwrap(); + string_builder.append_string("joe").unwrap(); string_builder.append_null().unwrap(); string_builder.append_null().unwrap(); - string_builder.push_string("mark").unwrap(); + string_builder.append_string("mark").unwrap(); let int_builder = builder .field_builder::(1) .expect("builder at field 1 should be int builder"); - int_builder.push(1).unwrap(); - int_builder.push(2).unwrap(); - int_builder.push_null().unwrap(); - int_builder.push(4).unwrap(); + int_builder.append_value(1).unwrap(); + int_builder.append_value(2).unwrap(); + int_builder.append_null().unwrap(); + int_builder.append_value(4).unwrap(); builder.append(true).unwrap(); builder.append(true).unwrap(); @@ -1282,16 +1282,16 @@ mod tests { fields.push(Field::new("f2", DataType::Boolean, false)); field_builders.push(Box::new(bool_builder) as Box); - let mut builder = StructArrayBuilder::new(fields, field_builders); + let mut builder = StructBuilder::new(fields, field_builders); builder .field_builder::(0) .unwrap() - .push_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) .unwrap(); builder .field_builder::(1) .unwrap() - .push_slice(&[ + .append_slice(&[ false, true, false, true, false, true, false, true, false, true, ]) .unwrap(); @@ -1303,12 +1303,12 @@ mod tests { builder .field_builder::(0) .unwrap() - .push_slice(&[1, 3, 5, 7, 9]) + .append_slice(&[1, 3, 5, 7, 9]) .unwrap(); builder .field_builder::(1) .unwrap() - .push_slice(&[false, true, false, true, false]) + .append_slice(&[false, true, false, true, false]) .unwrap(); let arr = builder.finish(); @@ -1327,11 +1327,11 @@ mod tests { let struct_type = DataType::Struct(sub_fields); fields.push(Field::new("f3", struct_type, false)); - let mut builder = StructArrayBuilder::from_schema(Schema::new(fields), 5); + let mut builder = StructBuilder::from_schema(Schema::new(fields), 5); assert_eq!(3, builder.num_fields()); assert!(builder.field_builder::(0).is_some()); - assert!(builder.field_builder::(1).is_some()); - assert!(builder.field_builder::(2).is_some()); + assert!(builder.field_builder::(1).is_some()); + assert!(builder.field_builder::(2).is_some()); } #[test] @@ -1342,7 +1342,7 @@ mod tests { let list_type = DataType::List(Box::new(DataType::Int64)); fields.push(Field::new("f2", list_type, false)); - let _ = StructArrayBuilder::from_schema(Schema::new(fields), 5); + let _ = StructBuilder::from_schema(Schema::new(fields), 5); } #[test] @@ -1354,8 +1354,8 @@ mod tests { fields.push(Field::new("f1", DataType::Int32, false)); field_builders.push(Box::new(int_builder) as Box); - let mut builder = StructArrayBuilder::new(fields, field_builders); - assert!(builder.field_builder::(0).is_none()); + let mut builder = StructBuilder::new(fields, field_builders); + assert!(builder.field_builder::(0).is_none()); } } diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs index 49e0302aa0672..718e8d526c46b 100644 --- a/rust/arrow/src/csv/reader.rs +++ b/rust/arrow/src/csv/reader.rs @@ -266,10 +266,10 @@ impl Reader { &DataType::Float32 => self.build_primitive_array::(rows, i), &DataType::Float64 => self.build_primitive_array::(rows, i), &DataType::Utf8 => { - let mut builder = BinaryArrayBuilder::new(rows.len()); + let mut builder = BinaryBuilder::new(rows.len()); for row_index in 0..rows.len() { match rows[row_index].get(*i) { - Some(s) => builder.push_string(s).unwrap(), + Some(s) => builder.append_string(s).unwrap(), _ => builder.append(false).unwrap(), } } @@ -294,7 +294,7 @@ impl Reader { rows: &[StringRecord], col_idx: &usize, ) -> Result { - let mut builder = PrimitiveArrayBuilder::::new(rows.len()); + let mut builder = PrimitiveBuilder::::new(rows.len()); let is_boolean_type = *self.schema.field(*col_idx).data_type() == DataType::Boolean; for row_index in 0..rows.len() { match rows[row_index].get(*col_idx) { @@ -305,7 +305,7 @@ impl Reader { s.parse::() }; match t { - Ok(v) => builder.push(v)?, + Ok(v) => builder.append_value(v)?, Err(_) => { // TODO: we should surface the underlying error here. return Err(ArrowError::ParseError(format!( @@ -315,7 +315,7 @@ impl Reader { } } } - _ => builder.push_null()?, + _ => builder.append_null()?, } } Ok(Arc::new(builder.finish())) diff --git a/rust/arrow/src/tensor.rs b/rust/arrow/src/tensor.rs index 7272a2cf14631..1703c83738570 100644 --- a/rust/arrow/src/tensor.rs +++ b/rust/arrow/src/tensor.rs @@ -279,7 +279,7 @@ mod tests { fn test_tensor() { let mut builder = Int32BufferBuilder::new(16); for i in 0..16 { - builder.push(i).unwrap(); + builder.append(i).unwrap(); } let buf = builder.finish(); let tensor = Int32Tensor::new(buf, Some(vec![2, 8]), None, None); @@ -294,7 +294,7 @@ mod tests { fn test_new_row_major() { let mut builder = Int32BufferBuilder::new(16); for i in 0..16 { - builder.push(i).unwrap(); + builder.append(i).unwrap(); } let buf = builder.finish(); let tensor = Int32Tensor::new_row_major(buf, Some(vec![2, 8]), None); @@ -312,7 +312,7 @@ mod tests { fn test_new_column_major() { let mut builder = Int32BufferBuilder::new(16); for i in 0..16 { - builder.push(i).unwrap(); + builder.append(i).unwrap(); } let buf = builder.finish(); let tensor = Int32Tensor::new_column_major(buf, Some(vec![2, 8]), None); @@ -330,7 +330,7 @@ mod tests { fn test_with_names() { let mut builder = Int64BufferBuilder::new(8); for i in 0..8 { - builder.push(i).unwrap(); + builder.append(i).unwrap(); } let buf = builder.finish(); let names = vec!["Dim 1", "Dim 2"]; @@ -351,7 +351,7 @@ mod tests { fn test_inconsistent_strides() { let mut builder = Int32BufferBuilder::new(16); for i in 0..16 { - builder.push(i).unwrap(); + builder.append(i).unwrap(); } let buf = builder.finish(); Int32Tensor::new(buf, Some(vec![2, 8]), Some(vec![2, 8, 1]), None); @@ -362,7 +362,7 @@ mod tests { fn test_inconsistent_names() { let mut builder = Int32BufferBuilder::new(16); for i in 0..16 { - builder.push(i).unwrap(); + builder.append(i).unwrap(); } let buf = builder.finish(); Int32Tensor::new( From a3aed3b60bd61c55d7402c4484e480f1998b99f1 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Wed, 9 Jan 2019 09:17:46 +0900 Subject: [PATCH 054/203] ARROW-4184: [Ruby] Add Arrow::RecordBatch#to_table Author: Kouhei Sutou Closes #3339 from kou/ruby-record-batch-to-table and squashes the following commits: a6fab35f Require gobject-introspection gem 3.3.1 or later 4a1f3564 Add Arrow::RecordBatch#to_table --- ruby/red-arrow/lib/arrow/record-batch.rb | 9 +++++++++ ruby/red-arrow/red-arrow.gemspec | 2 +- ruby/red-arrow/test/test-record-batch.rb | 23 ++++++++++++++--------- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/ruby/red-arrow/lib/arrow/record-batch.rb b/ruby/red-arrow/lib/arrow/record-batch.rb index f5f8ea2e77721..6d9c35b9dc849 100644 --- a/ruby/red-arrow/lib/arrow/record-batch.rb +++ b/ruby/red-arrow/lib/arrow/record-batch.rb @@ -29,6 +29,15 @@ def columns @columns ||= columns_raw end + # Converts the record batch to {Arrow::Table}. + # + # @return [Arrow::Table] + # + # @since 0.12.0 + def to_table + Table.new(schema, [self]) + end + def respond_to_missing?(name, include_private) return true if find_column(name) super diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec index 8e79c75dcaff2..2d417f08b0087 100644 --- a/ruby/red-arrow/red-arrow.gemspec +++ b/ruby/red-arrow/red-arrow.gemspec @@ -45,7 +45,7 @@ Gem::Specification.new do |spec| spec.test_files += Dir.glob("test/**/*") spec.extensions = ["dependency-check/Rakefile"] - spec.add_runtime_dependency("gobject-introspection", ">= 3.1.1") + spec.add_runtime_dependency("gobject-introspection", ">= 3.3.1") spec.add_runtime_dependency("pkg-config") spec.add_runtime_dependency("native-package-installer") diff --git a/ruby/red-arrow/test/test-record-batch.rb b/ruby/red-arrow/test/test-record-batch.rb index 994b16de99813..4dac085bff86e 100644 --- a/ruby/red-arrow/test/test-record-batch.rb +++ b/ruby/red-arrow/test/test-record-batch.rb @@ -16,16 +16,16 @@ # under the License. class RecordBatchTest < Test::Unit::TestCase - sub_test_case(".each") do - setup do - fields = [ - Arrow::Field.new("count", :uint32), - ] - @schema = Arrow::Schema.new(fields) - @counts = Arrow::UInt32Array.new([1, 2, 4, 8]) - @record_batch = Arrow::RecordBatch.new(@schema, @counts.length, [@counts]) - end + setup do + fields = [ + Arrow::Field.new("count", :uint32), + ] + @schema = Arrow::Schema.new(fields) + @counts = Arrow::UInt32Array.new([1, 2, 4, 8]) + @record_batch = Arrow::RecordBatch.new(@schema, @counts.length, [@counts]) + end + sub_test_case(".each") do test("default") do records = [] @record_batch.each do |record| @@ -54,4 +54,9 @@ class RecordBatchTest < Test::Unit::TestCase records.collect {|record, i| [record.index, i]}) end end + + test("#to_table") do + assert_equal(Arrow::Table.new(@schema, [@counts]), + @record_batch.to_table) + end end From 420c949fd4e593fb0303954092b3d8a46a7aa864 Mon Sep 17 00:00:00 2001 From: Yosuke Shiro Date: Wed, 9 Jan 2019 09:28:03 +0900 Subject: [PATCH 055/203] ARROW-4175: [GLib] Add support for decimal compare operators Author: Yosuke Shiro Author: Kouhei Sutou Closes #3346 from shiro615/glib-add-support-for-decimal-compare-operators and squashes the following commits: 28871fd6 Fix documents e81d4146 Unify test case comparisons 0791c4f1 Use rubyish method name 54f46039 Add a test for equal 943c2364 Rename 'more than' to 'greater than' 181e0544 Add support for decimal compare operators --- c_glib/arrow-glib/decimal128.cpp | 98 +++++++++++++++++++++++++++++++- c_glib/arrow-glib/decimal128.h | 15 +++++ c_glib/test/test-decimal128.rb | 97 +++++++++++++++++++++++++++++++ 3 files changed, 209 insertions(+), 1 deletion(-) diff --git a/c_glib/arrow-glib/decimal128.cpp b/c_glib/arrow-glib/decimal128.cpp index d87a5019c1203..a49dba580ee79 100644 --- a/c_glib/arrow-glib/decimal128.cpp +++ b/c_glib/arrow-glib/decimal128.cpp @@ -141,7 +141,8 @@ garrow_decimal128_new_integer(const gint64 data) * @decimal: A #GArrowDecimal128. * @other_decimal: A #GArrowDecimal128 to be compared. * - * Returns: %TRUE if both of them is the same value, %FALSE otherwise. + * Returns: %TRUE if the decimal is equal to the other decimal, %FALSE + * otherwise. * * Since: 0.12.0 */ @@ -154,6 +155,101 @@ garrow_decimal128_equal(GArrowDecimal128 *decimal, return *arrow_decimal == *arrow_other_decimal; } +/** + * garrow_decimal128_not_equal: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal isn't equal to the other decimal, + * %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_not_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + const auto arrow_decimal = garrow_decimal128_get_raw(decimal); + const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal); + return *arrow_decimal != *arrow_other_decimal; +} + +/** + * garrow_decimal128_less_than: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal is less than the other decimal, + * %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_less_than(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + const auto arrow_decimal = garrow_decimal128_get_raw(decimal); + const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal); + return *arrow_decimal < *arrow_other_decimal; +} + +/** + * garrow_decimal128_less_than_or_equal: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal is less than the other decimal + * or equal to the other decimal, %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_less_than_or_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + const auto arrow_decimal = garrow_decimal128_get_raw(decimal); + const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal); + return *arrow_decimal <= *arrow_other_decimal; +} + +/** + * garrow_decimal128_greater_than: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal is greater than the other decimal, + * %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_greater_than(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + const auto arrow_decimal = garrow_decimal128_get_raw(decimal); + const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal); + return *arrow_decimal > *arrow_other_decimal; +} + +/** + * garrow_decimal128_greater_than_or_equal: + * @decimal: A #GArrowDecimal128. + * @other_decimal: A #GArrowDecimal128 to be compared. + * + * Returns: %TRUE if the decimal is greater than the other decimal + * or equal to the other decimal, %FALSE otherwise. + * + * Since: 0.12.0 + */ +gboolean +garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal) +{ + const auto arrow_decimal = garrow_decimal128_get_raw(decimal); + const auto arrow_other_decimal = garrow_decimal128_get_raw(other_decimal); + return *arrow_decimal >= *arrow_other_decimal; +} + /** * garrow_decimal128_to_string_scale: * @decimal: A #GArrowDecimal128. diff --git a/c_glib/arrow-glib/decimal128.h b/c_glib/arrow-glib/decimal128.h index e8fa59980cd94..e7601a457601b 100644 --- a/c_glib/arrow-glib/decimal128.h +++ b/c_glib/arrow-glib/decimal128.h @@ -41,6 +41,21 @@ GArrowDecimal128 *garrow_decimal128_new_integer(const gint64 data); GARROW_AVAILABLE_IN_0_12 gboolean garrow_decimal128_equal(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_not_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_less_than(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_less_than_or_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_greater_than(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_0_12 +gboolean garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal, + GArrowDecimal128 *other_decimal); gchar *garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal, gint32 scale); gchar *garrow_decimal128_to_string(GArrowDecimal128 *decimal); diff --git a/c_glib/test/test-decimal128.rb b/c_glib/test/test-decimal128.rb index 99f1912babfae..de9453cbe69cd 100644 --- a/c_glib/test/test-decimal128.rb +++ b/c_glib/test/test-decimal128.rb @@ -106,4 +106,101 @@ def test_divide_zero decimal1.divide(decimal2) end end + + def test_equal + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(10) + other_decimal2 = Arrow::Decimal128.new(11) + assert_equal([ + true, + false, + ], + [ + decimal == other_decimal1, + decimal == other_decimal2, + ]) + end + + def test_not_equal + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(10) + other_decimal2 = Arrow::Decimal128.new(11) + assert_equal([ + false, + true, + ], + [ + decimal != other_decimal1, + decimal != other_decimal2, + ]) + end + + def test_less_than + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(11) + other_decimal2 = Arrow::Decimal128.new(9) + assert_equal([ + true, + false, + false + ], + [ + decimal < other_decimal1, + decimal < other_decimal2, + decimal < decimal, + ]) + end + + def test_less_than_or_equal + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(11) + other_decimal2 = Arrow::Decimal128.new(9) + assert_equal([ + true, + false, + true + ], + [ + decimal <= other_decimal1, + decimal <= other_decimal2, + decimal <= decimal + ]) + end + + def test_greater_than + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(11) + other_decimal2 = Arrow::Decimal128.new(9) + assert_equal([ + false, + true, + false + ], + [ + decimal > other_decimal1, + decimal > other_decimal2, + decimal > decimal + ]) + end + + def test_greater_than_or_equal + require_gi_bindings(3, 3, 1) + decimal = Arrow::Decimal128.new(10) + other_decimal1 = Arrow::Decimal128.new(11) + other_decimal2 = Arrow::Decimal128.new(9) + assert_equal([ + false, + true, + true + ], + [ + decimal >= other_decimal1, + decimal >= other_decimal2, + decimal >= decimal + ]) + end end From bfe6865ba8087a46bd7665679e48af3a77987cef Mon Sep 17 00:00:00 2001 From: Pindikura Ravindra Date: Wed, 9 Jan 2019 09:11:01 +0530 Subject: [PATCH 056/203] ARROW-4147: [Java] reduce heap usage for varwidth vectors (#3298) * ARROW-4147: reduce heap usage for varwidth vectors - some code reorg to avoid duplication - changed the default initial alloc from 4096 to 3970 * ARROW-4147: [Java] Address review comments * ARROW-4147: remove check on width to be <= 16: * ARROW-4147: allow initial valueCount to be 0. * ARROW-4147: Fix incorrect comment on initial alloc --- .../arrow/vector/BaseFixedWidthVector.java | 127 +--- .../apache/arrow/vector/BaseValueVector.java | 99 ++- .../arrow/vector/BaseVariableWidthVector.java | 165 +++-- .../org/apache/arrow/vector/BitVector.java | 5 +- .../vector/TestBufferOwnershipTransfer.java | 9 +- .../org/apache/arrow/vector/TestCopyFrom.java | 569 ++++++++++-------- .../apache/arrow/vector/TestValueVector.java | 435 +++++++------ .../arrow/vector/TestVectorReAlloc.java | 23 +- .../complex/writer/TestComplexWriter.java | 15 +- 9 files changed, 799 insertions(+), 648 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java index f69a9d1754ac7..f3c2837cfa7e8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java @@ -22,7 +22,6 @@ import java.util.Collections; import java.util.List; -import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; @@ -43,8 +42,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector implements FixedWidthVector, FieldVector, VectorDefinitionSetter { private final int typeWidth; - protected int valueAllocationSizeInBytes; - protected int validityAllocationSizeInBytes; + protected int initialValueAllocation; protected final Field field; private int allocationMonitor; @@ -61,14 +59,7 @@ public BaseFixedWidthVector(final String name, final BufferAllocator allocator, allocationMonitor = 0; validityBuffer = allocator.getEmpty(); valueBuffer = allocator.getEmpty(); - if (typeWidth > 0) { - valueAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * typeWidth; - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - } else { - /* specialized handling for BitVector */ - valueAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - validityAllocationSizeInBytes = valueAllocationSizeInBytes; - } + initialValueAllocation = INITIAL_VALUE_ALLOCATION; } @@ -159,12 +150,8 @@ public ArrowBuf getOffsetBuffer() { */ @Override public void setInitialCapacity(int valueCount) { - final long size = (long) valueCount * typeWidth; - if (size > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); - } - valueAllocationSizeInBytes = (int) size; - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(valueCount); + computeAndCheckBufferSize(valueCount); + initialValueAllocation = valueCount; } /** @@ -267,18 +254,13 @@ public void allocateNew() { */ @Override public boolean allocateNewSafe() { - long curAllocationSizeValue = valueAllocationSizeInBytes; - long curAllocationSizeValidity = validityAllocationSizeInBytes; - - if (align(curAllocationSizeValue) + curAllocationSizeValidity > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Requested amount of memory exceeds limit"); - } + computeAndCheckBufferSize(initialValueAllocation); /* we are doing a new allocation -- release the current buffers */ clear(); try { - allocateBytes(curAllocationSizeValue, curAllocationSizeValidity); + allocateBytes(initialValueAllocation); } catch (Exception e) { clear(); return false; @@ -295,22 +277,13 @@ public boolean allocateNewSafe() { * @throws org.apache.arrow.memory.OutOfMemoryException on error */ public void allocateNew(int valueCount) { - long valueBufferSize = valueCount * typeWidth; - long validityBufferSize = getValidityBufferSizeFromCount(valueCount); - if (typeWidth == 0) { - /* specialized handling for BitVector */ - valueBufferSize = validityBufferSize; - } - - if (align(valueBufferSize) + validityBufferSize > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); - } + computeAndCheckBufferSize(valueCount); /* we are doing a new allocation -- release the current buffers */ clear(); try { - allocateBytes(valueBufferSize, validityBufferSize); + allocateBytes(valueCount); } catch (Exception e) { clear(); throw e; @@ -318,10 +291,16 @@ public void allocateNew(int valueCount) { } /* - * align to a 8-byte value. + * Compute the buffer size required for 'valueCount', and check if it's within bounds. */ - private long align(long size) { - return ((size + 7) / 8) * 8; + private long computeAndCheckBufferSize(int valueCount) { + final long size = computeCombinedBufferSize(valueCount, typeWidth); + if (size > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Memory required for vector capacity " + + valueCount + + " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")"); + } + return size; } /** @@ -333,25 +312,11 @@ private long align(long size) { * within the bounds of max allocation allowed and any other error * conditions. */ - private void allocateBytes(final long valueBufferSize, final long validityBufferSize) { - int valueBufferSlice = (int)align(valueBufferSize); - int validityBufferSlice = (int)validityBufferSize; - - /* allocate combined buffer */ - ArrowBuf buffer = allocator.buffer(valueBufferSlice + validityBufferSlice); - - valueAllocationSizeInBytes = valueBufferSlice; - valueBuffer = buffer.slice(0, valueBufferSlice); - valueBuffer.retain(); - valueBuffer.readerIndex(0); - - validityAllocationSizeInBytes = validityBufferSlice; - validityBuffer = buffer.slice(valueBufferSlice, validityBufferSlice); - validityBuffer.retain(); - validityBuffer.readerIndex(0); + private void allocateBytes(int valueCount) { + DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount, typeWidth); + valueBuffer = buffers.getDataBuf(); + validityBuffer = buffers.getValidityBuf(); zeroVector(); - - buffer.release(); } /** @@ -363,7 +328,6 @@ private void allocateBytes(final long valueBufferSize, final long validityBuffer private void allocateValidityBuffer(final int validityBufferSize) { validityBuffer = allocator.buffer(validityBufferSize); validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = validityBufferSize; } /** @@ -439,50 +403,28 @@ public ArrowBuf[] getBuffers(boolean clear) { */ @Override public void reAlloc() { - int valueBaseSize = Integer.max(valueBuffer.capacity(), valueAllocationSizeInBytes); - long newValueBufferSlice = align(valueBaseSize * 2L); - long newValidityBufferSlice; - if (typeWidth > 0) { - long targetValueBufferSize = align(BaseAllocator.nextPowerOfTwo(newValueBufferSlice)); - long targetValueCount = targetValueBufferSize / typeWidth; - targetValueBufferSize -= getValidityBufferSizeFromCount((int) targetValueCount); - if (newValueBufferSlice < targetValueBufferSize) { - newValueBufferSlice = targetValueBufferSize; + int targetValueCount = getValueCapacity() * 2; + if (targetValueCount == 0) { + if (initialValueAllocation > 0) { + targetValueCount = initialValueAllocation * 2; + } else { + targetValueCount = INITIAL_VALUE_ALLOCATION * 2; } - - newValidityBufferSlice = getValidityBufferSizeFromCount((int)(newValueBufferSlice / typeWidth)); - } else { - newValidityBufferSlice = newValueBufferSlice; - } - - long newAllocationSize = newValueBufferSlice + newValidityBufferSlice; - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); } + computeAndCheckBufferSize(targetValueCount); - final ArrowBuf newBuffer = allocator.buffer((int) newAllocationSize); - final ArrowBuf newValueBuffer = newBuffer.slice(0, (int)newValueBufferSlice); + DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetValueCount, typeWidth); + final ArrowBuf newValueBuffer = buffers.getDataBuf(); newValueBuffer.setBytes(0, valueBuffer, 0, valueBuffer.capacity()); - newValueBuffer.setZero(valueBuffer.capacity(), (int)newValueBufferSlice - valueBuffer.capacity()); - newValueBuffer.retain(); - newValueBuffer.readerIndex(0); + newValueBuffer.setZero(valueBuffer.capacity(), newValueBuffer.capacity() - valueBuffer.capacity()); valueBuffer.release(); valueBuffer = newValueBuffer; - valueAllocationSizeInBytes = (int)newValueBufferSlice; - final ArrowBuf newValidityBuffer = newBuffer.slice((int)newValueBufferSlice, - (int)newValidityBufferSlice); + final ArrowBuf newValidityBuffer = buffers.getValidityBuf(); newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity()); - newValidityBuffer.setZero(validityBuffer.capacity(), (int)newValidityBufferSlice - validityBuffer.capacity()); - newValidityBuffer.retain(); - newValidityBuffer.readerIndex(0); + newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); validityBuffer.release(); validityBuffer = newValidityBuffer; - validityAllocationSizeInBytes = (int)newValidityBufferSlice; - - newBuffer.release(); } @Override @@ -535,9 +477,6 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers valueBuffer = dataBuffer.retain(allocator); valueCount = fieldNode.getLength(); - - valueAllocationSizeInBytes = valueBuffer.capacity(); - validityAllocationSizeInBytes = validityBuffer.capacity(); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java index 4cbf4be19dfeb..4e014bbd2aefe 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java @@ -20,6 +20,7 @@ import java.util.Collections; import java.util.Iterator; +import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.util.TransferPair; @@ -33,7 +34,14 @@ public abstract class BaseValueVector implements ValueVector { public static final String MAX_ALLOCATION_SIZE_PROPERTY = "arrow.vector.max_allocation_bytes"; public static final int MAX_ALLOCATION_SIZE = Integer.getInteger(MAX_ALLOCATION_SIZE_PROPERTY, Integer.MAX_VALUE); - public static final int INITIAL_VALUE_ALLOCATION = 4096; + /* + * For all fixed width vectors, the value and validity buffers are sliced from a single buffer. + * Similarly, for variable width vectors, the offsets and validity buffers are sliced from a + * single buffer. To ensure the single buffer is power-of-2 size, the initial value allocation + * should be less than power-of-2. For IntVectors, this comes to 3970*4 (15880) for the data + * buffer and 504 bytes for the validity buffer, totalling to 16384 (2^16). + */ + public static final int INITIAL_VALUE_ALLOCATION = 3970; protected final BufferAllocator allocator; protected final String name; @@ -98,5 +106,94 @@ protected ArrowBuf releaseBuffer(ArrowBuf buffer) { protected static int getValidityBufferSizeFromCount(final int valueCount) { return (int) Math.ceil(valueCount / 8.0); } + + /* round up to the next multiple of 8 */ + private static long roundUp8(long size) { + return ((size + 7) / 8) * 8; + } + + protected long computeCombinedBufferSize(int valueCount, int typeWidth) { + Preconditions.checkArgument(valueCount >= 0, "valueCount must be >= 0"); + Preconditions.checkArgument(typeWidth >= 0, "typeWidth must be >= 0"); + + // compute size of validity buffer. + long bufferSize = roundUp8(getValidityBufferSizeFromCount(valueCount)); + + // add the size of the value buffer. + if (typeWidth == 0) { + // for boolean type, value-buffer and validity-buffer are of same size. + bufferSize *= 2; + } else { + bufferSize += roundUp8(valueCount * typeWidth); + } + return BaseAllocator.nextPowerOfTwo(bufferSize); + } + + class DataAndValidityBuffers { + private ArrowBuf dataBuf; + private ArrowBuf validityBuf; + + DataAndValidityBuffers(ArrowBuf dataBuf, ArrowBuf validityBuf) { + this.dataBuf = dataBuf; + this.validityBuf = validityBuf; + } + + public ArrowBuf getDataBuf() { + return dataBuf; + } + + public ArrowBuf getValidityBuf() { + return validityBuf; + } + + } + + protected DataAndValidityBuffers allocFixedDataAndValidityBufs(int valueCount, int typeWidth) { + long bufferSize = computeCombinedBufferSize(valueCount, typeWidth); + assert bufferSize < MAX_ALLOCATION_SIZE; + + int validityBufferSize; + int dataBufferSize; + if (typeWidth == 0) { + validityBufferSize = dataBufferSize = (int) (bufferSize / 2); + } else { + // Due to roundup to power-of-2 allocation, the bufferSize could be greater than the + // requested size. Utilize the allocated buffer fully.; + int actualCount = (int) ((bufferSize * 8.0) / (8 * typeWidth + 1)); + do { + validityBufferSize = (int) roundUp8(getValidityBufferSizeFromCount(actualCount)); + dataBufferSize = (int) roundUp8(actualCount * typeWidth); + if (validityBufferSize + dataBufferSize <= bufferSize) { + break; + } + --actualCount; + } while (true); + } + + + /* allocate combined buffer */ + ArrowBuf combinedBuffer = allocator.buffer((int) bufferSize); + + /* slice into requested lengths */ + ArrowBuf dataBuf = null; + ArrowBuf validityBuf = null; + int bufferOffset = 0; + for (int numBuffers = 0; numBuffers < 2; ++numBuffers) { + int len = (numBuffers == 0 ? dataBufferSize : validityBufferSize); + ArrowBuf buf = combinedBuffer.slice(bufferOffset, len); + buf.retain(); + buf.readerIndex(0); + buf.writerIndex(0); + + bufferOffset += len; + if (numBuffers == 0) { + dataBuf = buf; + } else { + validityBuf = buf; + } + } + combinedBuffer.release(); + return new DataAndValidityBuffers(dataBuf, validityBuf); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index 390dfe955b6ce..ac148a25c7c29 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -38,10 +38,8 @@ public abstract class BaseVariableWidthVector extends BaseValueVector implements VariableWidthVector, FieldVector, VectorDefinitionSetter { private static final int DEFAULT_RECORD_BYTE_COUNT = 8; private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; - - private int valueAllocationSizeInBytes; - private int validityAllocationSizeInBytes; - private int offsetAllocationSizeInBytes; + private int initialValueAllocation; + private int initialValueAllocationSizeInBytes; /* protected members */ public static final int OFFSET_WIDTH = 4; /* 4 byte unsigned int to track offsets */ @@ -57,9 +55,9 @@ public abstract class BaseVariableWidthVector extends BaseValueVector public BaseVariableWidthVector(final String name, final BufferAllocator allocator, FieldType fieldType) { super(name, allocator); - valueAllocationSizeInBytes = INITIAL_BYTE_COUNT; - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - offsetAllocationSizeInBytes = (INITIAL_VALUE_ALLOCATION) * OFFSET_WIDTH; + initialValueAllocationSizeInBytes = INITIAL_BYTE_COUNT; + // -1 because we require one extra slot for the offset array. + initialValueAllocation = INITIAL_VALUE_ALLOCATION - 1; field = new Field(name, fieldType, null); valueCount = 0; lastSet = -1; @@ -155,15 +153,10 @@ public long getDataBufferAddress() { @Override public void setInitialCapacity(int valueCount) { final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT; - if (size > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); - } - valueAllocationSizeInBytes = (int) size; - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(valueCount); - /* to track the end offset of last data element in vector, we need - * an additional slot in offset buffer. - */ - offsetAllocationSizeInBytes = (valueCount + 1) * OFFSET_WIDTH; + checkDataBufferSize(size); + computeAndCheckOffsetsBufferSize(valueCount); + initialValueAllocationSizeInBytes = (int) size; + initialValueAllocation = valueCount; } /** @@ -175,17 +168,10 @@ public void setInitialCapacity(int valueCount) { @Override public void setInitialCapacity(int valueCount, double density) { long size = Math.max((long)(valueCount * density), 1L); - - if (size > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); - } - - valueAllocationSizeInBytes = (int) size; - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(valueCount); - /* to track the end offset of last data element in vector, we need - * an additional slot in offset buffer. - */ - offsetAllocationSizeInBytes = (valueCount + 1) * OFFSET_WIDTH; + checkDataBufferSize(size); + computeAndCheckOffsetsBufferSize(valueCount); + initialValueAllocationSizeInBytes = (int) size; + initialValueAllocation = valueCount; } /** @@ -376,20 +362,14 @@ public void allocateNew() { */ @Override public boolean allocateNewSafe() { - long curAllocationSizeValue = valueAllocationSizeInBytes; - long curAllocationSizeValidity = validityAllocationSizeInBytes; - long curAllocationSizeOffset = offsetAllocationSizeInBytes; - - if (curAllocationSizeValue > MAX_ALLOCATION_SIZE || - curAllocationSizeOffset > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Requested amount of memory exceeds limit"); - } + checkDataBufferSize(initialValueAllocationSizeInBytes); + computeAndCheckOffsetsBufferSize(initialValueAllocation); /* we are doing a new allocation -- release the current buffers */ clear(); try { - allocateBytes(curAllocationSizeValue, curAllocationSizeValidity, curAllocationSizeOffset); + allocateBytes(initialValueAllocationSizeInBytes, initialValueAllocation); } catch (Exception e) { clear(); return false; @@ -409,35 +389,59 @@ public boolean allocateNewSafe() { @Override public void allocateNew(int totalBytes, int valueCount) { assert totalBytes >= 0; - final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH; - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - if (totalBytes > MAX_ALLOCATION_SIZE || - offsetBufferSize > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Requested amount of memory exceeds limit"); - } + checkDataBufferSize(totalBytes); + computeAndCheckOffsetsBufferSize(valueCount); /* we are doing a new allocation -- release the current buffers */ clear(); try { - allocateBytes(totalBytes, validityBufferSize, offsetBufferSize); + allocateBytes(totalBytes, valueCount); } catch (Exception e) { clear(); throw e; } } + /* Check if the data buffer size is within bounds. */ + private void checkDataBufferSize(long size) { + if (size > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Memory required for vector " + + " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")"); + } + } + + /* + * Compute the buffer size required for 'valueCount' offsets and validity, and check if it's + * within bounds. + */ + private long computeAndCheckOffsetsBufferSize(int valueCount) { + /* to track the end offset of last data element in vector, we need + * an additional slot in offset buffer. + */ + final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH); + if (size > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Memory required for vector capacity " + + valueCount + + " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")"); + } + return size; + } + /* allocate the inner buffers */ - private void allocateBytes(final long valueBufferSize, final long validityBufferSize, - final long offsetBufferSize) { + private void allocateBytes(final int valueBufferSize, final int valueCount) { /* allocate data buffer */ - int curSize = (int) valueBufferSize; + int curSize = valueBufferSize; valueBuffer = allocator.buffer(curSize); valueBuffer.readerIndex(0); - valueAllocationSizeInBytes = curSize; - allocateValidityBuffer(validityBufferSize); - allocateOffsetBuffer(offsetBufferSize); + + /* allocate offset buffer and validity buffer */ + DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount + 1, OFFSET_WIDTH); + offsetBuffer = buffers.getDataBuf(); + validityBuffer = buffers.getValidityBuf(); + initOffsetBuffer(); + initValidityBuffer(); } /* allocate offset buffer */ @@ -445,7 +449,6 @@ private void allocateOffsetBuffer(final long size) { final int curSize = (int) size; offsetBuffer = allocator.buffer(curSize); offsetBuffer.readerIndex(0); - offsetAllocationSizeInBytes = curSize; initOffsetBuffer(); } @@ -454,7 +457,6 @@ private void allocateValidityBuffer(final long size) { final int curSize = (int) size; validityBuffer = allocator.buffer(curSize); validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; initValidityBuffer(); } @@ -476,7 +478,7 @@ public void reAlloc() { * @throws OutOfMemoryException if the internal memory allocation fails */ public void reallocDataBuffer() { - long baseSize = valueAllocationSizeInBytes; + long baseSize = initialValueAllocationSizeInBytes; final int currentBufferCapacity = valueBuffer.capacity(); if (baseSize < (long) currentBufferCapacity) { @@ -487,15 +489,12 @@ public void reallocDataBuffer() { newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); assert newAllocationSize >= 1; - if (newAllocationSize > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } + checkDataBufferSize(newAllocationSize); final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize); newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity); valueBuffer.release(); valueBuffer = newBuf; - valueAllocationSizeInBytes = (int) newAllocationSize; } /** @@ -522,40 +521,28 @@ public void reallocDataBuffer() { * @throws OutOfMemoryException if the internal memory allocation fails */ public void reallocValidityAndOffsetBuffers() { - offsetBuffer = reallocBufferHelper(offsetBuffer, true); - validityBuffer = reallocBufferHelper(validityBuffer, false); - } - - /* helper method to realloc a particular buffer. returns the allocated buffer */ - private ArrowBuf reallocBufferHelper(ArrowBuf buffer, final boolean offsetBuffer) { - final int currentBufferCapacity = buffer.capacity(); - long baseSize = (offsetBuffer ? offsetAllocationSizeInBytes - : validityAllocationSizeInBytes); - - if (baseSize < (long) currentBufferCapacity) { - baseSize = (long) currentBufferCapacity; - } - - long newAllocationSize = baseSize * 2L; - newAllocationSize = BaseAllocator.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); + int targetOffsetCount = (offsetBuffer.capacity() / OFFSET_WIDTH) * 2; + if (targetOffsetCount == 0) { + if (initialValueAllocation > 0) { + targetOffsetCount = 2 * (initialValueAllocation + 1); + } else { + targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1); + } } + computeAndCheckOffsetsBufferSize(targetOffsetCount); - final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize); - newBuf.setBytes(0, buffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - buffer.release(1); - buffer = newBuf; - if (offsetBuffer) { - offsetAllocationSizeInBytes = (int) newAllocationSize; - } else { - validityAllocationSizeInBytes = (int) newAllocationSize; - } + DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetOffsetCount, OFFSET_WIDTH); + final ArrowBuf newOffsetBuffer = buffers.getDataBuf(); + newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity()); + newOffsetBuffer.setZero(offsetBuffer.capacity(), newOffsetBuffer.capacity() - offsetBuffer.capacity()); + offsetBuffer.release(); + offsetBuffer = newOffsetBuffer; - return buffer; + final ArrowBuf newValidityBuffer = buffers.getValidityBuf(); + newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity()); + newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); + validityBuffer.release(); + validityBuffer = newValidityBuffer; } /** @@ -919,7 +906,7 @@ public long getStartEnd(int index) { @Override public void setIndexDefined(int index) { while (index >= getValidityBufferValueCapacity()) { - validityBuffer = reallocBufferHelper(validityBuffer, false); + reallocValidityAndOffsetBuffers(); } BitVectorHelper.setValidityBitToOne(validityBuffer, index); } @@ -1072,7 +1059,7 @@ public void setSafe(int index, ByteBuffer value, int start, int length) { */ public void setNull(int index) { while (index >= getValidityBufferValueCapacity()) { - validityBuffer = reallocBufferHelper(validityBuffer, false); + reallocValidityAndOffsetBuffers(); } BitVectorHelper.setValidityBit(validityBuffer, index, 0); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java index 7aac28cbf1fc4..c6c964233419d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java @@ -91,11 +91,10 @@ public MinorType getMinorType() { @Override public void setInitialCapacity(int valueCount) { final int size = getValidityBufferSizeFromCount(valueCount); - if (size > MAX_ALLOCATION_SIZE) { + if (size * 2 > MAX_ALLOCATION_SIZE) { throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); } - valueAllocationSizeInBytes = size; - validityAllocationSizeInBytes = size; + initialValueAllocation = valueCount; } /** diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java index 9165343bfdc2b..a407166c4f6d0 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java @@ -51,7 +51,7 @@ public void testTransferFixedWidth() { } @Test - public void testTransferVariableidth() { + public void testTransferVariableWidth() { BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 100000, 100000); BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 100000, 100000); @@ -62,15 +62,12 @@ public void testTransferVariableidth() { v1.setValueCount(4001); VarCharVector v2 = new VarCharVector("v2", childAllocator2); + long memoryBeforeTransfer = childAllocator1.getAllocatedMemory(); v1.makeTransferPair(v2).transfer(); assertEquals(0, childAllocator1.getAllocatedMemory()); - int expectedValueVector = 4096 * 8; - int expectedOffsetVector = 4096 * 4; - int expectedBitVector = 512; - int expected = expectedBitVector + expectedOffsetVector + expectedValueVector; - assertEquals(expected, childAllocator2.getAllocatedMemory()); + assertEquals(memoryBeforeTransfer, childAllocator2.getAllocatedMemory()); } private static class Pointer { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java index f7d3ddb397315..b10db95b6cf48 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java @@ -69,14 +69,16 @@ public void terminate() throws Exception { @Test /* NullableVarChar */ public void testCopyFromWithNulls() { - try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator); - final VarCharVector vector2 = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { + try (final VarCharVector vector = + newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator); + final VarCharVector vector2 = + newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { vector.allocateNew(); - int capacity = vector.getValueCapacity(); - assertEquals(4095, capacity); + assertTrue(vector.getValueCapacity() >= 1); + assertEquals(0, vector.getValueCount()); + int initialCapacity = vector.getValueCapacity(); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < initialCapacity; i++) { if (i % 3 == 0) { continue; } @@ -85,43 +87,53 @@ public void testCopyFromWithNulls() { } /* NO reAlloc() should have happened in setSafe() */ - capacity = vector.getValueCapacity(); - assertEquals(4095, capacity); + int capacity = vector.getValueCapacity(); + assertEquals(initialCapacity, capacity); - vector.setValueCount(4095); + vector.setValueCount(initialCapacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < initialCapacity; i++) { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString()); + assertEquals( + "unexpected value at index: " + i, + Integer.toString(i), + vector.getObject(i).toString()); } } + vector2.setInitialCapacity(initialCapacity); vector2.allocateNew(); capacity = vector2.getValueCapacity(); - assertEquals(4095, capacity); + assertEquals(initialCapacity, capacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector); if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals( + "unexpected value at index: " + i, + Integer.toString(i), + vector2.getObject(i).toString()); } } /* NO reAlloc() should have happened in copyFrom */ capacity = vector2.getValueCapacity(); - assertEquals(4095, capacity); + assertEquals(initialCapacity, capacity); - vector2.setValueCount(4095); + vector2.setValueCount(initialCapacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < initialCapacity; i++) { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals( + "unexpected value at index: " + i, + Integer.toString(i), + vector2.getObject(i).toString()); } } } @@ -129,14 +141,16 @@ public void testCopyFromWithNulls() { @Test /* NullableVarChar */ public void testCopyFromWithNulls1() { - try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator); - final VarCharVector vector2 = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { + try (final VarCharVector vector = + newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator); + final VarCharVector vector2 = + newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { vector.allocateNew(); - int capacity = vector.getValueCapacity(); - assertEquals(4095, capacity); + assertTrue(vector.getValueCapacity() >= 1); + assertEquals(0, vector.getValueCount()); + int initialCapacity = vector.getValueCapacity(); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < initialCapacity; i++) { if (i % 3 == 0) { continue; } @@ -145,47 +159,57 @@ public void testCopyFromWithNulls1() { } /* NO reAlloc() should have happened in setSafe() */ - capacity = vector.getValueCapacity(); - assertEquals(4095, capacity); + int capacity = vector.getValueCapacity(); + assertEquals(initialCapacity, capacity); - vector.setValueCount(4095); + vector.setValueCount(initialCapacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < initialCapacity; i++) { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString()); + assertEquals( + "unexpected value at index: " + i, + Integer.toString(i), + vector.getObject(i).toString()); } } /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024 * 10, 1024); + vector2.allocateNew((initialCapacity / 4) * 10, initialCapacity / 4); capacity = vector2.getValueCapacity(); - assertEquals(1024, capacity); + assertTrue(capacity >= initialCapacity / 4); + assertTrue(capacity < initialCapacity / 2); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector); if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals( + "unexpected value at index: " + i, + Integer.toString(i), + vector2.getObject(i).toString()); } } /* 2 reAllocs should have happened in copyFromSafe() */ capacity = vector2.getValueCapacity(); - assertEquals(4096, capacity); + assertTrue(capacity >= initialCapacity); - vector2.setValueCount(4095); + vector2.setValueCount(initialCapacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < initialCapacity; i++) { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals( + "unexpected value at index: " + i, + Integer.toString(i), + vector2.getObject(i).toString()); } } } @@ -194,28 +218,29 @@ public void testCopyFromWithNulls1() { @Test /* IntVector */ public void testCopyFromWithNulls2() { try (final IntVector vector1 = new IntVector(EMPTY_SCHEMA_PATH, allocator); - final IntVector vector2 = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { + final IntVector vector2 = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } vector1.setSafe(i, 1000 + i); } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { @@ -226,23 +251,24 @@ public void testCopyFromWithNulls2() { /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { assertEquals("unexpected value at index: " + i, 1000 + i, vector2.get(i)); @@ -254,60 +280,60 @@ public void testCopyFromWithNulls2() { @Test /* BigIntVector */ public void testCopyFromWithNulls3() { try (final BigIntVector vector1 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator); - final BigIntVector vector2 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator)) { + final BigIntVector vector2 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } - vector1.setSafe(i, 10000000000L + (long)i); + vector1.setSafe(i, 10000000000L + (long) i); } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - 10000000000L + (long)i, vector1.get(i)); + assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector1.get(i)); } } /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - 10000000000L + (long)i, vector2.get(i)); + assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector2.get(i)); } } } @@ -316,8 +342,9 @@ public void testCopyFromWithNulls3() { @Test /* BitVector */ public void testCopyFromWithNulls4() { try (final BitVector vector1 = new BitVector(EMPTY_SCHEMA_PATH, allocator); - final BitVector vector2 = new BitVector(EMPTY_SCHEMA_PATH, allocator)) { + final BitVector vector2 = new BitVector(EMPTY_SCHEMA_PATH, allocator)) { + vector1.setInitialCapacity(4096); vector1.allocateNew(); assertEquals(4096, vector1.getValueCapacity()); assertEquals(0, vector1.getValueCount()); @@ -394,60 +421,60 @@ public void testCopyFromWithNulls4() { @Test /* Float4Vector */ public void testCopyFromWithNulls5() { try (final Float4Vector vector1 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator); - final Float4Vector vector2 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) { + final Float4Vector vector2 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } - vector1.setSafe(i, 100.25f + (float)i); + vector1.setSafe(i, 100.25f + (float) i); } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - 100.25f + (float)i, vector1.get(i), 0); + assertEquals("unexpected value at index: " + i, 100.25f + (float) i, vector1.get(i), 0); } } /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - 100.25f + i * 1.0f, vector2.get(i), 0); + assertEquals("unexpected value at index: " + i, 100.25f + i * 1.0f, vector2.get(i), 0); } } } @@ -456,60 +483,62 @@ public void testCopyFromWithNulls5() { @Test /* Float8Vector */ public void testCopyFromWithNulls6() { try (final Float8Vector vector1 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator); - final Float8Vector vector2 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) { + final Float8Vector vector2 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } vector1.setSafe(i, 123456.7865 + (double) i); } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - 123456.7865 + (double) i, vector1.get(i), 0); + assertEquals( + "unexpected value at index: " + i, 123456.7865 + (double) i, vector1.get(i), 0); } } /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - 123456.7865 + (double) i, vector2.get(i), 0); + assertEquals( + "unexpected value at index: " + i, 123456.7865 + (double) i, vector2.get(i), 0); } } } @@ -518,30 +547,31 @@ public void testCopyFromWithNulls6() { @Test /* IntervalDayVector */ public void testCopyFromWithNulls7() { try (final IntervalDayVector vector1 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator); - final IntervalDayVector vector2 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator)) { + final IntervalDayVector vector2 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); final int days = 10; final int milliseconds = 10000; - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } vector1.setSafe(i, days + i, milliseconds + i); } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { @@ -554,23 +584,24 @@ public void testCopyFromWithNulls7() { /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { final Period p = vector2.getObject(i); @@ -584,15 +615,16 @@ public void testCopyFromWithNulls7() { @Test /* IntervalYearVector */ public void testCopyFromWithNulls8() { try (final IntervalYearVector vector1 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator); - final IntervalYearVector vector2 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator)) { + final IntervalYearVector vector2 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); final int interval = 30; /* 2 years 6 months */ - final Period[] periods = new Period[4096]; - for (int i = 0; i < 4096; i++) { + final Period[] periods = new Period[4096]; + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } @@ -600,18 +632,19 @@ public void testCopyFromWithNulls8() { final Period p = new Period(); final int years = (interval + i) / org.apache.arrow.vector.util.DateUtility.yearsToMonths; final int months = (interval + i) % org.apache.arrow.vector.util.DateUtility.yearsToMonths; - periods[i] = p.plusYears(years).plusMonths(months);; + periods[i] = p.plusYears(years).plusMonths(months); + ; } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { @@ -624,23 +657,24 @@ public void testCopyFromWithNulls8() { /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { final Period p = vector2.getObject(i); @@ -653,61 +687,61 @@ public void testCopyFromWithNulls8() { @Test /* SmallIntVector */ public void testCopyFromWithNulls9() { try (final SmallIntVector vector1 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator); - final SmallIntVector vector2 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator)) { + final SmallIntVector vector2 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); final short val = 1000; - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } - vector1.setSafe(i, val + (short)i); + vector1.setSafe(i, val + (short) i); } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - val + (short)i, vector1.get(i)); + assertEquals("unexpected value at index: " + i, val + (short) i, vector1.get(i)); } } /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - val + (short)i, vector2.get(i)); + assertEquals("unexpected value at index: " + i, val + (short) i, vector2.get(i)); } } } @@ -716,61 +750,61 @@ public void testCopyFromWithNulls9() { @Test /* TimeMicroVector */ public void testCopyFromWithNulls10() { try (final TimeMicroVector vector1 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator); - final TimeMicroVector vector2 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator)) { + final TimeMicroVector vector2 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); final long val = 100485765432L; - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } - vector1.setSafe(i, val + (long)i); + vector1.setSafe(i, val + (long) i); } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - val + (long)i, vector1.get(i)); + assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i)); } } /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - val + (long) i, vector2.get(i)); + assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i)); } } } @@ -779,61 +813,61 @@ public void testCopyFromWithNulls10() { @Test /* TimeMilliVector */ public void testCopyFromWithNulls11() { try (final TimeMilliVector vector1 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator); - final TimeMilliVector vector2 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator)) { + final TimeMilliVector vector2 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); final int val = 1000; - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } vector1.setSafe(i, val + i); } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - val + i, vector1.get(i)); + assertEquals("unexpected value at index: " + i, val + i, vector1.get(i)); } } /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - val + i, vector2.get(i)); + assertEquals("unexpected value at index: " + i, val + i, vector2.get(i)); } } } @@ -842,14 +876,15 @@ public void testCopyFromWithNulls11() { @Test /* TinyIntVector */ public void testCopyFromWithNulls12() { try (final TinyIntVector vector1 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator); - final TinyIntVector vector2 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator)) { + final TinyIntVector vector2 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); byte val = -128; - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } @@ -857,16 +892,16 @@ public void testCopyFromWithNulls12() { val++; } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); val = -128; - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { @@ -878,24 +913,24 @@ public void testCopyFromWithNulls12() { /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ val = -128; - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { assertEquals("unexpected value at index: " + i, val, vector2.get(i)); @@ -908,32 +943,33 @@ public void testCopyFromWithNulls12() { @Test /* DecimalVector */ public void testCopyFromWithNulls13() { try (final DecimalVector vector1 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16); - final DecimalVector vector2 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16)) { + final DecimalVector vector2 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); final double baseValue = 104567897654.876543654; final BigDecimal[] decimals = new BigDecimal[4096]; - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } - BigDecimal decimal = new BigDecimal(baseValue + (double)i); + BigDecimal decimal = new BigDecimal(baseValue + (double) i); vector1.setSafe(i, decimal); decimals[i] = decimal; } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { @@ -945,23 +981,24 @@ public void testCopyFromWithNulls13() { /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { final BigDecimal decimal = vector2.getObject(i); @@ -974,61 +1011,61 @@ public void testCopyFromWithNulls13() { @Test /* TimeStampVector */ public void testCopyFromWithNulls14() { try (final TimeStampVector vector1 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator); - final TimeStampVector vector2 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator)) { + final TimeStampVector vector2 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator)) { vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); + assertTrue(vector1.getValueCapacity() >= vector1.initialValueAllocation); assertEquals(0, vector1.getValueCount()); + int initialCapacity = vector1.getValueCapacity(); final long val = 20145678912L; - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { continue; } - vector1.setSafe(i, val + (long)i); + vector1.setSafe(i, val + (long) i); } - vector1.setValueCount(4096); + vector1.setValueCount(initialCapacity); /* No realloc should have happened in setSafe or * setValueCount */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); + assertEquals(initialCapacity, vector1.getValueCapacity()); + assertEquals(initialCapacity, vector1.getValueCount()); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - val + (long)i, vector1.get(i)); + assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i)); } } /* set lesser initial capacity than actually needed * to trigger reallocs in copyFromSafe() */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); + vector2.allocateNew(initialCapacity / 4); + assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); + assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity; i++) { vector2.copyFromSafe(i, i, vector1); } /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); + assertTrue(vector2.getValueCapacity() >= initialCapacity); + vector2.setValueCount(initialCapacity * 2); /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); + assertEquals(initialCapacity * 2, vector2.getValueCount()); + assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); /* check vector data after copy and realloc */ - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { + for (int i = 0; i < initialCapacity * 2; i++) { + if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, - val + (long) i, vector2.get(i)); + assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i)); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 4772a86356b95..30fe23cae4afd 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -32,6 +32,7 @@ import java.util.Arrays; import java.util.List; +import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; @@ -68,8 +69,8 @@ public void init() { private static final byte[] STR5 = "EEE5".getBytes(utf8Charset); private static final byte[] STR6 = "FFFFF6".getBytes(utf8Charset); private static final int MAX_VALUE_COUNT = - Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 4; - private static final int MAX_VALUE_COUNT_8BYTE = MAX_VALUE_COUNT / 2; + (int)(Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7); + private static final int MAX_VALUE_COUNT_8BYTE = (int)(MAX_VALUE_COUNT / 2); @After public void terminate() throws Exception { @@ -108,7 +109,7 @@ public void testFixedType1() { vector.allocateNew(1024); initialCapacity = vector.getValueCapacity(); - assertEquals(1024, initialCapacity); + assertTrue(initialCapacity >= 1024); // Put and set a few values vector.setSafe(0, 100); @@ -124,7 +125,7 @@ public void testFixedType1() { assertEquals(104, vector.get(1023)); try { - vector.set(1024, 10000); + vector.set(initialCapacity, 10000); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -133,7 +134,7 @@ public void testFixedType1() { } try { - vector.get(1024); + vector.get(initialCapacity); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -142,10 +143,10 @@ public void testFixedType1() { } /* this should trigger a realloc() */ - vector.setSafe(1024, 10000); + vector.setSafe(initialCapacity, 10000); /* underlying buffer should now be able to store double the number of values */ - assertEquals(initialCapacity * 2, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); /* check vector data after realloc */ assertEquals(100, vector.get(0)); @@ -153,16 +154,17 @@ public void testFixedType1() { assertEquals(102, vector.get(100)); assertEquals(103, vector.get(1022)); assertEquals(104, vector.get(1023)); - assertEquals(10000, vector.get(1024)); + assertEquals(10000, vector.get(initialCapacity)); /* reset the vector */ + int capacityBeforeReset = vector.getValueCapacity(); vector.reset(); /* capacity shouldn't change after reset */ - assertEquals(initialCapacity * 2, vector.getValueCapacity()); + assertEquals(capacityBeforeReset, vector.getValueCapacity()); /* vector data should have been zeroed out */ - for (int i = 0; i < (initialCapacity * 2); i++) { + for (int i = 0; i < capacityBeforeReset; i++) { // TODO: test vector.get(i) is 0 after unsafe get added assertEquals("non-zero data not expected at index: " + i, true, vector.isNull(i)); } @@ -180,7 +182,7 @@ public void testFixedType2() { intVector.setInitialCapacity(MAX_VALUE_COUNT); try { - intVector.setInitialCapacity(MAX_VALUE_COUNT + 1); + intVector.setInitialCapacity(MAX_VALUE_COUNT * 2); } catch (OversizedAllocationException oe) { error = true; } finally { @@ -195,17 +197,18 @@ public void testFixedType2() { /* allocate 64 bytes (16 * 4) */ intVector.allocateNew(); /* underlying buffer should be able to store 16 values */ - assertEquals(initialCapacity, intVector.getValueCapacity()); + assertTrue(intVector.getValueCapacity() >= initialCapacity); + initialCapacity = intVector.getValueCapacity(); /* populate the vector */ int j = 1; - for (int i = 0; i < 16; i += 2) { + for (int i = 0; i < initialCapacity; i += 2) { intVector.set(i, j); j++; } try { - intVector.set(16, 9); + intVector.set(initialCapacity, j); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -215,13 +218,13 @@ public void testFixedType2() { /* check vector contents */ j = 1; - for (int i = 0; i < 16; i += 2) { + for (int i = 0; i < initialCapacity; i += 2) { assertEquals("unexpected value at index: " + i, j, intVector.get(i)); j++; } try { - intVector.get(16); + intVector.get(initialCapacity); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -230,26 +233,27 @@ public void testFixedType2() { } /* this should trigger a realloc() */ - intVector.setSafe(16, 9); + intVector.setSafe(initialCapacity, j); /* underlying buffer should now be able to store double the number of values */ - assertEquals(initialCapacity * 2, intVector.getValueCapacity()); + assertTrue(intVector.getValueCapacity() >= initialCapacity * 2); /* vector data should still be intact after realloc */ j = 1; - for (int i = 0; i <= 16; i += 2) { + for (int i = 0; i <= initialCapacity; i += 2) { assertEquals("unexpected value at index: " + i, j, intVector.get(i)); j++; } /* reset the vector */ + int capacityBeforeRealloc = intVector.getValueCapacity(); intVector.reset(); /* capacity shouldn't change after reset */ - assertEquals(initialCapacity * 2, intVector.getValueCapacity()); + assertEquals(capacityBeforeRealloc, intVector.getValueCapacity()); /* vector data should have been zeroed out */ - for (int i = 0; i < (initialCapacity * 2); i++) { + for (int i = 0; i < capacityBeforeRealloc; i++) { assertEquals("non-zero data not expected at index: " + i, true, intVector.isNull(i)); } } @@ -266,7 +270,7 @@ public void testFixedType3() { floatVector.setInitialCapacity(MAX_VALUE_COUNT); try { - floatVector.setInitialCapacity(MAX_VALUE_COUNT + 1); + floatVector.setInitialCapacity(MAX_VALUE_COUNT * 2); } catch (OversizedAllocationException oe) { error = true; } finally { @@ -281,7 +285,8 @@ public void testFixedType3() { /* allocate 64 bytes (16 * 4) */ floatVector.allocateNew(); /* underlying buffer should be able to store 16 values */ - assertEquals(initialCapacity, floatVector.getValueCapacity()); + assertTrue(floatVector.getValueCapacity() >= initialCapacity); + initialCapacity = floatVector.getValueCapacity(); floatVector.zeroVector(); @@ -296,7 +301,7 @@ public void testFixedType3() { floatVector.set(14, 8.5f); try { - floatVector.set(16, 9.5f); + floatVector.set(initialCapacity, 9.5f); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -315,7 +320,7 @@ public void testFixedType3() { assertEquals(8.5f, floatVector.get(14), 0); try { - floatVector.get(16); + floatVector.get(initialCapacity); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -324,10 +329,10 @@ public void testFixedType3() { } /* this should trigger a realloc() */ - floatVector.setSafe(16, 9.5f); + floatVector.setSafe(initialCapacity, 9.5f); /* underlying buffer should now be able to store double the number of values */ - assertEquals(initialCapacity * 2, floatVector.getValueCapacity()); + assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); /* vector data should still be intact after realloc */ assertEquals(1.5f, floatVector.get(0), 0); @@ -338,16 +343,17 @@ public void testFixedType3() { assertEquals(6.6f, floatVector.get(10), 0); assertEquals(7.8f, floatVector.get(12), 0); assertEquals(8.5f, floatVector.get(14), 0); - assertEquals(9.5f, floatVector.get(16), 0); + assertEquals(9.5f, floatVector.get(initialCapacity), 0); /* reset the vector */ + int capacityBeforeReset = floatVector.getValueCapacity(); floatVector.reset(); /* capacity shouldn't change after reset */ - assertEquals(initialCapacity * 2, floatVector.getValueCapacity()); + assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); /* vector data should be zeroed out */ - for (int i = 0; i < (initialCapacity * 2); i++) { + for (int i = 0; i < capacityBeforeReset; i++) { assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i)); } } @@ -364,7 +370,7 @@ public void testFixedType4() { floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE); try { - floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE + 1); + floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE * 2); } catch (OversizedAllocationException oe) { error = true; } finally { @@ -379,7 +385,8 @@ public void testFixedType4() { /* allocate 128 bytes (16 * 8) */ floatVector.allocateNew(); /* underlying buffer should be able to store 16 values */ - assertEquals(initialCapacity, floatVector.getValueCapacity()); + assertTrue(floatVector.getValueCapacity() >= initialCapacity); + initialCapacity = floatVector.getValueCapacity(); /* populate the vector */ floatVector.set(0, 1.55); @@ -392,7 +399,7 @@ public void testFixedType4() { floatVector.set(14, 8.56); try { - floatVector.set(16, 9.53); + floatVector.set(initialCapacity, 9.53); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -411,7 +418,7 @@ public void testFixedType4() { assertEquals(8.56, floatVector.get(14), 0); try { - floatVector.get(16); + floatVector.get(initialCapacity); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -420,10 +427,10 @@ public void testFixedType4() { } /* this should trigger a realloc() */ - floatVector.setSafe(16, 9.53); + floatVector.setSafe(initialCapacity, 9.53); /* underlying buffer should now be able to store double the number of values */ - assertEquals(initialCapacity * 2, floatVector.getValueCapacity()); + assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); /* vector data should still be intact after realloc */ assertEquals(1.55, floatVector.get(0), 0); @@ -434,16 +441,17 @@ public void testFixedType4() { assertEquals(6.67, floatVector.get(10), 0); assertEquals(7.87, floatVector.get(12), 0); assertEquals(8.56, floatVector.get(14), 0); - assertEquals(9.53, floatVector.get(16), 0); + assertEquals(9.53, floatVector.get(initialCapacity), 0); /* reset the vector */ + int capacityBeforeReset = floatVector.getValueCapacity(); floatVector.reset(); /* capacity shouldn't change after reset */ - assertEquals(initialCapacity * 2, floatVector.getValueCapacity()); + assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); /* vector data should be zeroed out */ - for (int i = 0; i < (initialCapacity * 2); i++) { + for (int i = 0; i < capacityBeforeReset; i++) { assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i)); } } @@ -463,36 +471,37 @@ public void testNullableFixedType1() { assertEquals(0, vector.getValueCapacity()); vector.allocateNew(); - assertEquals(initialCapacity, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= initialCapacity); + initialCapacity = vector.getValueCapacity(); // Put and set a few values vector.set(0, 100); vector.set(1, 101); vector.set(100, 102); - vector.set(1022, 103); - vector.set(1023, 104); + vector.set(initialCapacity - 2, 103); + vector.set(initialCapacity - 1, 104); /* check vector contents */ assertEquals(100, vector.get(0)); assertEquals(101, vector.get(1)); assertEquals(102, vector.get(100)); - assertEquals(103, vector.get(1022)); - assertEquals(104, vector.get(1023)); + assertEquals(103, vector.get(initialCapacity - 2)); + assertEquals(104, vector.get(initialCapacity - 1)); int val = 0; /* check unset bits/null values */ - for (int i = 2, j = 101; i <= 99 || j <= 1021; i++, j++) { + for (int i = 2, j = 101; i <= 99 || j <= initialCapacity - 3; i++, j++) { if (i <= 99) { assertTrue(vector.isNull(i)); } - if (j <= 1021) { + if (j <= initialCapacity - 3) { assertTrue(vector.isNull(j)); } } try { - vector.set(1024, 10000); + vector.set(initialCapacity, 10000); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -501,7 +510,7 @@ public void testNullableFixedType1() { } try { - vector.get(1024); + vector.get(initialCapacity); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -510,39 +519,40 @@ public void testNullableFixedType1() { } /* should trigger a realloc of the underlying bitvector and valuevector */ - vector.setSafe(1024, 10000); + vector.setSafe(initialCapacity, 10000); /* check new capacity */ - assertEquals(initialCapacity * 2, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= initialCapacity * 2); /* vector contents should still be intact after realloc */ assertEquals(100, vector.get(0)); assertEquals(101, vector.get(1)); assertEquals(102, vector.get(100)); - assertEquals(103, vector.get(1022)); - assertEquals(104, vector.get(1023)); - assertEquals(10000, vector.get(1024)); + assertEquals(103, vector.get(initialCapacity - 2)); + assertEquals(104, vector.get(initialCapacity - 1)); + assertEquals(10000, vector.get(initialCapacity)); val = 0; /* check unset bits/null values */ - for (int i = 2, j = 101; i < 99 || j < 1021; i++, j++) { + for (int i = 2, j = 101; i < 99 || j < initialCapacity - 3; i++, j++) { if (i <= 99) { assertTrue(vector.isNull(i)); } - if (j <= 1021) { + if (j <= initialCapacity - 3) { assertTrue(vector.isNull(j)); } } /* reset the vector */ + int capacityBeforeReset = vector.getValueCapacity(); vector.reset(); /* capacity shouldn't change after reset */ - assertEquals(initialCapacity * 2, vector.getValueCapacity()); + assertEquals(capacityBeforeReset, vector.getValueCapacity()); /* vector data should be zeroed out */ - for (int i = 0; i < (initialCapacity * 2); i++) { + for (int i = 0; i < capacityBeforeReset; i++) { assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); } } @@ -560,7 +570,8 @@ public void testNullableFixedType2() { assertEquals(0, vector.getValueCapacity()); vector.allocateNew(); - assertEquals(initialCapacity, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= initialCapacity); + initialCapacity = vector.getValueCapacity(); /* populate the vector */ vector.set(0, 100.5f); @@ -573,7 +584,7 @@ public void testNullableFixedType2() { vector.set(14, 89.5f); try { - vector.set(16, 90.5f); + vector.set(initialCapacity, 90.5f); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -600,7 +611,7 @@ public void testNullableFixedType2() { assertTrue(vector.isNull(15)); try { - vector.get(16); + vector.get(initialCapacity); } catch (IndexOutOfBoundsException ie) { error = true; } finally { @@ -609,10 +620,10 @@ public void testNullableFixedType2() { } /* this should trigger a realloc() */ - vector.setSafe(16, 90.5f); + vector.setSafe(initialCapacity, 90.5f); /* underlying buffer should now be able to store double the number of values */ - assertEquals(initialCapacity * 2, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); /* vector data should still be intact after realloc */ assertEquals(100.5f, vector.get(0), 0); @@ -633,13 +644,14 @@ public void testNullableFixedType2() { assertTrue(vector.isNull(15)); /* reset the vector */ + int capacityBeforeReset = vector.getValueCapacity(); vector.reset(); /* capacity shouldn't change after reset */ - assertEquals(initialCapacity * 2, vector.getValueCapacity()); + assertEquals(capacityBeforeReset, vector.getValueCapacity()); /* vector data should be zeroed out */ - for (int i = 0; i < (initialCapacity * 2); i++) { + for (int i = 0; i < capacityBeforeReset; i++) { assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); } } @@ -656,8 +668,9 @@ public void testNullableFixedType3() { assertEquals(0, vector.getValueCapacity()); /* allocate space for 4KB data (1024 * 4) */ vector.allocateNew(initialCapacity); - /* underlying buffer should be able to store 16 values */ - assertEquals(initialCapacity, vector.getValueCapacity()); + /* underlying buffer should be able to store 1024 values */ + assertTrue(vector.getValueCapacity() >= initialCapacity); + initialCapacity = vector.getValueCapacity(); vector.set(0, 1); vector.set(1, 2); @@ -687,7 +700,7 @@ public void testNullableFixedType3() { ArrowBuf validityVectorBuf = buffers.get(0); /* bitvector tracks 1024 integers --> 1024 bits --> 128 bytes */ - assertEquals(128, validityVectorBuf.readableBytes()); + assertTrue(validityVectorBuf.readableBytes() >= 128); assertEquals(3, validityVectorBuf.getByte(0)); // 1st and second bit defined for (int i = 1; i < 12; i++) { assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined until 100 @@ -699,15 +712,15 @@ public void testNullableFixedType3() { assertEquals(-64, validityVectorBuf.getByte(127)); // 1022nd and 1023rd bit defined /* this should trigger a realloc() */ - vector.setSafe(1024, 6); + vector.setSafe(initialCapacity, 6); /* underlying buffer should now be able to store double the number of values */ - assertEquals(initialCapacity * 2, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); /* vector data should still be intact after realloc */ j = 1; for (int i = 0; i < (initialCapacity * 2); i++) { - if ((i > 1024) || (i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) { + if ((i > 1023 && i != initialCapacity) || (i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) { assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); } else { assertFalse("null data not expected at index: " + i, vector.isNull(i)); @@ -717,19 +730,20 @@ public void testNullableFixedType3() { } /* reset the vector */ + int capacityBeforeReset = vector.getValueCapacity(); vector.reset(); /* capacity shouldn't change after reset */ - assertEquals(initialCapacity * 2, vector.getValueCapacity()); + assertEquals(capacityBeforeReset, vector.getValueCapacity()); /* vector data should have been zeroed out */ - for (int i = 0; i < (initialCapacity * 2); i++) { + for (int i = 0; i < capacityBeforeReset; i++) { assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); } - vector.allocateNew(4096); + vector.allocateNew(initialCapacity * 4); // vector has been erased - for (int i = 0; i < 4096; i++) { + for (int i = 0; i < initialCapacity * 4; i++) { assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); } } @@ -764,7 +778,7 @@ public void testNullableFixedType4() { } vector.setSafe(valueCapacity, 20000000); - assertEquals(valueCapacity * 2, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= valueCapacity * 2); for (int i = 0; i < vector.getValueCapacity(); i++) { if (i == valueCapacity) { @@ -795,14 +809,15 @@ public void testNullableFixedType4() { } } - vector.setSafe((valueCapacity * 2) + 1000, 400000000); - assertEquals(valueCapacity * 4, vector.getValueCapacity()); + int valueCapacityBeforeRealloc = vector.getValueCapacity(); + vector.setSafe(valueCapacityBeforeRealloc + 1000, 400000000); + assertTrue(vector.getValueCapacity() >= valueCapacity * 4); for (int i = 0; i < vector.getValueCapacity(); i++) { - if (i == (valueCapacity * 2 + 1000)) { + if (i == (valueCapacityBeforeRealloc + 1000)) { assertFalse("unexpected null value at index: " + i, vector.isNull(i)); assertEquals("unexpected value at index: " + i, 400000000, vector.get(i)); - } else if (i < valueCapacity * 2 && (i % 2) == 0) { + } else if (i < valueCapacityBeforeRealloc && (i % 2) == 0) { assertFalse("unexpected null value at index: " + i, vector.isNull(i)); assertEquals("unexpected value at index: " + i, baseValue + i, vector.get(i)); } else { @@ -811,13 +826,14 @@ public void testNullableFixedType4() { } /* reset the vector */ + int valueCapacityBeforeReset = vector.getValueCapacity(); vector.reset(); /* capacity shouldn't change after reset */ - assertEquals(valueCapacity * 4, vector.getValueCapacity()); + assertEquals(valueCapacityBeforeReset, vector.getValueCapacity()); /* vector data should be zeroed out */ - for (int i = 0; i < (valueCapacity * 4); i++) { + for (int i = 0; i < valueCapacityBeforeReset; i++) { assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); } } @@ -936,52 +952,56 @@ public void testNullableVarType2() { @Test /* Float8Vector */ public void testReallocAfterVectorTransfer1() { try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) { - final int initialDefaultCapacity = 4096; + int initialCapacity = 4096; boolean error = false; /* use the default capacity; 4096*8 => 32KB */ + vector.setInitialCapacity(initialCapacity); vector.allocateNew(); - assertEquals(initialDefaultCapacity, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= initialCapacity); + initialCapacity = vector.getValueCapacity(); double baseValue = 100.375; - for (int i = 0; i < initialDefaultCapacity; i++) { + for (int i = 0; i < initialCapacity; i++) { vector.setSafe(i, baseValue + (double)i); } /* the above setSafe calls should not have triggered a realloc as * we are within the capacity. check the vector contents */ - assertEquals(initialDefaultCapacity, vector.getValueCapacity()); + assertEquals(initialCapacity, vector.getValueCapacity()); - for (int i = 0; i < initialDefaultCapacity; i++) { + for (int i = 0; i < initialCapacity; i++) { double value = vector.get(i); assertEquals(baseValue + (double)i, value, 0); } /* this should trigger a realloc */ - vector.setSafe(initialDefaultCapacity, baseValue + (double)initialDefaultCapacity); - assertEquals(initialDefaultCapacity * 2, vector.getValueCapacity()); + vector.setSafe(initialCapacity, baseValue + (double)initialCapacity); + assertTrue(vector.getValueCapacity() >= initialCapacity * 2); + int capacityAfterRealloc1 = vector.getValueCapacity(); - for (int i = initialDefaultCapacity + 1; i < (initialDefaultCapacity * 2); i++) { + for (int i = initialCapacity + 1; i < capacityAfterRealloc1; i++) { vector.setSafe(i, baseValue + (double)i); } - for (int i = 0; i < (initialDefaultCapacity * 2); i++) { + for (int i = 0; i < capacityAfterRealloc1; i++) { double value = vector.get(i); assertEquals(baseValue + (double)i, value, 0); } /* this should trigger a realloc */ - vector.setSafe(initialDefaultCapacity * 2, baseValue + (double)(initialDefaultCapacity * 2)); - assertEquals(initialDefaultCapacity * 4, vector.getValueCapacity()); + vector.setSafe(capacityAfterRealloc1, baseValue + (double)(capacityAfterRealloc1)); + assertTrue(vector.getValueCapacity() >= initialCapacity * 4); + int capacityAfterRealloc2 = vector.getValueCapacity(); - for (int i = (initialDefaultCapacity * 2) + 1; i < (initialDefaultCapacity * 4); i++) { + for (int i = capacityAfterRealloc1 + 1; i < capacityAfterRealloc2; i++) { vector.setSafe(i, baseValue + (double)i); } - for (int i = 0; i < (initialDefaultCapacity * 4); i++) { + for (int i = 0; i < capacityAfterRealloc2; i++) { double value = vector.get(i); assertEquals(baseValue + (double)i, value, 0); } @@ -997,10 +1017,10 @@ public void testReallocAfterVectorTransfer1() { /* now let's realloc the toVector */ toVector.reAlloc(); - assertEquals(initialDefaultCapacity * 8, toVector.getValueCapacity()); + assertTrue(toVector.getValueCapacity() >= initialCapacity * 8); - for (int i = 0; i < (initialDefaultCapacity * 8); i++) { - if (i < (initialDefaultCapacity * 4)) { + for (int i = 0; i < toVector.getValueCapacity(); i++) { + if (i < capacityAfterRealloc2) { assertEquals(baseValue + (double)i, toVector.get(i), 0); } else { assertTrue(toVector.isNull(i)); @@ -1014,51 +1034,53 @@ public void testReallocAfterVectorTransfer1() { @Test /* Float8Vector */ public void testReallocAfterVectorTransfer2() { try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) { - final int initialDefaultCapacity = 4096; + int initialCapacity = 4096; boolean error = false; - vector.allocateNew(initialDefaultCapacity); - - assertEquals(initialDefaultCapacity, vector.getValueCapacity()); + vector.allocateNew(initialCapacity); + assertTrue(vector.getValueCapacity() >= initialCapacity); + initialCapacity = vector.getValueCapacity(); double baseValue = 100.375; - for (int i = 0; i < initialDefaultCapacity; i++) { + for (int i = 0; i < initialCapacity; i++) { vector.setSafe(i, baseValue + (double)i); } /* the above setSafe calls should not have triggered a realloc as * we are within the capacity. check the vector contents */ - assertEquals(initialDefaultCapacity, vector.getValueCapacity()); + assertEquals(initialCapacity, vector.getValueCapacity()); - for (int i = 0; i < initialDefaultCapacity; i++) { + for (int i = 0; i < initialCapacity; i++) { double value = vector.get(i); assertEquals(baseValue + (double)i, value, 0); } /* this should trigger a realloc */ - vector.setSafe(initialDefaultCapacity, baseValue + (double)initialDefaultCapacity); - assertEquals(initialDefaultCapacity * 2, vector.getValueCapacity()); + vector.setSafe(initialCapacity, baseValue + (double)initialCapacity); + assertTrue(vector.getValueCapacity() >= initialCapacity * 2); + int capacityAfterRealloc1 = vector.getValueCapacity(); - for (int i = initialDefaultCapacity + 1; i < (initialDefaultCapacity * 2); i++) { + for (int i = initialCapacity + 1; i < capacityAfterRealloc1; i++) { vector.setSafe(i, baseValue + (double)i); } - for (int i = 0; i < (initialDefaultCapacity * 2); i++) { + for (int i = 0; i < capacityAfterRealloc1; i++) { double value = vector.get(i); assertEquals(baseValue + (double)i, value, 0); } /* this should trigger a realloc */ - vector.setSafe(initialDefaultCapacity * 2, baseValue + (double)(initialDefaultCapacity * 2)); - assertEquals(initialDefaultCapacity * 4, vector.getValueCapacity()); + vector.setSafe(capacityAfterRealloc1, baseValue + (double)(capacityAfterRealloc1)); + assertTrue(vector.getValueCapacity() >= initialCapacity * 4); + int capacityAfterRealloc2 = vector.getValueCapacity(); - for (int i = (initialDefaultCapacity * 2) + 1; i < (initialDefaultCapacity * 4); i++) { + for (int i = capacityAfterRealloc1 + 1; i < capacityAfterRealloc2; i++) { vector.setSafe(i, baseValue + (double)i); } - for (int i = 0; i < (initialDefaultCapacity * 4); i++) { + for (int i = 0; i < capacityAfterRealloc2; i++) { double value = vector.get(i); assertEquals(baseValue + (double)i, value, 0); } @@ -1073,7 +1095,7 @@ public void testReallocAfterVectorTransfer2() { Float8Vector toVector = (Float8Vector)transferPair.getTo(); /* check toVector contents before realloc */ - for (int i = 0; i < (initialDefaultCapacity * 4); i++) { + for (int i = 0; i < toVector.getValueCapacity(); i++) { assertFalse("unexpected null value at index: " + i, toVector.isNull(i)); double value = toVector.get(i); assertEquals("unexpected value at index: " + i, baseValue + (double)i, value, 0); @@ -1081,10 +1103,10 @@ public void testReallocAfterVectorTransfer2() { /* now let's realloc the toVector and check contents again */ toVector.reAlloc(); - assertEquals(initialDefaultCapacity * 8, toVector.getValueCapacity()); + assertTrue(toVector.getValueCapacity() >= initialCapacity * 8); - for (int i = 0; i < (initialDefaultCapacity * 8); i++) { - if (i < (initialDefaultCapacity * 4)) { + for (int i = 0; i < toVector.getValueCapacity(); i++) { + if (i < capacityAfterRealloc2) { assertFalse("unexpected null value at index: " + i, toVector.isNull(i)); double value = toVector.get(i); assertEquals("unexpected value at index: " + i, baseValue + (double)i, value, 0); @@ -1103,7 +1125,7 @@ public void testReallocAfterVectorTransfer3() { /* 4096 values with 10 byte per record */ vector.allocateNew(4096 * 10, 4096); int valueCapacity = vector.getValueCapacity(); - assertEquals(4096, valueCapacity); + assertTrue(valueCapacity >= 4096); /* populate the vector */ for (int i = 0; i < valueCapacity; i++) { @@ -1125,7 +1147,10 @@ public void testReallocAfterVectorTransfer3() { /* trigger first realloc */ vector.setSafe(valueCapacity, STR2, 0, STR2.length); - assertEquals(valueCapacity * 2, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); + while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) { + vector.reallocDataBuffer(); + } /* populate the remaining vector */ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { @@ -1148,7 +1173,10 @@ public void testReallocAfterVectorTransfer3() { /* trigger second realloc */ vector.setSafe(valueCapacity + 10, STR2, 0, STR2.length); - assertEquals(valueCapacity * 2, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); + while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) { + vector.reallocDataBuffer(); + } /* populate the remaining vector */ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { @@ -1197,7 +1225,7 @@ public void testReallocAfterVectorTransfer4() { /* 4096 values */ vector.allocateNew(4096); int valueCapacity = vector.getValueCapacity(); - assertEquals(4096, valueCapacity); + assertTrue(valueCapacity >= 4096); /* populate the vector */ int baseValue = 1000; @@ -1218,7 +1246,7 @@ public void testReallocAfterVectorTransfer4() { /* trigger first realloc */ vector.setSafe(valueCapacity, 10000000); - assertEquals(valueCapacity * 2, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= valueCapacity * 2); /* populate the remaining vector */ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { @@ -1239,7 +1267,7 @@ public void testReallocAfterVectorTransfer4() { /* trigger second realloc */ vector.setSafe(valueCapacity, 10000000); - assertEquals(valueCapacity * 2, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= valueCapacity * 2); /* populate the remaining vector */ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { @@ -1288,7 +1316,8 @@ public void testReAllocFixedWidthVector() { try (final Float4Vector vector = newVector(Float4Vector.class, EMPTY_SCHEMA_PATH, MinorType.FLOAT4, allocator)) { vector.allocateNew(1024); - assertEquals(1024, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 1024); + int initialCapacity = vector.getValueCapacity(); // Put values in indexes that fall within the initial allocation vector.setSafe(0, 100.1f); @@ -1299,7 +1328,7 @@ public void testReAllocFixedWidthVector() { vector.setSafe(2000, 105.5f); // Check valueCapacity is more than initial allocation - assertEquals(1024 * 2, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); assertEquals(100.1f, vector.get(0), 0); assertEquals(102.3f, vector.get(100), 0); @@ -1316,24 +1345,24 @@ public void testReAllocFixedWidthVector() { @Test public void testReAllocVariableWidthVector() { try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { + vector.setInitialCapacity(4095); vector.allocateNew(); int initialCapacity = vector.getValueCapacity(); - assertEquals(4095, initialCapacity); + assertTrue(initialCapacity >= 4095); /* Put values in indexes that fall within the initial allocation */ vector.setSafe(0, STR1, 0, STR1.length); vector.setSafe(initialCapacity - 1, STR2, 0, STR2.length); /* the above set calls should NOT have triggered a realloc */ - initialCapacity = vector.getValueCapacity(); - assertEquals(4095, initialCapacity); + assertEquals(initialCapacity, vector.getValueCapacity()); /* Now try to put values in space that falls beyond the initial allocation */ vector.setSafe(initialCapacity + 200, STR3, 0, STR3.length); /* Check valueCapacity is more than initial allocation */ - assertEquals(((initialCapacity + 1) * 2) - 1, vector.getValueCapacity()); + assertTrue(initialCapacity * 2 <= vector.getValueCapacity()); assertArrayEquals(STR1, vector.get(0)); assertArrayEquals(STR2, vector.get(initialCapacity - 1)); @@ -1348,20 +1377,20 @@ public void testReAllocVariableWidthVector() { @Test public void testFillEmptiesNotOverfill() { try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { + vector.setInitialCapacity(4095); vector.allocateNew(); int initialCapacity = vector.getValueCapacity(); - assertEquals(4095, initialCapacity); + assertTrue(initialCapacity >= 4095); vector.setSafe(4094, "hello".getBytes(), 0, 5); /* the above set method should NOT have trigerred a realloc */ - initialCapacity = vector.getValueCapacity(); - assertEquals(4095, initialCapacity); + assertEquals(initialCapacity, vector.getValueCapacity()); - vector.setValueCount(4095); - assertEquals(4096 * vector.OFFSET_WIDTH, vector.getFieldBuffers().get(1).capacity()); - initialCapacity = vector.getValueCapacity(); - assertEquals(4095, initialCapacity); + int bufSizeBefore = vector.getFieldBuffers().get(1).capacity(); + vector.setValueCount(initialCapacity); + assertEquals(bufSizeBefore, vector.getFieldBuffers().get(1).capacity()); + assertEquals(initialCapacity, vector.getValueCapacity()); } } @@ -1371,11 +1400,12 @@ public void testCopyFromWithNulls() { final VarCharVector vector2 = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { + vector.setInitialCapacity(4095); vector.allocateNew(); int capacity = vector.getValueCapacity(); - assertEquals(4095, capacity); + assertTrue(capacity >= 4095); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < capacity; i++) { if (i % 3 == 0) { continue; } @@ -1384,12 +1414,11 @@ public void testCopyFromWithNulls() { } /* NO reAlloc() should have happened in setSafe() */ - capacity = vector.getValueCapacity(); - assertEquals(4095, capacity); + assertEquals(capacity, vector.getValueCapacity()); - vector.setValueCount(4095); + vector.setValueCount(capacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < capacity; i++) { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { @@ -1397,11 +1426,12 @@ public void testCopyFromWithNulls() { } } + vector2.setInitialCapacity(4095); vector2.allocateNew(); - capacity = vector2.getValueCapacity(); - assertEquals(4095, capacity); + int capacity2 = vector2.getValueCapacity(); + assertEquals(capacity2, capacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < capacity; i++) { vector2.copyFromSafe(i, i, vector); if (i % 3 == 0) { assertNull(vector2.getObject(i)); @@ -1411,12 +1441,11 @@ public void testCopyFromWithNulls() { } /* NO reAlloc() should have happened in copyFrom */ - capacity = vector2.getValueCapacity(); - assertEquals(4095, capacity); + assertEquals(capacity, vector2.getValueCapacity()); - vector2.setValueCount(4095); + vector2.setValueCount(capacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < capacity; i++) { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { @@ -1432,11 +1461,12 @@ public void testCopyFromWithNulls1() { final VarCharVector vector2 = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { + vector.setInitialCapacity(4095); vector.allocateNew(); int capacity = vector.getValueCapacity(); - assertEquals(4095, capacity); + assertTrue(capacity >= 4095); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < capacity; i++) { if (i % 3 == 0) { continue; } @@ -1445,12 +1475,11 @@ public void testCopyFromWithNulls1() { } /* NO reAlloc() should have happened in setSafe() */ - capacity = vector.getValueCapacity(); - assertEquals(4095, capacity); + assertEquals(capacity, vector.getValueCapacity()); - vector.setValueCount(4095); + vector.setValueCount(capacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < capacity; i++) { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { @@ -1463,10 +1492,11 @@ public void testCopyFromWithNulls1() { */ vector2.allocateNew(1024 * 10, 1024); - capacity = vector2.getValueCapacity(); - assertEquals(1024, capacity); + int capacity2 = vector2.getValueCapacity(); + assertTrue(capacity2 >= 1024); + assertTrue(capacity2 <= capacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < capacity; i++) { vector2.copyFromSafe(i, i, vector); if (i % 3 == 0) { assertNull(vector2.getObject(i)); @@ -1476,12 +1506,11 @@ public void testCopyFromWithNulls1() { } /* 2 reAllocs should have happened in copyFromSafe() */ - capacity = vector2.getValueCapacity(); - assertEquals(4096, capacity); + assertEquals(capacity, vector2.getValueCapacity()); - vector2.setValueCount(4095); + vector2.setValueCount(capacity); - for (int i = 0; i < 4095; i++) { + for (int i = 0; i < capacity; i++) { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { @@ -1876,30 +1905,88 @@ public void testSetInitialCapacity() { try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { /* use the default 8 data bytes on average per element */ - vector.setInitialCapacity(4096); + int defaultCapacity = BaseValueVector.INITIAL_VALUE_ALLOCATION - 1; + vector.setInitialCapacity(defaultCapacity); vector.allocateNew(); - assertEquals(4096, vector.getValueCapacity()); - assertEquals(4096 * 8, vector.getDataBuffer().capacity()); + assertEquals(defaultCapacity, vector.getValueCapacity()); + assertEquals(BaseAllocator.nextPowerOfTwo(defaultCapacity * 8), vector.getDataBuffer().capacity()); - vector.setInitialCapacity(4096, 1); + vector.setInitialCapacity(defaultCapacity, 1); vector.allocateNew(); - assertEquals(4096, vector.getValueCapacity()); - assertEquals(4096, vector.getDataBuffer().capacity()); + assertEquals(defaultCapacity, vector.getValueCapacity()); + assertEquals(BaseAllocator.nextPowerOfTwo(defaultCapacity), vector.getDataBuffer().capacity()); - vector.setInitialCapacity(4096, 0.1); + vector.setInitialCapacity(defaultCapacity, 0.1); vector.allocateNew(); - assertEquals(4096, vector.getValueCapacity()); - assertEquals(512, vector.getDataBuffer().capacity()); + assertEquals(defaultCapacity, vector.getValueCapacity()); + assertEquals(BaseAllocator.nextPowerOfTwo((int)(defaultCapacity * 0.1)), vector.getDataBuffer().capacity()); - vector.setInitialCapacity(4096, 0.01); + vector.setInitialCapacity(defaultCapacity, 0.01); vector.allocateNew(); - assertEquals(4096, vector.getValueCapacity()); - assertEquals(64, vector.getDataBuffer().capacity()); + assertEquals(defaultCapacity, vector.getValueCapacity()); + assertEquals(BaseAllocator.nextPowerOfTwo((int)(defaultCapacity * 0.01)), vector.getDataBuffer().capacity()); vector.setInitialCapacity(5, 0.01); vector.allocateNew(); - assertEquals(7, vector.getValueCapacity()); + assertEquals(5, vector.getValueCapacity()); assertEquals(2, vector.getDataBuffer().capacity()); } } + + @Test + public void testDefaultAllocNewAll() { + int defaultCapacity = BaseFixedWidthVector.INITIAL_VALUE_ALLOCATION; + int expectedSize; + long beforeSize; + try (BufferAllocator childAllocator = allocator.newChildAllocator("defaultAllocs", 0, Long.MAX_VALUE); + final IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, childAllocator); + final BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, childAllocator); + final BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, childAllocator); + final DecimalVector decimalVector = new DecimalVector(EMPTY_SCHEMA_PATH, childAllocator, 38, 6); + final VarCharVector varCharVector = new VarCharVector(EMPTY_SCHEMA_PATH, childAllocator)) { + + // verify that the wastage is within bounds for IntVector. + beforeSize = childAllocator.getAllocatedMemory(); + intVector.allocateNew(); + assertTrue(intVector.getValueCapacity() >= defaultCapacity); + expectedSize = (defaultCapacity * IntVector.TYPE_WIDTH) + + BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity); + assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05); + + // verify that the wastage is within bounds for BigIntVector. + beforeSize = childAllocator.getAllocatedMemory(); + bigIntVector.allocateNew(); + assertTrue(bigIntVector.getValueCapacity() >= defaultCapacity); + expectedSize = (defaultCapacity * bigIntVector.TYPE_WIDTH) + + BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity); + assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05); + + // verify that the wastage is within bounds for DecimalVector. + beforeSize = childAllocator.getAllocatedMemory(); + decimalVector.allocateNew(); + assertTrue(decimalVector.getValueCapacity() >= defaultCapacity); + expectedSize = (defaultCapacity * decimalVector.TYPE_WIDTH) + + BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity); + assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05); + + // verify that the wastage is within bounds for VarCharVector. + // var char vector have an offsets array that is 1 less than defaultCapacity + beforeSize = childAllocator.getAllocatedMemory(); + varCharVector.allocateNew(); + assertTrue(varCharVector.getValueCapacity() >= defaultCapacity - 1); + expectedSize = (defaultCapacity * VarCharVector.OFFSET_WIDTH) + + BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) + + defaultCapacity * 8; + // wastage should be less than 5%. + assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05); + + // verify that the wastage is within bounds for BitVector. + beforeSize = childAllocator.getAllocatedMemory(); + bitVector.allocateNew(); + assertTrue(bitVector.getValueCapacity() >= defaultCapacity); + expectedSize = BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) * 2; + assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05); + + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java index 5474675fbf343..60747aaad92ce 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import java.nio.charset.StandardCharsets; @@ -54,20 +55,21 @@ public void testFixedType() { vector.setInitialCapacity(512); vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 512); + int initialCapacity = vector.getValueCapacity(); try { - vector.set(512, 0); + vector.set(initialCapacity, 0); Assert.fail("Expected out of bounds exception"); } catch (Exception e) { // ok } vector.reAlloc(); - assertEquals(1024, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); - vector.set(512, 100); - assertEquals(100, vector.get(512)); + vector.set(initialCapacity, 100); + assertEquals(100, vector.get(initialCapacity)); } } @@ -77,20 +79,21 @@ public void testNullableType() { vector.setInitialCapacity(512); vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 512); + int initialCapacity = vector.getValueCapacity(); try { - vector.set(512, "foo".getBytes(StandardCharsets.UTF_8)); + vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); Assert.fail("Expected out of bounds exception"); } catch (Exception e) { // ok } vector.reAlloc(); - assertEquals(1024, vector.getValueCapacity()); + assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); - vector.set(512, "foo".getBytes(StandardCharsets.UTF_8)); - assertEquals("foo", new String(vector.get(512), StandardCharsets.UTF_8)); + vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); + assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8)); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index b7215ce4e2e68..61c1b924f664d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -974,11 +974,16 @@ public void testSingleStructWriter1() { Float4Vector float4Vector = (Float4Vector)parent.getChild("float4Field"); Float8Vector float8Vector = (Float8Vector)parent.getChild("float8Field"); - assertEquals(initialCapacity, singleStructWriter.getValueCapacity()); - assertEquals(initialCapacity, intVector.getValueCapacity()); - assertEquals(initialCapacity, bigIntVector.getValueCapacity()); - assertEquals(initialCapacity, float4Vector.getValueCapacity()); - assertEquals(initialCapacity, float8Vector.getValueCapacity()); + int capacity = singleStructWriter.getValueCapacity(); + assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2); + capacity = intVector.getValueCapacity(); + assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2); + capacity = bigIntVector.getValueCapacity(); + assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2); + capacity = float4Vector.getValueCapacity(); + assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2); + capacity = float8Vector.getValueCapacity(); + assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2); StructReader singleStructReader = new SingleStructReaderImpl(parent); From cec75410b78b70b30bd57908d920c006d9101b72 Mon Sep 17 00:00:00 2001 From: Yosuke Shiro Date: Wed, 9 Jan 2019 13:35:05 +0900 Subject: [PATCH 057/203] ARROW-4199: [GLib] Add garrow_seekable_input_stream_peek() Author: Yosuke Shiro Author: Kouhei Sutou Closes #3351 from shiro615/glib-support-peek and squashes the following commits: 1f445764 Improve document a5f0fdfd Add GARROW_AVAILABLE_IN_0_12 b27c0a04 Use g_bytes_new_static to avoid copying the data f9d9f237 Add support for Peek to InputStream --- c_glib/arrow-glib/input-stream.cpp | 24 ++++++++++++++++++++++++ c_glib/arrow-glib/input-stream.h | 3 +++ c_glib/test/test-buffer-input-stream.rb | 8 ++++++++ 3 files changed, 35 insertions(+) diff --git a/c_glib/arrow-glib/input-stream.cpp b/c_glib/arrow-glib/input-stream.cpp index cb36e49067ac9..cb1fb3b04a68e 100644 --- a/c_glib/arrow-glib/input-stream.cpp +++ b/c_glib/arrow-glib/input-stream.cpp @@ -325,6 +325,30 @@ garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *input_stream, } +/** + * garrow_seekable_input_stream_peek: + * @input_stream: A #GArrowSeekableInputStream. + * @n_bytes: The number of bytes to be peeked. + * + * Returns: (transfer full): The data of the buffer, up to the + * indicated number. The data becomes invalid after any operation on + * the stream. If the stream is unbuffered, the data is empty. + * + * It should be freed with g_bytes_unref() when no longer needed. + * + * Since: 0.12.0 + */ +GBytes * +garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream, + gint64 n_bytes) +{ + auto arrow_random_access_file = + garrow_seekable_input_stream_get_raw(input_stream); + auto string_view = arrow_random_access_file->Peek(n_bytes); + return g_bytes_new_static(string_view.data(), string_view.size()); +} + + typedef struct GArrowBufferInputStreamPrivate_ { GArrowBuffer *buffer; } GArrowBufferInputStreamPrivate; diff --git a/c_glib/arrow-glib/input-stream.h b/c_glib/arrow-glib/input-stream.h index 9deebd717363b..745b912749eb6 100644 --- a/c_glib/arrow-glib/input-stream.h +++ b/c_glib/arrow-glib/input-stream.h @@ -66,6 +66,9 @@ GArrowBuffer *garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *in gint64 position, gint64 n_bytes, GError **error); +GARROW_AVAILABLE_IN_0_12 +GBytes *garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream, + gint64 n_bytes); #define GARROW_TYPE_BUFFER_INPUT_STREAM \ diff --git a/c_glib/test/test-buffer-input-stream.rb b/c_glib/test/test-buffer-input-stream.rb index f5a0132d2da98..cb6a667b3b7c0 100644 --- a/c_glib/test/test-buffer-input-stream.rb +++ b/c_glib/test/test-buffer-input-stream.rb @@ -39,4 +39,12 @@ def test_align read_buffer = buffer_input_stream.read(3) assert_equal("rld", read_buffer.data.to_s) end + + def test_peek + buffer = Arrow::Buffer.new("Hello World") + buffer_input_stream = Arrow::BufferInputStream.new(buffer) + peeked_data = buffer_input_stream.peek(5) + assert_equal(buffer_input_stream.read(5).data.to_s, + peeked_data.to_s) + end end From 090a8c020611b2f75ec0e36d765cc6d48adbe9a7 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 8 Jan 2019 22:59:00 -0600 Subject: [PATCH 058/203] ARROW-4200: [C++/Python] Enable conda_env_python.yml to work on Windows, simplify python/development.rst I also removed nomkl from conda_env_python.yml. It's sort of a developer decision whether or not they want to install the MKL -- we shouldn't force them to _not_ have it Author: Wes McKinney Closes #3353 from wesm/ARROW-4200 and squashes the following commits: 4849a326d Accept bkietz suggestions 576e63b27 Also add nomkl to python/Dockerfile 9b39e8300 Get conda env files working on Windows, small cleaning to Python development instructions --- ci/conda_env_python.yml | 2 -- ci/conda_env_unix.yml | 1 + ci/travis_script_python.sh | 1 + docs/source/python/development.rst | 23 +++++++---------------- python/Dockerfile | 1 + 5 files changed, 10 insertions(+), 18 deletions(-) diff --git a/ci/conda_env_python.yml b/ci/conda_env_python.yml index d3756cbcfa8c9..b51f5c32f3297 100644 --- a/ci/conda_env_python.yml +++ b/ci/conda_env_python.yml @@ -18,10 +18,8 @@ cython cloudpickle hypothesis -nomkl numpy pandas pytest -rsync setuptools setuptools_scm diff --git a/ci/conda_env_unix.yml b/ci/conda_env_unix.yml index eeb90e48dce72..9ecf549b504eb 100644 --- a/ci/conda_env_unix.yml +++ b/ci/conda_env_unix.yml @@ -18,3 +18,4 @@ # conda package dependencies specific to Unix-like environments (Linux and macOS) autoconf +rsync diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh index 69e115a9dcce7..e9a112275502e 100755 --- a/ci/travis_script_python.sh +++ b/ci/travis_script_python.sh @@ -47,6 +47,7 @@ fi conda create -y -q -p $CONDA_ENV_DIR \ --file $TRAVIS_BUILD_DIR/ci/conda_env_python.yml \ + nomkl \ cmake \ pip \ numpy=1.13.1 \ diff --git a/docs/source/python/development.rst b/docs/source/python/development.rst index 0bc1c62b4af18..d85537110e48c 100644 --- a/docs/source/python/development.rst +++ b/docs/source/python/development.rst @@ -86,18 +86,9 @@ On Linux and OSX: --file arrow/ci/conda_env_python.yml \ python=3.6 - source activate pyarrow-dev + conda activate pyarrow-dev -On Windows: - -.. code-block:: shell - - conda create -y -n pyarrow-dev -c conda-forge ^ - --file arrow\ci\conda_env_cpp.yml ^ - --file arrow\ci\conda_env_python.yml ^ - python=3.6 - - activate pyarrow-dev +For Windows, see the `Developing on Windows`_ section below. We need to set some environment variables to let Arrow's build system know about our build toolchain: @@ -310,11 +301,11 @@ First, starting from fresh clones of Apache Arrow: .. code-block:: shell - conda create -y -q -n pyarrow-dev ^ - python=3.6 numpy six setuptools cython pandas pytest ^ - cmake flatbuffers rapidjson boost-cpp thrift-cpp snappy zlib ^ - gflags brotli lz4-c zstd -c conda-forge - activate pyarrow-dev + conda create -y -n pyarrow-dev -c conda-forge ^ + --file arrow\ci\conda_env_cpp.yml ^ + --file arrow\ci\conda_env_python.yml ^ + python=3.7 + conda activate pyarrow-dev Now, we build and install Arrow C++ libraries diff --git a/python/Dockerfile b/python/Dockerfile index a99a4206290f8..ecabc94493cf0 100644 --- a/python/Dockerfile +++ b/python/Dockerfile @@ -21,6 +21,7 @@ FROM arrow:cpp ARG PYTHON_VERSION=3.6 ADD ci/conda_env_python.yml /arrow/ci/ RUN conda install -c conda-forge \ + nomkl \ --file arrow/ci/conda_env_python.yml \ python=$PYTHON_VERSION && \ conda clean --all From af925d9395bd8f5cf435f379e389633bd3acfdfd Mon Sep 17 00:00:00 2001 From: Dmitry Vukolov Date: Wed, 9 Jan 2019 13:58:48 +0100 Subject: [PATCH 059/203] ARROW-2038: [Python] Strip s3:// scheme in S3FSWrapper isdir() and isfile() This fixes an exception from ParquetDataset arising when the supplied path contains the `s3://` scheme specifier. The issue stemmed from the fact that while the underlying S3FileSystem does support both types of paths, with and without and explicit `s3://`, its function calls always return paths stripped of the scheme. This messed up with the logic in isdir() and isfile(). An alternative solution would be to strip the scheme in parquet.py (by adding it to _URI_STRIP_SCHEMES). This however would require additional code changes along the lines of: ```python _URI_STRIP_SCHEMES = ('hdfs', 's3') def _parse_uri(path): path = _stringify_path(path) parsed_uri = urlparse(path) if parsed_uri.scheme in _URI_STRIP_SCHEMES: scheme = '{0}://'.format(parsed_uri.scheme) path = parsed_uri.geturl().replace(scheme, '', 1) return path else: # ARROW-4073: On Windows returning the path with the scheme # stripped removes the drive letter, if any return path ``` Not sure if that would have any impact on handling HDFS. Therefore this patch proposes a safer, more localised approach, already used in other parts of S3FSWrapper. Author: Dmitry Vukolov Closes #3286 from dvukolov/master and squashes the following commits: 8de916c5 Strip s3:// scheme in S3FSWrapper isdir() and isfile() --- python/pyarrow/filesystem.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py index 98efb1e3ec374..92a65ce69892a 100644 --- a/python/pyarrow/filesystem.py +++ b/python/pyarrow/filesystem.py @@ -319,7 +319,7 @@ class S3FSWrapper(DaskFileSystem): @implements(FileSystem.isdir) def isdir(self, path): - path = _stringify_path(path) + path = _sanitize_s3(_stringify_path(path)) try: contents = self.fs.ls(path) if len(contents) == 1 and contents[0] == path: @@ -331,7 +331,7 @@ def isdir(self, path): @implements(FileSystem.isfile) def isfile(self, path): - path = _stringify_path(path) + path = _sanitize_s3(_stringify_path(path)) try: contents = self.fs.ls(path) return len(contents) == 1 and contents[0] == path @@ -345,7 +345,7 @@ def walk(self, path, refresh=False): Generator version of what is in s3fs, which yields a flattened list of files """ - path = _stringify_path(path).replace('s3://', '') + path = _sanitize_s3(_stringify_path(path)) directories = set() files = set() @@ -371,6 +371,13 @@ def walk(self, path, refresh=False): yield tup +def _sanitize_s3(path): + if path.startswith('s3://'): + return path.replace('s3://', '') + else: + return path + + def _ensure_filesystem(fs): fs_type = type(fs) From 361285d86c345b3943eee8e63d3f9a782e7bf6da Mon Sep 17 00:00:00 2001 From: Pindikura Ravindra Date: Wed, 9 Jan 2019 10:09:48 -0600 Subject: [PATCH 060/203] ARROW-4209: [Gandiva] Avoid struct return param in IR Author: Pindikura Ravindra Closes #3356 from pravindra/struct and squashes the following commits: f437acd0 ARROW-4209: Avoid struct return param in IR --- cpp/src/gandiva/decimal_ir.cc | 30 ++++++++----------- .../gandiva/precompiled/decimal_wrapper.cc | 20 +++++-------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/cpp/src/gandiva/decimal_ir.cc b/cpp/src/gandiva/decimal_ir.cc index 38b35a64b293f..d10158a6f0487 100644 --- a/cpp/src/gandiva/decimal_ir.cc +++ b/cpp/src/gandiva/decimal_ir.cc @@ -218,27 +218,23 @@ DecimalIR::ValueWithOverflow DecimalIR::AddWithOverflowCheck(const ValueFull& x, // This is pretty complex, so use CPP fns. llvm::Value* DecimalIR::AddLarge(const ValueFull& x, const ValueFull& y, const ValueFull& out) { - std::vector args; - + auto block = ir_builder()->GetInsertBlock(); + auto out_high_ptr = new llvm::AllocaInst(types()->i64_type(), 0, "out_hi", block); + auto out_low_ptr = new llvm::AllocaInst(types()->i64_type(), 0, "out_low", block); auto x_split = ValueSplit::MakeFromInt128(this, x.value()); - args.push_back(x_split.high()); - args.push_back(x_split.low()); - args.push_back(x.precision()); - args.push_back(x.scale()); - auto y_split = ValueSplit::MakeFromInt128(this, y.value()); - args.push_back(y_split.high()); - args.push_back(y_split.low()); - args.push_back(y.precision()); - args.push_back(y.scale()); - args.push_back(out.precision()); - args.push_back(out.scale()); - - auto split = ir_builder()->CreateCall( - module()->getFunction("add_large_decimal128_decimal128"), args); + std::vector args = { + x_split.high(), x_split.low(), x.precision(), x.scale(), + y_split.high(), y_split.low(), y.precision(), y.scale(), + out.precision(), out.scale(), out_high_ptr, out_low_ptr, + }; + ir_builder()->CreateCall(module()->getFunction("add_large_decimal128_decimal128"), + args); - auto sum = ValueSplit::MakeFromStruct(this, split).AsInt128(this); + auto out_high = ir_builder()->CreateLoad(out_high_ptr); + auto out_low = ir_builder()->CreateLoad(out_low_ptr); + auto sum = ValueSplit(out_high, out_low).AsInt128(this); ADD_TRACE_128("AddLarge : sum", sum); return sum; } diff --git a/cpp/src/gandiva/precompiled/decimal_wrapper.cc b/cpp/src/gandiva/precompiled/decimal_wrapper.cc index fdc751f7fe87c..0118100971220 100644 --- a/cpp/src/gandiva/precompiled/decimal_wrapper.cc +++ b/cpp/src/gandiva/precompiled/decimal_wrapper.cc @@ -20,24 +20,18 @@ extern "C" { -/// TODO : Passing around structs in IR can be fragile due to c-abi compatibility issues. -/// This seems to work for now, but will need to revisit if we hit issues. -struct DecimalSplit { - int64_t high_bits; - uint64_t low_bits; -}; - FORCE_INLINE -DecimalSplit add_large_decimal128_decimal128(int64_t x_high, uint64_t x_low, - int32_t x_precision, int32_t x_scale, - int64_t y_high, uint64_t y_low, - int32_t y_precision, int32_t y_scale, - int32_t out_precision, int32_t out_scale) { +void add_large_decimal128_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, + int32_t x_scale, int64_t y_high, uint64_t y_low, + int32_t y_precision, int32_t y_scale, + int32_t out_precision, int32_t out_scale, + int64_t* out_high, uint64_t* out_low) { gandiva::Decimal128Full x(x_high, x_low, x_precision, x_scale); gandiva::Decimal128Full y(y_high, y_low, y_precision, y_scale); arrow::Decimal128 out = gandiva::decimalops::Add(x, y, out_precision, out_scale); - return DecimalSplit{out.high_bits(), out.low_bits()}; + *out_high = out.high_bits(); + *out_low = out.low_bits(); } } // extern "C" From bcfacaafcb181a39d43dbb3d0540c018a5afe157 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 9 Jan 2019 23:12:31 +0100 Subject: [PATCH 061/203] ARROW-3233: [Python] Add prose documentation for CUDA support It will be harder to add generated API docs without requiring CUDA support on the machine building the docs. Author: Antoine Pitrou Closes #3359 from pitrou/ARROW-3233-pyarrow-cuda-doc and squashes the following commits: 40b63f0f ARROW-3233: Add prose documentation for CUDA support --- docs/source/python/cuda.rst | 159 ++++++++++++++++++++++++++++++++++ docs/source/python/index.rst | 1 + docs/source/python/memory.rst | 3 + 3 files changed, 163 insertions(+) create mode 100644 docs/source/python/cuda.rst diff --git a/docs/source/python/cuda.rst b/docs/source/python/cuda.rst new file mode 100644 index 0000000000000..b0150c1c5c8a2 --- /dev/null +++ b/docs/source/python/cuda.rst @@ -0,0 +1,159 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. currentmodule:: pyarrow.cuda + +CUDA Integration +================ + +Arrow is not limited to CPU buffers (located in the computer's main memory, +also named "host memory"). It also has provisions for accessing buffers +located on a CUDA-capable GPU device (in "device memory"). + +.. note:: + This functionality is optional and must have been enabled at build time. + If this is not done by your package manager, you might have to build Arrow + yourself. + +CUDA Contexts +------------- + +A CUDA context represents access to a particular CUDA-capable device. +For example, this is creating a CUDA context accessing CUDA device number 0:: + + >>> from pyarrow import cuda + >>> ctx = cuda.Context(0) + >>> + +CUDA Buffers +------------ + +A CUDA buffer can be created by copying data from host memory to the memory +of a CUDA device, using the :meth:`Context.buffer_from_data` method. +The source data can be any Python buffer-like object, including Arrow buffers:: + + >>> import numpy as np + >>> arr = np.arange(4, dtype=np.int32) + >>> arr.nbytes + 16 + >>> cuda_buf = ctx.buffer_from_data(arr) + >>> type(cuda_buf) + pyarrow._cuda.CudaBuffer + >>> cuda_buf.size # The buffer's size in bytes + 16 + >>> cuda_buf.address # The buffer's address in device memory + 30088364544 + >>> cuda_buf.context.device_number + 0 + +Conversely, you can copy back a CUDA buffer to device memory, getting a regular +CPU buffer:: + + >>> buf = cuda_buf.copy_to_host() + >>> type(buf) + pyarrow.lib.Buffer + >>> np.frombuffer(buf, dtype=np.int32) + array([0, 1, 2, 3], dtype=int32) + +.. warning:: + Many Arrow functions expect a CPU buffer but will not check the buffer's + actual type. You will get a crash if you pass a CUDA buffer to such a + function:: + + >>> pa.py_buffer(b"x" * 16).equals(cuda_buf) + Segmentation fault + +Numba Integration +----------------- + +There is not much you can do directly with Arrow CUDA buffers from Python, +but they support interoperation with `Numba `_, +a JIT compiler which can turn Python code into optimized CUDA kernels. + +Arrow to Numba +~~~~~~~~~~~~~~ + +First let's define a Numba CUDA kernel operating on an ``int32`` array. Here, +we will simply increment each array element (assuming the array is writable):: + + import numba.cuda + + @numba.cuda.jit + def increment_by_one(an_array): + pos = numba.cuda.grid(1) + if pos < an_array.size: + an_array[pos] += 1 + +Then we need to wrap our CUDA buffer into a Numba "device array" with the right +array metadata (shape, strides and datatype). This is necessary so that Numba +can identify the array's characteristics and compile the kernel with the +appropriate type declarations. + +In this case the metadata can simply be got from the original Numpy array. +Note the GPU data isn't copied, just pointed to:: + + >>> from numba.cuda.cudadrv.devicearray import DeviceNDArray + >>> device_arr = DeviceNDArray(arr.shape, arr.strides, arr.dtype, gpu_data=cuda_buf.to_numba()) + +(ideally we could have defined an Arrow array in CPU memory, copied it to CUDA +memory without losing type information, and then invoked the Numba kernel on it +without constructing the DeviceNDArray by hand; this is not yet possible) + +Finally we can run the Numba CUDA kernel on the Numba device array (here +with a 16x16 grid size):: + + >>> increment_by_one[16, 16](device_arr) + +And the results can be checked by copying back the CUDA buffer to CPU memory:: + + >>> np.frombuffer(cuda_buf.copy_to_host(), dtype=np.int32) + array([1, 2, 3, 4], dtype=int32) + +Numba to Arrow +~~~~~~~~~~~~~~ + +Conversely, a Numba-created device array can be viewed as an Arrow CUDA buffer, +using the :meth:`CudaBuffer.from_numba` factory method. + +For the sake of example, let's first create a Numba device array:: + + >>> arr = np.arange(10, 14, dtype=np.int32) + >>> arr + array([10, 11, 12, 13], dtype=int32) + >>> device_arr = numba.cuda.to_device(arr) + +Then we can create a CUDA buffer pointing the device array's memory. +We don't need to pass a CUDA context explicitly this time: the appropriate +CUDA context is automatically retrieved and adapted from the Numba object. + +:: + + >>> cuda_buf = cuda.CudaBuffer.from_numba(device_arr.gpu_data) + >>> cuda_buf.size + 16 + >>> cuda_buf.address + 30088364032 + >>> cuda_buf.context.device_number + 0 + +Of course, we can copy the CUDA buffer back to host memory:: + + >>> np.frombuffer(cuda_buf.copy_to_host(), dtype=np.int32) + array([10, 11, 12, 13], dtype=int32) + +.. seealso:: + Documentation for Numba's `CUDA support `_. diff --git a/docs/source/python/index.rst b/docs/source/python/index.rst index fe04a73f32ef2..9f96771494c79 100644 --- a/docs/source/python/index.rst +++ b/docs/source/python/index.rst @@ -43,6 +43,7 @@ files into Arrow structures. pandas csv parquet + cuda extending api development diff --git a/docs/source/python/memory.rst b/docs/source/python/memory.rst index 0d30866d0aa4d..ba66807b38a8e 100644 --- a/docs/source/python/memory.rst +++ b/docs/source/python/memory.rst @@ -109,6 +109,9 @@ the buffer is garbaged-collected, all of the memory is freed: buf = None pa.total_allocated_bytes() +.. seealso:: + On-GPU buffers using Arrow's optional :doc:`CUDA integration `. + Input and Output ================ From 3330d660643a034168b472b52aebfe0fea84b8cf Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 9 Jan 2019 16:14:25 -0600 Subject: [PATCH 062/203] ARROW-4118: [Python] Fix benchmark setup for "asv run" "conda activate" unfortunately isn't available from a non-interactive shell, and running bash as interactive doesn't look like a workable solution. Also fix a setup slowness issue in the Parquet benchmarks, and fix a C++ ABI issue by downloading packages from Anaconda rather than conda-forge. Author: Antoine Pitrou Closes #3357 from pitrou/ARROW-4118-fix-asv-run and squashes the following commits: b07b68e61 ARROW-4118: Fix benchmark setup for "asv run" --- docs/source/python/benchmarks.rst | 24 +++++++++++++----------- python/asv-build.sh | 17 ++++++++++++----- python/asv.conf.json | 4 +++- python/benchmarks/parquet.py | 16 +++++++++------- 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/docs/source/python/benchmarks.rst b/docs/source/python/benchmarks.rst index 7672294a4eddf..12205c57355bb 100644 --- a/docs/source/python/benchmarks.rst +++ b/docs/source/python/benchmarks.rst @@ -19,35 +19,37 @@ Benchmarks ========== The ``pyarrow`` package comes with a suite of benchmarks meant to -run with `asv`_. You'll need to install the ``asv`` package first +run with `ASV`_. You'll need to install the ``asv`` package first (``pip install asv`` or ``conda install -c conda-forge asv``). -The benchmarks are run using `asv`_ which is also their only requirement. - Running the benchmarks ---------------------- -To run the benchmarks, call ``asv run --python=same``. You cannot use the -plain ``asv run`` command at the moment as asv cannot handle python packages -in subdirectories of a repository. +To run the benchmarks for a locally-built Arrow, run ``asv dev`` or +``asv run --python=same``. -Running with arbitrary revisions --------------------------------- +Running for arbitrary Git revisions +----------------------------------- ASV allows to store results and generate graphs of the benchmarks over -the project's evolution. For this you have the latest development version of ASV: +the project's evolution. You need to have the latest development version of ASV: .. code:: pip install git+https://github.com/airspeed-velocity/asv +The build scripts assume that Conda's ``activate`` script is on the PATH +(the ``conda activate`` command unfortunately isn't available from +non-interactive scripts). + Now you should be ready to run ``asv run`` or whatever other command -suits your needs. +suits your needs. Note that this can be quite long, as each Arrow needs +to be rebuilt for each Git revision you're running the benchmarks for. Compatibility ------------- We only expect the benchmarking setup to work with Python 3.6 or later, -on a Unix-like system. +on a Unix-like system with bash. .. _asv: https://asv.readthedocs.org/ diff --git a/python/asv-build.sh b/python/asv-build.sh index 7b55456394dcd..90c7872cc2b8d 100755 --- a/python/asv-build.sh +++ b/python/asv-build.sh @@ -21,7 +21,9 @@ set -e # ASV doesn't activate its conda environment for us if [ -z "$ASV_ENV_DIR" ]; then exit 1; fi -conda activate $ASV_ENV_DIR +# Avoid "conda activate" because it's only set up in interactive shells +# (https://github.com/conda/conda/issues/8072) +source activate $ASV_ENV_DIR echo "== Conda Prefix for benchmarks: " $CONDA_PREFIX " ==" # Build Arrow C++ libraries @@ -32,6 +34,8 @@ export ORC_HOME=$CONDA_PREFIX export PROTOBUF_HOME=$CONDA_PREFIX export BOOST_ROOT=$CONDA_PREFIX +export CXXFLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" + pushd ../cpp mkdir -p build pushd build @@ -40,9 +44,11 @@ cmake -GNinja \ -DCMAKE_BUILD_TYPE=release \ -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ -DARROW_CXXFLAGS=$CXXFLAGS \ - -DARROW_PYTHON=ON \ - -DARROW_PLASMA=ON \ - -DARROW_BUILD_TESTS=OFF \ + -DARROW_USE_GLOG=off \ + -DARROW_PARQUET=on \ + -DARROW_PYTHON=on \ + -DARROW_PLASMA=on \ + -DARROW_BUILD_TESTS=off \ .. cmake --build . --target install @@ -52,7 +58,8 @@ popd # Build pyarrow wrappers export SETUPTOOLS_SCM_PRETEND_VERSION=0.0.1 export PYARROW_BUILD_TYPE=release -export PYARROW_PARALLEL=4 +export PYARROW_PARALLEL=8 +export PYARROW_WITH_PARQUET=1 export PYARROW_WITH_PLASMA=1 python setup.py clean diff --git a/python/asv.conf.json b/python/asv.conf.json index 40938ee713b08..09031c833035d 100644 --- a/python/asv.conf.json +++ b/python/asv.conf.json @@ -35,6 +35,7 @@ // of the repository. "repo_subdir": "python", + // Custom build commands for Arrow. "build_command": ["/bin/bash {build_dir}/asv-build.sh"], "install_command": ["/bin/bash {build_dir}/asv-install.sh"], "uninstall_command": ["/bin/bash {build_dir}/asv-uninstall.sh"], @@ -56,7 +57,8 @@ // determined by looking for tools on the PATH environment // variable. "environment_type": "conda", - "conda_channels": ["conda-forge", "defaults"], + // Avoid conda-forge to avoid C++ ABI issues + "conda_channels": ["defaults"], // the base URL to show a commit for the project. "show_commit_url": "https://github.com/apache/arrow/commit/", diff --git a/python/benchmarks/parquet.py b/python/benchmarks/parquet.py index fd617934e8baf..4f555872a1550 100644 --- a/python/benchmarks/parquet.py +++ b/python/benchmarks/parquet.py @@ -15,11 +15,12 @@ # specific language governing permissions and limitations # under the License. -import pandas as pd -import random import shutil import tempfile +import numpy as np +import pandas as pd + import pyarrow as pa try: import pyarrow.parquet as pq @@ -38,18 +39,19 @@ class ParquetManifestCreation(object): def setup(self, num_partitions, num_threads): if pq is None: - raise NotImplementedError + raise NotImplementedError("Parquet support not enabled") self.tmpdir = tempfile.mkdtemp('benchmark_parquet') - num1 = [random.choice(range(0, num_partitions)) - for _ in range(self.size)] - num2 = [random.choice(range(0, 1000)) for _ in range(self.size)] + rnd = np.random.RandomState(42) + num1 = rnd.randint(0, num_partitions, size=self.size) + num2 = rnd.randint(0, 1000, size=self.size) output_df = pd.DataFrame({'num1': num1, 'num2': num2}) output_table = pa.Table.from_pandas(output_df) pq.write_to_dataset(output_table, self.tmpdir, ['num1']) def teardown(self, num_partitions, num_threads): - shutil.rmtree(self.tmpdir) + if self.tmpdir is not None: + shutil.rmtree(self.tmpdir) def time_manifest_creation(self, num_partitions, num_threads): pq.ParquetManifest(self.tmpdir, metadata_nthreads=num_threads) From 6b496f7c1929a0a371fe708ae653228a9e722150 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 9 Jan 2019 16:16:40 -0600 Subject: [PATCH 063/203] ARROW-3997: [Documentation] Clarify dictionary index type Mandate signed integers for dictionary index types, without constraining integer width. Author: Antoine Pitrou Closes #3355 from pitrou/ARROW-3997-dictionary-encoding-doc and squashes the following commits: 4e05e2642 ARROW-3997: Clarify dictionary index type --- docs/source/format/Layout.rst | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/docs/source/format/Layout.rst b/docs/source/format/Layout.rst index 69cbf0654900a..f3e5290ec1803 100644 --- a/docs/source/format/Layout.rst +++ b/docs/source/format/Layout.rst @@ -614,13 +614,13 @@ Dictionary encoding ------------------- When a field is dictionary encoded, the values are represented by an array of -Int32 representing the index of the value in the dictionary. The Dictionary is -received as one or more DictionaryBatches with the id referenced by a -dictionary attribute defined in the metadata (Message.fbs) in the Field -table. The dictionary has the same layout as the type of the field would -dictate. Each entry in the dictionary can be accessed by its index in the -DictionaryBatches. When a Schema references a Dictionary id, it must send at -least one DictionaryBatch for this id. +signed integers representing the index of the value in the dictionary. +The Dictionary is received as one or more DictionaryBatches with the id +referenced by a dictionary attribute defined in the metadata (Message.fbs) +in the Field table. The dictionary has the same layout as the type of the +field would dictate. Each entry in the dictionary can be accessed by its +index in the DictionaryBatches. When a Schema references a Dictionary id, +it must send at least one DictionaryBatch for this id. As an example, you could have the following data: :: @@ -640,16 +640,17 @@ As an example, you could have the following data: :: In dictionary-encoded form, this could appear as: :: data List (dictionary-encoded, dictionary id i) - indices: [0, 0, 0, 1, 1, 1, 0] + type: Int32 + values: + [0, 0, 0, 1, 1, 1, 0] dictionary i - - type: List - - [ - ['a', 'b'], - ['c', 'd', 'e'], - ] + type: List + values: + [ + ['a', 'b'], + ['c', 'd', 'e'], + ] References ---------- From b8aeb79e94a5a507aeec55d0b6c6bf5d7f0100b2 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Wed, 9 Jan 2019 16:18:19 -0600 Subject: [PATCH 064/203] ARROW-854: [Format] Add tentative SparseTensor format I'm interested in making a language-agnostic sparse tensor format. I believe one of the suitable places to do this is Apache Arrow, so let me propose my idea of this here. First of all, I found that there is no common memory layout of sparse tensor representations in my investigation. It means we need some kinds of conversion to share sparse tensors among different systems even if the data format is logically the same. It is the same situation as dataframe, and this is the reason why I believe Apache Arrow is the suitable place. There are many formats to represent a sparse tensor. Most of them are specialized for a matrix, which has two dimensions. There are few formats for general sparse tensor with more than two dimensions. I think the COO format is suitable to start because COO can handle any dimensions, and many systems support the COO format. In my investigation, the systems support COO are SciPy, dask, pydata/sparse, TensorFlow, and PyTorch. Additionally, CSR format for matrices may also be good to support at the first time. The reason is that CSR format is efficient to extract row slices, that may be important for extracting samples from tidy data, and it is supported by SciPy, MXNet, and R's Matrix library. I add my prototype definition of SparseTensor format in this pull-request. I designed this prototype format to be extensible so that we can support additional sparse formats. I think we at least need to support additional sparse tensor format for more than two dimensions in addition to COO so we will need this extensibility. Author: Kenta Murata Closes #2546 from mrkn/sparse_tensor_proposal and squashes the following commits: 148bff822 make format d57e56fc6 Merge sparse_tensor_format.h into sparse_tensor.h 880bbc4eb Rename too-verbose function name c83ea6aaf Add type aliases of sparse tensor types 90e8b3166 Rename sparse tensor classes 07a651863 Use substitution instead of constructor call 37a0a14c6 Remove needless function declaration 97e85bd35 Use std::make_shared 3dd434c83 Capitalize member function name 6ef6ad065 Apply code formatter 6f291581e Mark APIs for sparse tensor as EXPERIMENTAL ff3ea71c5 Rename length to non_zero_length in SparseTensor f78230344 Return Status::IOError instead of DCHECK if message header type is not matched 7e814de36 Put EXPERIMENTAL markn in comments 357860d8c Fix typo in comments 43d8eea44 Fix coding style 99b1d1d4d Add missing ARROW_EXPORT specifiers 401ae8023 Fix SparseCSRIndex::ToString and add tests 9e457acd3 Remove needless virtual specifiers 3b1db7d32 Add SparseTensorBase::Equals d6a8c3805 Unify Tensor.fbs and SparseTensor.fbs b3a62ebfa Fix format 6bc9e296f Support IPC read and write of SparseTensor 1d9042709 Fix format 51a83bfee Add SparseTensorFormat 93c03adad Add SparseIndex::ToString() 021b46be0 Add SparseTensorBase ed3984dd4 Add SparseIndex::format_type 4251b4d08 Add SparseCSRIndex 433c9b441 Change COO index matrix to column-major in a format description 392a25b7c Implement SparseTensor and SparseCOOIndex b24f3c342 Insert additional padding in sparse tensor format c508db086 Write sparse tensor format in IPC.md 2b50040f5 Add an example of the CSR format in comment 76c56dd35 Make indptr of CSR a buffer d7e653f17 Add an example of COO format in comment 866b2c13a Add header comments in SparseTensor.fbs aa9b8a4d0 Add SparseTensor.fbs in FBS_SRC 1f16ffed8 Fix syntax error in SparseTensor.fbs c3bc6edfa Add tentative SparseTensor format --- cpp/src/arrow/CMakeLists.txt | 2 + cpp/src/arrow/compare.cc | 93 +++++ cpp/src/arrow/compare.h | 4 + cpp/src/arrow/ipc/message.cc | 2 + cpp/src/arrow/ipc/message.h | 2 +- cpp/src/arrow/ipc/metadata-internal.cc | 148 ++++++++ cpp/src/arrow/ipc/metadata-internal.h | 12 + cpp/src/arrow/ipc/read-write-test.cc | 112 ++++++ cpp/src/arrow/ipc/reader.cc | 119 +++++++ cpp/src/arrow/ipc/reader.h | 17 + cpp/src/arrow/ipc/writer.cc | 101 ++++++ cpp/src/arrow/ipc/writer.h | 15 + cpp/src/arrow/sparse_tensor-test.cc | 244 +++++++++++++ cpp/src/arrow/sparse_tensor.cc | 452 +++++++++++++++++++++++++ cpp/src/arrow/sparse_tensor.h | 211 ++++++++++++ cpp/src/arrow/tensor.h | 6 + docs/source/format/IPC.rst | 24 ++ format/Message.fbs | 4 +- format/Tensor.fbs | 96 ++++++ 19 files changed, 1661 insertions(+), 3 deletions(-) create mode 100644 cpp/src/arrow/sparse_tensor-test.cc create mode 100644 cpp/src/arrow/sparse_tensor.cc create mode 100644 cpp/src/arrow/sparse_tensor.h diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index f2a811247287b..91bdce294c2d1 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -83,6 +83,7 @@ set(ARROW_SRCS table.cc table_builder.cc tensor.cc + sparse_tensor.cc type.cc visitor.cc @@ -286,6 +287,7 @@ ADD_ARROW_TEST(type-test) ADD_ARROW_TEST(table-test) ADD_ARROW_TEST(table_builder-test) ADD_ARROW_TEST(tensor-test) +ADD_ARROW_TEST(sparse_tensor-test) ADD_ARROW_BENCHMARK(builder-benchmark) ADD_ARROW_BENCHMARK(column-benchmark) diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index efc8ad82faf93..114752934c9f6 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -30,6 +30,7 @@ #include "arrow/array.h" #include "arrow/buffer.h" +#include "arrow/sparse_tensor.h" #include "arrow/status.h" #include "arrow/tensor.h" #include "arrow/type.h" @@ -782,6 +783,98 @@ bool TensorEquals(const Tensor& left, const Tensor& right) { return are_equal; } +namespace { + +template +struct SparseTensorEqualsImpl { + static bool Compare(const SparseTensorImpl& left, + const SparseTensorImpl& right) { + // TODO(mrkn): should we support the equality among different formats? + return false; + } +}; + +template +struct SparseTensorEqualsImpl { + static bool Compare(const SparseTensorImpl& left, + const SparseTensorImpl& right) { + DCHECK(left.type()->id() == right.type()->id()); + DCHECK(left.shape() == right.shape()); + DCHECK(left.non_zero_length() == right.non_zero_length()); + + const auto& left_index = checked_cast(*left.sparse_index()); + const auto& right_index = checked_cast(*right.sparse_index()); + + if (!left_index.Equals(right_index)) { + return false; + } + + const auto& size_meta = dynamic_cast(*left.type()); + const int byte_width = size_meta.bit_width() / CHAR_BIT; + DCHECK_GT(byte_width, 0); + + const uint8_t* left_data = left.data()->data(); + const uint8_t* right_data = right.data()->data(); + + return memcmp(left_data, right_data, + static_cast(byte_width * left.non_zero_length())); + } +}; + +template +inline bool SparseTensorEqualsImplDispatch(const SparseTensorImpl& left, + const SparseTensor& right) { + switch (right.format_id()) { + case SparseTensorFormat::COO: { + const auto& right_coo = + checked_cast&>(right); + return SparseTensorEqualsImpl::Compare(left, + right_coo); + } + + case SparseTensorFormat::CSR: { + const auto& right_csr = + checked_cast&>(right); + return SparseTensorEqualsImpl::Compare(left, + right_csr); + } + + default: + return false; + } +} + +} // namespace + +bool SparseTensorEquals(const SparseTensor& left, const SparseTensor& right) { + if (&left == &right) { + return true; + } else if (left.type()->id() != right.type()->id()) { + return false; + } else if (left.size() == 0) { + return true; + } else if (left.shape() != right.shape()) { + return false; + } else if (left.non_zero_length() != right.non_zero_length()) { + return false; + } + + switch (left.format_id()) { + case SparseTensorFormat::COO: { + const auto& left_coo = checked_cast&>(left); + return SparseTensorEqualsImplDispatch(left_coo, right); + } + + case SparseTensorFormat::CSR: { + const auto& left_csr = checked_cast&>(left); + return SparseTensorEqualsImplDispatch(left_csr, right); + } + + default: + return false; + } +} + bool TypeEquals(const DataType& left, const DataType& right) { bool are_equal; // The arrays are the same object diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h index 21e2fdc24f19c..d49d7cc0fdb08 100644 --- a/cpp/src/arrow/compare.h +++ b/cpp/src/arrow/compare.h @@ -29,12 +29,16 @@ namespace arrow { class Array; class DataType; class Tensor; +class SparseTensor; /// Returns true if the arrays are exactly equal bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right); bool ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right); +/// EXPERIMENTAL: Returns true if the given sparse tensors are exactly equal +bool ARROW_EXPORT SparseTensorEquals(const SparseTensor& left, const SparseTensor& right); + /// Returns true if the arrays are approximately equal. For non-floating point /// types, this is equivalent to ArrayEquals(left, right) bool ARROW_EXPORT ArrayApproxEquals(const Array& left, const Array& right); diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc index 8adf4a8b66038..23709a4619207 100644 --- a/cpp/src/arrow/ipc/message.cc +++ b/cpp/src/arrow/ipc/message.cc @@ -63,6 +63,8 @@ class Message::MessageImpl { return Message::RECORD_BATCH; case flatbuf::MessageHeader_Tensor: return Message::TENSOR; + case flatbuf::MessageHeader_SparseTensor: + return Message::SPARSE_TENSOR; default: return Message::NONE; } diff --git a/cpp/src/arrow/ipc/message.h b/cpp/src/arrow/ipc/message.h index 092a19ff9a0cf..760012d1a6878 100644 --- a/cpp/src/arrow/ipc/message.h +++ b/cpp/src/arrow/ipc/message.h @@ -70,7 +70,7 @@ constexpr int kMaxNestingDepth = 64; /// \brief An IPC message including metadata and body class ARROW_EXPORT Message { public: - enum Type { NONE, SCHEMA, DICTIONARY_BATCH, RECORD_BATCH, TENSOR }; + enum Type { NONE, SCHEMA, DICTIONARY_BATCH, RECORD_BATCH, TENSOR, SPARSE_TENSOR }; /// \brief Construct message, but do not validate /// diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 1d4c80c2946b1..da6711395f8ea 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -31,6 +31,7 @@ #include "arrow/ipc/Tensor_generated.h" // IWYU pragma: keep #include "arrow/ipc/message.h" #include "arrow/ipc/util.h" +#include "arrow/sparse_tensor.h" #include "arrow/status.h" #include "arrow/tensor.h" #include "arrow/type.h" @@ -50,6 +51,7 @@ using DictionaryOffset = flatbuffers::Offset; using FieldOffset = flatbuffers::Offset; using KeyValueOffset = flatbuffers::Offset; using RecordBatchOffset = flatbuffers::Offset; +using SparseTensorOffset = flatbuffers::Offset; using Offset = flatbuffers::Offset; using FBString = flatbuffers::Offset; @@ -781,6 +783,106 @@ Status WriteTensorMessage(const Tensor& tensor, int64_t buffer_start_offset, body_length, out); } +Status MakeSparseTensorIndexCOO(FBB& fbb, const SparseCOOIndex& sparse_index, + const std::vector& buffers, + flatbuf::SparseTensorIndex* fb_sparse_index_type, + Offset* fb_sparse_index, size_t* num_buffers) { + *fb_sparse_index_type = flatbuf::SparseTensorIndex_SparseTensorIndexCOO; + const BufferMetadata& indices_metadata = buffers[0]; + flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length); + *fb_sparse_index = flatbuf::CreateSparseTensorIndexCOO(fbb, &indices).Union(); + *num_buffers = 1; + return Status::OK(); +} + +Status MakeSparseMatrixIndexCSR(FBB& fbb, const SparseCSRIndex& sparse_index, + const std::vector& buffers, + flatbuf::SparseTensorIndex* fb_sparse_index_type, + Offset* fb_sparse_index, size_t* num_buffers) { + *fb_sparse_index_type = flatbuf::SparseTensorIndex_SparseMatrixIndexCSR; + const BufferMetadata& indptr_metadata = buffers[0]; + const BufferMetadata& indices_metadata = buffers[1]; + flatbuf::Buffer indptr(indptr_metadata.offset, indptr_metadata.length); + flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length); + *fb_sparse_index = flatbuf::CreateSparseMatrixIndexCSR(fbb, &indptr, &indices).Union(); + *num_buffers = 2; + return Status::OK(); +} + +Status MakeSparseTensorIndex(FBB& fbb, const SparseIndex& sparse_index, + const std::vector& buffers, + flatbuf::SparseTensorIndex* fb_sparse_index_type, + Offset* fb_sparse_index, size_t* num_buffers) { + switch (sparse_index.format_id()) { + case SparseTensorFormat::COO: + RETURN_NOT_OK(MakeSparseTensorIndexCOO( + fbb, checked_cast(sparse_index), buffers, + fb_sparse_index_type, fb_sparse_index, num_buffers)); + break; + + case SparseTensorFormat::CSR: + RETURN_NOT_OK(MakeSparseMatrixIndexCSR( + fbb, checked_cast(sparse_index), buffers, + fb_sparse_index_type, fb_sparse_index, num_buffers)); + break; + + default: + std::stringstream ss; + ss << "Unsupporoted sparse tensor format:: " << sparse_index.ToString() + << std::endl; + return Status::NotImplemented(ss.str()); + } + + return Status::OK(); +} + +Status MakeSparseTensor(FBB& fbb, const SparseTensor& sparse_tensor, int64_t body_length, + const std::vector& buffers, + SparseTensorOffset* offset) { + flatbuf::Type fb_type_type; + Offset fb_type; + RETURN_NOT_OK( + TensorTypeToFlatbuffer(fbb, *sparse_tensor.type(), &fb_type_type, &fb_type)); + + using TensorDimOffset = flatbuffers::Offset; + std::vector dims; + for (int i = 0; i < sparse_tensor.ndim(); ++i) { + FBString name = fbb.CreateString(sparse_tensor.dim_name(i)); + dims.push_back(flatbuf::CreateTensorDim(fbb, sparse_tensor.shape()[i], name)); + } + + auto fb_shape = fbb.CreateVector(dims); + + flatbuf::SparseTensorIndex fb_sparse_index_type; + Offset fb_sparse_index; + size_t num_index_buffers = 0; + RETURN_NOT_OK(MakeSparseTensorIndex(fbb, *sparse_tensor.sparse_index(), buffers, + &fb_sparse_index_type, &fb_sparse_index, + &num_index_buffers)); + + const BufferMetadata& data_metadata = buffers[num_index_buffers]; + flatbuf::Buffer data(data_metadata.offset, data_metadata.length); + + const int64_t non_zero_length = sparse_tensor.non_zero_length(); + + *offset = + flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, non_zero_length, + fb_sparse_index_type, fb_sparse_index, &data); + + return Status::OK(); +} + +Status WriteSparseTensorMessage(const SparseTensor& sparse_tensor, int64_t body_length, + const std::vector& buffers, + std::shared_ptr* out) { + FBB fbb; + SparseTensorOffset fb_sparse_tensor; + RETURN_NOT_OK( + MakeSparseTensor(fbb, sparse_tensor, body_length, buffers, &fb_sparse_tensor)); + return WriteFBMessage(fbb, flatbuf::MessageHeader_SparseTensor, + fb_sparse_tensor.Union(), body_length, out); +} + Status WriteDictionaryMessage(int64_t id, int64_t length, int64_t body_length, const std::vector& nodes, const std::vector& buffers, @@ -933,6 +1035,52 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr* type return TypeFromFlatbuffer(tensor->type_type(), tensor->type(), {}, type); } +Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr* type, + std::vector* shape, + std::vector* dim_names, + int64_t* non_zero_length, + SparseTensorFormat::type* sparse_tensor_format_id) { + auto message = flatbuf::GetMessage(metadata.data()); + if (message->header_type() != flatbuf::MessageHeader_SparseTensor) { + return Status::IOError("Header of flatbuffer-encoded Message is not SparseTensor."); + } + if (message->header() == nullptr) { + return Status::IOError("Header-pointer of flatbuffer-encoded Message is null."); + } + + auto sparse_tensor = reinterpret_cast(message->header()); + int ndim = static_cast(sparse_tensor->shape()->size()); + + for (int i = 0; i < ndim; ++i) { + auto dim = sparse_tensor->shape()->Get(i); + + shape->push_back(dim->size()); + auto fb_name = dim->name(); + if (fb_name == 0) { + dim_names->push_back(""); + } else { + dim_names->push_back(fb_name->str()); + } + } + + *non_zero_length = sparse_tensor->non_zero_length(); + + switch (sparse_tensor->sparseIndex_type()) { + case flatbuf::SparseTensorIndex_SparseTensorIndexCOO: + *sparse_tensor_format_id = SparseTensorFormat::COO; + break; + + case flatbuf::SparseTensorIndex_SparseMatrixIndexCSR: + *sparse_tensor_format_id = SparseTensorFormat::CSR; + break; + + default: + return Status::Invalid("Unrecognized sparse index type"); + } + + return TypeFromFlatbuffer(sparse_tensor->type_type(), sparse_tensor->type(), {}, type); +} + // ---------------------------------------------------------------------- // Implement message writing diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h index 152ca1367ec0e..6562382b878e6 100644 --- a/cpp/src/arrow/ipc/metadata-internal.h +++ b/cpp/src/arrow/ipc/metadata-internal.h @@ -33,6 +33,7 @@ #include "arrow/ipc/dictionary.h" // IYWU pragma: keep #include "arrow/ipc/message.h" #include "arrow/memory_pool.h" +#include "arrow/sparse_tensor.h" #include "arrow/status.h" namespace arrow { @@ -40,6 +41,7 @@ namespace arrow { class DataType; class Schema; class Tensor; +class SparseTensor; namespace flatbuf = org::apache::arrow::flatbuf; @@ -103,6 +105,12 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr* type std::vector* shape, std::vector* strides, std::vector* dim_names); +// EXPERIMENTAL: Extracting metadata of a sparse tensor from the message +Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr* type, + std::vector* shape, + std::vector* dim_names, int64_t* length, + SparseTensorFormat::type* sparse_tensor_format_id); + /// Write a serialized message metadata with a length-prefix and padding to an /// 8-byte offset. Does not make assumptions about whether the stream is /// aligned already @@ -137,6 +145,10 @@ Status WriteRecordBatchMessage(const int64_t length, const int64_t body_length, Status WriteTensorMessage(const Tensor& tensor, const int64_t buffer_start_offset, std::shared_ptr* out); +Status WriteSparseTensorMessage(const SparseTensor& sparse_tensor, int64_t body_length, + const std::vector& buffers, + std::shared_ptr* out); + Status WriteFileFooter(const Schema& schema, const std::vector& dictionaries, const std::vector& record_batches, DictionaryMemo* dictionary_memo, io::OutputStream* out); diff --git a/cpp/src/arrow/ipc/read-write-test.cc b/cpp/src/arrow/ipc/read-write-test.cc index 3a723badf37d7..bc27386f34f30 100644 --- a/cpp/src/arrow/ipc/read-write-test.cc +++ b/cpp/src/arrow/ipc/read-write-test.cc @@ -38,6 +38,7 @@ #include "arrow/ipc/writer.h" #include "arrow/memory_pool.h" #include "arrow/record_batch.h" +#include "arrow/sparse_tensor.h" #include "arrow/status.h" #include "arrow/tensor.h" #include "arrow/test-util.h" @@ -844,6 +845,117 @@ TEST_F(TestTensorRoundTrip, NonContiguous) { CheckTensorRoundTrip(tensor); } +class TestSparseTensorRoundTrip : public ::testing::Test, public IpcTestFixture { + public: + void SetUp() { pool_ = default_memory_pool(); } + void TearDown() { io::MemoryMapFixture::TearDown(); } + + template + void CheckSparseTensorRoundTrip(const SparseTensorImpl& tensor) { + GTEST_FAIL(); + } +}; + +template <> +void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip( + const SparseTensorImpl& tensor) { + const auto& type = checked_cast(*tensor.type()); + const int elem_size = type.bit_width() / 8; + + int32_t metadata_length; + int64_t body_length; + + ASSERT_OK(mmap_->Seek(0)); + + ASSERT_OK(WriteSparseTensor(tensor, mmap_.get(), &metadata_length, &body_length, + default_memory_pool())); + + const auto& sparse_index = checked_cast(*tensor.sparse_index()); + const int64_t indices_length = elem_size * sparse_index.indices()->size(); + const int64_t data_length = elem_size * tensor.non_zero_length(); + const int64_t expected_body_length = indices_length + data_length; + ASSERT_EQ(expected_body_length, body_length); + + ASSERT_OK(mmap_->Seek(0)); + + std::shared_ptr result; + ASSERT_OK(ReadSparseTensor(mmap_.get(), &result)); + + const auto& resulted_sparse_index = + checked_cast(*result->sparse_index()); + ASSERT_EQ(resulted_sparse_index.indices()->data()->size(), indices_length); + ASSERT_EQ(result->data()->size(), data_length); + ASSERT_TRUE(result->Equals(*result)); +} + +template <> +void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip( + const SparseTensorImpl& tensor) { + const auto& type = checked_cast(*tensor.type()); + const int elem_size = type.bit_width() / 8; + + int32_t metadata_length; + int64_t body_length; + + ASSERT_OK(mmap_->Seek(0)); + + ASSERT_OK(WriteSparseTensor(tensor, mmap_.get(), &metadata_length, &body_length, + default_memory_pool())); + + const auto& sparse_index = checked_cast(*tensor.sparse_index()); + const int64_t indptr_length = elem_size * sparse_index.indptr()->size(); + const int64_t indices_length = elem_size * sparse_index.indices()->size(); + const int64_t data_length = elem_size * tensor.non_zero_length(); + const int64_t expected_body_length = indptr_length + indices_length + data_length; + ASSERT_EQ(expected_body_length, body_length); + + ASSERT_OK(mmap_->Seek(0)); + + std::shared_ptr result; + ASSERT_OK(ReadSparseTensor(mmap_.get(), &result)); + + const auto& resulted_sparse_index = + checked_cast(*result->sparse_index()); + ASSERT_EQ(resulted_sparse_index.indptr()->data()->size(), indptr_length); + ASSERT_EQ(resulted_sparse_index.indices()->data()->size(), indices_length); + ASSERT_EQ(result->data()->size(), data_length); + ASSERT_TRUE(result->Equals(*result)); +} + +TEST_F(TestSparseTensorRoundTrip, WithSparseCOOIndex) { + std::string path = "test-write-sparse-coo-tensor"; + constexpr int64_t kBufferSize = 1 << 20; + ASSERT_OK(io::MemoryMapFixture::InitMemoryMap(kBufferSize, path, &mmap_)); + + std::vector shape = {2, 3, 4}; + std::vector dim_names = {"foo", "bar", "baz"}; + std::vector values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0, + 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; + + auto data = Buffer::Wrap(values); + NumericTensor t(data, shape, {}, dim_names); + SparseTensorImpl st(t); + + CheckSparseTensorRoundTrip(st); +} + +TEST_F(TestSparseTensorRoundTrip, WithSparseCSRIndex) { + std::string path = "test-write-sparse-csr-matrix"; + constexpr int64_t kBufferSize = 1 << 20; + ASSERT_OK(io::MemoryMapFixture::InitMemoryMap(kBufferSize, path, &mmap_)); + + std::vector shape = {4, 6}; + std::vector dim_names = {"foo", "bar", "baz"}; + std::vector values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0, + 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; + + auto data = Buffer::Wrap(values); + NumericTensor t(data, shape, {}, dim_names); + SparseTensorImpl st(t); + + CheckSparseTensorRoundTrip(st); +} + TEST(TestRecordBatchStreamReader, MalformedInput) { const std::string empty_str = ""; const std::string garbage_str = "12345678"; diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 59a322a64338a..e856acafd7138 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -38,6 +38,7 @@ #include "arrow/ipc/message.h" #include "arrow/ipc/metadata-internal.h" #include "arrow/record_batch.h" +#include "arrow/sparse_tensor.h" #include "arrow/status.h" #include "arrow/tensor.h" #include "arrow/type.h" @@ -726,5 +727,123 @@ Status ReadTensor(const Message& message, std::shared_ptr* out) { return Status::OK(); } +namespace { + +Status ReadSparseCOOIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim, + int64_t non_zero_length, io::RandomAccessFile* file, + std::shared_ptr* out) { + auto* sparse_index = sparse_tensor->sparseIndex_as_SparseTensorIndexCOO(); + auto* indices_buffer = sparse_index->indicesBuffer(); + std::shared_ptr indices_data; + RETURN_NOT_OK( + file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data)); + std::vector shape({non_zero_length, ndim}); + const int64_t elsize = sizeof(int64_t); + std::vector strides({elsize, elsize * non_zero_length}); + *out = std::make_shared( + std::make_shared(indices_data, shape, strides)); + return Status::OK(); +} + +Status ReadSparseCSRIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim, + int64_t non_zero_length, io::RandomAccessFile* file, + std::shared_ptr* out) { + auto* sparse_index = sparse_tensor->sparseIndex_as_SparseMatrixIndexCSR(); + + auto* indptr_buffer = sparse_index->indptrBuffer(); + std::shared_ptr indptr_data; + RETURN_NOT_OK( + file->ReadAt(indptr_buffer->offset(), indptr_buffer->length(), &indptr_data)); + + auto* indices_buffer = sparse_index->indicesBuffer(); + std::shared_ptr indices_data; + RETURN_NOT_OK( + file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data)); + + std::vector indptr_shape({ndim + 1}); + std::vector indices_shape({non_zero_length}); + *out = std::make_shared( + std::make_shared(indptr_data, indptr_shape), + std::make_shared(indices_data, indices_shape)); + return Status::OK(); +} + +Status MakeSparseTensorWithSparseCOOIndex( + const std::shared_ptr& type, const std::vector& shape, + const std::vector& dim_names, + const std::shared_ptr& sparse_index, int64_t non_zero_length, + const std::shared_ptr& data, std::shared_ptr* out) { + *out = std::make_shared>(sparse_index, type, data, + shape, dim_names); + return Status::OK(); +} + +Status MakeSparseTensorWithSparseCSRIndex( + const std::shared_ptr& type, const std::vector& shape, + const std::vector& dim_names, + const std::shared_ptr& sparse_index, int64_t non_zero_length, + const std::shared_ptr& data, std::shared_ptr* out) { + *out = std::make_shared>(sparse_index, type, data, + shape, dim_names); + return Status::OK(); +} + +} // namespace + +Status ReadSparseTensor(const Buffer& metadata, io::RandomAccessFile* file, + std::shared_ptr* out) { + std::shared_ptr type; + std::vector shape; + std::vector dim_names; + int64_t non_zero_length; + SparseTensorFormat::type sparse_tensor_format_id; + + RETURN_NOT_OK(internal::GetSparseTensorMetadata( + metadata, &type, &shape, &dim_names, &non_zero_length, &sparse_tensor_format_id)); + + auto message = flatbuf::GetMessage(metadata.data()); + auto sparse_tensor = reinterpret_cast(message->header()); + const flatbuf::Buffer* buffer = sparse_tensor->data(); + DCHECK(BitUtil::IsMultipleOf8(buffer->offset())) + << "Buffer of sparse index data " + << "did not start on 8-byte aligned offset: " << buffer->offset(); + + std::shared_ptr data; + RETURN_NOT_OK(file->ReadAt(buffer->offset(), buffer->length(), &data)); + + std::shared_ptr sparse_index; + switch (sparse_tensor_format_id) { + case SparseTensorFormat::COO: + RETURN_NOT_OK(ReadSparseCOOIndex(sparse_tensor, shape.size(), non_zero_length, file, + &sparse_index)); + return MakeSparseTensorWithSparseCOOIndex( + type, shape, dim_names, std::dynamic_pointer_cast(sparse_index), + non_zero_length, data, out); + + case SparseTensorFormat::CSR: + RETURN_NOT_OK(ReadSparseCSRIndex(sparse_tensor, shape.size(), non_zero_length, file, + &sparse_index)); + return MakeSparseTensorWithSparseCSRIndex( + type, shape, dim_names, std::dynamic_pointer_cast(sparse_index), + non_zero_length, data, out); + + default: + return Status::Invalid("Unsupported sparse index format"); + } +} + +Status ReadSparseTensor(const Message& message, std::shared_ptr* out) { + io::BufferReader buffer_reader(message.body()); + return ReadSparseTensor(*message.metadata(), &buffer_reader, out); +} + +Status ReadSparseTensor(io::InputStream* file, std::shared_ptr* out) { + std::unique_ptr message; + RETURN_NOT_OK(ReadContiguousPayload(file, &message)); + DCHECK_EQ(message->type(), Message::SPARSE_TENSOR); + io::BufferReader buffer_reader(message->body()); + return ReadSparseTensor(*message->metadata(), &buffer_reader, out); +} + } // namespace ipc } // namespace arrow diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h index 942664d6f2269..ebecea13ffb8b 100644 --- a/cpp/src/arrow/ipc/reader.h +++ b/cpp/src/arrow/ipc/reader.h @@ -33,6 +33,7 @@ class Buffer; class Schema; class Status; class Tensor; +class SparseTensor; namespace io { @@ -235,6 +236,22 @@ Status ReadTensor(io::InputStream* file, std::shared_ptr* out); ARROW_EXPORT Status ReadTensor(const Message& message, std::shared_ptr* out); +/// \brief EXPERIMETNAL: Read arrow::SparseTensor as encapsulated IPC message in file +/// +/// \param[in] file an InputStream pointed at the start of the message +/// \param[out] out the read sparse tensor +/// \return Status +ARROW_EXPORT +Status ReadSparseTensor(io::InputStream* file, std::shared_ptr* out); + +/// \brief EXPERIMENTAL: Read arrow::SparseTensor from IPC message +/// +/// \param[in] message a Message containing the tensor metadata and body +/// \param[out] out the read sparse tensor +/// \return Status +ARROW_EXPORT +Status ReadSparseTensor(const Message& message, std::shared_ptr* out); + } // namespace ipc } // namespace arrow diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index 6ce72e070e7b3..0bf68142c7776 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "arrow/array.h" @@ -33,6 +34,7 @@ #include "arrow/ipc/util.h" #include "arrow/memory_pool.h" #include "arrow/record_batch.h" +#include "arrow/sparse_tensor.h" #include "arrow/status.h" #include "arrow/table.h" #include "arrow/tensor.h" @@ -671,6 +673,105 @@ Status GetTensorMessage(const Tensor& tensor, MemoryPool* pool, return Status::OK(); } +namespace internal { + +class SparseTensorSerializer { + public: + SparseTensorSerializer(int64_t buffer_start_offset, IpcPayload* out) + : out_(out), buffer_start_offset_(buffer_start_offset) {} + + ~SparseTensorSerializer() = default; + + Status VisitSparseIndex(const SparseIndex& sparse_index) { + switch (sparse_index.format_id()) { + case SparseTensorFormat::COO: + RETURN_NOT_OK( + VisitSparseCOOIndex(checked_cast(sparse_index))); + break; + + case SparseTensorFormat::CSR: + RETURN_NOT_OK( + VisitSparseCSRIndex(checked_cast(sparse_index))); + break; + + default: + std::stringstream ss; + ss << "Unable to convert type: " << sparse_index.ToString() << std::endl; + return Status::NotImplemented(ss.str()); + } + + return Status::OK(); + } + + Status SerializeMetadata(const SparseTensor& sparse_tensor) { + return WriteSparseTensorMessage(sparse_tensor, out_->body_length, buffer_meta_, + &out_->metadata); + } + + Status Assemble(const SparseTensor& sparse_tensor) { + if (buffer_meta_.size() > 0) { + buffer_meta_.clear(); + out_->body_buffers.clear(); + } + + RETURN_NOT_OK(VisitSparseIndex(*sparse_tensor.sparse_index())); + out_->body_buffers.emplace_back(sparse_tensor.data()); + + int64_t offset = buffer_start_offset_; + buffer_meta_.reserve(out_->body_buffers.size()); + + for (size_t i = 0; i < out_->body_buffers.size(); ++i) { + const Buffer* buffer = out_->body_buffers[i].get(); + int64_t size = buffer->size(); + int64_t padding = BitUtil::RoundUpToMultipleOf8(size) - size; + buffer_meta_.push_back({offset, size + padding}); + offset += size + padding; + } + + out_->body_length = offset - buffer_start_offset_; + DCHECK(BitUtil::IsMultipleOf8(out_->body_length)); + + return SerializeMetadata(sparse_tensor); + } + + private: + Status VisitSparseCOOIndex(const SparseCOOIndex& sparse_index) { + out_->body_buffers.emplace_back(sparse_index.indices()->data()); + return Status::OK(); + } + + Status VisitSparseCSRIndex(const SparseCSRIndex& sparse_index) { + out_->body_buffers.emplace_back(sparse_index.indptr()->data()); + out_->body_buffers.emplace_back(sparse_index.indices()->data()); + return Status::OK(); + } + + IpcPayload* out_; + + std::vector buffer_meta_; + + int64_t buffer_start_offset_; +}; + +Status GetSparseTensorPayload(const SparseTensor& sparse_tensor, MemoryPool* pool, + IpcPayload* out) { + SparseTensorSerializer writer(0, out); + return writer.Assemble(sparse_tensor); +} + +} // namespace internal + +Status WriteSparseTensor(const SparseTensor& sparse_tensor, io::OutputStream* dst, + int32_t* metadata_length, int64_t* body_length, + MemoryPool* pool) { + internal::IpcPayload payload; + internal::SparseTensorSerializer writer(0, &payload); + RETURN_NOT_OK(writer.Assemble(sparse_tensor)); + + *body_length = payload.body_length; + return internal::WriteIpcPayload(payload, dst, metadata_length); +} + Status WriteDictionary(int64_t dictionary_id, const std::shared_ptr& dictionary, int64_t buffer_start_offset, io::OutputStream* dst, int32_t* metadata_length, int64_t* body_length, MemoryPool* pool) { diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h index a1c711146efe8..5feb9e90cb0b0 100644 --- a/cpp/src/arrow/ipc/writer.h +++ b/cpp/src/arrow/ipc/writer.h @@ -36,6 +36,7 @@ class Schema; class Status; class Table; class Tensor; +class SparseTensor; namespace io { @@ -269,6 +270,20 @@ ARROW_EXPORT Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length, int64_t* body_length); +// \brief EXPERIMENTAL: Write arrow::SparseTensor as a contiguous mesasge. The metadata, +// sparse index, and body are written assuming 64-byte alignment. It is the +// user's responsibility to ensure that the OutputStream has been aligned +// to a 64-byte multiple before writing the message. +// +// \param[in] tensor the SparseTensor to write +// \param[in] dst the OutputStream to write to +// \param[out] metadata_length the actual metadata length, including padding +// \param[out] body_length the actual message body length +ARROW_EXPORT +Status WriteSparseTensor(const SparseTensor& sparse_tensor, io::OutputStream* dst, + int32_t* metadata_length, int64_t* body_length, + MemoryPool* pool); + namespace internal { // These internal APIs may change without warning or deprecation diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc new file mode 100644 index 0000000000000..d48f2d0229d58 --- /dev/null +++ b/cpp/src/arrow/sparse_tensor-test.cc @@ -0,0 +1,244 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Unit tests for DataType (and subclasses), Field, and Schema + +#include +#include +#include +#include + +#include + +#include + +#include "arrow/sparse_tensor.h" +#include "arrow/test-util.h" +#include "arrow/type.h" + +namespace arrow { + +static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected, + const SparseTensor& sparse_tensor) { + ASSERT_EQ(expected, sparse_tensor.format_id()); + ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id()); +} + +TEST(TestSparseCOOTensor, CreationEmptyTensor) { + std::vector shape = {2, 3, 4}; + SparseTensorImpl st1(int64(), shape); + + std::vector dim_names = {"foo", "bar", "baz"}; + SparseTensorImpl st2(int64(), shape, dim_names); + + ASSERT_EQ(0, st1.non_zero_length()); + ASSERT_EQ(0, st2.non_zero_length()); + + ASSERT_EQ(24, st1.size()); + ASSERT_EQ(24, st2.size()); + + ASSERT_EQ("foo", st2.dim_name(0)); + ASSERT_EQ("bar", st2.dim_name(1)); + ASSERT_EQ("baz", st2.dim_name(2)); + + ASSERT_EQ("", st1.dim_name(0)); + ASSERT_EQ("", st1.dim_name(1)); + ASSERT_EQ("", st1.dim_name(2)); +} + +TEST(TestSparseCOOTensor, CreationFromNumericTensor) { + std::vector shape = {2, 3, 4}; + std::vector values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0, + 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; + std::shared_ptr buffer = Buffer::Wrap(values); + std::vector dim_names = {"foo", "bar", "baz"}; + NumericTensor tensor1(buffer, shape); + NumericTensor tensor2(buffer, shape, {}, dim_names); + SparseTensorImpl st1(tensor1); + SparseTensorImpl st2(tensor2); + + CheckSparseIndexFormatType(SparseTensorFormat::COO, st1); + + ASSERT_EQ(12, st1.non_zero_length()); + ASSERT_TRUE(st1.is_mutable()); + + ASSERT_EQ("foo", st2.dim_name(0)); + ASSERT_EQ("bar", st2.dim_name(1)); + ASSERT_EQ("baz", st2.dim_name(2)); + + ASSERT_EQ("", st1.dim_name(0)); + ASSERT_EQ("", st1.dim_name(1)); + ASSERT_EQ("", st1.dim_name(2)); + + const int64_t* ptr = reinterpret_cast(st1.raw_data()); + for (int i = 0; i < 6; ++i) { + ASSERT_EQ(i + 1, ptr[i]); + } + for (int i = 0; i < 6; ++i) { + ASSERT_EQ(i + 11, ptr[i + 6]); + } + + const auto& si = internal::checked_cast(*st1.sparse_index()); + ASSERT_EQ(std::string("SparseCOOIndex"), si.ToString()); + + std::shared_ptr sidx = si.indices(); + ASSERT_EQ(std::vector({12, 3}), sidx->shape()); + ASSERT_TRUE(sidx->is_column_major()); + + // (0, 0, 0) -> 1 + ASSERT_EQ(0, sidx->Value({0, 0})); + ASSERT_EQ(0, sidx->Value({0, 1})); + ASSERT_EQ(0, sidx->Value({0, 2})); + + // (0, 0, 2) -> 2 + ASSERT_EQ(0, sidx->Value({1, 0})); + ASSERT_EQ(0, sidx->Value({1, 1})); + ASSERT_EQ(2, sidx->Value({1, 2})); + + // (0, 1, 1) -> 3 + ASSERT_EQ(0, sidx->Value({2, 0})); + ASSERT_EQ(1, sidx->Value({2, 1})); + ASSERT_EQ(1, sidx->Value({2, 2})); + + // (1, 2, 1) -> 15 + ASSERT_EQ(1, sidx->Value({10, 0})); + ASSERT_EQ(2, sidx->Value({10, 1})); + ASSERT_EQ(1, sidx->Value({10, 2})); + + // (1, 2, 3) -> 16 + ASSERT_EQ(1, sidx->Value({11, 0})); + ASSERT_EQ(2, sidx->Value({11, 1})); + ASSERT_EQ(3, sidx->Value({11, 2})); +} + +TEST(TestSparseCOOTensor, CreationFromTensor) { + std::vector shape = {2, 3, 4}; + std::vector values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0, + 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; + std::shared_ptr buffer = Buffer::Wrap(values); + std::vector dim_names = {"foo", "bar", "baz"}; + Tensor tensor1(int64(), buffer, shape); + Tensor tensor2(int64(), buffer, shape, {}, dim_names); + SparseTensorImpl st1(tensor1); + SparseTensorImpl st2(tensor2); + + ASSERT_EQ(12, st1.non_zero_length()); + ASSERT_TRUE(st1.is_mutable()); + + ASSERT_EQ("foo", st2.dim_name(0)); + ASSERT_EQ("bar", st2.dim_name(1)); + ASSERT_EQ("baz", st2.dim_name(2)); + + ASSERT_EQ("", st1.dim_name(0)); + ASSERT_EQ("", st1.dim_name(1)); + ASSERT_EQ("", st1.dim_name(2)); + + const int64_t* ptr = reinterpret_cast(st1.raw_data()); + for (int i = 0; i < 6; ++i) { + ASSERT_EQ(i + 1, ptr[i]); + } + for (int i = 0; i < 6; ++i) { + ASSERT_EQ(i + 11, ptr[i + 6]); + } + + const auto& si = internal::checked_cast(*st1.sparse_index()); + std::shared_ptr sidx = si.indices(); + ASSERT_EQ(std::vector({12, 3}), sidx->shape()); + ASSERT_TRUE(sidx->is_column_major()); + + // (0, 0, 0) -> 1 + ASSERT_EQ(0, sidx->Value({0, 0})); + ASSERT_EQ(0, sidx->Value({0, 1})); + ASSERT_EQ(0, sidx->Value({0, 2})); + + // (0, 0, 2) -> 2 + ASSERT_EQ(0, sidx->Value({1, 0})); + ASSERT_EQ(0, sidx->Value({1, 1})); + ASSERT_EQ(2, sidx->Value({1, 2})); + + // (0, 1, 1) -> 3 + ASSERT_EQ(0, sidx->Value({2, 0})); + ASSERT_EQ(1, sidx->Value({2, 1})); + ASSERT_EQ(1, sidx->Value({2, 2})); + + // (1, 2, 1) -> 15 + ASSERT_EQ(1, sidx->Value({10, 0})); + ASSERT_EQ(2, sidx->Value({10, 1})); + ASSERT_EQ(1, sidx->Value({10, 2})); + + // (1, 2, 3) -> 16 + ASSERT_EQ(1, sidx->Value({11, 0})); + ASSERT_EQ(2, sidx->Value({11, 1})); + ASSERT_EQ(3, sidx->Value({11, 2})); +} + +TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) { + std::vector shape = {6, 4}; + std::vector values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0, + 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; + std::shared_ptr buffer = Buffer::Wrap(values); + std::vector dim_names = {"foo", "bar", "baz"}; + NumericTensor tensor1(buffer, shape); + NumericTensor tensor2(buffer, shape, {}, dim_names); + + SparseTensorImpl st1(tensor1); + SparseTensorImpl st2(tensor2); + + CheckSparseIndexFormatType(SparseTensorFormat::CSR, st1); + + ASSERT_EQ(12, st1.non_zero_length()); + ASSERT_TRUE(st1.is_mutable()); + + ASSERT_EQ("foo", st2.dim_name(0)); + ASSERT_EQ("bar", st2.dim_name(1)); + ASSERT_EQ("baz", st2.dim_name(2)); + + ASSERT_EQ("", st1.dim_name(0)); + ASSERT_EQ("", st1.dim_name(1)); + ASSERT_EQ("", st1.dim_name(2)); + + const int64_t* ptr = reinterpret_cast(st1.raw_data()); + for (int i = 0; i < 6; ++i) { + ASSERT_EQ(i + 1, ptr[i]); + } + for (int i = 0; i < 6; ++i) { + ASSERT_EQ(i + 11, ptr[i + 6]); + } + + const auto& si = internal::checked_cast(*st1.sparse_index()); + + ASSERT_EQ(std::string("SparseCSRIndex"), si.ToString()); + ASSERT_EQ(1, si.indptr()->ndim()); + ASSERT_EQ(1, si.indices()->ndim()); + + const int64_t* indptr_begin = reinterpret_cast(si.indptr()->raw_data()); + std::vector indptr_values(indptr_begin, + indptr_begin + si.indptr()->shape()[0]); + + ASSERT_EQ(7, indptr_values.size()); + ASSERT_EQ(std::vector({0, 2, 4, 6, 8, 10, 12}), indptr_values); + + const int64_t* indices_begin = + reinterpret_cast(si.indices()->raw_data()); + std::vector indices_values(indices_begin, + indices_begin + si.indices()->shape()[0]); + + ASSERT_EQ(12, indices_values.size()); + ASSERT_EQ(std::vector({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values); +} + +} // namespace arrow diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc new file mode 100644 index 0000000000000..101500d36432e --- /dev/null +++ b/cpp/src/arrow/sparse_tensor.cc @@ -0,0 +1,452 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/sparse_tensor.h" + +#include +#include +#include + +#include "arrow/compare.h" +#include "arrow/util/logging.h" + +namespace arrow { + +namespace { + +// ---------------------------------------------------------------------- +// SparseTensorConverter + +template +class SparseTensorConverter { + public: + explicit SparseTensorConverter(const NumericTensor&) {} + + Status Convert() { return Status::Invalid("Unsupported sparse index"); } +}; + +// ---------------------------------------------------------------------- +// SparseTensorConverter for SparseCOOIndex + +template +struct SparseTensorConverterBase { + using NumericTensorType = NumericTensor; + using value_type = typename NumericTensorType::value_type; + + explicit SparseTensorConverterBase(const NumericTensorType& tensor) : tensor_(tensor) {} + + bool TensorIsTriviallyIterable() const { + return tensor_.ndim() <= 1 || tensor_.is_contiguous(); + } + + size_t CountNonZero() const { + if (tensor_.size() == 0) { + return 0; + } + + if (TensorIsTriviallyIterable()) { + const value_type* data = reinterpret_cast(tensor_.raw_data()); + return std::count_if(data, data + tensor_.size(), + [](value_type x) { return x != 0; }); + } + + const std::vector& shape = tensor_.shape(); + const int64_t ndim = tensor_.ndim(); + + size_t count = 0; + std::vector coord(ndim, 0); + for (int64_t n = tensor_.size(); n > 0; n--) { + if (tensor_.Value(coord) != 0) { + ++count; + } + + // increment index + ++coord[ndim - 1]; + if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) { + int64_t d = ndim - 1; + while (d > 0 && coord[d] == shape[d]) { + coord[d] = 0; + ++coord[d - 1]; + --d; + } + } + } + return count; + } + + const NumericTensorType& tensor_; +}; + +template +class SparseTensorConverter + : private SparseTensorConverterBase { + public: + using BaseClass = SparseTensorConverterBase; + using NumericTensorType = typename BaseClass::NumericTensorType; + using value_type = typename BaseClass::value_type; + + explicit SparseTensorConverter(const NumericTensorType& tensor) : BaseClass(tensor) {} + + Status Convert() { + const int64_t ndim = tensor_.ndim(); + const int64_t nonzero_count = static_cast(CountNonZero()); + + std::shared_ptr indices_buffer; + RETURN_NOT_OK( + AllocateBuffer(sizeof(int64_t) * ndim * nonzero_count, &indices_buffer)); + int64_t* indices = reinterpret_cast(indices_buffer->mutable_data()); + + std::shared_ptr values_buffer; + RETURN_NOT_OK(AllocateBuffer(sizeof(value_type) * nonzero_count, &values_buffer)); + value_type* values = reinterpret_cast(values_buffer->mutable_data()); + + if (ndim <= 1) { + const value_type* data = reinterpret_cast(tensor_.raw_data()); + const int64_t count = ndim == 0 ? 1 : tensor_.shape()[0]; + for (int64_t i = 0; i < count; ++i, ++data) { + if (*data != 0) { + *indices++ = i; + *values++ = *data; + } + } + } else { + const std::vector& shape = tensor_.shape(); + std::vector coord(ndim, 0); + + for (int64_t n = tensor_.size(); n > 0; n--) { + const value_type x = tensor_.Value(coord); + if (tensor_.Value(coord) != 0) { + *values++ = x; + + int64_t* indp = indices; + for (int64_t i = 0; i < ndim; ++i) { + *indp = coord[i]; + indp += nonzero_count; + } + indices++; + } + + // increment index + ++coord[ndim - 1]; + if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) { + int64_t d = ndim - 1; + while (d > 0 && coord[d] == shape[d]) { + coord[d] = 0; + ++coord[d - 1]; + --d; + } + } + } + } + + // make results + const std::vector indices_shape = {nonzero_count, ndim}; + const int64_t indices_elsize = sizeof(int64_t); + const std::vector indices_strides = {indices_elsize, + indices_elsize * nonzero_count}; + sparse_index = + std::make_shared(std::make_shared( + indices_buffer, indices_shape, indices_strides)); + data = values_buffer; + + return Status::OK(); + } + + std::shared_ptr sparse_index; + std::shared_ptr data; + + private: + using SparseTensorConverterBase::tensor_; + using SparseTensorConverterBase::CountNonZero; +}; + +template +void MakeSparseTensorFromTensor(const Tensor& tensor, + std::shared_ptr* sparse_index, + std::shared_ptr* data) { + NumericTensor numeric_tensor(tensor.data(), tensor.shape(), tensor.strides()); + SparseTensorConverter converter(numeric_tensor); + DCHECK_OK(converter.Convert()); + *sparse_index = converter.sparse_index; + *data = converter.data; +} + +// ---------------------------------------------------------------------- +// SparseTensorConverter for SparseCSRIndex + +template +class SparseTensorConverter + : private SparseTensorConverterBase { + public: + using BaseClass = SparseTensorConverterBase; + using NumericTensorType = typename BaseClass::NumericTensorType; + using value_type = typename BaseClass::value_type; + + explicit SparseTensorConverter(const NumericTensorType& tensor) : BaseClass(tensor) {} + + Status Convert() { + const int64_t ndim = tensor_.ndim(); + if (ndim > 2) { + return Status::Invalid("Invalid tensor dimension"); + } + + const int64_t nr = tensor_.shape()[0]; + const int64_t nc = tensor_.shape()[1]; + const int64_t nonzero_count = static_cast(CountNonZero()); + + std::shared_ptr indptr_buffer; + std::shared_ptr indices_buffer; + + std::shared_ptr values_buffer; + RETURN_NOT_OK(AllocateBuffer(sizeof(value_type) * nonzero_count, &values_buffer)); + value_type* values = reinterpret_cast(values_buffer->mutable_data()); + + if (ndim <= 1) { + return Status::NotImplemented("TODO for ndim <= 1"); + } else { + RETURN_NOT_OK(AllocateBuffer(sizeof(int64_t) * (nr + 1), &indptr_buffer)); + int64_t* indptr = reinterpret_cast(indptr_buffer->mutable_data()); + + RETURN_NOT_OK(AllocateBuffer(sizeof(int64_t) * nonzero_count, &indices_buffer)); + int64_t* indices = reinterpret_cast(indices_buffer->mutable_data()); + + int64_t k = 0; + *indptr++ = 0; + for (int64_t i = 0; i < nr; ++i) { + for (int64_t j = 0; j < nc; ++j) { + const value_type x = tensor_.Value({i, j}); + if (x != 0) { + *values++ = x; + *indices++ = j; + k++; + } + } + *indptr++ = k; + } + } + + std::vector indptr_shape({nr + 1}); + std::shared_ptr indptr_tensor = + std::make_shared(indptr_buffer, indptr_shape); + + std::vector indices_shape({nonzero_count}); + std::shared_ptr indices_tensor = + std::make_shared(indices_buffer, indices_shape); + + sparse_index = std::make_shared(indptr_tensor, indices_tensor); + data = values_buffer; + + return Status::OK(); + } + + std::shared_ptr sparse_index; + std::shared_ptr data; + + private: + using BaseClass::tensor_; + using SparseTensorConverterBase::CountNonZero; +}; + +// ---------------------------------------------------------------------- +// Instantiate templates + +#define INSTANTIATE_SPARSE_TENSOR_CONVERTER(IndexType) \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter; \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter; \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter; \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter; \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter; \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter; \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter; \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter; \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter; \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter; \ + template class ARROW_TEMPLATE_EXPORT SparseTensorConverter + +INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCOOIndex); +INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCSRIndex); + +} // namespace + +// ---------------------------------------------------------------------- +// SparseCOOIndex + +// Constructor with a column-major NumericTensor +SparseCOOIndex::SparseCOOIndex(const std::shared_ptr& coords) + : SparseIndexBase(coords->shape()[0]), coords_(coords) { + DCHECK(coords_->is_column_major()); +} + +std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOIndex"); } + +// ---------------------------------------------------------------------- +// SparseCSRIndex + +// Constructor with two index vectors +SparseCSRIndex::SparseCSRIndex(const std::shared_ptr& indptr, + const std::shared_ptr& indices) + : SparseIndexBase(indices->shape()[0]), indptr_(indptr), indices_(indices) { + DCHECK_EQ(1, indptr_->ndim()); + DCHECK_EQ(1, indices_->ndim()); +} + +std::string SparseCSRIndex::ToString() const { return std::string("SparseCSRIndex"); } + +// ---------------------------------------------------------------------- +// SparseTensor + +// Constructor with all attributes +SparseTensor::SparseTensor(const std::shared_ptr& type, + const std::shared_ptr& data, + const std::vector& shape, + const std::shared_ptr& sparse_index, + const std::vector& dim_names) + : type_(type), + data_(data), + shape_(shape), + sparse_index_(sparse_index), + dim_names_(dim_names) { + DCHECK(is_tensor_supported(type->id())); +} + +const std::string& SparseTensor::dim_name(int i) const { + static const std::string kEmpty = ""; + if (dim_names_.size() == 0) { + return kEmpty; + } else { + DCHECK_LT(i, static_cast(dim_names_.size())); + return dim_names_[i]; + } +} + +int64_t SparseTensor::size() const { + return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies()); +} + +bool SparseTensor::Equals(const SparseTensor& other) const { + return SparseTensorEquals(*this, other); +} + +// ---------------------------------------------------------------------- +// SparseTensorImpl + +// Constructor with a dense tensor +template +SparseTensorImpl::SparseTensorImpl( + const std::shared_ptr& type, const std::vector& shape, + const std::vector& dim_names) + : SparseTensorImpl(nullptr, type, nullptr, shape, dim_names) {} + +// Constructor with a dense tensor +template +template +SparseTensorImpl::SparseTensorImpl(const NumericTensor& tensor) + : SparseTensorImpl(nullptr, tensor.type(), nullptr, tensor.shape(), + tensor.dim_names_) { + SparseTensorConverter converter(tensor); + DCHECK_OK(converter.Convert()); + sparse_index_ = converter.sparse_index; + data_ = converter.data; +} + +// Constructor with a dense tensor +template +SparseTensorImpl::SparseTensorImpl(const Tensor& tensor) + : SparseTensorImpl(nullptr, tensor.type(), nullptr, tensor.shape(), + tensor.dim_names_) { + switch (tensor.type()->id()) { + case Type::UINT8: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + case Type::INT8: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + case Type::UINT16: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + case Type::INT16: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + case Type::UINT32: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + case Type::INT32: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + case Type::UINT64: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + case Type::INT64: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + case Type::HALF_FLOAT: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + case Type::FLOAT: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + case Type::DOUBLE: + MakeSparseTensorFromTensor(tensor, &sparse_index_, + &data_); + return; + default: + break; + } +} + +// ---------------------------------------------------------------------- +// Instantiate templates + +#define INSTANTIATE_SPARSE_TENSOR(IndexType) \ + template class ARROW_TEMPLATE_EXPORT SparseTensorImpl; \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&); \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&); \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&); \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&); \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&); \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&); \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&); \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&); \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&); \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&); \ + template ARROW_EXPORT SparseTensorImpl::SparseTensorImpl( \ + const NumericTensor&) + +INSTANTIATE_SPARSE_TENSOR(SparseCOOIndex); +INSTANTIATE_SPARSE_TENSOR(SparseCSRIndex); + +} // namespace arrow diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h new file mode 100644 index 0000000000000..c7693d2ec9579 --- /dev/null +++ b/cpp/src/arrow/sparse_tensor.h @@ -0,0 +1,211 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_SPARSE_TENSOR_H +#define ARROW_SPARSE_TENSOR_H + +#include +#include +#include + +#include "arrow/tensor.h" + +namespace arrow { + +// ---------------------------------------------------------------------- +// SparseIndex class + +/// \brief EXPERIMENTAL: Sparse tensor format enumeration +struct SparseTensorFormat { + enum type { COO, CSR }; +}; + +/// \brief EXPERIMENTAL: The base class for representing index of non-zero +/// values in sparse tensor +class ARROW_EXPORT SparseIndex { + public: + explicit SparseIndex(SparseTensorFormat::type format_id, int64_t non_zero_length) + : format_id_(format_id), non_zero_length_(non_zero_length) {} + + virtual ~SparseIndex() = default; + + SparseTensorFormat::type format_id() const { return format_id_; } + int64_t non_zero_length() const { return non_zero_length_; } + + virtual std::string ToString() const = 0; + + protected: + SparseTensorFormat::type format_id_; + int64_t non_zero_length_; +}; + +template +class SparseIndexBase : public SparseIndex { + public: + explicit SparseIndexBase(int64_t non_zero_length) + : SparseIndex(SparseIndexType::format_id, non_zero_length) {} +}; + +// ---------------------------------------------------------------------- +// SparseCOOIndex class + +/// \brief EXPERIMENTAL: The index data for COO sparse tensor +class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase { + public: + using CoordsTensor = NumericTensor; + + static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::COO; + + // Constructor with a column-major NumericTensor + explicit SparseCOOIndex(const std::shared_ptr& coords); + + const std::shared_ptr& indices() const { return coords_; } + + std::string ToString() const override; + + bool Equals(const SparseCOOIndex& other) const { + return indices()->Equals(*other.indices()); + } + + protected: + std::shared_ptr coords_; +}; + +// ---------------------------------------------------------------------- +// SparseCSRIndex class + +/// \brief EXPERIMENTAL: The index data for CSR sparse matrix +class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase { + public: + using IndexTensor = NumericTensor; + + static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSR; + + // Constructor with two index vectors + explicit SparseCSRIndex(const std::shared_ptr& indptr, + const std::shared_ptr& indices); + + const std::shared_ptr& indptr() const { return indptr_; } + const std::shared_ptr& indices() const { return indices_; } + + std::string ToString() const override; + + bool Equals(const SparseCSRIndex& other) const { + return indptr()->Equals(*other.indptr()) && indices()->Equals(*other.indices()); + } + + protected: + std::shared_ptr indptr_; + std::shared_ptr indices_; +}; + +// ---------------------------------------------------------------------- +// SparseTensor class + +/// \brief EXPERIMENTAL: The base class of sparse tensor container +class ARROW_EXPORT SparseTensor { + public: + virtual ~SparseTensor() = default; + + SparseTensorFormat::type format_id() const { return sparse_index_->format_id(); } + + std::shared_ptr type() const { return type_; } + std::shared_ptr data() const { return data_; } + + const uint8_t* raw_data() const { return data_->data(); } + uint8_t* raw_mutable_data() const { return data_->mutable_data(); } + + const std::vector& shape() const { return shape_; } + + const std::shared_ptr& sparse_index() const { return sparse_index_; } + + int ndim() const { return static_cast(shape_.size()); } + + const std::string& dim_name(int i) const; + + /// Total number of value cells in the sparse tensor + int64_t size() const; + + /// Return true if the underlying data buffer is mutable + bool is_mutable() const { return data_->is_mutable(); } + + /// Total number of non-zero cells in the sparse tensor + int64_t non_zero_length() const { + return sparse_index_ ? sparse_index_->non_zero_length() : 0; + } + + bool Equals(const SparseTensor& other) const; + + protected: + // Constructor with all attributes + SparseTensor(const std::shared_ptr& type, const std::shared_ptr& data, + const std::vector& shape, + const std::shared_ptr& sparse_index, + const std::vector& dim_names); + + std::shared_ptr type_; + std::shared_ptr data_; + std::vector shape_; + std::shared_ptr sparse_index_; + + /// These names are optional + std::vector dim_names_; +}; + +// ---------------------------------------------------------------------- +// SparseTensorImpl class + +/// \brief EXPERIMENTAL: Concrete sparse tensor implementation classes with sparse index +/// type +template +class ARROW_EXPORT SparseTensorImpl : public SparseTensor { + public: + virtual ~SparseTensorImpl() = default; + + // Constructor with all attributes + SparseTensorImpl(const std::shared_ptr& sparse_index, + const std::shared_ptr& type, + const std::shared_ptr& data, const std::vector& shape, + const std::vector& dim_names) + : SparseTensor(type, data, shape, sparse_index, dim_names) {} + + // Constructor for empty sparse tensor + SparseTensorImpl(const std::shared_ptr& type, + const std::vector& shape, + const std::vector& dim_names = {}); + + // Constructor with a dense numeric tensor + template + explicit SparseTensorImpl(const NumericTensor& tensor); + + // Constructor with a dense tensor + explicit SparseTensorImpl(const Tensor& tensor); + + private: + ARROW_DISALLOW_COPY_AND_ASSIGN(SparseTensorImpl); +}; + +/// \brief EXPERIMENTAL: Type alias for COO sparse tensor +using SparseTensorCOO = SparseTensorImpl; + +/// \brief EXPERIMENTAL: Type alias for CSR sparse matrix +using SparseTensorCSR = SparseTensorImpl; +using SparseMatrixCSR = SparseTensorImpl; + +} // namespace arrow + +#endif // ARROW_SPARSE_TENSOR_H diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index a9b5df81fa193..e81f0f0dff5d7 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -50,6 +50,9 @@ static inline bool is_tensor_supported(Type::type type_id) { return false; } +template +class SparseTensorImpl; + class ARROW_EXPORT Tensor { public: virtual ~Tensor() = default; @@ -110,6 +113,9 @@ class ARROW_EXPORT Tensor { /// These names are optional std::vector dim_names_; + template + friend class SparseTensorImpl; + private: ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor); }; diff --git a/docs/source/format/IPC.rst b/docs/source/format/IPC.rst index 8cb74b87afcdc..62a1237436ae3 100644 --- a/docs/source/format/IPC.rst +++ b/docs/source/format/IPC.rst @@ -234,4 +234,28 @@ region) to be multiples of 64 bytes: :: +SparseTensor Message Format +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``SparseTensor`` message types provides another way to write a +multidimensional array of fixed-size values using Arrow's shared memory tools +in addition to ``Tensor``. ``SparseTensor`` is designed specifically for tensors +whose elements are almost zeros. Arrow implementations in general are not +required to implement this data format likewise ``Tensor``. + +When writing a standalone encapsulated sparse tensor message, we use the format as +indicated above, but additionally align the starting offset of the metadata as +well as the starting offsets of the sparse index and the sparse tensor body +(if writing to a shared memory region) to be multiples of 64 bytes: + + + + + + + + +The contents of the sparse tensor index is depends on what kinds of sparse +format is used. + .. _Flatbuffer: https://github.com/google/flatbuffers diff --git a/format/Message.fbs b/format/Message.fbs index 830718139d88c..e14fdca8f155c 100644 --- a/format/Message.fbs +++ b/format/Message.fbs @@ -87,7 +87,7 @@ table DictionaryBatch { /// which may include experimental metadata types. For maximum compatibility, /// it is best to send data using RecordBatch union MessageHeader { - Schema, DictionaryBatch, RecordBatch, Tensor + Schema, DictionaryBatch, RecordBatch, Tensor, SparseTensor } table Message { @@ -96,4 +96,4 @@ table Message { bodyLength: long; } -root_type Message; \ No newline at end of file +root_type Message; diff --git a/format/Tensor.fbs b/format/Tensor.fbs index 18b614c3bde62..e77b353a0f33f 100644 --- a/format/Tensor.fbs +++ b/format/Tensor.fbs @@ -23,6 +23,9 @@ include "Schema.fbs"; namespace org.apache.arrow.flatbuf; +/// ---------------------------------------------------------------------- +/// Data structures for dense tensors + /// Shape data for a single axis in a tensor table TensorDim { /// Length of dimension @@ -48,3 +51,96 @@ table Tensor { } root_type Tensor; + +/// ---------------------------------------------------------------------- +/// EXPERIMENTAL: Data structures for sparse tensors + +/// Coodinate format of sparse tensor index. +table SparseTensorIndexCOO { + /// COO's index list are represented as a NxM matrix, + /// where N is the number of non-zero values, + /// and M is the number of dimensions of a sparse tensor. + /// indicesBuffer stores the location and size of this index matrix. + /// The type of index value is long, so the stride for the index matrix is unnecessary. + /// + /// For example, let X be a 2x3x4x5 tensor, and it has the following 6 non-zero values: + /// + /// X[0, 1, 2, 0] := 1 + /// X[1, 1, 2, 3] := 2 + /// X[0, 2, 1, 0] := 3 + /// X[0, 1, 3, 0] := 4 + /// X[0, 1, 2, 1] := 5 + /// X[1, 2, 0, 4] := 6 + /// + /// In COO format, the index matrix of X is the following 4x6 matrix: + /// + /// [[0, 0, 0, 0, 1, 1], + /// [1, 1, 1, 2, 1, 2], + /// [2, 2, 3, 1, 2, 0], + /// [0, 1, 0, 0, 3, 4]] + /// + /// Note that the indices are sorted in lexcographical order. + indicesBuffer: Buffer; +} + +/// Compressed Sparse Row format, that is matrix-specific. +table SparseMatrixIndexCSR { + /// indptrBuffer stores the location and size of indptr array that + /// represents the range of the rows. + /// The i-th row spans from indptr[i] to indptr[i+1] in the data. + /// The length of this array is 1 + (the number of rows), and the type + /// of index value is long. + /// + /// For example, let X be the following 6x4 matrix: + /// + /// X := [[0, 1, 2, 0], + /// [0, 0, 3, 0], + /// [0, 4, 0, 5], + /// [0, 0, 0, 0], + /// [6, 0, 7, 8], + /// [0, 9, 0, 0]]. + /// + /// The array of non-zero values in X is: + /// + /// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. + /// + /// And the indptr of X is: + /// + /// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. + indptrBuffer: Buffer; + + /// indicesBuffer stores the location and size of the array that + /// contains the column indices of the corresponding non-zero values. + /// The type of index value is long. + /// + /// For example, the indices of the above X is: + /// + /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. + indicesBuffer: Buffer; +} + +union SparseTensorIndex { + SparseTensorIndexCOO, + SparseMatrixIndexCSR +} + +table SparseTensor { + /// The type of data contained in a value cell. + /// Currently only fixed-width value types are supported, + /// no strings or nested types. + type: Type; + + /// The dimensions of the tensor, optionally named. + shape: [TensorDim]; + + /// The number of non-zero values in a sparse tensor. + non_zero_length: long; + + /// Sparse tensor index + sparseIndex: SparseTensorIndex; + + /// The location and size of the tensor's data + data: Buffer; +} + +root_type SparseTensor; From 84b221dd864af8385ac626fc753875416e840ff0 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 9 Jan 2019 17:32:38 -0600 Subject: [PATCH 065/203] ARROW-4138: [Python] Fix setuptools_scm version customization on Windows Using single quotes for the regular expression doesn't work on Windows for some reason. Using double quotes fixes the issue Author: Wes McKinney Closes #3362 from wesm/ARROW-4138 and squashes the following commits: ca3e56a9b Windows doesn't like single quotes passed to git describe --- python/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/setup.py b/python/setup.py index 742851918c124..584c35a09ce5a 100755 --- a/python/setup.py +++ b/python/setup.py @@ -500,8 +500,8 @@ def parse_git(root, **kwargs): subprojects, e.g. apache-arrow-js-XXX tags. """ from setuptools_scm.git import parse - kwargs['describe_command'] = \ - "git describe --dirty --tags --long --match 'apache-arrow-[0-9].*'" + kwargs['describe_command'] =\ + 'git describe --dirty --tags --long --match "apache-arrow-[0-9].*"' return parse(root, **kwargs) From a80c27e46814ded00216cc48f83e3fedbfb9cf4f Mon Sep 17 00:00:00 2001 From: Tim Paine Date: Wed, 9 Jan 2019 17:34:53 -0600 Subject: [PATCH 066/203] ARROW-4197: [C++] Better Emscripten support A few changes for better compatibility with the Emscripten compiler for WebAssembly - expose the `-ggdb` flag as an option (unsupported by emscripten) - the `-undefined dynamic_lookup` flag should be set on apple, but not when using emscripten - allow for `backtrace` to be turned off even if found (no `execinfo.h` available, from `util/logging.cc`) Author: Tim Paine Closes #3350 from timkpaine/emscripten and squashes the following commits: e3661ff52 restore default ggdb behavior, use better environment variable to detect emscripten and add a comment explaining this a0e91a77c expose backtrace at top level, re-add -g, make backtrace private scope again b8f0c8068 Merge branch 'master' into emscripten 5308f6b49 fix for emscripten --- cpp/CMakeLists.txt | 8 ++++++++ cpp/cmake_modules/BuildUtils.cmake | 5 ++++- cpp/cmake_modules/SetupCxxFlags.cmake | 16 ++++++++++++---- cpp/src/arrow/CMakeLists.txt | 2 +- 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3d2b698b8ff25..4232af3a12005 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -170,6 +170,10 @@ static|shared (default shared)") "If off, 'quiet' flags will be passed to linting tools" OFF) + option(ARROW_GGDB_DEBUG + "Pass -ggdb flag to debug builds" + ON) + #---------------------------------------------------------------------- # Project components to enable / disable building @@ -249,6 +253,10 @@ Note that this requires linking Boost statically" "Rely on Protocol Buffers shared libraries where relevant" OFF) + option(ARROW_WITH_BACKTRACE + "Build with backtrace support" + ON) + option(ARROW_USE_GLOG "Build libraries with glog support for pluggable logging" ON) diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index 77db28e2aab28..cf2145b8a9166 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -182,11 +182,14 @@ function(ADD_ARROW_LIB LIB_NAME) ${ARG_PRIVATE_INCLUDES}) endif() - if(APPLE) + if(APPLE AND NOT DEFINED $ENV{EMSCRIPTEN}) # On OS X, you can avoid linking at library load time and instead # expecting that the symbols have been loaded separately. This happens # with libpython* where there can be conflicts between system Python and # the Python from a thirdparty distribution + # + # When running with the Emscripten Compiler, we need not worry about + # python, and the Emscripten Compiler does not support this option. set(ARG_SHARED_LINK_FLAGS "-undefined dynamic_lookup ${ARG_SHARED_LINK_FLAGS}") endif() diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 11608350c5f7a..796a68db0b878 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -340,11 +340,19 @@ endif() # Debug symbols are stripped for reduced binary size. Add # -DARROW_CXXFLAGS="-g" to add them if (NOT MSVC) - set(C_FLAGS_DEBUG "-ggdb -O0") - set(C_FLAGS_FASTDEBUG "-ggdb -O1") + if(ARROW_GGDB_DEBUG) + set(C_FLAGS_DEBUG "-ggdb -O0") + set(C_FLAGS_FASTDEBUG "-ggdb -O1") + set(CXX_FLAGS_DEBUG "-ggdb -O0") + set(CXX_FLAGS_FASTDEBUG "-ggdb -O1") + else() + set(C_FLAGS_DEBUG "-g -O0") + set(C_FLAGS_FASTDEBUG "-g -O1") + set(CXX_FLAGS_DEBUG "-g -O0") + set(CXX_FLAGS_FASTDEBUG "-g -O1") + endif() + set(C_FLAGS_RELEASE "-O3 -DNDEBUG") - set(CXX_FLAGS_DEBUG "-ggdb -O0") - set(CXX_FLAGS_FASTDEBUG "-ggdb -O1") set(CXX_FLAGS_RELEASE "-O3 -DNDEBUG") endif() diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 91bdce294c2d1..59f035792b80d 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -257,7 +257,7 @@ find_package(Backtrace) foreach(LIB_TARGET ${ARROW_LIBRARIES}) target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_EXPORTING) - if (Backtrace_FOUND) + if (Backtrace_FOUND AND ARROW_WITH_BACKTRACE) target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_WITH_BACKTRACE) endif() From 87ceb3ca904c9e9a839ff1cc724d3139c1958047 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 9 Jan 2019 16:49:04 -0700 Subject: [PATCH 067/203] ARROW-3959: [Rust] Add date/time data types This only adds the date/time types to the DataTypes enum as well as JSON serialization for meta data. This PR also implements `Schema::to_json` Author: Andy Grove Closes #3340 from andygrove/ARROW-3959 and squashes the following commits: 945498e merge from master and implement Hash for DateUnit, TimeUnit, etc. b05d6a0 Merge branch 'master' into ARROW-3959 312885e Timestamp now uses TimeUnit c3e092b Merge branch 'master' into ARROW-3959 d289cbb improve test 2d36927 update unit test d51bc82 fix mistake f4bbf10 Add date/time data types --- rust/arrow/src/datatypes.rs | 146 +++++++++++++++++++++++++++++++++++- 1 file changed, 145 insertions(+), 1 deletion(-) diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index 05db6ce7d40b9..5008a97624a40 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -56,11 +56,36 @@ pub enum DataType { Float16, Float32, Float64, + Timestamp(TimeUnit), + Date(DateUnit), + Time32(TimeUnit), + Time64(TimeUnit), + Interval(IntervalUnit), Utf8, List(Box), Struct(Vec), } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)] +pub enum DateUnit { + Day, + Millisecond, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)] +pub enum TimeUnit { + Second, + Millisecond, + Microsecond, + Nanosecond, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)] +pub enum IntervalUnit { + YearMonth, + DayTime, +} + /// Contains the meta-data for a single relative type. /// /// The `Schema` object is an ordered collection of `Field` objects. @@ -175,6 +200,47 @@ impl DataType { "floatingpoint precision missing or invalid".to_string(), )), }, + Some(s) if s == "timestamp" => match map.get("unit") { + Some(p) if p == "SECOND" => Ok(DataType::Timestamp(TimeUnit::Second)), + Some(p) if p == "MILLISECOND" => Ok(DataType::Timestamp(TimeUnit::Millisecond)), + Some(p) if p == "MICROSECOND" => Ok(DataType::Timestamp(TimeUnit::Microsecond)), + Some(p) if p == "NANOSECOND" => Ok(DataType::Timestamp(TimeUnit::Nanosecond)), + _ => Err(ArrowError::ParseError( + "timestamp unit missing or invalid".to_string(), + )), + }, + Some(s) if s == "date" => match map.get("unit") { + Some(p) if p == "DAY" => Ok(DataType::Date(DateUnit::Day)), + Some(p) if p == "MILLISECOND" => Ok(DataType::Date(DateUnit::Millisecond)), + _ => Err(ArrowError::ParseError( + "date unit missing or invalid".to_string(), + )), + }, + Some(s) if s == "time" => { + let unit = match map.get("unit") { + Some(p) if p == "SECOND" => Ok(TimeUnit::Second), + Some(p) if p == "MILLISECOND" => Ok(TimeUnit::Millisecond), + Some(p) if p == "MICROSECOND" => Ok(TimeUnit::Microsecond), + Some(p) if p == "NANOSECOND" => Ok(TimeUnit::Nanosecond), + _ => Err(ArrowError::ParseError( + "time unit missing or invalid".to_string(), + )), + }; + match map.get("bitWidth") { + Some(p) if p == "32" => Ok(DataType::Time32(unit?)), + Some(p) if p == "64" => Ok(DataType::Time32(unit?)), + _ => Err(ArrowError::ParseError( + "time bitWidth missing or invalid".to_string(), + )), + } + } + Some(s) if s == "interval" => match map.get("unit") { + Some(p) if p == "DAY_TIME" => Ok(DataType::Interval(IntervalUnit::DayTime)), + Some(p) if p == "YEAR_MONTH" => Ok(DataType::Interval(IntervalUnit::YearMonth)), + _ => Err(ArrowError::ParseError( + "interval unit missing or invalid".to_string(), + )), + }, Some(s) if s == "int" => match map.get("isSigned") { Some(&Value::Bool(true)) => match map.get("bitWidth") { Some(&Value::Number(ref n)) => match n.as_u64() { @@ -231,7 +297,7 @@ impl DataType { /// Generate a JSON representation of the data type pub fn to_json(&self) -> Value { - match *self { + match self { DataType::Boolean => json!({"name": "bool"}), DataType::Int8 => json!({"name": "int", "bitWidth": 8, "isSigned": true}), DataType::Int16 => json!({"name": "int", "bitWidth": 16, "isSigned": true}), @@ -254,6 +320,32 @@ impl DataType { let child_json = t.to_json(); json!({ "name": "list", "children": child_json }) } + DataType::Time32(unit) => json!({"name": "time", "bitWidth": "32", "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}), + DataType::Time64(unit) => json!({"name": "time", "bitWidth": "64", "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}), + DataType::Date(unit) => json!({"name": "date", "unit": match unit { + DateUnit::Day => "DAY", + DateUnit::Millisecond => "MILLISECOND", + }}), + DataType::Timestamp(unit) => json!({"name": "timestamp", "unit": match unit { + TimeUnit::Second => "SECOND", + TimeUnit::Millisecond => "MILLISECOND", + TimeUnit::Microsecond => "MICROSECOND", + TimeUnit::Nanosecond => "NANOSECOND", + }}), + DataType::Interval(unit) => json!({"name": "interval", "unit": match unit { + IntervalUnit::YearMonth => "YEAR_MONTH", + IntervalUnit::DayTime => "DAY_TIME", + }}), } } } @@ -394,6 +486,13 @@ impl Schema { .enumerate() .find(|&(_, c)| c.name == name) } + + /// Generate a JSON representation of the `Field` + pub fn to_json(&self) -> Value { + json!({ + "fields": self.fields.iter().map(|field| field.to_json()).collect::>(), + }) + } } impl fmt::Display for Schema { @@ -528,6 +627,51 @@ mod tests { assert_eq!(DataType::Int32, dt); } + #[test] + fn schema_json() { + let schema = Schema::new(vec![ + Field::new("c1", DataType::Utf8, false), + Field::new("c2", DataType::Date(DateUnit::Day), false), + Field::new("c3", DataType::Date(DateUnit::Millisecond), false), + Field::new("c7", DataType::Time32(TimeUnit::Second), false), + Field::new("c8", DataType::Time32(TimeUnit::Millisecond), false), + Field::new("c9", DataType::Time32(TimeUnit::Microsecond), false), + Field::new("c10", DataType::Time32(TimeUnit::Nanosecond), false), + Field::new("c11", DataType::Time64(TimeUnit::Second), false), + Field::new("c12", DataType::Time64(TimeUnit::Millisecond), false), + Field::new("c13", DataType::Time64(TimeUnit::Microsecond), false), + Field::new("c14", DataType::Time64(TimeUnit::Nanosecond), false), + Field::new("c15", DataType::Timestamp(TimeUnit::Second), false), + Field::new("c16", DataType::Timestamp(TimeUnit::Millisecond), false), + Field::new("c17", DataType::Timestamp(TimeUnit::Microsecond), false), + Field::new("c18", DataType::Timestamp(TimeUnit::Nanosecond), false), + Field::new("c19", DataType::Interval(IntervalUnit::DayTime), false), + Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false), + Field::new( + "c21", + DataType::Struct(vec![ + Field::new("a", DataType::Utf8, false), + Field::new("b", DataType::UInt16, false), + ]), + false, + ), + ]); + + let json = schema.to_json().to_string(); + assert_eq!(json, "{\"fields\":[{\"name\":\"c1\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},{\"name\":\"c2\",\"nullable\":false,\"type\":{\"name\":\"date\",\"unit\":\"DAY\"}},{\"name\":\"c3\",\"nullable\":false,\"type\":{\"name\":\"date\",\"unit\":\"MILLISECOND\"}},{\"name\":\"c7\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"SECOND\"}},{\"name\":\"c8\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"MILLISECOND\"}},{\"name\":\"c9\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"MICROSECOND\"}},{\"name\":\"c10\",\"nullable\":false,\"type\":{\"bitWidth\":\"32\",\"name\":\"time\",\"unit\":\"NANOSECOND\"}},{\"name\":\"c11\",\"nullable\":false,\"type\":{\"bitWidth\":\"64\",\"name\":\"time\",\"unit\":\"SECOND\"}},{\"name\":\"c12\",\"nullable\":false,\"type\":{\"bitWidth\":\"64\",\"name\":\"time\",\"unit\":\"MILLISECOND\"}},{\"name\":\"c13\",\"nullable\":false,\"type\":{\"bitWidth\":\"64\",\"name\":\"time\",\"unit\":\"MICROSECOND\"}},{\"name\":\"c14\",\"nullable\":false,\"type\":{\"bitWidth\":\"64\",\"name\":\"time\",\"unit\":\"NANOSECOND\"}},{\"name\":\"c15\",\"nullable\":false,\"type\":{\"name\":\"timestamp\",\"unit\":\"SECOND\"}},{\"name\":\"c16\",\"nullable\":false,\"type\":{\"name\":\"timestamp\",\"unit\":\"MILLISECOND\"}},{\"name\":\"c17\",\"nullable\":false,\"type\":{\"name\":\"timestamp\",\"unit\":\"MICROSECOND\"}},{\"name\":\"c18\",\"nullable\":false,\"type\":{\"name\":\"timestamp\",\"unit\":\"NANOSECOND\"}},{\"name\":\"c19\",\"nullable\":false,\"type\":{\"name\":\"interval\",\"unit\":\"DAY_TIME\"}},{\"name\":\"c20\",\"nullable\":false,\"type\":{\"name\":\"interval\",\"unit\":\"YEAR_MONTH\"}},{\"name\":\"c21\",\"nullable\":false,\"type\":{\"fields\":[{\"name\":\"a\",\"nullable\":false,\"type\":{\"name\":\"utf8\"}},{\"name\":\"b\",\"nullable\":false,\"type\":{\"bitWidth\":16,\"isSigned\":false,\"name\":\"int\"}}]}}]}"); + + // convert back to a schema + let value: Value = serde_json::from_str(&json).unwrap(); + let schema2 = DataType::from(&value).unwrap(); + + match schema2 { + DataType::Struct(fields) => { + assert_eq!(schema.fields().len(), fields.len()); + } + _ => panic!(), + } + } + #[test] fn create_schema_string() { let _person = Schema::new(vec![ From b29ecdce6e096618aeb110878367906b3b4b48a5 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 9 Jan 2019 19:30:29 -0600 Subject: [PATCH 068/203] ARROW-4177: [C++] Add ThreadPool and TaskGroup microbenchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These benchmarks measure the number of tasks per second that can be executed depending on task cost and number of threads. It shows that for short tasks (< 10 µs), the scalability can be poor or even negative for very short tasks (< 1 µs). Also includes an optimization of ThreadedTaskGroup to avoid taking a lock on the hot path. Sample output (8-core AMD CPU, Ubuntu 18.04): ``` ----------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------------------------------------------------------- BM_WorkloadCost/task_cost:1000/repeats:1 724 ns 724 ns 987295 1.31655M items/s BM_WorkloadCost/task_cost:10000/repeats:1 7331 ns 7330 ns 88982 133.23k items/s BM_WorkloadCost/task_cost:100000/repeats:1 73279 ns 73267 ns 9182 13.3288k items/s BM_ThreadPoolSpawn/threads:1/task_cost:1000/repeats:1/real_time 163842359 ns 41132762 ns 4 1.16414M items/s BM_ThreadPoolSpawn/threads:2/task_cost:1000/repeats:1/real_time 158705340 ns 103873994 ns 7 1.20182M items/s BM_ThreadPoolSpawn/threads:4/task_cost:1000/repeats:1/real_time 447998576 ns 370986805 ns 2 435.969k items/s BM_ThreadPoolSpawn/threads:8/task_cost:1000/repeats:1/real_time 674500180 ns 543967794 ns 1 289.568k items/s BM_ThreadPoolSpawn/threads:1/task_cost:10000/repeats:1/real_time 150078690 ns 4887868 ns 5 130.147k items/s BM_ThreadPoolSpawn/threads:2/task_cost:10000/repeats:1/real_time 84446492 ns 5402850 ns 8 231.297k items/s BM_ThreadPoolSpawn/threads:4/task_cost:10000/repeats:1/real_time 46164089 ns 4912818 ns 15 423.104k items/s BM_ThreadPoolSpawn/threads:8/task_cost:10000/repeats:1/real_time 22703512 ns 7074437 ns 31 860.317k items/s BM_ThreadPoolSpawn/threads:1/task_cost:100000/repeats:1/real_time 149733023 ns 515907 ns 4 13.0506k items/s BM_ThreadPoolSpawn/threads:2/task_cost:100000/repeats:1/real_time 81157195 ns 448091 ns 9 24.078k items/s BM_ThreadPoolSpawn/threads:4/task_cost:100000/repeats:1/real_time 45600571 ns 521094 ns 16 42.8526k items/s BM_ThreadPoolSpawn/threads:8/task_cost:100000/repeats:1/real_time 20867873 ns 359547 ns 32 93.6416k items/s BM_SerialTaskGroup/task_cost:1000/repeats:1/real_time 8366557 ns 8362959 ns 66 1.13998M items/s BM_SerialTaskGroup/task_cost:10000/repeats:1/real_time 8346475 ns 8345288 ns 75 117.12k items/s BM_SerialTaskGroup/task_cost:100000/repeats:1/real_time 8409974 ns 8408879 ns 80 11.7281k items/s BM_ThreadedTaskGroup/threads:1/task_cost:1000/repeats:1/real_time 12932016 ns 6283623 ns 60 755.227k items/s BM_ThreadedTaskGroup/threads:2/task_cost:1000/repeats:1/real_time 10622580 ns 8631946 ns 58 919.419k items/s BM_ThreadedTaskGroup/threads:4/task_cost:1000/repeats:1/real_time 25544253 ns 20347053 ns 25 382.34k items/s BM_ThreadedTaskGroup/threads:8/task_cost:1000/repeats:1/real_time 36215077 ns 29435817 ns 19 269.683k items/s BM_ThreadedTaskGroup/threads:1/task_cost:10000/repeats:1/real_time 9830469 ns 476288 ns 69 99.4397k items/s BM_ThreadedTaskGroup/threads:2/task_cost:10000/repeats:1/real_time 5446608 ns 546159 ns 116 179.477k items/s BM_ThreadedTaskGroup/threads:4/task_cost:10000/repeats:1/real_time 2858316 ns 666944 ns 247 341.998k items/s BM_ThreadedTaskGroup/threads:8/task_cost:10000/repeats:1/real_time 1544885 ns 526298 ns 452 632.759k items/s BM_ThreadedTaskGroup/threads:1/task_cost:100000/repeats:1/real_time 9506192 ns 53110 ns 69 10.3756k items/s BM_ThreadedTaskGroup/threads:2/task_cost:100000/repeats:1/real_time 5262119 ns 67967 ns 116 18.7439k items/s BM_ThreadedTaskGroup/threads:4/task_cost:100000/repeats:1/real_time 2710626 ns 82870 ns 252 36.3875k items/s BM_ThreadedTaskGroup/threads:8/task_cost:100000/repeats:1/real_time 1602394 ns 65768 ns 423 61.5534k items/s ``` Author: Antoine Pitrou Closes #3337 from pitrou/ARROW-4177-thread-pool-benchmark and squashes the following commits: 5a17ca0d8 Fix warnings 2ffce8376 Make ThreadedTaskGroup mostly lockless (apart from ThreadPool) b5260b955 ARROW-4177: Add ThreadPool and TaskGroup microbenchmarks --- cpp/src/arrow/util/CMakeLists.txt | 1 + cpp/src/arrow/util/task-group.cc | 60 ++++-- cpp/src/arrow/util/task-group.h | 2 +- cpp/src/arrow/util/thread-pool-benchmark.cc | 202 ++++++++++++++++++++ cpp/src/arrow/util/thread-pool.cc | 3 + 5 files changed, 246 insertions(+), 22 deletions(-) create mode 100644 cpp/src/arrow/util/thread-pool-benchmark.cc diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index b02dc113c5459..54ff5674fdfcc 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -73,5 +73,6 @@ ADD_ARROW_BENCHMARK(int-util-benchmark) ADD_ARROW_BENCHMARK(lazy-benchmark) ADD_ARROW_BENCHMARK(machine-benchmark) ADD_ARROW_BENCHMARK(number-parsing-benchmark) +ADD_ARROW_BENCHMARK(thread-pool-benchmark) ADD_ARROW_BENCHMARK(trie-benchmark) ADD_ARROW_BENCHMARK(utf8-util-benchmark) diff --git a/cpp/src/arrow/util/task-group.cc b/cpp/src/arrow/util/task-group.cc index 3ea63fc5ad80e..52c40bd46d1d3 100644 --- a/cpp/src/arrow/util/task-group.cc +++ b/cpp/src/arrow/util/task-group.cc @@ -17,9 +17,11 @@ #include "arrow/util/task-group.h" +#include #include #include #include +#include #include "arrow/util/logging.h" #include "arrow/util/thread-pool.h" @@ -41,6 +43,8 @@ class SerialTaskGroup : public TaskGroup { Status current_status() override { return status_; } + bool ok() override { return status_.ok(); } + Status Finish() override { if (!finished_) { finished_ = true; @@ -70,7 +74,8 @@ class SerialTaskGroup : public TaskGroup { class ThreadedTaskGroup : public TaskGroup { public: - explicit ThreadedTaskGroup(ThreadPool* thread_pool) : thread_pool_(thread_pool) {} + explicit ThreadedTaskGroup(ThreadPool* thread_pool) + : thread_pool_(thread_pool), nremaining_(0), ok_(true) {} ~ThreadedTaskGroup() override { // Make sure all pending tasks are finished, so that dangling references @@ -79,22 +84,19 @@ class ThreadedTaskGroup : public TaskGroup { } void AppendReal(std::function task) override { - std::lock_guard lock(mutex_); - DCHECK(!finished_); - - if (status_.ok()) { - ++nremaining_; - status_ = thread_pool_->Spawn([&, task]() { - std::unique_lock lock(mutex_); - if (status_.ok()) { - lock.unlock(); + // The hot path is unlocked thanks to atomics + // Only if an error occurs is the lock taken + if (ok_.load(std::memory_order_acquire)) { + nremaining_.fetch_add(1, std::memory_order_acquire); + Status st = thread_pool_->Spawn([this, task]() { + if (ok_.load(std::memory_order_acquire)) { // XXX what about exceptions? Status st = task(); - lock.lock(); - status_ &= st; + UpdateStatus(std::move(st)); } OneTaskDone(); }); + UpdateStatus(std::move(st)); } } @@ -103,15 +105,15 @@ class ThreadedTaskGroup : public TaskGroup { return status_; } + bool ok() override { return ok_.load(); } + Status Finish() override { std::unique_lock lock(mutex_); if (!finished_) { - cv_.wait(lock, [&]() { return nremaining_ == 0; }); + cv_.wait(lock, [&]() { return nremaining_.load() == 0; }); // Current tasks may start other tasks, so only set this when done finished_ = true; if (parent_) { - // Need to lock parent - std::lock_guard parent_lock(parent_->mutex_); parent_->OneTaskDone(); } } @@ -124,26 +126,42 @@ class ThreadedTaskGroup : public TaskGroup { std::lock_guard lock(mutex_); auto child = new ThreadedTaskGroup(thread_pool_); child->parent_ = this; - nremaining_++; + nremaining_.fetch_add(1, std::memory_order_acquire); return std::shared_ptr(child); } protected: + void UpdateStatus(Status&& st) { + // Must be called unlocked, only locks on error + if (ARROW_PREDICT_FALSE(!st.ok())) { + std::lock_guard lock(mutex_); + ok_.store(false, std::memory_order_release); + status_ &= std::move(st); + } + } + void OneTaskDone() { - // We are locked - --nremaining_; - DCHECK_GE(nremaining_, 0); - if (nremaining_ == 0) { + // Can be called unlocked thanks to atomics + auto nremaining = nremaining_.fetch_sub(1, std::memory_order_release) - 1; + DCHECK_GE(nremaining, 0); + if (nremaining == 0) { + // Take the lock so that ~ThreadedTaskGroup cannot destroy cv + // before cv.notify_one() has returned + std::unique_lock lock(mutex_); cv_.notify_one(); } } + // These members are usable unlocked ThreadPool* thread_pool_; + std::atomic nremaining_; + std::atomic ok_; + + // These members use locking std::mutex mutex_; std::condition_variable cv_; Status status_; bool finished_ = false; - int32_t nremaining_ = 0; ThreadedTaskGroup* parent_ = nullptr; }; diff --git a/cpp/src/arrow/util/task-group.h b/cpp/src/arrow/util/task-group.h index 450b6da5884fc..390d9476e59bd 100644 --- a/cpp/src/arrow/util/task-group.h +++ b/cpp/src/arrow/util/task-group.h @@ -59,7 +59,7 @@ class ARROW_EXPORT TaskGroup { virtual Status current_status() = 0; /// Whether some tasks have already failed. Non-blocking , useful for stopping early. - bool ok() { return current_status().ok(); } + virtual bool ok() = 0; /// How many tasks can typically be executed in parallel. /// This is only a hint, useful for testing or debugging. diff --git a/cpp/src/arrow/util/thread-pool-benchmark.cc b/cpp/src/arrow/util/thread-pool-benchmark.cc new file mode 100644 index 0000000000000..8d855d3acba09 --- /dev/null +++ b/cpp/src/arrow/util/thread-pool-benchmark.cc @@ -0,0 +1,202 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "benchmark/benchmark.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "arrow/status.h" +#include "arrow/test-util.h" +#include "arrow/util/task-group.h" +#include "arrow/util/thread-pool.h" + +namespace arrow { +namespace internal { + +struct Workload { + explicit Workload(int32_t size) : size_(size), data_(kDataSize) { + std::default_random_engine gen(42); + std::uniform_int_distribution dist(0, std::numeric_limits::max()); + std::generate(data_.begin(), data_.end(), [&]() { return dist(gen); }); + } + + void operator()(); + + private: + static constexpr int32_t kDataSize = 32; + + int32_t size_; + std::vector data_; +}; + +void Workload::operator()() { + uint64_t result = 0; + for (int32_t i = 0; i < size_ / kDataSize; ++i) { + for (const auto v : data_) { + result = (result << (v % 64)) - v; + } + } + benchmark::DoNotOptimize(result); +} + +struct Task { + explicit Task(int32_t size) : workload_(size) {} + + Status operator()() { + workload_(); + return Status::OK(); + } + + private: + Workload workload_; +}; + +// This benchmark simply provides a baseline indicating the raw cost of our workload +// depending on the workload size. Number of items / second in this (serial) +// benchmark can be compared to the numbers obtained in BM_ThreadPoolSpawn. +static void BM_WorkloadCost(benchmark::State& state) { + const auto workload_size = static_cast(state.range(0)); + + Workload workload(workload_size); + for (auto _ : state) { + workload(); + } + + state.SetItemsProcessed(state.iterations()); +} + +// Benchmark ThreadPool::Spawn +static void BM_ThreadPoolSpawn(benchmark::State& state) { + const auto nthreads = static_cast(state.range(0)); + const auto workload_size = static_cast(state.range(1)); + + Workload workload(workload_size); + + // Spawn enough tasks to make the pool start up overhead negligible + const int32_t nspawns = 200000000 / workload_size + 1; + + for (auto _ : state) { + state.PauseTiming(); + std::shared_ptr pool; + ABORT_NOT_OK(ThreadPool::Make(nthreads, &pool)); + state.ResumeTiming(); + + for (int32_t i = 0; i < nspawns; ++i) { + // Pass the task by reference to avoid copying it around + ABORT_NOT_OK(pool->Spawn(std::ref(workload))); + } + + // Wait for all tasks to finish + ABORT_NOT_OK(pool->Shutdown(true /* wait */)); + state.PauseTiming(); + pool.reset(); + state.ResumeTiming(); + } + state.SetItemsProcessed(state.iterations() * nspawns); +} + +// Benchmark serial TaskGroup +static void BM_SerialTaskGroup(benchmark::State& state) { + const auto workload_size = static_cast(state.range(0)); + + Task task(workload_size); + + const int32_t nspawns = 10000000 / workload_size + 1; + + for (auto _ : state) { + auto task_group = TaskGroup::MakeSerial(); + for (int32_t i = 0; i < nspawns; ++i) { + // Pass the task by reference to avoid copying it around + task_group->Append(std::ref(task)); + } + ABORT_NOT_OK(task_group->Finish()); + } + state.SetItemsProcessed(state.iterations() * nspawns); +} + +// Benchmark threaded TaskGroup +static void BM_ThreadedTaskGroup(benchmark::State& state) { + const auto nthreads = static_cast(state.range(0)); + const auto workload_size = static_cast(state.range(1)); + + std::shared_ptr pool; + ABORT_NOT_OK(ThreadPool::Make(nthreads, &pool)); + + Task task(workload_size); + + const int32_t nspawns = 10000000 / workload_size + 1; + + for (auto _ : state) { + auto task_group = TaskGroup::MakeThreaded(pool.get()); + for (int32_t i = 0; i < nspawns; ++i) { + // Pass the task by reference to avoid copying it around + task_group->Append(std::ref(task)); + } + ABORT_NOT_OK(task_group->Finish()); + } + ABORT_NOT_OK(pool->Shutdown(true /* wait */)); + + state.SetItemsProcessed(state.iterations() * nspawns); +} + +static const int32_t kWorkloadSizes[] = {1000, 10000, 100000}; + +static void WorkloadCost_Customize(benchmark::internal::Benchmark* b) { + for (const auto w : kWorkloadSizes) { + b->Args({w}); + } + b->ArgNames({"task_cost"}); +} + +static void ThreadPoolSpawn_Customize(benchmark::internal::Benchmark* b) { + for (const int32_t w : kWorkloadSizes) { + for (const int nthreads : {1, 2, 4, 8}) { + b->Args({nthreads, w}); + } + } + b->ArgNames({"threads", "task_cost"}); +} + +static const int kRepetitions = 1; + +BENCHMARK(BM_WorkloadCost)->Repetitions(kRepetitions)->Apply(WorkloadCost_Customize); + +BENCHMARK(BM_ThreadPoolSpawn) + ->UseRealTime() + ->Repetitions(kRepetitions) + ->Apply(ThreadPoolSpawn_Customize); + +BENCHMARK(BM_SerialTaskGroup) + ->UseRealTime() + ->Repetitions(kRepetitions) + ->Apply(WorkloadCost_Customize); + +BENCHMARK(BM_ThreadedTaskGroup) + ->UseRealTime() + ->Repetitions(kRepetitions) + ->Apply(ThreadPoolSpawn_Customize); + +} // namespace internal +} // namespace arrow diff --git a/cpp/src/arrow/util/thread-pool.cc b/cpp/src/arrow/util/thread-pool.cc index 751b264b42f59..17ad9c4972fa2 100644 --- a/cpp/src/arrow/util/thread-pool.cc +++ b/cpp/src/arrow/util/thread-pool.cc @@ -34,6 +34,9 @@ namespace internal { struct ThreadPool::State { State() : desired_capacity_(0), please_shutdown_(false), quick_shutdown_(false) {} + // NOTE: in case locking becomes too expensive, we can investigate lock-free FIFOs + // such as https://github.com/cameron314/concurrentqueue + std::mutex mutex_; std::condition_variable cv_; std::condition_variable cv_shutdown_; From db29723f661174eefd04077666347a9bbaca5be1 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Thu, 10 Jan 2019 11:49:29 +0900 Subject: [PATCH 069/203] ARROW-4215: [GLib] Fix typos in documentation This solves the following warnings: arrow-glib/basic-data-type.cpp:1070: warning: multi-line since docs found arrow-glib/decimal128.cpp:37: warning: Section decimal is not defined in the arrow-glib-sections.txt file. Author: Kouhei Sutou Closes #3361 from kou/glib-fix-document and squashes the following commits: edd43c8a Fix typos in documentation --- c_glib/arrow-glib/basic-data-type.cpp | 2 +- c_glib/arrow-glib/decimal128.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp index 2a599963ee3aa..861bbaf388801 100644 --- a/c_glib/arrow-glib/basic-data-type.cpp +++ b/c_glib/arrow-glib/basic-data-type.cpp @@ -1065,7 +1065,7 @@ garrow_decimal_data_type_class_init(GArrowDecimalDataTypeClass *klass) * * Since: 0.10.0 * - * Deprecate: 0.12.0: + * Deprecated: 0.12.0: * Use garrow_decimal128_data_type_new() instead. */ GArrowDecimalDataType * diff --git a/c_glib/arrow-glib/decimal128.cpp b/c_glib/arrow-glib/decimal128.cpp index a49dba580ee79..32bdf5fcae6e4 100644 --- a/c_glib/arrow-glib/decimal128.cpp +++ b/c_glib/arrow-glib/decimal128.cpp @@ -27,8 +27,8 @@ G_BEGIN_DECLS /** - * SECTION: decimal - * @title: Decimal classes + * SECTION: decimal128 + * @title: 128-bit decimal class * @include: arrow-glib/arrow-glib.h * * #GArrowDecimal128 is a 128-bit decimal class. From 3b61349b3c16d43003e493c7e2aec9348e7e7343 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 9 Jan 2019 22:00:12 -0600 Subject: [PATCH 070/203] ARROW-2968: [R] Multi-threaded conversion from Arrow table to R data.frame The `as_tibble()` methods for `arrow::RecordBatch` and `arrow::Table` gained a `use_threads` argument. When set to `TRUE` columns of a record batch or table are converted to R vectors in parallel. We cannot allocate R data structures in parallel (including scalar strings), so it goes like this: ``` for each column: - allocate the R vector host for the array - if that can be done in parallel, fill the R vector with data from the array fill serially all columns that could not be filled in parallel wait for all columns to be full ``` This is I believe better (although perhaps harder to explain) than - allocate all the vectors - fill them in parallel Because we don't have to wait for all the vectors to be allocated to start filling them. I believe the python does that, in `DataFrameBlockCreator::Convert` ``` RETURN_NOT_OK(CreateBlocks()); RETURN_NOT_OK(WriteTableToBlocks()); ``` I've had to split the implementation of `Array__as_vector` into two steps: - Allocate: this must happen on the main thread, or alternatively would need to mutex R - Ingest: For most array types, this can be done in parallel Author: Romain Francois Closes #3332 from romainfrancois/2968/threads and squashes the following commits: 8261f2907 sprinkle use_threads in functions that call as_tibble() 3205de2d8 lint 590baf5a6 using string_view cd0dd343e no need for checkBuffers 29546cd5d Some more refactoring of the Converters 5557b7974 refactor the Converter api, so that all Converters are implementations of the base class Converter. e2ed26b78 lint 2a5815e03 moving parallel_ingest() to a static method of the Converter classes 2613d4ec4 null_count already local variable 62a842054 + to_r_index lambda, with comment about why +1 52c725fc8 default_value() marked constexpr 11e82e769 lint d22b9c551 parallel version of Table__to_dataframe 2455bd057 parallel version of RecordBatch__to_dataframe 380d3a5bc simplify ArrayVector__as_vector. 85881a3e2 simplify ArrayVector_To_Vector 7074b36e9 reinstate Converter_Timestamp so that ArrayVector__as_vector can be simplified cf7e76bae + parallel_ingest() to indicate if ingest for a givne converter can be doine in parallel baaaefe1b Re"work Converter api e650b7934 + arrow::r::inspect(SEXP) for debugging a335dfdfc Factor out Array -> R vector code in separate file 1212e28a9 .Ingest() return an Invalid status instead of throwing an exception 39bf76403 .Ingest() return a Status instead of void f68b79376 replaced DictionaryArrays_to_Vector and Converter_Dictionary_Int32Indices by Converter_Dictionary d25a0e6b5 replace Date32ArrayVector_to_Vector by Converter_Date32 85e48c0c7 lint 18b921e6f + Get/Set ThreadPoolCapacity --- r/NAMESPACE | 2 + r/R/RcppExports.R | 57 +- r/R/RecordBatch.R | 4 +- r/R/Table.R | 4 +- r/R/feather.R | 5 +- r/R/parquet.R | 5 +- r/R/read_table.R | 4 +- r/man/GetCpuThreadPoolCapacity.Rd | 18 + r/man/SetCpuThreadPoolCapacity.Rd | 17 + r/man/read_feather.Rd | 5 +- r/man/read_parquet.Rd | 4 +- r/man/read_table.Rd | 4 +- r/src/RcppExports.cpp | 120 +-- r/src/array.cpp | 496 ------------- r/src/array__to_vector.cpp | 697 ++++++++++++++++++ r/src/arrow_types.h | 12 +- r/src/recordbatch.cpp | 16 - r/src/symbols.cpp | 9 + r/src/table.cpp | 17 - r/src/threadpool.cpp | 44 ++ r/tests/testthat/test-RecordBatch.R | 1 - r/tests/testthat/test-cputhreadpoolcapacity.R | 26 + 22 files changed, 959 insertions(+), 608 deletions(-) create mode 100644 r/man/GetCpuThreadPoolCapacity.Rd create mode 100644 r/man/SetCpuThreadPoolCapacity.Rd create mode 100644 r/src/array__to_vector.cpp create mode 100644 r/src/threadpool.cpp create mode 100644 r/tests/testthat/test-cputhreadpoolcapacity.R diff --git a/r/NAMESPACE b/r/NAMESPACE index f8f6384dce1f8..7fd76c7c4fb7e 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -80,6 +80,7 @@ export(FeatherTableWriter) export(FileMode) export(FileOutputStream) export(FixedSizeBufferWriter) +export(GetCpuThreadPoolCapacity) export(MessageReader) export(MessageType) export(MockOutputStream) @@ -88,6 +89,7 @@ export(RecordBatchFileReader) export(RecordBatchFileWriter) export(RecordBatchStreamReader) export(RecordBatchStreamWriter) +export(SetCpuThreadPoolCapacity) export(StatusCode) export(TimeUnit) export(Type) diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index c6fe8719f4e89..51ed4ea6b5a2a 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -5,14 +5,6 @@ Array__from_vector <- function(x) { .Call(`_arrow_Array__from_vector`, x) } -Array__as_vector <- function(array) { - .Call(`_arrow_Array__as_vector`, array) -} - -ChunkedArray__as_vector <- function(chunked_array) { - .Call(`_arrow_ChunkedArray__as_vector`, chunked_array) -} - Array__Slice1 <- function(array, offset) { .Call(`_arrow_Array__Slice1`, array, offset) } @@ -81,6 +73,22 @@ DictionaryArray__dictionary <- function(array) { .Call(`_arrow_DictionaryArray__dictionary`, array) } +Array__as_vector <- function(array) { + .Call(`_arrow_Array__as_vector`, array) +} + +ChunkedArray__as_vector <- function(chunked_array) { + .Call(`_arrow_ChunkedArray__as_vector`, chunked_array) +} + +RecordBatch__to_dataframe <- function(batch, use_threads) { + .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads) +} + +Table__to_dataframe <- function(table, use_threads) { + .Call(`_arrow_Table__to_dataframe`, table, use_threads) +} + ArrayData__get_type <- function(x) { .Call(`_arrow_ArrayData__get_type`, x) } @@ -661,10 +669,6 @@ RecordBatch__column <- function(batch, i) { .Call(`_arrow_RecordBatch__column`, batch, i) } -RecordBatch__to_dataframe <- function(batch) { - .Call(`_arrow_RecordBatch__to_dataframe`, batch) -} - RecordBatch__from_dataframe <- function(tbl) { .Call(`_arrow_RecordBatch__from_dataframe`, tbl) } @@ -781,10 +785,6 @@ Table__schema <- function(x) { .Call(`_arrow_Table__schema`, x) } -Table__to_dataframe <- function(table) { - .Call(`_arrow_Table__to_dataframe`, table) -} - Table__column <- function(table, i) { .Call(`_arrow_Table__column`, table, i) } @@ -793,3 +793,28 @@ Table__columns <- function(table) { .Call(`_arrow_Table__columns`, table) } +#' Get the capacity of the global thread pool +#' +#' @return the number of worker threads in the thread pool to which +#' Arrow dispatches various CPU-bound tasks. This is an ideal number, +#' not necessarily the exact number of threads at a given point in time. +#' +#' You can change this number using [SetCpuThreadPoolCapacity()]. +#' +#' @export +GetCpuThreadPoolCapacity <- function() { + .Call(`_arrow_GetCpuThreadPoolCapacity`) +} + +#' Set the capacity of the global thread pool +#' +#' @param threads the number of worker threads int the thread pool to which +#' Arrow dispatches various CPU-bound tasks. +#' +#' The current number is returned by [GetCpuThreadPoolCapacity()] +#' +#' @export +SetCpuThreadPoolCapacity <- function(threads) { + invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads)) +} + diff --git a/r/R/RecordBatch.R b/r/R/RecordBatch.R index fed10abee769c..9872117452e85 100644 --- a/r/R/RecordBatch.R +++ b/r/R/RecordBatch.R @@ -80,8 +80,8 @@ } #' @export -`as_tibble.arrow::RecordBatch` <- function(x, ...){ - RecordBatch__to_dataframe(x) +`as_tibble.arrow::RecordBatch` <- function(x, use_threads = TRUE, ...){ + RecordBatch__to_dataframe(x, use_threads = use_threads) } #' Create an [arrow::RecordBatch][arrow__RecordBatch] from a data frame diff --git a/r/R/Table.R b/r/R/Table.R index 8972634d59f1d..c39fce246af16 100644 --- a/r/R/Table.R +++ b/r/R/Table.R @@ -61,6 +61,6 @@ table <- function(.data){ } #' @export -`as_tibble.arrow::Table` <- function(x, ...){ - Table__to_dataframe(x) +`as_tibble.arrow::Table` <- function(x, use_threads = TRUE, ...){ + Table__to_dataframe(x, use_threads = use_threads) } diff --git a/r/R/feather.R b/r/R/feather.R index 064652145c8e4..eaeea4caefbaa 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -154,15 +154,16 @@ FeatherTableReader.fs_path <- function(file, mmap = TRUE, ...) { #' @param file a arrow::ipc::feather::TableReader or whatever the [FeatherTableReader()] function can handle #' @param columns names if the columns to read. The default `NULL` means all columns #' @param as_tibble should the [arrow::Table][arrow__Table] be converted to a tibble. +#' @param use_threads Use threads when converting to a tibble. #' @param ... additional parameters #' #' @return a data frame if `as_tibble` is `TRUE` (the default), or a [arrow::Table][arrow__Table] otherwise #' #' @export -read_feather <- function(file, columns = NULL, as_tibble = TRUE, ...){ +read_feather <- function(file, columns = NULL, as_tibble = TRUE, use_threads = TRUE, ...){ out <- FeatherTableReader(file, ...)$Read(columns) if (isTRUE(as_tibble)) { - out <- as_tibble(out) + out <- as_tibble(out, use_threads = use_threads) } out } diff --git a/r/R/parquet.R b/r/R/parquet.R index 141da7bd04b2c..6a393e2c880df 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -19,15 +19,16 @@ #' #' @param file a file path #' @param as_tibble should the [arrow::Table][arrow__Table] be converted to a tibble. +#' @param use_threads Use threads when converting to a tibble, only relevant if `as_tibble` is `TRUE` #' @param ... currently ignored #' #' @return a [arrow::Table][arrow__Table], or a data frame if `as_tibble` is `TRUE`. #' #' @export -read_parquet <- function(file, as_tibble = TRUE, ...) { +read_parquet <- function(file, as_tibble = TRUE, use_threads = TRUE, ...) { tab <- shared_ptr(`arrow::Table`, read_parquet_file(f)) if (isTRUE(as_tibble)) { - tab <- as_tibble(tab) + tab <- as_tibble(tab, use_threads = use_threads) } tab } diff --git a/r/R/read_table.R b/r/R/read_table.R index a540a42173556..260c50f12374f 100644 --- a/r/R/read_table.R +++ b/r/R/read_table.R @@ -33,6 +33,8 @@ #' #' - a raw vector: read using a [arrow::ipc::RecordBatchStreamReader][arrow__ipc__RecordBatchStreamReader] #' +#' @param use_threads Use threads when converting to a tibble +#' #' @return #' #' - `read_table` returns an [arrow::Table][arrow__Table] @@ -81,6 +83,6 @@ read_table.fs_path <- function(stream) { #' @rdname read_table #' @export -read_arrow <- function(stream){ +read_arrow <- function(stream, use_threads = TRUE){ as_tibble(read_table(stream)) } diff --git a/r/man/GetCpuThreadPoolCapacity.Rd b/r/man/GetCpuThreadPoolCapacity.Rd new file mode 100644 index 0000000000000..8bf0a6fc89424 --- /dev/null +++ b/r/man/GetCpuThreadPoolCapacity.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{GetCpuThreadPoolCapacity} +\alias{GetCpuThreadPoolCapacity} +\title{Get the capacity of the global thread pool} +\usage{ +GetCpuThreadPoolCapacity() +} +\value{ +the number of worker threads in the thread pool to which +Arrow dispatches various CPU-bound tasks. This is an ideal number, +not necessarily the exact number of threads at a given point in time. + +You can change this number using \code{\link[=SetCpuThreadPoolCapacity]{SetCpuThreadPoolCapacity()}}. +} +\description{ +Get the capacity of the global thread pool +} diff --git a/r/man/SetCpuThreadPoolCapacity.Rd b/r/man/SetCpuThreadPoolCapacity.Rd new file mode 100644 index 0000000000000..3a06dd5d6a202 --- /dev/null +++ b/r/man/SetCpuThreadPoolCapacity.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{SetCpuThreadPoolCapacity} +\alias{SetCpuThreadPoolCapacity} +\title{Set the capacity of the global thread pool} +\usage{ +SetCpuThreadPoolCapacity(threads) +} +\arguments{ +\item{threads}{the number of worker threads int the thread pool to which +Arrow dispatches various CPU-bound tasks. + +The current number is returned by \code{\link[=GetCpuThreadPoolCapacity]{GetCpuThreadPoolCapacity()}}} +} +\description{ +Set the capacity of the global thread pool +} diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index 31fd36ab65a26..4509c7d334dbf 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -4,7 +4,8 @@ \alias{read_feather} \title{Read a feather file} \usage{ -read_feather(file, columns = NULL, as_tibble = TRUE, ...) +read_feather(file, columns = NULL, as_tibble = TRUE, + use_threads = TRUE, ...) } \arguments{ \item{file}{a arrow::ipc::feather::TableReader or whatever the \code{\link[=FeatherTableReader]{FeatherTableReader()}} function can handle} @@ -13,6 +14,8 @@ read_feather(file, columns = NULL, as_tibble = TRUE, ...) \item{as_tibble}{should the \link[=arrow__Table]{arrow::Table} be converted to a tibble.} +\item{use_threads}{Use threads when converting to a tibble.} + \item{...}{additional parameters} } \value{ diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index c29e18bca5baf..a4f294bdd67ed 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -4,13 +4,15 @@ \alias{read_parquet} \title{Read parquet file from disk} \usage{ -read_parquet(file, as_tibble = TRUE, ...) +read_parquet(file, as_tibble = TRUE, use_threads = TRUE, ...) } \arguments{ \item{file}{a file path} \item{as_tibble}{should the \link[=arrow__Table]{arrow::Table} be converted to a tibble.} +\item{use_threads}{Use threads when converting to a tibble, only relevant if \code{as_tibble} is \code{TRUE}} + \item{...}{currently ignored} } \value{ diff --git a/r/man/read_table.Rd b/r/man/read_table.Rd index 3231b26da267b..356ec5e740d01 100644 --- a/r/man/read_table.Rd +++ b/r/man/read_table.Rd @@ -7,7 +7,7 @@ \usage{ read_table(stream) -read_arrow(stream) +read_arrow(stream, use_threads = TRUE) } \arguments{ \item{stream}{stream. @@ -23,6 +23,8 @@ binary file format, and uses a \link[=arrow__ipc__RecordBatchFileReader]{arrow:: to process it. \item a raw vector: read using a \link[=arrow__ipc__RecordBatchStreamReader]{arrow::ipc::RecordBatchStreamReader} }} + +\item{use_threads}{Use threads when converting to a tibble} } \value{ \itemize{ diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index 1e8fed1867655..a31c401efa5f5 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -17,28 +17,6 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// Array__as_vector -SEXP Array__as_vector(const std::shared_ptr& array); -RcppExport SEXP _arrow_Array__as_vector(SEXP arraySEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr& >::type array(arraySEXP); - rcpp_result_gen = Rcpp::wrap(Array__as_vector(array)); - return rcpp_result_gen; -END_RCPP -} -// ChunkedArray__as_vector -SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array); -RcppExport SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_arraySEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr& >::type chunked_array(chunked_arraySEXP); - rcpp_result_gen = Rcpp::wrap(ChunkedArray__as_vector(chunked_array)); - return rcpp_result_gen; -END_RCPP -} // Array__Slice1 std::shared_ptr Array__Slice1(const std::shared_ptr& array, int offset); RcppExport SEXP _arrow_Array__Slice1(SEXP arraySEXP, SEXP offsetSEXP) { @@ -237,6 +215,52 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// Array__as_vector +SEXP Array__as_vector(const std::shared_ptr& array); +RcppExport SEXP _arrow_Array__as_vector(SEXP arraySEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type array(arraySEXP); + rcpp_result_gen = Rcpp::wrap(Array__as_vector(array)); + return rcpp_result_gen; +END_RCPP +} +// ChunkedArray__as_vector +SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array); +RcppExport SEXP _arrow_ChunkedArray__as_vector(SEXP chunked_arraySEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type chunked_array(chunked_arraySEXP); + rcpp_result_gen = Rcpp::wrap(ChunkedArray__as_vector(chunked_array)); + return rcpp_result_gen; +END_RCPP +} +// RecordBatch__to_dataframe +List RecordBatch__to_dataframe(const std::shared_ptr& batch, bool use_threads); +RcppExport SEXP _arrow_RecordBatch__to_dataframe(SEXP batchSEXP, SEXP use_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type batch(batchSEXP); + Rcpp::traits::input_parameter< bool >::type use_threads(use_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(RecordBatch__to_dataframe(batch, use_threads)); + return rcpp_result_gen; +END_RCPP +} +// Table__to_dataframe +List Table__to_dataframe(const std::shared_ptr& table, bool use_threads); +RcppExport SEXP _arrow_Table__to_dataframe(SEXP tableSEXP, SEXP use_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type table(tableSEXP); + Rcpp::traits::input_parameter< bool >::type use_threads(use_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(Table__to_dataframe(table, use_threads)); + return rcpp_result_gen; +END_RCPP +} // ArrayData__get_type std::shared_ptr ArrayData__get_type(const std::shared_ptr& x); RcppExport SEXP _arrow_ArrayData__get_type(SEXP xSEXP) { @@ -1846,17 +1870,6 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// RecordBatch__to_dataframe -List RecordBatch__to_dataframe(const std::shared_ptr& batch); -RcppExport SEXP _arrow_RecordBatch__to_dataframe(SEXP batchSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr& >::type batch(batchSEXP); - rcpp_result_gen = Rcpp::wrap(RecordBatch__to_dataframe(batch)); - return rcpp_result_gen; -END_RCPP -} // RecordBatch__from_dataframe std::shared_ptr RecordBatch__from_dataframe(DataFrame tbl); RcppExport SEXP _arrow_RecordBatch__from_dataframe(SEXP tblSEXP) { @@ -2185,17 +2198,6 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// Table__to_dataframe -List Table__to_dataframe(const std::shared_ptr& table); -RcppExport SEXP _arrow_Table__to_dataframe(SEXP tableSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const std::shared_ptr& >::type table(tableSEXP); - rcpp_result_gen = Rcpp::wrap(Table__to_dataframe(table)); - return rcpp_result_gen; -END_RCPP -} // Table__column std::shared_ptr Table__column(const std::shared_ptr& table, int i); RcppExport SEXP _arrow_Table__column(SEXP tableSEXP, SEXP iSEXP) { @@ -2219,11 +2221,29 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// GetCpuThreadPoolCapacity +int GetCpuThreadPoolCapacity(); +RcppExport SEXP _arrow_GetCpuThreadPoolCapacity() { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + rcpp_result_gen = Rcpp::wrap(GetCpuThreadPoolCapacity()); + return rcpp_result_gen; +END_RCPP +} +// SetCpuThreadPoolCapacity +void SetCpuThreadPoolCapacity(int threads); +RcppExport SEXP _arrow_SetCpuThreadPoolCapacity(SEXP threadsSEXP) { +BEGIN_RCPP + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< int >::type threads(threadsSEXP); + SetCpuThreadPoolCapacity(threads); + return R_NilValue; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 1}, - {"_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, - {"_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, {"_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, {"_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, {"_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, @@ -2241,6 +2261,10 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_Array__Mask", (DL_FUNC) &_arrow_Array__Mask, 1}, {"_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1}, {"_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1}, + {"_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, + {"_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, + {"_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, + {"_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, {"_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, {"_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, {"_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, @@ -2386,7 +2410,6 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, {"_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, {"_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, - {"_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 1}, {"_arrow_RecordBatch__from_dataframe", (DL_FUNC) &_arrow_RecordBatch__from_dataframe, 1}, {"_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 2}, {"_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2}, @@ -2416,9 +2439,10 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1}, {"_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, {"_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, - {"_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 1}, {"_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, {"_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, + {"_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, + {"_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, {NULL, NULL, 0} }; diff --git a/r/src/array.cpp b/r/src/array.cpp index 901f2b69bedb4..dd0d7e64a20bf 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -33,9 +33,6 @@ inline bool isna(double x) { return ISNA(x); } -// the integer64 sentinel -constexpr int64_t NA_INT64 = std::numeric_limits::min(); - template std::shared_ptr SimpleArray(SEXP x) { Rcpp::Vector vec(x); @@ -503,499 +500,6 @@ std::shared_ptr Array__from_vector(SEXP x) { return nullptr; } -// ---------------------------- Array -> R vector - -namespace arrow { -namespace r { - -template -SEXP ArrayVector_To_Vector(int64_t n, const ArrayVector& arrays, Args... args) { - Converter converter(n, std::forward(args)...); - - R_xlen_t k = 0; - for (const auto& array : arrays) { - auto n_chunk = array->length(); - converter.Ingest(array, k, n_chunk); - k += n_chunk; - } - return converter.data; -} - -template -struct Converter_SimpleArray { - using Vector = Rcpp::Vector; - - Converter_SimpleArray(R_xlen_t n) : data(no_init(n)) {} - - void Ingest(const std::shared_ptr& array, R_xlen_t start, R_xlen_t n) { - using value_type = typename Vector::stored_type; - auto null_count = array->null_count(); - - if (n == null_count) { - std::fill_n(data.begin() + start, n, default_value()); - } else { - auto p_values = array->data()->GetValues(1); - STOP_IF_NULL(p_values); - - // first copy all the data - std::copy_n(p_values, n, data.begin() + start); - - if (null_count) { - // then set the sentinel NA - arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), - array->offset(), n); - - for (size_t i = 0; i < n; i++, bitmap_reader.Next()) { - if (bitmap_reader.IsNotSet()) { - data[i + start] = default_value(); - } - } - } - } - } - - Vector data; -}; - -struct Converter_String { - Converter_String(R_xlen_t n) : data(n) {} - - void Ingest(const std::shared_ptr& array, R_xlen_t start, R_xlen_t n) { - auto null_count = array->null_count(); - - if (null_count == n) { - std::fill_n(data.begin(), n, NA_STRING); - } else { - auto p_offset = array->data()->GetValues(1); - STOP_IF_NULL(p_offset); - auto p_data = array->data()->GetValues(2, *p_offset); - if (!p_data) { - // There is an offset buffer, but the data buffer is null - // There is at least one value in the array and not all the values are null - // That means all values are empty strings so there is nothing to do - return; - } - - if (null_count) { - // need to watch for nulls - arrow::internal::BitmapReader null_reader(array->null_bitmap_data(), - array->offset(), n); - for (int i = 0; i < n; i++, null_reader.Next()) { - if (null_reader.IsSet()) { - auto diff = p_offset[i + 1] - p_offset[i]; - SET_STRING_ELT(data, start + i, Rf_mkCharLenCE(p_data, diff, CE_UTF8)); - p_data += diff; - } else { - SET_STRING_ELT(data, start + i, NA_STRING); - } - } - - } else { - // no need to check for nulls - // TODO: altrep mark this as no na - for (int i = 0; i < n; i++) { - auto diff = p_offset[i + 1] - p_offset[i]; - SET_STRING_ELT(data, start + i, Rf_mkCharLenCE(p_data, diff, CE_UTF8)); - p_data += diff; - } - } - } - } - - CharacterVector data; -}; - -struct Converter_Boolean { - Converter_Boolean(R_xlen_t n) : data(n) {} - - void Ingest(const std::shared_ptr& array, R_xlen_t start, R_xlen_t n) { - auto null_count = array->null_count(); - - if (n == null_count) { - std::fill_n(data.begin() + start, n, NA_LOGICAL); - } else { - // process the data - auto p_data = array->data()->GetValues(1, 0); - STOP_IF_NULL(p_data); - - arrow::internal::BitmapReader data_reader(p_data, array->offset(), n); - for (size_t i = 0; i < n; i++, data_reader.Next()) { - data[start + i] = data_reader.IsSet(); - } - - // then the null bitmap if needed - if (null_count) { - arrow::internal::BitmapReader null_reader(array->null_bitmap()->data(), - array->offset(), n); - for (size_t i = 0; i < n; i++, null_reader.Next()) { - if (null_reader.IsNotSet()) { - data[start + i] = NA_LOGICAL; - } - } - } - } - } - - LogicalVector data; -}; - -template -struct Converter_Dictionary_Int32Indices { - Converter_Dictionary_Int32Indices(R_xlen_t n, const std::shared_ptr& dict, - bool ordered) - : data(no_init(n)) { - data.attr("levels") = ArrayVector_To_Vector(dict->length(), {dict}); - if (ordered) { - data.attr("class") = CharacterVector::create("ordered", "factor"); - } else { - data.attr("class") = "factor"; - } - } - - void Ingest(const std::shared_ptr& array, R_xlen_t start, R_xlen_t n) { - DictionaryArray* dict_array = static_cast(array.get()); - using value_type = typename arrow::TypeTraits::ArrayType::value_type; - auto null_count = array->null_count(); - - if (n == null_count) { - std::fill_n(data.begin() + start, n, NA_INTEGER); - } else { - std::shared_ptr indices = dict_array->indices(); - auto p_array = indices->data()->GetValues(1); - STOP_IF_NULL(p_array); - - if (array->null_count()) { - arrow::internal::BitmapReader bitmap_reader(indices->null_bitmap()->data(), - indices->offset(), n); - for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_array) { - data[start + i] = - bitmap_reader.IsNotSet() ? NA_INTEGER : (static_cast(*p_array) + 1); - } - } else { - std::transform( - p_array, p_array + n, data.begin() + start, - [](const value_type value) { return static_cast(value) + 1; }); - } - } - } - - IntegerVector data; -}; - -struct Converter_Date64 { - Converter_Date64(R_xlen_t n) : data(n) { - data.attr("class") = CharacterVector::create("POSIXct", "POSIXt"); - } - - void Ingest(const std::shared_ptr& array, R_xlen_t start, R_xlen_t n) { - auto null_count = array->null_count(); - if (null_count == n) { - std::fill_n(data.begin() + start, n, NA_REAL); - } else { - auto p_values = array->data()->GetValues(1); - STOP_IF_NULL(p_values); - auto p_vec = data.begin() + start; - - // convert DATE64 milliseconds to R seconds (stored as double) - auto seconds = [](int64_t ms) { return static_cast(ms / 1000); }; - - if (null_count) { - arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), - array->offset(), n); - for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_vec, ++p_values) { - *p_vec = bitmap_reader.IsSet() ? seconds(*p_values) : NA_REAL; - } - } else { - std::transform(p_values, p_values + n, p_vec, seconds); - } - } - } - - NumericVector data; -}; - -template -struct Converter_Promotion { - using r_stored_type = typename Rcpp::Vector::stored_type; - using value_type = typename TypeTraits::ArrayType::value_type; - - Converter_Promotion(R_xlen_t n) : data(no_init(n)) {} - - void Ingest(const std::shared_ptr& array, R_xlen_t start, R_xlen_t n) { - auto null_count = array->null_count(); - if (null_count == n) { - std::fill_n(data.begin() + start, n, default_value()); - } else { - auto p_values = array->data()->GetValues(1); - STOP_IF_NULL(p_values); - - auto value_convert = [](value_type value) { - return static_cast(value); - }; - if (null_count) { - internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), - array->offset(), n); - for (size_t i = 0; i < n; i++, bitmap_reader.Next()) { - data[start + i] = bitmap_reader.IsNotSet() ? Rcpp::Vector::get_na() - : value_convert(p_values[i]); - } - } else { - std::transform(p_values, p_values + n, data.begin(), value_convert); - } - } - } - - Rcpp::Vector data; -}; - -template -struct Converter_Time { - Converter_Time(int64_t n, int32_t multiplier, CharacterVector classes) - : data(no_init(n)), multiplier_(multiplier) { - data.attr("class") = classes; - } - - Converter_Time(int64_t n, int32_t multiplier) - : data(no_init(n)), multiplier_(multiplier) { - data.attr("class") = CharacterVector::create("hms", "difftime"); - data.attr("units") = "secs"; - } - - void Ingest(const std::shared_ptr& array, R_xlen_t start, R_xlen_t n) { - auto null_count = array->null_count(); - if (n == null_count) { - std::fill_n(data.begin() + start, n, NA_REAL); - } else { - auto p_values = array->data()->GetValues(1); - STOP_IF_NULL(p_values); - auto p_vec = data.begin() + start; - auto convert = [this](value_type value) { - return static_cast(value) / multiplier_; - }; - if (null_count) { - arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), - array->offset(), n); - for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_vec, ++p_values) { - *p_vec = bitmap_reader.IsSet() ? convert(*p_values) : NA_REAL; - } - } else { - std::transform(p_values, p_values + n, p_vec, convert); - } - } - } - - NumericVector data; - int32_t multiplier_; -}; - -template -struct Converter_TimeStamp : Converter_Time { - Converter_TimeStamp(int64_t n, int32_t multiplier) - : Converter_Time(n, multiplier, - CharacterVector::create("POSIXct", "POSIXt")) {} -}; - -struct Converter_Int64 { - Converter_Int64(R_xlen_t n) : data(no_init(n)) { data.attr("class") = "integer64"; } - - void Ingest(const std::shared_ptr& array, R_xlen_t start, R_xlen_t n) { - auto null_count = array->null_count(); - if (null_count == n) { - std::fill_n(reinterpret_cast(data.begin()) + start, n, NA_INT64); - } else { - auto p_values = array->data()->GetValues(1); - STOP_IF_NULL(p_values); - auto p_vec = reinterpret_cast(data.begin()) + start; - - if (array->null_count()) { - internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), - array->offset(), n); - for (size_t i = 0; i < n; i++, bitmap_reader.Next()) { - p_vec[i] = bitmap_reader.IsNotSet() ? NA_INT64 : p_values[i]; - } - } else { - std::copy_n(p_values, n, p_vec); - } - } - } - - NumericVector data; -}; - -SEXP DictionaryArrays_to_Vector(int64_t n, const ArrayVector& arrays) { - DictionaryArray* dict_array = static_cast(arrays[0].get()); - auto dict = dict_array->dictionary(); - auto indices = dict_array->indices(); - - if (dict->type_id() != Type::STRING) { - stop("Cannot convert Dictionary Array of type `%s` to R", - dict_array->type()->ToString()); - } - bool ordered = dict_array->dict_type()->ordered(); - switch (indices->type_id()) { - case Type::UINT8: - return ArrayVector_To_Vector>( - n, arrays, dict, ordered); - - case Type::INT8: - return ArrayVector_To_Vector>( - n, arrays, dict, ordered); - - case Type::UINT16: - return ArrayVector_To_Vector>( - n, arrays, dict, ordered); - - case Type::INT16: - return ArrayVector_To_Vector>( - n, arrays, dict, ordered); - - case Type::INT32: - return ArrayVector_To_Vector>( - n, arrays, dict, ordered); - - default: - stop("Cannot convert Dictionary Array of type `%s` to R", - dict_array->type()->ToString()); - } - return R_NilValue; -} - -SEXP Date32ArrayVector_to_Vector(int64_t n, const ArrayVector& arrays) { - IntegerVector out( - arrow::r::ArrayVector_To_Vector>(n, arrays)); - out.attr("class") = "Date"; - return out; -} - -struct Converter_Decimal { - Converter_Decimal(R_xlen_t n) : data(no_init(n)) {} - - void Ingest(const std::shared_ptr& array, R_xlen_t start, R_xlen_t n) { - auto null_count = array->null_count(); - if (n == null_count) { - std::fill_n(data.begin() + start, n, NA_REAL); - } else { - auto p_vec = reinterpret_cast(data.begin()) + start; - const auto& decimals_arr = - internal::checked_cast(*array); - - if (array->null_count()) { - internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), - array->offset(), n); - - for (size_t i = 0; i < n; i++, bitmap_reader.Next()) { - p_vec[i] = bitmap_reader.IsNotSet() - ? NA_REAL - : std::stod(decimals_arr.FormatValue(i).c_str()); - } - } else { - for (size_t i = 0; i < n; i++) { - p_vec[i] = std::stod(decimals_arr.FormatValue(i).c_str()); - } - } - } - } - - NumericVector data; -}; - -} // namespace r -} // namespace arrow - -SEXP ArrayVector__as_vector(int64_t n, const ArrayVector& arrays) { - using namespace arrow::r; - - switch (arrays[0]->type_id()) { - // direct support - case Type::INT8: - return ArrayVector_To_Vector>(n, arrays); - case Type::INT32: - return ArrayVector_To_Vector>(n, arrays); - case Type::DOUBLE: - return ArrayVector_To_Vector>(n, arrays); - - // need to handle 1-bit case - case Type::BOOL: - return ArrayVector_To_Vector(n, arrays); - - // handle memory dense strings - case Type::STRING: - return ArrayVector_To_Vector(n, arrays); - case Type::DICTIONARY: - return DictionaryArrays_to_Vector(n, arrays); - - case Type::DATE32: - return Date32ArrayVector_to_Vector(n, arrays); - case Type::DATE64: - return ArrayVector_To_Vector(n, arrays); - - // promotions to integer vector - case Type::UINT8: - return ArrayVector_To_Vector>(n, - arrays); - case Type::INT16: - return ArrayVector_To_Vector>(n, - arrays); - case Type::UINT16: - return ArrayVector_To_Vector>( - n, arrays); - - // promotions to numeric vector - case Type::UINT32: - return ArrayVector_To_Vector>( - n, arrays); - case Type::HALF_FLOAT: - return ArrayVector_To_Vector>( - n, arrays); - case Type::FLOAT: - return ArrayVector_To_Vector>( - n, arrays); - - // time32 ane time64 - case Type::TIME32: - return ArrayVector_To_Vector>( - n, arrays, - static_cast(arrays[0]->type().get())->unit() == TimeUnit::SECOND - ? 1 - : 1000); - - case Type::TIME64: - return ArrayVector_To_Vector>( - n, arrays, - static_cast(arrays[0]->type().get())->unit() == TimeUnit::MICRO - ? 1000000 - : 1000000000); - - case Type::TIMESTAMP: - return ArrayVector_To_Vector>( - n, arrays, - static_cast(arrays[0]->type().get())->unit() == TimeUnit::MICRO - ? 1000000 - : 1000000000); - - case Type::INT64: - return ArrayVector_To_Vector(n, arrays); - case Type::DECIMAL: - return ArrayVector_To_Vector(n, arrays); - - default: - break; - } - - stop(tfm::format("cannot handle Array of type %s", arrays[0]->type()->name())); - return R_NilValue; -} - -// [[Rcpp::export]] -SEXP Array__as_vector(const std::shared_ptr& array) { - return ArrayVector__as_vector(array->length(), {array}); -} - -// [[Rcpp::export]] -SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array) { - return ArrayVector__as_vector(chunked_array->length(), chunked_array->chunks()); -} - // [[Rcpp::export]] std::shared_ptr Array__Slice1(const std::shared_ptr& array, int offset) { diff --git a/r/src/array__to_vector.cpp b/r/src/array__to_vector.cpp new file mode 100644 index 0000000000000..c531933c04d52 --- /dev/null +++ b/r/src/array__to_vector.cpp @@ -0,0 +1,697 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include "arrow_types.h" + +using namespace Rcpp; +using namespace arrow; + +namespace arrow { +namespace r { + +class Converter { + public: + Converter(const ArrayVector& arrays) : arrays_(arrays) {} + + virtual ~Converter() {} + + // Allocate a vector of the right R type for this converter + virtual SEXP Allocate(R_xlen_t n) const = 0; + + // data[ start:(start + n) ] = NA + virtual Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const = 0; + + // ingest the values from the array into data[ start : (start + n)] + virtual Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const = 0; + + // ingest one array + Status IngestOne(SEXP data, const std::shared_ptr& array, R_xlen_t start, + R_xlen_t n) const { + if (array->null_count() == n) { + return Ingest_all_nulls(data, start, n); + } else { + return Ingest_some_nulls(data, array, start, n); + } + } + + // can this run in parallel ? + virtual bool Parallel() const { return true; } + + // Ingest all the arrays serially + Status IngestSerial(SEXP data) { + R_xlen_t k = 0; + for (const auto& array : arrays_) { + auto n_chunk = array->length(); + RETURN_NOT_OK(IngestOne(data, array, k, n_chunk)); + k += n_chunk; + } + return Status::OK(); + } + + // ingest the arrays in parallel + // + // for each array, add a task to the task group + // + // The task group is Finish() iun the caller + void IngestParallel(SEXP data, const std::shared_ptr& tg) { + R_xlen_t k = 0; + for (const auto& array : arrays_) { + auto n_chunk = array->length(); + tg->Append([=] { return IngestOne(data, array, k, n_chunk); }); + k += n_chunk; + } + } + + // Converter factory + static std::shared_ptr Make(const ArrayVector& arrays); + + protected: + const ArrayVector& arrays_; +}; + +// data[start:(start+n)] = NA +template +Status AllNull_Ingest(SEXP data, R_xlen_t start, R_xlen_t n) { + auto p_data = Rcpp::internal::r_vector_start(data) + start; + std::fill_n(p_data, n, default_value()); + return Status::OK(); +} + +// ingest the data from `array` into a slice of `data` +// +// each element goes through `lambda` when some conversion is needed +template +Status SomeNull_Ingest(SEXP data, R_xlen_t start, R_xlen_t n, + const array_value_type* p_values, + const std::shared_ptr& array, Lambda lambda) { + if (!p_values) { + return Status::Invalid("Invalid data buffer"); + } + auto p_data = Rcpp::internal::r_vector_start(data) + start; + + if (array->null_count()) { + arrow::internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), + array->offset(), n); + for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_data, ++p_values) { + *p_data = bitmap_reader.IsSet() ? lambda(*p_values) : default_value(); + } + } else { + std::transform(p_values, p_values + n, p_data, lambda); + } + + return Status::OK(); +} + +// Allocate + Ingest +SEXP ArrayVector__as_vector(R_xlen_t n, const ArrayVector& arrays) { + auto converter = Converter::Make(arrays); + Shield data(converter->Allocate(n)); + STOP_IF_NOT_OK(converter->IngestSerial(data)); + return data; +} + +template +class Converter_SimpleArray : public Converter { + using Vector = Rcpp::Vector; + using value_type = typename Vector::stored_type; + + public: + Converter_SimpleArray(const ArrayVector& arrays) : Converter(arrays) {} + + SEXP Allocate(R_xlen_t n) const { return Vector(no_init(n)); } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + return AllNull_Ingest(data, start, n); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + auto p_values = array->data()->GetValues(1); + auto echo = [](value_type value) { return value; }; + return SomeNull_Ingest(data, start, n, p_values, array, echo); + } +}; + +class Converter_Date32 : public Converter_SimpleArray { + public: + Converter_Date32(const ArrayVector& arrays) : Converter_SimpleArray(arrays) {} + + SEXP Allocate(R_xlen_t n) const { + IntegerVector data(no_init(n)); + data.attr("class") = "Date"; + return data; + } +}; + +struct Converter_String : public Converter { + public: + Converter_String(const ArrayVector& arrays) : Converter(arrays) {} + + SEXP Allocate(R_xlen_t n) const { return StringVector_(no_init(n)); } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + return AllNull_Ingest(data, start, n); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + auto p_offset = array->data()->GetValues(1); + if (!p_offset) { + return Status::Invalid("Invalid offset buffer"); + } + auto p_strings = array->data()->GetValues(2, *p_offset); + if (!p_strings) { + // There is an offset buffer, but the data buffer is null + // There is at least one value in the array and not all the values are null + // That means all values are either empty strings or nulls so there is nothing to do + + if (array->null_count()) { + arrow::internal::BitmapReader null_reader(array->null_bitmap_data(), + array->offset(), n); + for (int i = 0; i < n; i++, null_reader.Next()) { + if (null_reader.IsNotSet()) { + SET_STRING_ELT(data, start + i, NA_STRING); + } + } + } + return Status::OK(); + } + + arrow::StringArray* string_array = static_cast(array.get()); + if (array->null_count()) { + // need to watch for nulls + arrow::internal::BitmapReader null_reader(array->null_bitmap_data(), + array->offset(), n); + for (int i = 0; i < n; i++, null_reader.Next()) { + if (null_reader.IsSet()) { + SET_STRING_ELT(data, start + i, r_string(string_array->GetString(i))); + } else { + SET_STRING_ELT(data, start + i, NA_STRING); + } + } + + } else { + for (int i = 0; i < n; i++) { + SET_STRING_ELT(data, start + i, r_string(string_array->GetString(i))); + } + } + + return Status::OK(); + } + + bool Parallel() const { return false; } + + inline SEXP r_string(const arrow::util::string_view& view) const { + return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8); + } +}; + +class Converter_Boolean : public Converter { + public: + Converter_Boolean(const ArrayVector& arrays) : Converter(arrays) {} + + SEXP Allocate(R_xlen_t n) const { return LogicalVector_(no_init(n)); } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + return AllNull_Ingest(data, start, n); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + auto p_data = Rcpp::internal::r_vector_start(data) + start; + auto p_bools = array->data()->GetValues(1, 0); + if (!p_bools) { + return Status::Invalid("Invalid data buffer"); + } + + arrow::internal::BitmapReader data_reader(p_bools, array->offset(), n); + if (array->null_count()) { + arrow::internal::BitmapReader null_reader(array->null_bitmap()->data(), + array->offset(), n); + + for (size_t i = 0; i < n; i++, data_reader.Next(), null_reader.Next(), ++p_data) { + *p_data = null_reader.IsSet() ? data_reader.IsSet() : NA_LOGICAL; + } + } else { + for (size_t i = 0; i < n; i++, data_reader.Next(), ++p_data) { + *p_data = data_reader.IsSet(); + } + } + + return Status::OK(); + } +}; + +class Converter_Dictionary : public Converter { + public: + Converter_Dictionary(const ArrayVector& arrays) : Converter(arrays) {} + + SEXP Allocate(R_xlen_t n) const { + IntegerVector data(no_init(n)); + auto dict_array = static_cast(Converter::arrays_[0].get()); + auto dict = dict_array->dictionary(); + auto indices = dict_array->indices(); + switch (indices->type_id()) { + case Type::UINT8: + case Type::INT8: + case Type::UINT16: + case Type::INT16: + case Type::INT32: + break; + default: + stop("Cannot convert Dictionary Array of type `%s` to R", + dict_array->type()->ToString()); + } + + if (dict->type_id() != Type::STRING) { + stop("Cannot convert Dictionary Array of type `%s` to R", + dict_array->type()->ToString()); + } + bool ordered = dict_array->dict_type()->ordered(); + + data.attr("levels") = ArrayVector__as_vector(dict->length(), {dict}); + if (ordered) { + data.attr("class") = CharacterVector::create("ordered", "factor"); + } else { + data.attr("class") = "factor"; + } + return data; + } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + return AllNull_Ingest(data, start, n); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + DictionaryArray* dict_array = static_cast(array.get()); + auto indices = dict_array->indices(); + switch (indices->type_id()) { + case Type::UINT8: + return Ingest_some_nulls_Impl(data, array, start, n); + case Type::INT8: + return Ingest_some_nulls_Impl(data, array, start, n); + case Type::UINT16: + return Ingest_some_nulls_Impl(data, array, start, n); + case Type::INT16: + return Ingest_some_nulls_Impl(data, array, start, n); + case Type::INT32: + return Ingest_some_nulls_Impl(data, array, start, n); + default: + break; + } + return Status::OK(); + } + + private: + template + Status Ingest_some_nulls_Impl(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + using value_type = typename arrow::TypeTraits::ArrayType::value_type; + + std::shared_ptr indices = + static_cast(array.get())->indices(); + + // convert the 0-based indices from the arrow Array + // to 1-based indices used in R factors + auto to_r_index = [](value_type value) { return static_cast(value) + 1; }; + + return SomeNull_Ingest( + data, start, n, indices->data()->GetValues(1), indices, to_r_index); + } +}; + +double ms_to_seconds(int64_t ms) { return static_cast(ms / 1000); } + +class Converter_Date64 : public Converter { + public: + Converter_Date64(const ArrayVector& arrays) : Converter(arrays) {} + + SEXP Allocate(R_xlen_t n) const { + NumericVector data(no_init(n)); + data.attr("class") = CharacterVector::create("POSIXct", "POSIXt"); + return data; + } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + return AllNull_Ingest(data, start, n); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + auto convert = [](int64_t ms) { return static_cast(ms / 1000); }; + return SomeNull_Ingest( + data, start, n, array->data()->GetValues(1), array, convert); + } +}; + +template +class Converter_Promotion : public Converter { + using r_stored_type = typename Rcpp::Vector::stored_type; + using value_type = typename TypeTraits::ArrayType::value_type; + + public: + Converter_Promotion(const ArrayVector& arrays) : Converter(arrays) {} + + SEXP Allocate(R_xlen_t n) const { + return Rcpp::Vector(no_init(n)); + } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + return AllNull_Ingest(data, start, n); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + auto convert = [](value_type value) { return static_cast(value); }; + return SomeNull_Ingest( + data, start, n, array->data()->GetValues(1), array, convert); + } + + private: + static r_stored_type value_convert(value_type value) { + return static_cast(value); + } +}; + +template +class Converter_Time : public Converter { + public: + Converter_Time(const ArrayVector& arrays) : Converter(arrays) {} + + SEXP Allocate(R_xlen_t n) const { + NumericVector data(no_init(n)); + data.attr("class") = CharacterVector::create("hms", "difftime"); + data.attr("units") = CharacterVector::create("secs"); + return data; + } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + return AllNull_Ingest(data, start, n); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + int multiplier = TimeUnit_multiplier(array); + auto convert = [=](value_type value) { + return static_cast(value) / multiplier; + }; + return SomeNull_Ingest( + data, start, n, array->data()->GetValues(1), array, convert); + } + + private: + int TimeUnit_multiplier(const std::shared_ptr& array) const { + switch (static_cast(array->type().get())->unit()) { + case TimeUnit::SECOND: + return 1; + case TimeUnit::MILLI: + return 1000; + case TimeUnit::MICRO: + return 1000000; + case TimeUnit::NANO: + return 1000000000; + } + } +}; + +template +class Converter_Timestamp : public Converter_Time { + public: + Converter_Timestamp(const ArrayVector& arrays) : Converter_Time(arrays) {} + + SEXP Allocate(R_xlen_t n) const { + NumericVector data(no_init(n)); + data.attr("class") = CharacterVector::create("POSIXct", "POSIXt"); + return data; + } +}; + +class Converter_Decimal : public Converter { + public: + Converter_Decimal(const ArrayVector& arrays) : Converter(arrays) {} + + SEXP Allocate(R_xlen_t n) const { return NumericVector_(no_init(n)); } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + return AllNull_Ingest(data, start, n); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + auto p_data = Rcpp::internal::r_vector_start(data) + start; + const auto& decimals_arr = + internal::checked_cast(*array); + + internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(), + n); + + for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_data) { + *p_data = bitmap_reader.IsSet() ? std::stod(decimals_arr.FormatValue(i).c_str()) + : NA_REAL; + } + + return Status::OK(); + } +}; + +class Converter_Int64 : public Converter { + public: + Converter_Int64(const ArrayVector& arrays) : Converter(arrays) {} + + SEXP Allocate(R_xlen_t n) const { + NumericVector data(no_init(n)); + data.attr("class") = "integer64"; + return data; + } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + auto p_data = reinterpret_cast(REAL(data)) + start; + std::fill_n(p_data, n, NA_INT64); + return Status::OK(); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + auto p_values = array->data()->GetValues(1); + if (!p_values) { + return Status::Invalid("Invalid data buffer"); + } + + auto p_data = reinterpret_cast(REAL(data)) + start; + + if (array->null_count()) { + internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(), + n); + for (size_t i = 0; i < n; i++, bitmap_reader.Next(), ++p_data) { + *p_data = bitmap_reader.IsSet() ? p_values[i] : NA_INT64; + } + } else { + std::copy_n(p_values, n, p_data); + } + + return Status::OK(); + } +}; + +std::shared_ptr Converter::Make(const ArrayVector& arrays) { + using namespace arrow::r; + + switch (arrays[0]->type_id()) { + // direct support + case Type::INT8: + return std::make_shared>(arrays); + + case Type::INT32: + return std::make_shared>(arrays); + + case Type::DOUBLE: + return std::make_shared>(arrays); + + // need to handle 1-bit case + case Type::BOOL: + return std::make_shared(arrays); + + // handle memory dense strings + case Type::STRING: + return std::make_shared(arrays); + + case Type::DICTIONARY: + return std::make_shared(arrays); + + case Type::DATE32: + return std::make_shared(arrays); + + case Type::DATE64: + return std::make_shared(arrays); + + // promotions to integer vector + case Type::UINT8: + return std::make_shared>(arrays); + + case Type::INT16: + return std::make_shared>(arrays); + + case Type::UINT16: + return std::make_shared>(arrays); + + // promotions to numeric vector + case Type::UINT32: + return std::make_shared>(arrays); + + case Type::HALF_FLOAT: + return std::make_shared>(arrays); + + case Type::FLOAT: + return std::make_shared>(arrays); + + // time32 ane time64 + case Type::TIME32: + return std::make_shared>(arrays); + + case Type::TIME64: + return std::make_shared>(arrays); + + case Type::TIMESTAMP: + return std::make_shared>(arrays); + + case Type::INT64: + return std::make_shared(arrays); + + case Type::DECIMAL: + return std::make_shared(arrays); + + default: + break; + } + + stop(tfm::format("cannot handle Array of type %s", arrays[0]->type()->name())); + return nullptr; +} + +List to_dataframe_serial(int64_t nr, int64_t nc, const CharacterVector& names, + const std::vector>& converters) { + List tbl(nc); + + for (int i = 0; i < nc; i++) { + SEXP column = tbl[i] = converters[i]->Allocate(nr); + STOP_IF_NOT_OK(converters[i]->IngestSerial(column)); + } + tbl.attr("names") = names; + tbl.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame"); + tbl.attr("row.names") = IntegerVector::create(NA_INTEGER, -nr); + return tbl; +} + +List to_dataframe_parallel(int64_t nr, int64_t nc, const CharacterVector& names, + const std::vector>& converters) { + List tbl(nc); + + // task group to ingest data in parallel + auto tg = arrow::internal::TaskGroup::MakeThreaded(arrow::internal::GetCpuThreadPool()); + + // allocate and start ingesting immediately the columns that + // can be ingested in parallel, i.e. when ingestion no longer + // need to happen on the main thread + for (int i = 0; i < nc; i++) { + // allocate data for column i + SEXP column = tbl[i] = converters[i]->Allocate(nr); + + // add a task to ingest data of that column if that can be done in parallel + if (converters[i]->Parallel()) { + converters[i]->IngestParallel(column, tg); + } + } + + arrow::Status status = arrow::Status::OK(); + + // ingest the columns that cannot be dealt with in parallel + for (int i = 0; i < nc; i++) { + if (!converters[i]->Parallel()) { + status &= converters[i]->IngestSerial(tbl[i]); + } + } + + // wait for the ingestion to be finished + status &= tg->Finish(); + + STOP_IF_NOT_OK(status); + + tbl.attr("names") = names; + tbl.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame"); + tbl.attr("row.names") = IntegerVector::create(NA_INTEGER, -nr); + + return tbl; +} + +} // namespace r +} // namespace arrow + +// [[Rcpp::export]] +SEXP Array__as_vector(const std::shared_ptr& array) { + return arrow::r::ArrayVector__as_vector(array->length(), {array}); +} + +// [[Rcpp::export]] +SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array) { + return arrow::r::ArrayVector__as_vector(chunked_array->length(), + chunked_array->chunks()); +} + +// [[Rcpp::export]] +List RecordBatch__to_dataframe(const std::shared_ptr& batch, + bool use_threads) { + int64_t nc = batch->num_columns(); + int64_t nr = batch->num_rows(); + CharacterVector names(nc); + std::vector arrays(nc); + std::vector> converters(nc); + + for (int64_t i = 0; i < nc; i++) { + names[i] = batch->column_name(i); + arrays[i] = {batch->column(i)}; + converters[i] = arrow::r::Converter::Make(arrays[i]); + } + + if (use_threads) { + return arrow::r::to_dataframe_parallel(nr, nc, names, converters); + } else { + return arrow::r::to_dataframe_serial(nr, nc, names, converters); + } +} + +// [[Rcpp::export]] +List Table__to_dataframe(const std::shared_ptr& table, bool use_threads) { + int64_t nc = table->num_columns(); + int64_t nr = table->num_rows(); + CharacterVector names(nc); + std::vector> converters(nc); + + for (int64_t i = 0; i < nc; i++) { + converters[i] = arrow::r::Converter::Make(table->column(i)->data()->chunks()); + names[i] = table->column(i)->name(); + } + + if (use_threads) { + return arrow::r::to_dataframe_parallel(nr, nc, names, converters); + } else { + return arrow::r::to_dataframe_serial(nr, nc, names, converters); + } +} diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 6fef7997dbfa7..a657731a51ae4 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -52,6 +52,8 @@ namespace r { struct symbols { static SEXP units; static SEXP xp; + static SEXP dot_Internal; + static SEXP inspect; }; } // namespace r } // namespace arrow @@ -148,6 +150,7 @@ inline SEXP wrap_dispatch(const T& x, Rcpp::traits::wrap_type_unique_ptr_tag) { } // namespace Rcpp namespace Rcpp { +using NumericVector_ = Rcpp::Vector; using IntegerVector_ = Rcpp::Vector; using LogicalVector_ = Rcpp::Vector; using StringVector_ = Rcpp::Vector; @@ -156,11 +159,11 @@ using RawVector_ = Rcpp::Vector; using List_ = Rcpp::Vector; template -inline typename Rcpp::Vector::stored_type default_value() { +inline constexpr typename Rcpp::Vector::stored_type default_value() { return Rcpp::Vector::get_na(); } template <> -inline Rbyte default_value() { +inline constexpr Rbyte default_value() { return 0; } @@ -174,6 +177,11 @@ std::shared_ptr RecordBatch__from_dataframe(Rcpp::DataFrame namespace arrow { namespace r { +void inspect(SEXP obj); + +// the integer64 sentinel +constexpr int64_t NA_INT64 = std::numeric_limits::min(); + template > class RBuffer : public MutableBuffer { public: diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index b6bee7ae53927..b776d2ae5753e 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -57,22 +57,6 @@ std::shared_ptr RecordBatch__column( return batch->column(i); } -// [[Rcpp::export]] -List RecordBatch__to_dataframe(const std::shared_ptr& batch) { - int nc = batch->num_columns(); - int nr = batch->num_rows(); - List tbl(nc); - CharacterVector names(nc); - for (int i = 0; i < nc; i++) { - tbl[i] = Array__as_vector(batch->column(i)); - names[i] = batch->column_name(i); - } - tbl.attr("names") = names; - tbl.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame"); - tbl.attr("row.names") = IntegerVector::create(NA_INTEGER, -nr); - return tbl; -} - // [[Rcpp::export]] std::shared_ptr RecordBatch__from_dataframe(DataFrame tbl) { CharacterVector names = tbl.names(); diff --git a/r/src/symbols.cpp b/r/src/symbols.cpp index e60bcce631f37..5b4e44e8bfc5f 100644 --- a/r/src/symbols.cpp +++ b/r/src/symbols.cpp @@ -21,5 +21,14 @@ namespace arrow { namespace r { SEXP symbols::units = Rf_install("units"); SEXP symbols::xp = Rf_install(".:xp:."); +SEXP symbols::dot_Internal = Rf_install(".Internal"); +SEXP symbols::inspect = Rf_install("inspect"); + +void inspect(SEXP obj) { + Rcpp::Shield call_inspect(Rf_lang2(symbols::inspect, obj)); + Rcpp::Shield call_internal(Rf_lang2(symbols::dot_Internal, call_inspect)); + Rf_eval(call_internal, R_GlobalEnv); +} + } // namespace r } // namespace arrow diff --git a/r/src/table.cpp b/r/src/table.cpp index f4ebd0466b918..fcf2a0347689b 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -45,23 +45,6 @@ std::shared_ptr Table__schema(const std::shared_ptr return x->schema(); } -// [[Rcpp::export]] -List Table__to_dataframe(const std::shared_ptr& table) { - int nc = table->num_columns(); - int nr = table->num_rows(); - List tbl(nc); - CharacterVector names(nc); - for (int i = 0; i < nc; i++) { - auto column = table->column(i); - tbl[i] = ChunkedArray__as_vector(column->data()); - names[i] = column->name(); - } - tbl.attr("names") = names; - tbl.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame"); - tbl.attr("row.names") = IntegerVector::create(NA_INTEGER, -nr); - return tbl; -} - // [[Rcpp::export]] std::shared_ptr Table__column(const std::shared_ptr& table, int i) { diff --git a/r/src/threadpool.cpp b/r/src/threadpool.cpp new file mode 100644 index 0000000000000..1ce0451ac2b55 --- /dev/null +++ b/r/src/threadpool.cpp @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "arrow_types.h" + +//' Get the capacity of the global thread pool +//' +//' @return the number of worker threads in the thread pool to which +//' Arrow dispatches various CPU-bound tasks. This is an ideal number, +//' not necessarily the exact number of threads at a given point in time. +//' +//' You can change this number using [SetCpuThreadPoolCapacity()]. +//' +//' @export +// [[Rcpp::export]] +int GetCpuThreadPoolCapacity() { return arrow::GetCpuThreadPoolCapacity(); } + +//' Set the capacity of the global thread pool +//' +//' @param threads the number of worker threads int the thread pool to which +//' Arrow dispatches various CPU-bound tasks. +//' +//' The current number is returned by [GetCpuThreadPoolCapacity()] +//' +//' @export +// [[Rcpp::export]] +void SetCpuThreadPoolCapacity(int threads) { + STOP_IF_NOT_OK(arrow::SetCpuThreadPoolCapacity(threads)); +} diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index f40bd8387ad74..29f90946da6e7 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -69,7 +69,6 @@ test_that("RecordBatch", { expect_equal(col_fct$as_vector(), tbl$fct) expect_equal(col_fct$type, dictionary(int32(), array(letters[1:10]))) - batch2 <- batch$RemoveColumn(0) expect_equal( batch2$schema, diff --git a/r/tests/testthat/test-cputhreadpoolcapacity.R b/r/tests/testthat/test-cputhreadpoolcapacity.R new file mode 100644 index 0000000000000..de23f151a3524 --- /dev/null +++ b/r/tests/testthat/test-cputhreadpoolcapacity.R @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +context("CpuThreadPoolCapacity") + +test_that("can set/get cpu thread pool capacity", { + old <- GetCpuThreadPoolCapacity() + SetCpuThreadPoolCapacity(19L) + expect_equal(GetCpuThreadPoolCapacity(), 19L) + SetCpuThreadPoolCapacity(old) + expect_equal(GetCpuThreadPoolCapacity(), old) +}) From 2b361fb2e5b4321a6cdcbdbf457181702fd97eaa Mon Sep 17 00:00:00 2001 From: Bryan Cutler Date: Wed, 9 Jan 2019 22:07:14 -0600 Subject: [PATCH 071/203] ARROW-3428: [Python] Fix from_pandas conversion from float to bool When `from_pandas` converts data to boolean, the values are read into a `uint8_t` and then checked. When the values are floating point numbers, not all bits are checked which can cause incorrect results. Author: Bryan Cutler Closes #2698 from BryanCutler/python-from_pandas-float-to-bool-ARROW-3428 and squashes the following commits: f3d472626 added test with fix that passes, but fails other tests --- cpp/src/arrow/compute/kernels/cast-test.cc | 19 ++++++ cpp/src/arrow/python/numpy_to_arrow.cc | 66 +++++++++------------ cpp/src/arrow/python/type_traits.h | 1 + python/pyarrow/tests/test_convert_pandas.py | 39 +++++++++--- 4 files changed, 81 insertions(+), 44 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/cast-test.cc b/cpp/src/arrow/compute/kernels/cast-test.cc index 781e0af87a825..c3a0df5d8a73f 100644 --- a/cpp/src/arrow/compute/kernels/cast-test.cc +++ b/cpp/src/arrow/compute/kernels/cast-test.cc @@ -138,6 +138,25 @@ TEST_F(TestCast, SameTypeZeroCopy) { AssertBufferSame(*arr, *result, 1); } +TEST_F(TestCast, FromBoolean) { + CastOptions options; + + vector is_valid(20, true); + is_valid[3] = false; + + vector v1(is_valid.size(), true); + vector e1(is_valid.size(), 1); + for (size_t i = 0; i < v1.size(); ++i) { + if (i % 3 == 1) { + v1[i] = false; + e1[i] = 0; + } + } + + CheckCase(boolean(), v1, is_valid, int32(), e1, + options); +} + TEST_F(TestCast, ToBoolean) { CastOptions options; for (auto type : kNumericTypes) { diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc index aa28b6e870834..aada6bf598ca0 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/cpp/src/arrow/python/numpy_to_arrow.cc @@ -63,6 +63,7 @@ namespace arrow { using internal::checked_cast; using internal::CopyBitmap; +using internal::GenerateBitsUnrolled; namespace py { @@ -246,6 +247,11 @@ class NumPyConverter { return Status::OK(); } + // Called before ConvertData to ensure Numpy input buffer is in expected + // Arrow layout + template + Status PrepareInputData(std::shared_ptr* data); + // ---------------------------------------------------------------------- // Traditional visitor conversion for non-object arrays @@ -407,14 +413,32 @@ Status CopyStridedArray(PyArrayObject* arr, const int64_t length, MemoryPool* po } // namespace template -inline Status NumPyConverter::ConvertData(std::shared_ptr* data) { +inline Status NumPyConverter::PrepareInputData(std::shared_ptr* data) { if (is_strided()) { RETURN_NOT_OK(CopyStridedArray(arr_, length_, pool_, data)); + } else if (dtype_->type_num == NPY_BOOL) { + int64_t nbytes = BitUtil::BytesForBits(length_); + std::shared_ptr buffer; + RETURN_NOT_OK(AllocateBuffer(pool_, nbytes, &buffer)); + + Ndarray1DIndexer values(arr_); + int64_t i = 0; + const auto generate = [&values, &i]() -> bool { return values[i++] > 0; }; + GenerateBitsUnrolled(buffer->mutable_data(), 0, length_, generate); + + *data = buffer; } else { // Can zero-copy *data = std::make_shared(reinterpret_cast(arr_)); } + return Status::OK(); +} + +template +inline Status NumPyConverter::ConvertData(std::shared_ptr* data) { + RETURN_NOT_OK(PrepareInputData(data)); + std::shared_ptr input_type; RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast(dtype_), &input_type)); @@ -426,38 +450,12 @@ inline Status NumPyConverter::ConvertData(std::shared_ptr* data) { return Status::OK(); } -template <> -inline Status NumPyConverter::ConvertData(std::shared_ptr* data) { - int64_t nbytes = BitUtil::BytesForBits(length_); - std::shared_ptr buffer; - RETURN_NOT_OK(AllocateBuffer(pool_, nbytes, &buffer)); - - Ndarray1DIndexer values(arr_); - - uint8_t* bitmap = buffer->mutable_data(); - - memset(bitmap, 0, nbytes); - for (int i = 0; i < length_; ++i) { - if (values[i] > 0) { - BitUtil::SetBit(bitmap, i); - } - } - - *data = buffer; - return Status::OK(); -} - template <> inline Status NumPyConverter::ConvertData(std::shared_ptr* data) { - if (is_strided()) { - RETURN_NOT_OK(CopyStridedArray(arr_, length_, pool_, data)); - } else { - // Can zero-copy - *data = std::make_shared(reinterpret_cast(arr_)); - } - std::shared_ptr input_type; + RETURN_NOT_OK(PrepareInputData(data)); + auto date_dtype = reinterpret_cast(dtype_->c_metadata); if (dtype_->type_num == NPY_DATETIME) { // If we have inbound datetime64[D] data, this needs to be downcasted @@ -489,17 +487,11 @@ inline Status NumPyConverter::ConvertData(std::shared_ptr* d template <> inline Status NumPyConverter::ConvertData(std::shared_ptr* data) { - if (is_strided()) { - RETURN_NOT_OK(CopyStridedArray(arr_, length_, pool_, data)); - } else { - // Can zero-copy - *data = std::make_shared(reinterpret_cast(arr_)); - } - constexpr int64_t kMillisecondsInDay = 86400000; - std::shared_ptr input_type; + RETURN_NOT_OK(PrepareInputData(data)); + auto date_dtype = reinterpret_cast(dtype_->c_metadata); if (dtype_->type_num == NPY_DATETIME) { // If we have inbound datetime64[D] data, this needs to be downcasted diff --git a/cpp/src/arrow/python/type_traits.h b/cpp/src/arrow/python/type_traits.h index d90517a60a28a..bc71ec4e90bd0 100644 --- a/cpp/src/arrow/python/type_traits.h +++ b/cpp/src/arrow/python/type_traits.h @@ -149,6 +149,7 @@ template <> struct arrow_traits { static constexpr int npy_type = NPY_BOOL; static constexpr bool supports_nulls = false; + typedef typename npy_traits::value_type T; }; #define INT_DECL(TYPE) \ diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 3e89f5eb4ff70..cd7f4999ace3a 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -113,13 +113,13 @@ def _check_array_roundtrip(values, expected=None, mask=None, else: assert arr.null_count == (mask | values_nulls).sum() - if mask is None: - tm.assert_series_equal(pd.Series(result), pd.Series(values), - check_names=False) - else: - expected = pd.Series(np.ma.masked_array(values, mask=mask)) - tm.assert_series_equal(pd.Series(result), expected, - check_names=False) + if expected is None: + if mask is None: + expected = pd.Series(values) + else: + expected = pd.Series(np.ma.masked_array(values, mask=mask)) + + tm.assert_series_equal(pd.Series(result), expected, check_names=False) def _check_array_from_pandas_roundtrip(np_array, type=None): @@ -559,6 +559,11 @@ def test_float_nulls_to_ints(self): assert table[0].to_pylist() == [1, 2, None] tm.assert_frame_equal(df, table.to_pandas()) + def test_float_nulls_to_boolean(self): + s = pd.Series([0.0, 1.0, 2.0, None, -3.0]) + expected = pd.Series([False, True, True, None, True]) + _check_array_roundtrip(s, expected=expected, type=pa.bool_()) + def test_integer_no_nulls(self): data = OrderedDict() fields = [] @@ -672,6 +677,26 @@ def test_boolean_nulls(self): tm.assert_frame_equal(result, ex_frame) + def test_boolean_to_int(self): + # test from dtype=bool + s = pd.Series([True, True, False, True, True] * 2) + expected = pd.Series([1, 1, 0, 1, 1] * 2) + _check_array_roundtrip(s, expected=expected, type=pa.int64()) + + def test_boolean_objects_to_int(self): + # test from dtype=object + s = pd.Series([True, True, False, True, True] * 2, dtype=object) + expected = pd.Series([1, 1, 0, 1, 1] * 2) + expected_msg = 'Expected integer, got bool' + with pytest.raises(pa.ArrowTypeError, match=expected_msg): + _check_array_roundtrip(s, expected=expected, type=pa.int64()) + + def test_boolean_nulls_to_float(self): + # test from dtype=object + s = pd.Series([True, True, False, None, True] * 2) + expected = pd.Series([1.0, 1.0, 0.0, None, 1.0] * 2) + _check_array_roundtrip(s, expected=expected, type=pa.float64()) + def test_float_object_nulls(self): arr = np.array([None, 1.5, np.float64(3.5)] * 5, dtype=object) df = pd.DataFrame({'floats': arr}) From 8ab1493c810ae354ce085c2c2052676f349b168a Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Wed, 9 Jan 2019 22:30:39 -0600 Subject: [PATCH 072/203] ARROW-4065: [C++] arrowTargets.cmake is broken When we build Arrow's cpp library using CMake, arrowTargets.cmake will be generated and installed but it's broken. The following is a part of arrowTargets.cmake generated. ``` # Create imported target arrow_shared add_library(arrow_shared SHARED IMPORTED) set_target_properties(arrow_shared PROPERTIES INTERFACE_LINK_LIBRARIES "dl;pthreadshared" ) # Create imported target arrow_static add_library(arrow_static STATIC IMPORTED) set_target_properties(arrow_static PROPERTIES INTERFACE_LINK_LIBRARIES "glog_static;zstd_static;zlib_shared;snappy_static;lz4_static;brotli_dec_static;brotli_enc_static;brotli_common_static;double-conversion_static;boost_system_shared;boost_filesystem_shared;boost_regex_shared;jemalloc_static;rt;pthreadshared" ) ``` There are no INTERFACE_INCLUDE_DIRECTORIES and linker doesn't recognize pthreadshared because the true name of pthread should be libpthread.so or libpthread.a. *_static and *_shared are also wrong name. After this fix, we can build apps which links to arrow using CMake with CMakeLists.txt like as follows. ``` cmake_minimum_required(VERSION ...) project(...) ... find_package(arrow) add_executable(your_excellent_app ...) target_link_libraries(your_excellent_app arrow_shared) # or arrow_static ... ``` `$ cmake -D CMAKE_PREFIX_PATH=/path/to/arrow /path/to/CMakeLists.txt` `$ cmake --build .` Author: Kousuke Saruta Closes #3212 from sarutak/improve-cmake-config-file-generation and squashes the following commits: 0213d2666 Fix cpp/CMakeLists.txt, src/arrow/CMakeLists.txt and BuildUtils.cmake to enable building apps which links to Arrow using arrowTargets.cmake --- cpp/CMakeLists.txt | 80 +++++++++++++++++++++--------- cpp/cmake_modules/BuildUtils.cmake | 30 +++++++++-- cpp/src/arrow/CMakeLists.txt | 4 +- 3 files changed, 84 insertions(+), 30 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4232af3a12005..0e4f3951156a6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -690,46 +690,59 @@ endif(UNIX) ############################################################ set(ARROW_LINK_LIBS) +set(ARROW_SHARED_INSTALL_INTERFACE_LIBS) +set(ARROW_STATIC_INSTALL_INTERFACE_LIBS) # Libraries to link statically with libarrow.so set(ARROW_STATIC_LINK_LIBS double-conversion_static) +set(ARROW_STATIC_INSTALL_INTERFACE_LIBS double-conversion) if (ARROW_WITH_BROTLI) - SET(ARROW_STATIC_LINK_LIBS + list(APPEND + ARROW_STATIC_LINK_LIBS brotli_dec_static brotli_enc_static - brotli_common_static - ${ARROW_STATIC_LINK_LIBS}) + brotli_common_static) + list(APPEND + ARROW_STATIC_INSTALL_INTERFACE_LIBS + brotlidec + brotlienc + brotlicommon) endif() if (ARROW_WITH_BZ2) - SET(ARROW_STATIC_LINK_LIBS bz2_static ${ARROW_STATIC_LINK_LIBS}) + list(APPEND ARROW_STATIC_LINK_LIBS bz2_static) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS bz2) endif() if (ARROW_WITH_LZ4) - SET(ARROW_STATIC_LINK_LIBS lz4_static ${ARROW_STATIC_LINK_LIBS}) + list(APPEND ARROW_STATIC_LINK_LIBS lz4_static) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS lz4) endif() if (ARROW_WITH_SNAPPY) - SET(ARROW_STATIC_LINK_LIBS snappy_static ${ARROW_STATIC_LINK_LIBS}) + list(APPEND ARROW_STATIC_LINK_LIBS snappy_static) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS snappy) endif() if (ARROW_WITH_ZLIB) - SET(ARROW_STATIC_LINK_LIBS ${ZLIB_LIBRARY} ${ARROW_STATIC_LINK_LIBS}) + list(APPEND ARROW_STATIC_LINK_LIBS ${ZLIB_LIBRARY}) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS z) endif() if (ARROW_WITH_ZSTD) - SET(ARROW_STATIC_LINK_LIBS zstd_static ${ARROW_STATIC_LINK_LIBS}) + list(APPEND ARROW_STATIC_LINK_LIBS zstd_static) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS zstd) endif() if (ARROW_ORC) - SET(ARROW_STATIC_LINK_LIBS - ${ARROW_STATIC_LINK_LIBS} - orc_static) + list(APPEND ARROW_STATIC_LINK_LIBS orc_static) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc) endif() if (ARROW_USE_GLOG) - SET(ARROW_STATIC_LINK_LIBS glog_static ${ARROW_STATIC_LINK_LIBS}) + list(APPEND ARROW_STATIC_LINK_LIBS glog_static) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS glog) add_definitions("-DARROW_USE_GLOG") endif() @@ -746,15 +759,24 @@ set(ARROW_SHARED_PRIVATE_LINK_LIBS ${BOOST_FILESYSTEM_LIBRARY} ${BOOST_REGEX_LIBRARY}) -set(ARROW_STATIC_LINK_LIBS - ${ARROW_STATIC_LINK_LIBS} +list(APPEND + ARROW_STATIC_LINK_LIBS ${BOOST_SYSTEM_LIBRARY} ${BOOST_FILESYSTEM_LIBRARY} ${BOOST_REGEX_LIBRARY}) +list(APPEND + ARROW_STATIC_INSTALL_INTERFACE_LIBS + boost_system + boost_filesystem + boost_regex) + if (NOT MSVC) - set(ARROW_LINK_LIBS - ${ARROW_LINK_LIBS} + list(APPEND + ARROW_LINK_LIBS + ${CMAKE_DL_LIBS}) + list(APPEND + ARROW_SHARED_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS}) endif() @@ -822,21 +844,31 @@ if (ARROW_JEMALLOC) jemalloc_static ) endif() - set(ARROW_SHARED_PRIVATE_LINK_LIBS - ${ARROW_SHARED_PRIVATE_LINK_LIBS} + list(APPEND + ARROW_SHARED_PRIVATE_LINK_LIBS ${ARROW_JEMALLOC_LINK_LIBS}) - set(ARROW_STATIC_LINK_LIBS - ${ARROW_STATIC_LINK_LIBS} + list(APPEND + ARROW_STATIC_LINK_LIBS ${ARROW_JEMALLOC_LINK_LIBS}) + list(APPEND + ARROW_STATIC_INSTALL_INTERFACE_LIBS + jemalloc + rt) endif(ARROW_JEMALLOC) if (PTHREAD_LIBRARY) - set(ARROW_LINK_LIBS - ${ARROW_LINK_LIBS} + list(APPEND + ARROW_LINK_LIBS pthreadshared) - set(ARROW_STATIC_LINK_LIBS - ${ARROW_STATIC_LINK_LIBS} + list(APPEND + ARROW_SHARED_INSTALL_INTERFACE_LIBS + pthread) + list(APPEND + ARROW_STATIC_LINK_LIBS pthreadshared) + list(APPEND + ARROW_STATIC_INSTALL_INTERFACE_LIBS + pthread) endif() ############################################################ diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index cf2145b8a9166..fffd15819f85f 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -97,7 +97,9 @@ function(ADD_ARROW_LIB LIB_NAME) SHARED_PRIVATE_LINK_LIBS EXTRA_INCLUDES PRIVATE_INCLUDES - DEPENDENCIES) + DEPENDENCIES + SHARED_INSTALL_INTERFACE_LIBS + STATIC_INSTALL_INTERFACE_LIBS) cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) if(ARG_UNPARSED_ARGUMENTS) message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") @@ -204,8 +206,16 @@ function(ADD_ARROW_LIB LIB_NAME) VERSION "${ARROW_FULL_SO_VERSION}" SOVERSION "${ARROW_SO_VERSION}") + if (ARG_SHARED_INSTALL_INTERFACE_LIBS) + set(INTERFACE_LIBS ${ARG_SHARED_INSTALL_INTERFACE_LIBS}) + else() + set(INTERFACE_LIBS ${ARG_SHARED_LINK_LIBS}) + endif() + target_link_libraries(${LIB_NAME}_shared - LINK_PUBLIC ${ARG_SHARED_LINK_LIBS} + LINK_PUBLIC + "$" + "$" LINK_PRIVATE ${ARG_SHARED_PRIVATE_LINK_LIBS}) if (ARROW_RPATH_ORIGIN) @@ -235,7 +245,8 @@ function(ADD_ARROW_LIB LIB_NAME) EXPORT ${PROJECT_NAME}-targets RUNTIME DESTINATION ${RUNTIME_INSTALL_DIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) endif() if (BUILD_STATIC) @@ -274,15 +285,24 @@ function(ADD_ARROW_LIB LIB_NAME) LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}" OUTPUT_NAME ${LIB_NAME_STATIC}) + if (ARG_STATIC_INSTALL_INTERFACE_LIBS) + set(INTERFACE_LIBS ${ARG_STATIC_INSTALL_INTERFACE_LIBS}) + else() + set(INTERFACE_LIBS ${ARG_STATIC_LINK_LIBS}) + endif() + target_link_libraries(${LIB_NAME}_static - LINK_PUBLIC ${ARG_STATIC_LINK_LIBS}) + LINK_PUBLIC + "$" + "$") install(TARGETS ${LIB_NAME}_static ${INSTALL_IS_OPTIONAL} EXPORT ${PROJECT_NAME}-targets RUNTIME DESTINATION ${RUNTIME_INSTALL_DIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) endif() # Modify variable in calling scope diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 59f035792b80d..244d0b9342f08 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -226,7 +226,9 @@ ADD_ARROW_LIB(arrow SHARED_LINK_FLAGS ${ARROW_SHARED_LINK_FLAGS} SHARED_LINK_LIBS ${ARROW_LINK_LIBS} SHARED_PRIVATE_LINK_LIBS ${ARROW_SHARED_PRIVATE_LINK_LIBS} - STATIC_LINK_LIBS ${ARROW_STATIC_LINK_LIBS}) + STATIC_LINK_LIBS ${ARROW_STATIC_LINK_LIBS} + SHARED_INSTALL_INTERFACE_LIBS ${ARROW_SHARED_INSTALL_INTERFACE_LIBS} + STATIC_INSTALL_INTERFACE_LIBS ${ARROW_STATIC_INSTALL_INTERFACE_LIBS}) add_dependencies(arrow ${ARROW_LIBRARIES}) From 7fcad2c29e3c3ac99b2f6c1f1fddc91c05b7f2b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 9 Jan 2019 22:38:12 -0600 Subject: [PATCH 073/203] ARROW-3126: [Python] Make Buffered* IO classes available to Python, incorporate into input_stream, output_stream factory functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should add benchmarks too as a follow up PR. Author: Krisztián Szűcs Author: Wes McKinney Closes #3252 from kszucs/ARROW-3126 and squashes the following commits: 50118a639 Fix API in file-benchmark.cc d3917d9e5 Code review comments, buffer_size=0 means unbuffered 88bed90ef lint 5842eae0e remove test runner script fd729abdb don't typehint _detect_compression 3d1e386ce tests 5e8b38551 fix failing test e458db5a6 python support for buffered input and output streams --- cpp/CMakeLists.txt | 2 +- cpp/src/arrow/io/api.h | 1 + cpp/src/arrow/io/buffered-test.cc | 5 +- cpp/src/arrow/io/buffered.cc | 30 +++-- cpp/src/arrow/io/buffered.h | 20 +-- cpp/src/arrow/io/file-benchmark.cc | 9 +- python/pyarrow/includes/libarrow.pxd | 16 +++ python/pyarrow/io.pxi | 195 +++++++++++++-------------- python/pyarrow/tests/test_io.py | 86 ++++++++++++ 9 files changed, 234 insertions(+), 130 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0e4f3951156a6..08868af829b9e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -80,7 +80,6 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR INFER_FOUND) # See http://clang.llvm.org/docs/JSONCompilationDatabase.html set(CMAKE_EXPORT_COMPILE_COMMANDS 1) endif() - # ---------------------------------------------------------------------- # cmake options @@ -358,6 +357,7 @@ endif() if (ARROW_USE_CCACHE) find_program(CCACHE_FOUND ccache) if(CCACHE_FOUND) + message(STATUS "Using ccache: ${CCACHE_FOUND}") set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND}) set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND}) endif(CCACHE_FOUND) diff --git a/cpp/src/arrow/io/api.h b/cpp/src/arrow/io/api.h index 0d5742ad65864..cf1be337fd1a9 100644 --- a/cpp/src/arrow/io/api.h +++ b/cpp/src/arrow/io/api.h @@ -18,6 +18,7 @@ #ifndef ARROW_IO_API_H #define ARROW_IO_API_H +#include "arrow/io/buffered.h" #include "arrow/io/compressed.h" #include "arrow/io/file.h" #include "arrow/io/hdfs.h" diff --git a/cpp/src/arrow/io/buffered-test.cc b/cpp/src/arrow/io/buffered-test.cc index 074833d4bf7b7..7b9ab0cd890b1 100644 --- a/cpp/src/arrow/io/buffered-test.cc +++ b/cpp/src/arrow/io/buffered-test.cc @@ -105,7 +105,8 @@ class TestBufferedOutputStream : public FileTestFixture { lseek(fd_, 0, SEEK_END); #endif } - ASSERT_OK(BufferedOutputStream::Create(file, buffer_size, &buffered_)); + ASSERT_OK(BufferedOutputStream::Create(buffer_size, default_memory_pool(), file, + &buffered_)); } void WriteChunkwise(const std::string& datastr, const std::valarray& sizes) { @@ -321,7 +322,7 @@ class TestBufferedInputStream : public FileTestFixture { std::shared_ptr file_in; ASSERT_OK(ReadableFile::Open(path_, &file_in)); raw_ = file_in; - ASSERT_OK(BufferedInputStream::Create(raw_, buffer_size, pool, &buffered_)); + ASSERT_OK(BufferedInputStream::Create(buffer_size, pool, raw_, &buffered_)); } protected: diff --git a/cpp/src/arrow/io/buffered.cc b/cpp/src/arrow/io/buffered.cc index f3eae39c8e62e..0b1431f440fa2 100644 --- a/cpp/src/arrow/io/buffered.cc +++ b/cpp/src/arrow/io/buffered.cc @@ -91,8 +91,8 @@ class BufferedBase { class BufferedOutputStream::Impl : public BufferedBase { public: - explicit Impl(std::shared_ptr raw) - : BufferedBase(default_memory_pool()), raw_(std::move(raw)) {} + explicit Impl(std::shared_ptr raw, MemoryPool* pool) + : BufferedBase(pool), raw_(std::move(raw)) {} Status Close() { std::lock_guard guard(lock_); @@ -173,14 +173,16 @@ class BufferedOutputStream::Impl : public BufferedBase { std::shared_ptr raw_; }; -BufferedOutputStream::BufferedOutputStream(std::shared_ptr raw) - : impl_(new BufferedOutputStream::Impl(std::move(raw))) {} +BufferedOutputStream::BufferedOutputStream(std::shared_ptr raw, + MemoryPool* pool) { + impl_.reset(new Impl(std::move(raw), pool)); +} -Status BufferedOutputStream::Create(std::shared_ptr raw, - int64_t buffer_size, +Status BufferedOutputStream::Create(int64_t buffer_size, MemoryPool* pool, + std::shared_ptr raw, std::shared_ptr* out) { - auto result = - std::shared_ptr(new BufferedOutputStream(std::move(raw))); + auto result = std::shared_ptr( + new BufferedOutputStream(std::move(raw), pool)); RETURN_NOT_OK(result->SetBufferSize(buffer_size)); *out = std::move(result); return Status::OK(); @@ -217,12 +219,12 @@ std::shared_ptr BufferedOutputStream::raw() const { return impl_-> // ---------------------------------------------------------------------- // BufferedInputStream implementation -class BufferedInputStream::BufferedInputStreamImpl : public BufferedBase { +class BufferedInputStream::Impl : public BufferedBase { public: - BufferedInputStreamImpl(std::shared_ptr raw, MemoryPool* pool) + Impl(std::shared_ptr raw, MemoryPool* pool) : BufferedBase(pool), raw_(std::move(raw)), bytes_buffered_(0) {} - ~BufferedInputStreamImpl() { DCHECK_OK(Close()); } + ~Impl() { DCHECK_OK(Close()); } Status Close() { std::lock_guard guard(lock_); @@ -350,13 +352,13 @@ class BufferedInputStream::BufferedInputStreamImpl : public BufferedBase { BufferedInputStream::BufferedInputStream(std::shared_ptr raw, MemoryPool* pool) { - impl_.reset(new BufferedInputStreamImpl(std::move(raw), pool)); + impl_.reset(new Impl(std::move(raw), pool)); } BufferedInputStream::~BufferedInputStream() { DCHECK_OK(impl_->Close()); } -Status BufferedInputStream::Create(std::shared_ptr raw, int64_t buffer_size, - MemoryPool* pool, +Status BufferedInputStream::Create(int64_t buffer_size, MemoryPool* pool, + std::shared_ptr raw, std::shared_ptr* out) { auto result = std::shared_ptr(new BufferedInputStream(std::move(raw), pool)); diff --git a/cpp/src/arrow/io/buffered.h b/cpp/src/arrow/io/buffered.h index d5079556c7cfc..945915bfe998f 100644 --- a/cpp/src/arrow/io/buffered.h +++ b/cpp/src/arrow/io/buffered.h @@ -40,12 +40,13 @@ class ARROW_EXPORT BufferedOutputStream : public OutputStream { ~BufferedOutputStream() override; /// \brief Create a buffered output stream wrapping the given output stream. + /// \param[in] buffer_size the size of the temporary write buffer + /// \param[in] pool a MemoryPool to use for allocations /// \param[in] raw another OutputStream - /// \param[in] buffer_size the size of the temporary buffer. Allocates from - /// the default memory pool /// \param[out] out the created BufferedOutputStream /// \return Status - static Status Create(std::shared_ptr raw, int64_t buffer_size, + static Status Create(int64_t buffer_size, MemoryPool* pool, + std::shared_ptr raw, std::shared_ptr* out); /// \brief Resize internal buffer @@ -79,7 +80,7 @@ class ARROW_EXPORT BufferedOutputStream : public OutputStream { std::shared_ptr raw() const; private: - explicit BufferedOutputStream(std::shared_ptr raw); + explicit BufferedOutputStream(std::shared_ptr raw, MemoryPool* pool); class ARROW_NO_EXPORT Impl; std::unique_ptr impl_; @@ -94,12 +95,13 @@ class ARROW_EXPORT BufferedInputStream : public InputStream { ~BufferedInputStream() override; /// \brief Create a BufferedInputStream from a raw InputStream - /// \param[in] raw a raw InputStream /// \param[in] buffer_size the size of the temporary read buffer /// \param[in] pool a MemoryPool to use for allocations + /// \param[in] raw a raw InputStream /// \param[out] out the created BufferedInputStream - static Status Create(std::shared_ptr raw, int64_t buffer_size, - MemoryPool* pool, std::shared_ptr* out); + static Status Create(int64_t buffer_size, MemoryPool* pool, + std::shared_ptr raw, + std::shared_ptr* out); /// \brief Resize internal read buffer; calls to Read(...) will read at least /// \param[in] new_buffer_size the new read buffer size @@ -138,8 +140,8 @@ class ARROW_EXPORT BufferedInputStream : public InputStream { private: explicit BufferedInputStream(std::shared_ptr raw, MemoryPool* pool); - class ARROW_NO_EXPORT BufferedInputStreamImpl; - std::unique_ptr impl_; + class ARROW_NO_EXPORT Impl; + std::unique_ptr impl_; }; } // namespace io diff --git a/cpp/src/arrow/io/file-benchmark.cc b/cpp/src/arrow/io/file-benchmark.cc index c57fa6d605d68..4439a18978232 100644 --- a/cpp/src/arrow/io/file-benchmark.cc +++ b/cpp/src/arrow/io/file-benchmark.cc @@ -163,7 +163,8 @@ static void BM_BufferedOutputStreamSmallWritesToNull( ABORT_NOT_OK(io::FileOutputStream::Open(GetNullFile(), &file)); std::shared_ptr buffered_file; - ABORT_NOT_OK(io::BufferedOutputStream::Create(file, kBufferSize, &buffered_file)); + ABORT_NOT_OK(io::BufferedOutputStream::Create(kBufferSize, default_memory_pool(), file, + &buffered_file)); BenchmarkStreamingWrites(state, small_sizes, buffered_file.get()); } @@ -196,7 +197,8 @@ static void BM_BufferedOutputStreamSmallWritesToPipe( SetupPipeWriter(&stream, &reader); std::shared_ptr buffered_stream; - ABORT_NOT_OK(io::BufferedOutputStream::Create(stream, kBufferSize, &buffered_stream)); + ABORT_NOT_OK(io::BufferedOutputStream::Create(kBufferSize, default_memory_pool(), + stream, &buffered_stream)); BenchmarkStreamingWrites(state, small_sizes, buffered_stream.get(), reader.get()); } @@ -207,7 +209,8 @@ static void BM_BufferedOutputStreamLargeWritesToPipe( SetupPipeWriter(&stream, &reader); std::shared_ptr buffered_stream; - ABORT_NOT_OK(io::BufferedOutputStream::Create(stream, kBufferSize, &buffered_stream)); + ABORT_NOT_OK(io::BufferedOutputStream::Create(kBufferSize, default_memory_pool(), + stream, &buffered_stream)); BenchmarkStreamingWrites(state, large_sizes, buffered_stream.get(), reader.get()); } diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index cc77ff432967f..97bc892ddf3fe 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -697,6 +697,22 @@ cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil: CStatus Make(CCodec* codec, shared_ptr[OutputStream] raw, shared_ptr[CCompressedOutputStream]* out) + cdef cppclass CBufferedInputStream \ + " arrow::io::BufferedInputStream"(InputStream): + + @staticmethod + CStatus Create(int64_t buffer_size, CMemoryPool* pool, + shared_ptr[InputStream] raw, + shared_ptr[CBufferedInputStream]* out) + + cdef cppclass CBufferedOutputStream \ + " arrow::io::BufferedOutputStream"(OutputStream): + + @staticmethod + CStatus Create(int64_t buffer_size, CMemoryPool* pool, + shared_ptr[OutputStream] raw, + shared_ptr[CBufferedOutputStream]* out) + # ---------------------------------------------------------------------- # HDFS diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 97abde8f892af..52122740b63ae 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1064,32 +1064,6 @@ cdef class BufferReader(NativeFile): self.is_readable = True -cdef shared_ptr[InputStream] _make_compressed_input_stream( - shared_ptr[InputStream] stream, - CompressionType compression_type) except *: - cdef: - shared_ptr[CCompressedInputStream] compressed_stream - unique_ptr[CCodec] codec - - check_status(CCodec.Create(compression_type, &codec)) - check_status(CCompressedInputStream.Make(codec.get(), stream, - &compressed_stream)) - return compressed_stream - - -cdef shared_ptr[OutputStream] _make_compressed_output_stream( - shared_ptr[OutputStream] stream, - CompressionType compression_type) except *: - cdef: - shared_ptr[CCompressedOutputStream] compressed_stream - unique_ptr[CCodec] codec - - check_status(CCodec.Create(compression_type, &codec)) - check_status(CCompressedOutputStream.Make(codec.get(), stream, - &compressed_stream)) - return compressed_stream - - cdef class CompressedInputStream(NativeFile): """ An input stream wrapper which decompresses data on the fly. @@ -1104,26 +1078,19 @@ cdef class CompressedInputStream(NativeFile): def __init__(self, NativeFile stream, compression): cdef: CompressionType compression_type + unique_ptr[CCodec] codec + shared_ptr[CCompressedInputStream] compressed_stream compression_type = _get_compression_type(compression) if compression_type == CompressionType_UNCOMPRESSED: - raise ValueError("Invalid value for compression: %r" - % (compression,)) - self._init(stream, compression_type) + raise ValueError('Invalid value for compression: {!r}' + .format(compression)) - @staticmethod - cdef create(NativeFile stream, CompressionType compression_type): - cdef: - CompressedInputStream self - - self = CompressedInputStream.__new__(CompressedInputStream) - self._init(stream, compression_type) - return self + check_status(CCodec.Create(compression_type, &codec)) + check_status(CCompressedInputStream.Make( + codec.get(), stream.get_input_stream(), &compressed_stream)) - cdef _init(self, NativeFile stream, CompressionType compression_type): - self.set_input_stream( - _make_compressed_input_stream(stream.get_input_stream(), - compression_type)) + self.set_input_stream( compressed_stream) self.is_readable = True @@ -1138,29 +1105,55 @@ cdef class CompressedOutputStream(NativeFile): The compression type ("bz2", "brotli", "gzip", "lz4", "snappy" or "zstd") """ + def __init__(self, NativeFile stream, compression): cdef: CompressionType compression_type + unique_ptr[CCodec] codec + shared_ptr[CCompressedOutputStream] compressed_stream compression_type = _get_compression_type(compression) if compression_type == CompressionType_UNCOMPRESSED: - raise ValueError("Invalid value for compression: %r" - % (compression,)) - self._init(stream, compression_type) + raise ValueError('Invalid value for compression: {!r}' + .format(compression)) - @staticmethod - cdef create(NativeFile stream, CompressionType compression_type): - cdef: - CompressedOutputStream self + check_status(CCodec.Create(compression_type, &codec)) + check_status(CCompressedOutputStream.Make( + codec.get(), stream.get_output_stream(), &compressed_stream)) - self = CompressedOutputStream.__new__(CompressedOutputStream) - self._init(stream, compression_type) - return self + self.set_output_stream( compressed_stream) + self.is_writable = True + + +cdef class BufferedInputStream(NativeFile): + + def __init__(self, NativeFile stream, int buffer_size, + MemoryPool memory_pool=None): + cdef shared_ptr[CBufferedInputStream] buffered_stream + + if buffer_size <= 0: + raise ValueError('Buffer size must be larger than zero') + check_status(CBufferedInputStream.Create( + buffer_size, maybe_unbox_memory_pool(memory_pool), + stream.get_input_stream(), &buffered_stream)) + + self.set_input_stream( buffered_stream) + self.is_readable = True + + +cdef class BufferedOutputStream(NativeFile): + + def __init__(self, NativeFile stream, int buffer_size, + MemoryPool memory_pool=None): + cdef shared_ptr[CBufferedOutputStream] buffered_stream + + if buffer_size <= 0: + raise ValueError('Buffer size must be larger than zero') + check_status(CBufferedOutputStream.Create( + buffer_size, maybe_unbox_memory_pool(memory_pool), + stream.get_output_stream(), &buffered_stream)) - cdef _init(self, NativeFile stream, CompressionType compression_type): - self.set_output_stream( - _make_compressed_output_stream(stream.get_output_stream(), - compression_type)) + self.set_output_stream( buffered_stream) self.is_writable = True @@ -1232,24 +1225,27 @@ cdef get_input_stream(object source, c_bool use_memory_map, """ cdef: NativeFile nf + unique_ptr[CCodec] codec shared_ptr[InputStream] input_stream shared_ptr[CCompressedInputStream] compressed_stream - CompressionType compression_type = CompressionType_UNCOMPRESSED - unique_ptr[CCodec] codec + CompressionType compression_type try: source_path = _stringify_path(source) except TypeError: - pass + compression = None else: - compression_type = _get_compression_type_by_filename(source_path) + compression = _detect_compression(source_path) + compression_type = _get_compression_type(compression) nf = _get_native_file(source, use_memory_map) input_stream = nf.get_input_stream() if compression_type != CompressionType_UNCOMPRESSED: - input_stream = _make_compressed_input_stream(input_stream, - compression_type) + check_status(CCodec.Create(compression_type, &codec)) + check_status(CCompressedInputStream.Make(codec.get(), input_stream, + &compressed_stream)) + input_stream = compressed_stream out[0] = input_stream @@ -1292,21 +1288,19 @@ cdef CompressionType _get_compression_type(object name) except *: elif name == 'zstd': return CompressionType_ZSTD else: - raise ValueError("Unrecognized compression type: {0}" - .format(str(name))) + raise ValueError('Unrecognized compression type: {}'.format(name)) -cdef CompressionType _get_compression_type_by_filename(filename) except *: - if filename.endswith('.bz2'): - return CompressionType_BZ2 - elif filename.endswith('.gz'): - return CompressionType_GZIP - elif filename.endswith('.lz4'): - return CompressionType_LZ4 - elif filename.endswith('.zst'): - return CompressionType_ZSTD - else: - return CompressionType_UNCOMPRESSED +def _detect_compression(path): + if isinstance(path, six.string_types): + if path.endswith('.bz2'): + return 'bz2' + elif path.endswith('.gz'): + return 'gzip' + elif path.endswith('.lz4'): + return 'lz4' + elif path.endswith('.zst'): + return 'zstd' def compress(object buf, codec='lz4', asbytes=False, memory_pool=None): @@ -1427,18 +1421,7 @@ def decompress(object buf, decompressed_size=None, codec='lz4', return pybuf if asbytes else out_buf -cdef CompressionType _stream_compression_argument( - compression, source_path) except *: - if compression == 'detect': - if source_path is not None: - return _get_compression_type_by_filename(source_path) - else: - return CompressionType_UNCOMPRESSED - else: - return _get_compression_type(compression) - - -def input_stream(source, compression='detect'): +def input_stream(source, compression='detect', buffer_size=None): """ Create an Arrow input stream. @@ -1452,18 +1435,17 @@ def input_stream(source, compression='detect'): chosen based on the file extension. If None, no compression will be applied. Otherwise, a well-known algorithm name must be supplied (e.g. "gzip") + buffer_size: int, default None + If None or 0, no buffering will happen. Otherwise the size of the + temporary read buffer. """ - cdef: - CompressionType compression_type - NativeFile stream + cdef NativeFile stream try: source_path = _stringify_path(source) except TypeError: source_path = None - compression_type = _stream_compression_argument(compression, source_path) - if isinstance(source, NativeFile): stream = source elif source_path is not None: @@ -1479,13 +1461,19 @@ def input_stream(source, compression='detect'): raise TypeError("pa.input_stream() called with instance of '{}'" .format(source.__class__)) - if compression_type != CompressionType_UNCOMPRESSED: - stream = CompressedInputStream.create(stream, compression_type) + if compression == 'detect': + compression = _detect_compression(source_path) + + if buffer_size is not None and buffer_size != 0: + stream = BufferedInputStream(stream, buffer_size) + + if compression is not None: + stream = CompressedInputStream(stream, compression) return stream -def output_stream(source, compression='detect'): +def output_stream(source, compression='detect', buffer_size=None): """ Create an Arrow output stream. @@ -1499,18 +1487,17 @@ def output_stream(source, compression='detect'): chosen based on the file extension. If None, no compression will be applied. Otherwise, a well-known algorithm name must be supplied (e.g. "gzip") + buffer_size: int, default None + If None or 0, no buffering will happen. Otherwise the size of the + temporary write buffer. """ - cdef: - CompressionType compression_type - NativeFile stream + cdef NativeFile stream try: source_path = _stringify_path(source) except TypeError: source_path = None - compression_type = _stream_compression_argument(compression, source_path) - if isinstance(source, NativeFile): stream = source elif source_path is not None: @@ -1526,7 +1513,13 @@ def output_stream(source, compression='detect'): raise TypeError("pa.output_stream() called with instance of '{}'" .format(source.__class__)) - if compression_type != CompressionType_UNCOMPRESSED: - stream = CompressedOutputStream.create(stream, compression_type) + if compression == 'detect': + compression = _detect_compression(source_path) + + if buffer_size is not None and buffer_size != 0: + stream = BufferedOutputStream(stream, buffer_size) + + if compression is not None: + stream = CompressedOutputStream(stream, compression) return stream diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index f54f03a9ff92e..77ed70c31ca77 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -1134,6 +1134,44 @@ def test_input_stream_file_path_compressed(tmpdir): assert stream.read() == gz_data +def test_input_stream_file_path_buffered(tmpdir): + data = b"some test data\n" * 10 + b"eof\n" + file_path = tmpdir / 'input_stream.buffered' + with open(str(file_path), 'wb') as f: + f.write(data) + + stream = pa.input_stream(file_path, buffer_size=32) + assert stream.read() == data + stream = pa.input_stream(str(file_path), buffer_size=64) + assert stream.read() == data + stream = pa.input_stream(pathlib.Path(str(file_path)), buffer_size=1024) + assert stream.read() == data + + unbuffered_stream = pa.input_stream(file_path, buffer_size=0) + assert isinstance(unbuffered_stream, pa.OSFile) + + msg = 'Buffer size must be larger than zero' + with pytest.raises(ValueError, match=msg): + pa.input_stream(file_path, buffer_size=-1) + with pytest.raises(TypeError): + pa.input_stream(file_path, buffer_size='million') + + +def test_input_stream_file_path_compressed_and_buffered(tmpdir): + data = b"some test data\n" * 100 + b"eof\n" + gz_data = gzip_compress(data) + file_path = tmpdir / 'input_stream_compressed_and_buffered.gz' + with open(str(file_path), 'wb') as f: + f.write(gz_data) + + stream = pa.input_stream(file_path, buffer_size=32, compression='gzip') + assert stream.read() == data + stream = pa.input_stream(str(file_path), buffer_size=64) + assert stream.read() == data + stream = pa.input_stream(pathlib.Path(str(file_path)), buffer_size=1024) + assert stream.read() == data + + def test_input_stream_python_file(tmpdir): data = b"some test data\n" * 10 + b"eof\n" bio = BytesIO(data) @@ -1232,6 +1270,54 @@ def check_data(file_path, data, **kwargs): check_data(file_path, data, compression='gzip')) == data assert check_data(file_path, data, compression=None) == data + with pytest.raises(ValueError, match='Unrecognized compression type'): + assert check_data(file_path, data, compression='rabbit') == data + + +def test_output_stream_file_path_buffered(tmpdir): + data = b"some test data\n" * 10 + b"eof\n" + file_path = tmpdir / 'output_stream.buffered' + + def check_data(file_path, data, **kwargs): + with pa.output_stream(file_path, **kwargs) as stream: + stream.write(data) + with open(str(file_path), 'rb') as f: + return f.read() + + unbuffered_stream = pa.output_stream(file_path, buffer_size=0) + assert isinstance(unbuffered_stream, pa.OSFile) + + msg = 'Buffer size must be larger than zero' + with pytest.raises(ValueError, match=msg): + assert check_data(file_path, data, buffer_size=-128) == data + + assert check_data(file_path, data, buffer_size=32) == data + assert check_data(file_path, data, buffer_size=1024) == data + assert check_data(str(file_path), data, buffer_size=32) == data + + result = check_data(pathlib.Path(str(file_path)), data, buffer_size=32) + assert result == data + + +def test_output_stream_file_path_compressed_and_buffered(tmpdir): + data = b"some test data\n" * 100 + b"eof\n" + file_path = tmpdir / 'output_stream_compressed_and_buffered.gz' + + def check_data(file_path, data, **kwargs): + with pa.output_stream(file_path, **kwargs) as stream: + stream.write(data) + with open(str(file_path), 'rb') as f: + return f.read() + + result = check_data(file_path, data, buffer_size=32) + assert gzip_decompress(result) == data + + result = check_data(file_path, data, buffer_size=1024) + assert gzip_decompress(result) == data + + result = check_data(file_path, data, buffer_size=1024, compression='gzip') + assert gzip_decompress(result) == data + def test_output_stream_python_file(tmpdir): data = b"some test data\n" * 10 + b"eof\n" From ea69e8fe4901329e53455c8d6fafad1c4f35d827 Mon Sep 17 00:00:00 2001 From: Yosuke Shiro Date: Thu, 10 Jan 2019 17:07:39 +0900 Subject: [PATCH 074/203] ARROW-4207: [Gandiva] [GLib] Add support for IfNode Author: Yosuke Shiro Author: Kouhei Sutou Closes #3354 from shiro615/glib-add-support-for-if-node and squashes the following commits: d543ea00 Add support for error 8058b2c7 Add support for IfNode --- c_glib/gandiva-glib/node.cpp | 207 ++++++++++++++++++++++++++++ c_glib/gandiva-glib/node.h | 19 +++ c_glib/gandiva-glib/node.hpp | 6 + c_glib/test/gandiva/test-if-node.rb | 49 +++++++ 4 files changed, 281 insertions(+) create mode 100644 c_glib/test/gandiva/test-if-node.rb diff --git a/c_glib/gandiva-glib/node.cpp b/c_glib/gandiva-glib/node.cpp index 2c68cbeabe330..a3814c190412d 100644 --- a/c_glib/gandiva-glib/node.cpp +++ b/c_glib/gandiva-glib/node.cpp @@ -95,6 +95,8 @@ G_BEGIN_DECLS * #GGandivaStringLiteralNode is a class for a node in the expression tree, * representing an UTF-8 encoded string literal. * + * #GGandivaIfNode is a class for a node in the expression tree, representing an if-else. + * * Since: 0.12.0 */ @@ -1180,6 +1182,194 @@ ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node) return value.c_str(); } + +typedef struct GGandivaIfNodePrivate_ { + GGandivaNode *condition_node; + GGandivaNode *then_node; + GGandivaNode *else_node; +} GGandivaIfNodePrivate; + +enum { + PROP_CONDITION_NODE = 1, + PROP_THEN_NODE, + PROP_ELSE_NODE, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GGandivaIfNode, + ggandiva_if_node, + GGANDIVA_TYPE_NODE) + +#define GGANDIVA_IF_NODE_GET_PRIVATE(object) \ + static_cast( \ + ggandiva_if_node_get_instance_private( \ + GGANDIVA_IF_NODE(object))) + +static void +ggandiva_if_node_dispose(GObject *object) +{ + auto priv = GGANDIVA_IF_NODE_GET_PRIVATE(object); + + if (priv->condition_node) { + g_object_unref(priv->condition_node); + priv->condition_node = nullptr; + } + + if (priv->then_node) { + g_object_unref(priv->then_node); + priv->then_node = nullptr; + } + + if (priv->else_node) { + g_object_unref(priv->else_node); + priv->else_node = nullptr; + } + + G_OBJECT_CLASS(ggandiva_if_node_parent_class)->dispose(object); +} + +static void +ggandiva_if_node_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_IF_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CONDITION_NODE: + priv->condition_node = GGANDIVA_NODE(g_value_dup_object(value)); + break; + case PROP_THEN_NODE: + priv->then_node = GGANDIVA_NODE(g_value_dup_object(value)); + break; + case PROP_ELSE_NODE: + priv->else_node = GGANDIVA_NODE(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_if_node_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GGANDIVA_IF_NODE_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_CONDITION_NODE: + g_value_set_object(value, priv->condition_node); + break; + case PROP_THEN_NODE: + g_value_set_object(value, priv->then_node); + break; + case PROP_ELSE_NODE: + g_value_set_object(value, priv->else_node); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +ggandiva_if_node_init(GGandivaIfNode *if_node) +{ +} + +static void +ggandiva_if_node_class_init(GGandivaIfNodeClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = ggandiva_if_node_dispose; + gobject_class->set_property = ggandiva_if_node_set_property; + gobject_class->get_property = ggandiva_if_node_get_property; + + GParamSpec *spec; + spec = g_param_spec_object("condition-node", + "Condition node", + "The condition node", + GGANDIVA_TYPE_NODE, + static_cast(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_CONDITION_NODE, spec); + + spec = g_param_spec_object("then-node", + "Then node", + "The then node", + GGANDIVA_TYPE_NODE, + static_cast(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_THEN_NODE, spec); + + spec = g_param_spec_object("else-node", + "Else node", + "The else node", + GGANDIVA_TYPE_NODE, + static_cast(G_PARAM_READWRITE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_ELSE_NODE, spec); +} + +/** + * ggandiva_if_node_new: + * @condition_node: the node with the condition for if-else expression. + * @then_node: the node in case the condition node is true. + * @else_node: the node in case the condition node is false. + * @return_type: A #GArrowDataType. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GGandivaIfNode or %NULl on error. + * + * Since: 0.12.0 + */ +GGandivaIfNode * +ggandiva_if_node_new(GGandivaNode *condition_node, + GGandivaNode *then_node, + GGandivaNode *else_node, + GArrowDataType *return_type, + GError **error) +{ + if (!condition_node || !then_node || !else_node || !return_type) { + /* TODO: Improve error message to show which arguments are invalid. */ + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[gandiva][if-literal-node][new] " + "all arguments must not NULL"); + return NULL; + } + auto gandiva_condition_node = ggandiva_node_get_raw(condition_node); + auto gandiva_then_node = ggandiva_node_get_raw(then_node); + auto gandiva_else_node = ggandiva_node_get_raw(else_node); + auto arrow_return_type = garrow_data_type_get_raw(return_type); + auto gandiva_node = gandiva::TreeExprBuilder::MakeIf(gandiva_condition_node, + gandiva_then_node, + gandiva_else_node, + arrow_return_type); + if (!gandiva_node) { + g_set_error(error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[gandiva][if-literal-node][new] " + "failed to create: if (<%s>) {<%s>} else {<%s>} -> <%s>", + gandiva_condition_node->ToString().c_str(), + gandiva_then_node->ToString().c_str(), + gandiva_else_node->ToString().c_str(), + arrow_return_type->ToString().c_str()); + return NULL; + } + return ggandiva_if_node_new_raw(&gandiva_node, + condition_node, + then_node, + else_node, + return_type); +} + G_END_DECLS std::shared_ptr @@ -1305,3 +1495,20 @@ ggandiva_literal_node_new_raw(std::shared_ptr *gandiva_node, return literal_node; } + +GGandivaIfNode * +ggandiva_if_node_new_raw(std::shared_ptr *gandiva_node, + GGandivaNode *condition_node, + GGandivaNode *then_node, + GGandivaNode *else_node, + GArrowDataType *return_type) +{ + auto if_node = g_object_new(GGANDIVA_TYPE_IF_NODE, + "node", gandiva_node, + "condition-node", condition_node, + "then-node", then_node, + "else-node", else_node, + "return-type", return_type, + NULL); + return GGANDIVA_IF_NODE(if_node); +} diff --git a/c_glib/gandiva-glib/node.h b/c_glib/gandiva-glib/node.h index d9e67e27b7eea..ffcf41da10b21 100644 --- a/c_glib/gandiva-glib/node.h +++ b/c_glib/gandiva-glib/node.h @@ -320,4 +320,23 @@ ggandiva_string_literal_node_new(const gchar *value); const gchar * ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node); + +#define GGANDIVA_TYPE_IF_NODE (ggandiva_if_node_get_type()) +G_DECLARE_DERIVABLE_TYPE(GGandivaIfNode, + ggandiva_if_node, + GGANDIVA, + IF_NODE, + GGandivaNode) +struct _GGandivaIfNodeClass +{ + GGandivaNodeClass parent_class; +}; + +GGandivaIfNode * +ggandiva_if_node_new(GGandivaNode *condition_node, + GGandivaNode *then_node, + GGandivaNode *else_node, + GArrowDataType *return_type, + GError **error); + G_END_DECLS diff --git a/c_glib/gandiva-glib/node.hpp b/c_glib/gandiva-glib/node.hpp index 40f9d1b465591..9a6ae98058699 100644 --- a/c_glib/gandiva-glib/node.hpp +++ b/c_glib/gandiva-glib/node.hpp @@ -38,3 +38,9 @@ ggandiva_function_node_new_raw(std::shared_ptr *gandiva_node, GGandivaLiteralNode * ggandiva_literal_node_new_raw(std::shared_ptr *gandiva_node, GArrowDataType *return_type); +GGandivaIfNode * +ggandiva_if_node_new_raw(std::shared_ptr *gandiva_node, + GGandivaNode *condition_node, + GGandivaNode *then_node, + GGandivaNode *else_node, + GArrowDataType *return_type); diff --git a/c_glib/test/gandiva/test-if-node.rb b/c_glib/test/gandiva/test-if-node.rb new file mode 100644 index 0000000000000..b00359590905d --- /dev/null +++ b/c_glib/test/gandiva/test-if-node.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestGandivaIfNode < Test::Unit::TestCase + def setup + omit("Gandiva is required") unless defined?(::Gandiva) + field1 = Arrow::Field.new("field1", Arrow::Int32DataType.new) + field2 = Arrow::Field.new("field2", Arrow::Int32DataType.new) + @then_node = Gandiva::FieldNode.new(field1) + @else_node = Gandiva::FieldNode.new(field2) + @return_type = Arrow::Int32DataType.new + @condition_node = Gandiva::FunctionNode.new("greater_than", + [@then_node, @else_node], + @return_type) + @if_node = Gandiva::IfNode.new(@condition_node, + @then_node, + @else_node, + @return_type) + end + + def test_readers + assert_equal([ + @condition_node, + @then_node, + @else_node, + @return_type + ], + [ + @if_node.condition_node, + @if_node.then_node, + @if_node.else_node, + @if_node.return_type + ]) + end +end From 9c0e643442fa6d4ca3db18b1fe4adf8fcd7dd807 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Thu, 10 Jan 2019 17:18:30 +0900 Subject: [PATCH 075/203] ARROW-4211: [GLib] Add GArrowFixedSizeBinaryDataType Author: Kouhei Sutou Author: Yosuke Shiro Closes #3358 from shiro615/glib-add-fixed-size-binary-data-type and squashes the following commits: a19354e9 Add garrow_fixed_size_binary_data_type_get_byte_width() 8dd67811 Fix GArrowFixedSizeBinaryDataType's parent f2491309 Add GArrowFixedSizeBinaryDataType --- c_glib/arrow-glib/basic-data-type.cpp | 62 ++++++++++++++++++- c_glib/arrow-glib/basic-data-type.h | 24 ++++++- c_glib/arrow-glib/type.cpp | 2 + c_glib/arrow-glib/type.h | 3 + .../test/test-fixed-size-binary-data-type.rb | 39 ++++++++++++ 5 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 c_glib/test/test-fixed-size-binary-data-type.rb diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp index 861bbaf388801..b6c5705fb070b 100644 --- a/c_glib/arrow-glib/basic-data-type.cpp +++ b/c_glib/arrow-glib/basic-data-type.cpp @@ -66,6 +66,8 @@ G_BEGIN_DECLS * * #GArrowBinaryDataType is a class for binary data type. * + * #GArrowFixedSizeBinaryDataType is a class for fixed-size binary data type. + * * #GArrowStringDataType is a class for UTF-8 encoded string data * type. * @@ -239,7 +241,7 @@ garrow_fixed_width_data_type_class_init(GArrowFixedWidthDataTypeClass *klass) } /** - * garrow_fixed_width_data_type_get_id: + * garrow_fixed_width_data_type_get_bit_width: * @data_type: A #GArrowFixedWidthDataType. * * Returns: The number of bits for one data. @@ -716,6 +718,59 @@ garrow_binary_data_type_new(void) } +G_DEFINE_TYPE(GArrowFixedSizeBinaryDataType, + garrow_fixed_size_binary_data_type, + GARROW_TYPE_FIXED_WIDTH_DATA_TYPE) + +static void +garrow_fixed_size_binary_data_type_init(GArrowFixedSizeBinaryDataType *object) +{ +} + +static void +garrow_fixed_size_binary_data_type_class_init(GArrowFixedSizeBinaryDataTypeClass *klass) +{ +} + +/** + * garrow_fixed_size_binary_data_type: + * @byte_width: The byte width. + * + * Returns: The newly created fixed-size binary data type. + * + * Since: 0.12.0 + */ +GArrowFixedSizeBinaryDataType * +garrow_fixed_size_binary_data_type_new(gint32 byte_width) +{ + auto arrow_fixed_size_binary_data_type = arrow::fixed_size_binary(byte_width); + + auto fixed_size_binary_data_type = + GARROW_FIXED_SIZE_BINARY_DATA_TYPE(g_object_new(GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE, + "data-type", &arrow_fixed_size_binary_data_type, + NULL)); + return fixed_size_binary_data_type; +} + +/** + * garrow_fixed_size_binary_data_type_get_byte_width: + * @data_type: A #GArrowFixedSizeBinaryDataType. + * + * Returns: The number of bytes for one data. + * + * Since: 0.12.0 + */ +gint32 +garrow_fixed_size_binary_data_type_get_byte_width(GArrowFixedSizeBinaryDataType *data_type) +{ + const auto arrow_data_type = + garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + const auto arrow_fixed_size_binary_type = + std::static_pointer_cast(arrow_data_type); + return arrow_fixed_size_binary_type->byte_width(); +} + + G_DEFINE_TYPE(GArrowStringDataType, garrow_string_data_type, GARROW_TYPE_DATA_TYPE) @@ -1044,7 +1099,7 @@ garrow_time64_data_type_new(GArrowTimeUnit unit, GError **error) G_DEFINE_ABSTRACT_TYPE(GArrowDecimalDataType, garrow_decimal_data_type, - GARROW_TYPE_DATA_TYPE) + GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE) static void garrow_decimal_data_type_init(GArrowDecimalDataType *object) @@ -1197,6 +1252,9 @@ garrow_data_type_new_raw(std::shared_ptr *arrow_data_type) case arrow::Type::type::BINARY: type = GARROW_TYPE_BINARY_DATA_TYPE; break; + case arrow::Type::type::FIXED_SIZE_BINARY: + type = GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE; + break; case arrow::Type::type::STRING: type = GARROW_TYPE_STRING_DATA_TYPE; break; diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h index ef41f1dbcfa0b..d18958265748d 100644 --- a/c_glib/arrow-glib/basic-data-type.h +++ b/c_glib/arrow-glib/basic-data-type.h @@ -338,6 +338,25 @@ GType garrow_binary_data_type_get_type (void) G_GNUC_CONST; GArrowBinaryDataType *garrow_binary_data_type_new (void); +#define GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE (garrow_fixed_size_binary_data_type_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryDataType, + garrow_fixed_size_binary_data_type, + GARROW, + FIXED_SIZE_BINARY_DATA_TYPE, + GArrowDataType) +struct _GArrowFixedSizeBinaryDataTypeClass +{ + GArrowFixedWidthDataTypeClass parent_class; +}; + +GARROW_AVAILABLE_IN_0_12 +GArrowFixedSizeBinaryDataType * +garrow_fixed_size_binary_data_type_new(gint32 byte_width); +GARROW_AVAILABLE_IN_0_12 +gint32 +garrow_fixed_size_binary_data_type_get_byte_width(GArrowFixedSizeBinaryDataType *data_type); + + #define GARROW_TYPE_STRING_DATA_TYPE \ (garrow_string_data_type_get_type()) #define GARROW_STRING_DATA_TYPE(obj) \ @@ -651,15 +670,14 @@ GArrowTime64DataType *garrow_time64_data_type_new (GArrowTimeUnit unit, #define GARROW_TYPE_DECIMAL_DATA_TYPE (garrow_decimal_data_type_get_type()) -/* TODO: Delivered from GArrowFixedSizeBinaryDataType. */ G_DECLARE_DERIVABLE_TYPE(GArrowDecimalDataType, garrow_decimal_data_type, GARROW, DECIMAL_DATA_TYPE, - GArrowDataType) + GArrowFixedSizeBinaryDataType) struct _GArrowDecimalDataTypeClass { - GArrowDataTypeClass parent_class; + GArrowFixedSizeBinaryDataTypeClass parent_class; }; #ifndef GARROW_DISABLE_DEPRECATED diff --git a/c_glib/arrow-glib/type.cpp b/c_glib/arrow-glib/type.cpp index 0642004e2f07b..e227ed2c31fc8 100644 --- a/c_glib/arrow-glib/type.cpp +++ b/c_glib/arrow-glib/type.cpp @@ -66,6 +66,8 @@ garrow_type_from_raw(arrow::Type::type type) return GARROW_TYPE_STRING; case arrow::Type::type::BINARY: return GARROW_TYPE_BINARY; + case arrow::Type::type::FIXED_SIZE_BINARY: + return GARROW_TYPE_FIXED_SIZE_BINARY; case arrow::Type::type::DATE32: return GARROW_TYPE_DATE32; case arrow::Type::type::DATE64: diff --git a/c_glib/arrow-glib/type.h b/c_glib/arrow-glib/type.h index 2137c785515f8..85f55c452be55 100644 --- a/c_glib/arrow-glib/type.h +++ b/c_glib/arrow-glib/type.h @@ -40,6 +40,8 @@ G_BEGIN_DECLS * @GARROW_TYPE_DOUBLE: 8-byte floating point value. * @GARROW_TYPE_STRING: UTF-8 variable-length string. * @GARROW_TYPE_BINARY: Variable-length bytes (no guarantee of UTF-8-ness). + * @GARROW_TYPE_FIXED_SIZE_BINARY: Fixed-size binary. Each value occupies + * the same number of bytes. * @GARROW_TYPE_DATE32: int32 days since the UNIX epoch. * @GARROW_TYPE_DATE64: int64 milliseconds since the UNIX epoch. * @GARROW_TYPE_TIMESTAMP: Exact timestamp encoded with int64 since UNIX epoch. @@ -72,6 +74,7 @@ typedef enum { GARROW_TYPE_DOUBLE, GARROW_TYPE_STRING, GARROW_TYPE_BINARY, + GARROW_TYPE_FIXED_SIZE_BINARY, GARROW_TYPE_DATE32, GARROW_TYPE_DATE64, GARROW_TYPE_TIMESTAMP, diff --git a/c_glib/test/test-fixed-size-binary-data-type.rb b/c_glib/test/test-fixed-size-binary-data-type.rb new file mode 100644 index 0000000000000..584fb3deec93d --- /dev/null +++ b/c_glib/test/test-fixed-size-binary-data-type.rb @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFixedSizeBinaryDataType < Test::Unit::TestCase + def setup + @byte_width = 10 + @data_type = Arrow::FixedSizeBinaryDataType.new(@byte_width) + end + + def test_type + assert_equal(Arrow::Type::FIXED_SIZE_BINARY, @data_type.id) + end + + def test_to_s + assert_equal("fixed_size_binary[10]", @data_type.to_s) + end + + def test_byte_width + assert_equal(@byte_width, @data_type.byte_width) + end + + def test_bit_width + assert_equal(@byte_width * 8, @data_type.bit_width) + end +end From f67a5150df7d11a0ad5bc53044c192b023ad312c Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Thu, 10 Jan 2019 17:39:11 +0900 Subject: [PATCH 076/203] ARROW-4214: [Ruby] Add support for building RecordBatch from raw Ruby objects Author: Kouhei Sutou Closes #3360 from kou/ruby-record-batch-builder-append-records and squashes the following commits: e85bbaf5 Add support for building RecordBatch from raw Ruby objects --- ruby/red-arrow/lib/arrow/array-builder.rb | 8 +- .../red-arrow/lib/arrow/list-array-builder.rb | 10 ++ ruby/red-arrow/lib/arrow/loader.rb | 8 ++ .../lib/arrow/record-batch-builder.rb | 115 +++++++++++++++++ ruby/red-arrow/lib/arrow/record-batch.rb | 16 +++ .../lib/arrow/struct-array-builder.rb | 10 ++ .../red-arrow/test/test-list-array-builder.rb | 17 +++ .../test/test-record-batch-builder.rb | 116 ++++++++++++++++++ ruby/red-arrow/test/test-record-batch.rb | 114 ++++++++++++----- .../test/test-struct-array-builder.rb | 20 +++ 10 files changed, 400 insertions(+), 34 deletions(-) create mode 100644 ruby/red-arrow/lib/arrow/record-batch-builder.rb create mode 100644 ruby/red-arrow/test/test-record-batch-builder.rb diff --git a/ruby/red-arrow/lib/arrow/array-builder.rb b/ruby/red-arrow/lib/arrow/array-builder.rb index 8edb3c4bfbbd9..7cfc4329aed6e 100644 --- a/ruby/red-arrow/lib/arrow/array-builder.rb +++ b/ruby/red-arrow/lib/arrow/array-builder.rb @@ -65,6 +65,12 @@ def build(values) end def build(values) + append(*values) + finish + end + + # @since 0.12.0 + def append(*values) value_convertable = respond_to?(:convert_to_arrow_value, true) start_index = 0 current_index = 0 @@ -111,8 +117,6 @@ def build(values) append_nulls(current_index - start_index) end end - - finish end def append_nulls(n) diff --git a/ruby/red-arrow/lib/arrow/list-array-builder.rb b/ruby/red-arrow/lib/arrow/list-array-builder.rb index aa093c2de9b5c..1fa507f69a72f 100644 --- a/ruby/red-arrow/lib/arrow/list-array-builder.rb +++ b/ruby/red-arrow/lib/arrow/list-array-builder.rb @@ -82,5 +82,15 @@ def append_values(lists, is_valids=nil) end end end + + # @since 0.12.0 + def append(*values) + if values.empty? + # For backward compatibility + append_value + else + super + end + end end end diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb index acd2573e3218f..6e0bf2929022f 100644 --- a/ruby/red-arrow/lib/arrow/loader.rb +++ b/ruby/red-arrow/lib/arrow/loader.rb @@ -54,6 +54,7 @@ def require_libraries require "arrow/path-extension" require "arrow/record" require "arrow/record-batch" + require "arrow/record-batch-builder" require "arrow/record-batch-file-reader" require "arrow/record-batch-stream-reader" require "arrow/rolling-window" @@ -89,6 +90,13 @@ def load_object_info(info) def load_method_info(info, klass, method_name) case klass.name + when /Builder\z/ + case method_name + when "append" + return + else + super + end when "Arrow::StringArray" case method_name when "get_value" diff --git a/ruby/red-arrow/lib/arrow/record-batch-builder.rb b/ruby/red-arrow/lib/arrow/record-batch-builder.rb new file mode 100644 index 0000000000000..dba16b3b8116d --- /dev/null +++ b/ruby/red-arrow/lib/arrow/record-batch-builder.rb @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module Arrow + class RecordBatchBuilder + class << self + # @since 0.12.0 + def build(schema, data) + builder = new(schema) + builder.append(data) + builder.flush + end + end + + alias_method :initialize_raw, :initialize + private :initialize_raw + def initialize(schema) + unless schema.is_a?(Schema) + schema = Schema.new(schema) + end + initialize_raw(schema) + @name_to_index = {} + schema.fields.each_with_index do |field, i| + @name_to_index[field.name] = i + end + end + + # @since 0.12.0 + def [](name_or_index) + case name_or_index + when String, Symbol + name = name_or_index + self[resolve_name(name)] + else + index = name_or_index + column_builders[index] + end + end + + # @since 0.12.0 + def append(*values) + values.each do |value| + case value + when Hash + append_columns(value) + else + append_records(value) + end + end + end + + # @since 0.12.0 + def append_records(records) + n = n_fields + columns = n.times.collect do + [] + end + records.each_with_index do |record, nth_record| + case record + when nil + when Hash + record.each do |name, value| + nth_column = resolve_name(name) + next if nth_column.nil? + columns[nth_column] << value + end + else + record.each_with_index do |value, nth_column| + columns[nth_column] << value + end + end + columns.each do |column| + column << nil if column.size != (nth_record + 1) + end + end + columns.each_with_index do |column, i| + self[i].append(*column) + end + end + + # @since 0.12.0 + def append_columns(columns) + columns.each do |name, values| + self[name].append(*values) + end + end + + private + def resolve_name(name) + @name_to_index[name.to_s] + end + + # TODO: Make public with good name. Is column_builders good enough? + # builders? sub_builders? + def column_builders + @column_builders ||= n_fields.times.collect do |i| + get_field(i) + end + end + end +end diff --git a/ruby/red-arrow/lib/arrow/record-batch.rb b/ruby/red-arrow/lib/arrow/record-batch.rb index 6d9c35b9dc849..b577d4a41a6c6 100644 --- a/ruby/red-arrow/lib/arrow/record-batch.rb +++ b/ruby/red-arrow/lib/arrow/record-batch.rb @@ -22,6 +22,22 @@ class RecordBatch include RecordContainable include Enumerable + class << self + def new(*args) + n_args = args.size + case n_args + when 2 + schema, data = args + RecordBatchBuilder.build(schema, data) + when 3 + super + else + message = "wrong number of arguments (given #{n_args}, expected 2..3)" + raise ArgumentError, message + end + end + end + alias_method :each, :each_record alias_method :columns_raw, :columns diff --git a/ruby/red-arrow/lib/arrow/struct-array-builder.rb b/ruby/red-arrow/lib/arrow/struct-array-builder.rb index 52f75aab46d35..b56056cad4471 100644 --- a/ruby/red-arrow/lib/arrow/struct-array-builder.rb +++ b/ruby/red-arrow/lib/arrow/struct-array-builder.rb @@ -119,6 +119,16 @@ def append_null end end + # @since 0.12.0 + def append(*values) + if values.empty? + # For backward compatibility + append_value_raw + else + super + end + end + private def cached_field_builders @field_builders ||= field_builders diff --git a/ruby/red-arrow/test/test-list-array-builder.rb b/ruby/red-arrow/test/test-list-array-builder.rb index e36f2c8340be4..aee31e73b1b96 100644 --- a/ruby/red-arrow/test/test-list-array-builder.rb +++ b/ruby/red-arrow/test/test-list-array-builder.rb @@ -59,4 +59,21 @@ def setup array.collect {|list| list ? list.to_a : nil}) end end + + sub_test_case("#append") do + test("backward compatibility") do + @builder.append + @builder.value_builder.append(true) + @builder.value_builder.append(false) + @builder.append + @builder.value_builder.append(true) + array = @builder.finish + + assert_equal([ + [true, false], + [true], + ], + array.collect(&:to_a)) + end + end end diff --git a/ruby/red-arrow/test/test-record-batch-builder.rb b/ruby/red-arrow/test/test-record-batch-builder.rb new file mode 100644 index 0000000000000..7cd1f8cee7a16 --- /dev/null +++ b/ruby/red-arrow/test/test-record-batch-builder.rb @@ -0,0 +1,116 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class RecordBatchBuilderTest < Test::Unit::TestCase + sub_test_case(".new") do + test("Schema") do + schema = Arrow::Schema.new(visible: :boolean, + count: :uint32) + builder = Arrow::RecordBatchBuilder.new(schema) + assert_equal(schema, + builder.schema) + end + + test("Hash") do + builder = Arrow::RecordBatchBuilder.new(visible: :boolean, + count: :uint32) + assert_equal(Arrow::Schema.new(visible: :boolean, + count: :uint32), + builder.schema) + end + end + + sub_test_case("instance methods") do + def setup + @schema = Arrow::Schema.new(visible: :boolean, + count: :uint32) + @builder = Arrow::RecordBatchBuilder.new(@schema) + end + + sub_test_case("#[]") do + test("String") do + assert_equal(Arrow::BooleanDataType.new, + @builder["visible"].value_data_type) + end + + test("Symbol") do + assert_equal(Arrow::BooleanDataType.new, + @builder[:visible].value_data_type) + end + + test("Integer") do + assert_equal(Arrow::UInt32DataType.new, + @builder[1].value_data_type) + end + end + + test("#append") do + records = [ + {visible: true, count: 1}, + ] + columns = { + visible: [false], + count: [2], + } + arrays = [ + Arrow::BooleanArray.new([true, false]), + Arrow::UInt32Array.new([1, 2]), + ] + @builder.append(records, columns) + assert_equal(Arrow::RecordBatch.new(@schema, + arrays[0].length, + arrays), + @builder.flush) + end + + test("#append_records") do + records = [ + {visible: true, count: 1}, + {visible: true, count: 2, garbage: "garbage"}, + {visible: true}, + [false, 4], + nil, + [true], + ] + arrays = [ + Arrow::BooleanArray.new([true, true, true, false, nil, true]), + Arrow::UInt32Array.new([1, 2, nil, 4, nil, nil]), + ] + @builder.append_records(records) + assert_equal(Arrow::RecordBatch.new(@schema, + arrays[0].length, + arrays), + @builder.flush) + end + + test("#append_columns") do + columns = { + visible: [true, true, true, false, nil, true], + count: [1, 2, nil, 4, nil, nil], + } + arrays = [ + Arrow::BooleanArray.new(columns[:visible]), + Arrow::UInt32Array.new(columns[:count]), + ] + @builder.append_columns(columns) + assert_equal(Arrow::RecordBatch.new(@schema, + arrays[0].length, + arrays), + @builder.flush) + end + end +end diff --git a/ruby/red-arrow/test/test-record-batch.rb b/ruby/red-arrow/test/test-record-batch.rb index 4dac085bff86e..d33298b4e5f7f 100644 --- a/ruby/red-arrow/test/test-record-batch.rb +++ b/ruby/red-arrow/test/test-record-batch.rb @@ -16,47 +16,97 @@ # under the License. class RecordBatchTest < Test::Unit::TestCase - setup do - fields = [ - Arrow::Field.new("count", :uint32), - ] - @schema = Arrow::Schema.new(fields) - @counts = Arrow::UInt32Array.new([1, 2, 4, 8]) - @record_batch = Arrow::RecordBatch.new(@schema, @counts.length, [@counts]) - end + sub_test_case(".new") do + def setup + @schema = Arrow::Schema.new(visible: :boolean, + count: :uint32) + end - sub_test_case(".each") do - test("default") do - records = [] - @record_batch.each do |record| - records << [record, record.index] - end + test("[Schema, records]") do + records = [ + {visible: true, count: 1}, + nil, + [false, 3], + ] + record_batch = Arrow::RecordBatch.new(@schema, records) assert_equal([ - [0, 0], - [1, 1], - [2, 2], - [3, 3], + {"visible" => true, "count" => 1}, + {"visible" => nil, "count" => nil}, + {"visible" => false, "count" => 3}, ], - records.collect {|record, i| [record.index, i]}) + record_batch.each_record.collect(&:to_h)) end - test("reuse_record: true") do - records = [] - @record_batch.each(reuse_record: true) do |record| - records << [record, record.index] - end + test("[Schema, columns]") do + columns = { + visible: [true, nil, false], + count: [1, 2, nil], + } + record_batch = Arrow::RecordBatch.new(@schema, columns) + assert_equal([ + {"visible" => true, "count" => 1}, + {"visible" => nil, "count" => 2}, + {"visible" => false, "count" => nil}, + ], + record_batch.each_record.collect(&:to_h)) + end + + test("[Schema, n_rows, columns]") do + columns = [ + Arrow::BooleanArray.new([true, nil, false]), + Arrow::UInt32Array.new([1, 2, nil]), + ] + n_rows = columns[0].length + record_batch = Arrow::RecordBatch.new(@schema, n_rows, columns) assert_equal([ - [3, 0], - [3, 1], - [3, 2], - [3, 3], + {"visible" => true, "count" => 1}, + {"visible" => nil, "count" => 2}, + {"visible" => false, "count" => nil}, ], - records.collect {|record, i| [record.index, i]}) + record_batch.each_record.collect(&:to_h)) end end - test("#to_table") do - assert_equal(Arrow::Table.new(@schema, [@counts]), - @record_batch.to_table) + sub_test_case("instance methods") do + def setup + @schema = Arrow::Schema.new(count: :uint32) + @counts = Arrow::UInt32Array.new([1, 2, 4, 8]) + @record_batch = Arrow::RecordBatch.new(@schema, @counts.length, [@counts]) + end + + sub_test_case("#each") do + test("default") do + records = [] + @record_batch.each do |record| + records << [record, record.index] + end + assert_equal([ + [0, 0], + [1, 1], + [2, 2], + [3, 3], + ], + records.collect {|record, i| [record.index, i]}) + end + + test("reuse_record: true") do + records = [] + @record_batch.each(reuse_record: true) do |record| + records << [record, record.index] + end + assert_equal([ + [3, 0], + [3, 1], + [3, 2], + [3, 3], + ], + records.collect {|record, i| [record.index, i]}) + end + end + + test("#to_table") do + assert_equal(Arrow::Table.new(@schema, [@counts]), + @record_batch.to_table) + end end end diff --git a/ruby/red-arrow/test/test-struct-array-builder.rb b/ruby/red-arrow/test/test-struct-array-builder.rb index 42e1ded78e318..f7706ee8d190b 100644 --- a/ruby/red-arrow/test/test-struct-array-builder.rb +++ b/ruby/red-arrow/test/test-struct-array-builder.rb @@ -157,4 +157,24 @@ def setup ]) end end + + sub_test_case("#append") do + test("backward compatibility") do + @builder.append + @builder.get_field_builder(0).append(true) + @builder.get_field_builder(1).append(1) + @builder.append + @builder.get_field_builder(0).append(false) + @builder.get_field_builder(1).append(2) + array = @builder.finish + assert_equal([ + [true, 1], + [false, 2], + ], + [ + array.get_value(0).values, + array.get_value(1).values, + ]) + end + end end From b8d59133465c8be85603f9b0f23fdc687ec2e2ba Mon Sep 17 00:00:00 2001 From: "Sweeney, Mack" Date: Thu, 10 Jan 2019 09:47:00 +0100 Subject: [PATCH 077/203] ARROW-3916: [Python] Add support for `filesystem` kwarg in ParquetWriter Implements [ARROW 3916](https://jira.apache.org/jira/browse/ARROW-3916). Author: Sweeney, Mack Author: Wes McKinney Closes #3070 from macks22/ARROW-3916_ParquetDataset_filesystem_kwarg and squashes the following commits: b5973bc0 Fixes post rebase 124d9df8 Add support for filesystem kwarg in ParquetWriter --- python/pyarrow/filesystem.py | 18 ++++++++---- python/pyarrow/parquet.py | 25 +++++++++-------- python/pyarrow/tests/test_parquet.py | 41 +++++++++++++++++++++++++++- 3 files changed, 67 insertions(+), 17 deletions(-) diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py index 92a65ce69892a..43280799bccce 100644 --- a/python/pyarrow/filesystem.py +++ b/python/pyarrow/filesystem.py @@ -23,7 +23,7 @@ from six.moves.urllib.parse import urlparse import pyarrow as pa -from pyarrow.util import implements, _stringify_path +from pyarrow.util import implements, _stringify_path, _is_path_like class FileSystem(object): @@ -397,14 +397,22 @@ def _ensure_filesystem(fs): return fs -def get_filesystem_from_uri(path): +def resolve_filesystem_and_path(where, filesystem=None): """ return filesystem from path which could be an HDFS URI """ + if not _is_path_like(where): + if filesystem is not None: + raise ValueError("filesystem passed but where is file-like, so" + " there is nothing to open with filesystem.") + return filesystem, where + # input can be hdfs URI such as hdfs://host:port/myfile.parquet - path = _stringify_path(path) - # if _has_pathlib and isinstance(path, pathlib.Path): - # path = str(path) + path = _stringify_path(where) + + if filesystem is not None: + return _ensure_filesystem(filesystem), path + parsed_uri = urlparse(path) if parsed_uri.scheme == 'hdfs': netloc_split = parsed_uri.netloc.split(':') diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py index b8dae65a5de78..7142e2f474540 100644 --- a/python/pyarrow/parquet.py +++ b/python/pyarrow/parquet.py @@ -35,7 +35,7 @@ ParquetSchema, ColumnSchema) from pyarrow.compat import guid from pyarrow.filesystem import (LocalFileSystem, _ensure_filesystem, - get_filesystem_from_uri) + resolve_filesystem_and_path) from pyarrow.util import _is_path_like, _stringify_path _URI_STRIP_SCHEMES = ('hdfs',) @@ -54,7 +54,7 @@ def _parse_uri(path): def _get_filesystem_and_path(passed_filesystem, path): if passed_filesystem is None: - return get_filesystem_from_uri(path) + return resolve_filesystem_and_path(path, passed_filesystem) else: passed_filesystem = _ensure_filesystem(passed_filesystem) parsed_path = _parse_uri(path) @@ -320,7 +320,10 @@ def _sanitize_table(table, new_schema, flavor): Specify the compression codec, either on a general basis or per-column. Valid values: {'NONE', 'SNAPPY', 'GZIP', 'LZO', 'BROTLI', 'LZ4', 'ZSTD'} flavor : {'spark'}, default None - Sanitize schema or set other compatibility options for compatibility""" + Sanitize schema or set other compatibility options for compatibility +filesystem : FileSystem, default None + If nothing passed, will be inferred from `where` if path-like, else + `where` is already a file-like object so no filesystem is needed.""" class ParquetWriter(object): @@ -335,12 +338,12 @@ class ParquetWriter(object): {0} """.format(_parquet_writer_arg_docs) - def __init__(self, where, schema, flavor=None, + def __init__(self, where, schema, filesystem=None, + flavor=None, version='1.0', use_dictionary=True, compression='snappy', - use_deprecated_int96_timestamps=None, - filesystem=None, **options): + use_deprecated_int96_timestamps=None, **options): if use_deprecated_int96_timestamps is None: # Use int96 timestamps for Spark if flavor is not None and 'spark' in flavor: @@ -357,13 +360,13 @@ def __init__(self, where, schema, flavor=None, self.schema = schema self.where = where - # If we open a file using an implied filesystem, so it can be assured - # to be closed + # If we open a file using a filesystem, store file handle so we can be + # sure to close it when `self.close` is called. self.file_handle = None - if _is_path_like(where): - fs, path = _get_filesystem_and_path(filesystem, where) - sink = self.file_handle = fs.open(path, 'wb') + filesystem, path = resolve_filesystem_and_path(where, filesystem) + if filesystem is not None: + sink = self.file_handle = filesystem.open(path, 'wb') else: sink = where diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 3a6c84678eba2..5156300b01b95 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -31,7 +31,7 @@ import pyarrow as pa from pyarrow.compat import guid, u, BytesIO, unichar, PY2 from pyarrow.tests import util -from pyarrow.filesystem import LocalFileSystem +from pyarrow.filesystem import LocalFileSystem, FileSystem from .pandas_examples import dataframe_with_arrays, dataframe_with_lists try: @@ -2277,6 +2277,45 @@ def test_empty_row_groups(tempdir): assert reader.read_row_group(i).equals(table) +def test_parquet_writer_with_caller_provided_filesystem(): + out = pa.BufferOutputStream() + + class CustomFS(FileSystem): + def __init__(self): + self.path = None + self.mode = None + + def open(self, path, mode='rb'): + self.path = path + self.mode = mode + return out + + fs = CustomFS() + fname = 'expected_fname.parquet' + df = _test_dataframe(100) + table = pa.Table.from_pandas(df, preserve_index=False) + + with pq.ParquetWriter(fname, table.schema, filesystem=fs, version='2.0') \ + as writer: + writer.write_table(table) + + assert fs.path == fname + assert fs.mode == 'wb' + assert out.closed + + buf = out.getvalue() + table_read = _read_table(pa.BufferReader(buf)) + df_read = table_read.to_pandas() + tm.assert_frame_equal(df_read, df) + + # Should raise ValueError when filesystem is passed with file-like object + with pytest.raises(ValueError) as err_info: + pq.ParquetWriter(pa.BufferOutputStream(), table.schema, filesystem=fs) + expected_msg = ("filesystem passed but where is file-like, so" + " there is nothing to open with filesystem.") + assert str(err_info) == expected_msg + + def test_writing_empty_lists(): # ARROW-2591: [Python] Segmentation fault issue in pq.write_table arr1 = pa.array([[], []], pa.list_(pa.int32())) From bf34291b93c748f9dc63a0d89cc1cf857a28630c Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Thu, 10 Jan 2019 18:54:20 +0900 Subject: [PATCH 078/203] ARROW-4227: [GLib] Fix wrong data type in field of composite data type Author: Kouhei Sutou Closes #3363 from kou/glib-fix-wrong-data-type and squashes the following commits: 1f274e51 Use garrow_field_new_raw(..., nullpter) 20460a9f Fix wrong data type in field of composite data type --- c_glib/arrow-glib/composite-data-type.cpp | 14 ++++---- c_glib/arrow-glib/field.cpp | 9 +++++ c_glib/arrow-glib/schema.cpp | 16 ++------- c_glib/test/test-dense-union-data-type.rb | 33 +++++++++++++++--- c_glib/test/test-list-data-type.rb | 25 +++++++++----- c_glib/test/test-sparse-union-data-type.rb | 33 +++++++++++++++--- c_glib/test/test-struct-data-type.rb | 39 +++++++++++++++++++--- 7 files changed, 127 insertions(+), 42 deletions(-) diff --git a/c_glib/arrow-glib/composite-data-type.cpp b/c_glib/arrow-glib/composite-data-type.cpp index 8046d2e23a31a..5ddc1c3dd8914 100644 --- a/c_glib/arrow-glib/composite-data-type.cpp +++ b/c_glib/arrow-glib/composite-data-type.cpp @@ -98,7 +98,7 @@ garrow_list_data_type_get_value_field(GArrowListDataType *list_data_type) static_cast(arrow_data_type.get()); auto arrow_field = arrow_list_data_type->value_field(); - return garrow_field_new_raw(&arrow_field, data_type); + return garrow_field_new_raw(&arrow_field, nullptr); } @@ -172,8 +172,7 @@ garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type) GList *fields = NULL; for (auto arrow_field : arrow_fields) { - fields = g_list_prepend(fields, - garrow_field_new_raw(&arrow_field, data_type)); + fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field, nullptr)); } return g_list_reverse(fields); } @@ -207,7 +206,7 @@ garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type, auto arrow_field = arrow_data_type->child(i); if (arrow_field) { - return garrow_field_new_raw(&arrow_field, data_type); + return garrow_field_new_raw(&arrow_field, nullptr); } else { return NULL; } @@ -234,7 +233,7 @@ garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type auto arrow_field = arrow_struct_data_type->GetFieldByName(name); if (arrow_field) { - return garrow_field_new_raw(&arrow_field, data_type); + return garrow_field_new_raw(&arrow_field, nullptr); } else { return NULL; } @@ -309,8 +308,7 @@ garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type) GList *fields = NULL; for (auto arrow_field : arrow_fields) { - fields = g_list_prepend(fields, - garrow_field_new_raw(&arrow_field, data_type)); + fields = g_list_prepend(fields, garrow_field_new_raw(&arrow_field, nullptr)); } return g_list_reverse(fields); } @@ -344,7 +342,7 @@ garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type, auto arrow_field = arrow_data_type->child(i); if (arrow_field) { - return garrow_field_new_raw(&arrow_field, data_type); + return garrow_field_new_raw(&arrow_field, nullptr); } else { return NULL; } diff --git a/c_glib/arrow-glib/field.cpp b/c_glib/arrow-glib/field.cpp index d74053af48f05..f7250bc6ee634 100644 --- a/c_glib/arrow-glib/field.cpp +++ b/c_glib/arrow-glib/field.cpp @@ -243,10 +243,19 @@ GArrowField * garrow_field_new_raw(std::shared_ptr *arrow_field, GArrowDataType *data_type) { + bool data_type_need_unref = false; + if (!data_type) { + auto arrow_data_type = (*arrow_field)->type(); + data_type = garrow_data_type_new_raw(&arrow_data_type); + data_type_need_unref = true; + } auto field = GARROW_FIELD(g_object_new(GARROW_TYPE_FIELD, "field", arrow_field, "data-type", data_type, NULL)); + if (data_type_need_unref) { + g_object_unref(data_type); + } return field; } diff --git a/c_glib/arrow-glib/schema.cpp b/c_glib/arrow-glib/schema.cpp index 64332419e0972..1bbe82f9a3ca6 100644 --- a/c_glib/arrow-glib/schema.cpp +++ b/c_glib/arrow-glib/schema.cpp @@ -174,11 +174,7 @@ garrow_schema_get_field(GArrowSchema *schema, guint i) { const auto arrow_schema = garrow_schema_get_raw(schema); auto arrow_field = arrow_schema->field(i); - auto arrow_data_type = arrow_field->type(); - auto data_type = garrow_data_type_new_raw(&arrow_data_type); - auto field = garrow_field_new_raw(&arrow_field, data_type); - g_object_unref(data_type); - return field; + return garrow_field_new_raw(&arrow_field, nullptr); } /** @@ -198,10 +194,7 @@ garrow_schema_get_field_by_name(GArrowSchema *schema, return NULL; } else { auto arrow_data_type = arrow_field->type(); - auto data_type = garrow_data_type_new_raw(&arrow_data_type); - auto field = garrow_field_new_raw(&arrow_field, data_type); - g_object_unref(data_type); - return field; + return garrow_field_new_raw(&arrow_field, nullptr); } } @@ -232,10 +225,7 @@ garrow_schema_get_fields(GArrowSchema *schema) GList *fields = NULL; for (auto arrow_field : arrow_schema->fields()) { - auto arrow_data_type = arrow_field->type(); - auto data_type = garrow_data_type_new_raw(&arrow_data_type); - auto field = garrow_field_new_raw(&arrow_field, data_type); - g_object_unref(data_type); + auto field = garrow_field_new_raw(&arrow_field, nullptr); fields = g_list_prepend(fields, field); } diff --git a/c_glib/test/test-dense-union-data-type.rb b/c_glib/test/test-dense-union-data-type.rb index 0d1295423ebbb..231767f8a5441 100644 --- a/c_glib/test/test-dense-union-data-type.rb +++ b/c_glib/test/test-dense-union-data-type.rb @@ -17,11 +17,19 @@ class TestDenseUnionDataType < Test::Unit::TestCase def setup - fields = [ - Arrow::Field.new("number", Arrow::Int32DataType.new), - Arrow::Field.new("text", Arrow::StringDataType.new), + @number_field_data_type = Arrow::Int32DataType.new + @text_field_data_type = Arrow::StringDataType.new + @field_data_types = [ + @number_field_data_type, + @text_field_data_type, ] - @data_type = Arrow::DenseUnionDataType.new(fields, [2, 9]) + @number_field = Arrow::Field.new("number", @number_field_data_type) + @text_field = Arrow::Field.new("text", @text_field_data_type) + @fields = [ + @number_field, + @text_field, + ] + @data_type = Arrow::DenseUnionDataType.new(@fields, [2, 9]) end def test_type @@ -32,4 +40,21 @@ def test_to_s assert_equal("union[dense]", @data_type.to_s) end + + def test_fields + assert_equal(@fields.zip(@field_data_types), + @data_type.fields.collect {|field| [field, field.data_type]}) + end + + def test_get_field + field = @data_type.get_field(0) + assert_equal([ + @fields[0], + @field_data_types[0], + ], + [ + field, + field.data_type, + ]) + end end diff --git a/c_glib/test/test-list-data-type.rb b/c_glib/test/test-list-data-type.rb index aa6a8fa65fd8c..2d96fcb21ed3e 100644 --- a/c_glib/test/test-list-data-type.rb +++ b/c_glib/test/test-list-data-type.rb @@ -16,21 +16,28 @@ # under the License. class TestListDataType < Test::Unit::TestCase + def setup + @field_data_type = Arrow::BooleanDataType.new + @field = Arrow::Field.new("enabled", @field_data_type) + @data_type = Arrow::ListDataType.new(@field) + end + def test_type - field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) - data_type = Arrow::ListDataType.new(field) - assert_equal(Arrow::Type::LIST, data_type.id) + assert_equal(Arrow::Type::LIST, @data_type.id) end def test_to_s - field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) - data_type = Arrow::ListDataType.new(field) - assert_equal("list", data_type.to_s) + assert_equal("list", @data_type.to_s) end def test_value_field - field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) - data_type = Arrow::ListDataType.new(field) - assert_equal(field, data_type.value_field) + assert_equal([ + @field, + @field_data_type, + ], + [ + @data_type.value_field, + @data_type.value_field.data_type, + ]) end end diff --git a/c_glib/test/test-sparse-union-data-type.rb b/c_glib/test/test-sparse-union-data-type.rb index ff4ce72c274a3..30e24f7a11c9b 100644 --- a/c_glib/test/test-sparse-union-data-type.rb +++ b/c_glib/test/test-sparse-union-data-type.rb @@ -17,11 +17,19 @@ class TestSparseUnionDataType < Test::Unit::TestCase def setup - fields = [ - Arrow::Field.new("number", Arrow::Int32DataType.new), - Arrow::Field.new("text", Arrow::StringDataType.new), + @number_field_data_type = Arrow::Int32DataType.new + @text_field_data_type = Arrow::StringDataType.new + @field_data_types = [ + @number_field_data_type, + @text_field_data_type, ] - @data_type = Arrow::SparseUnionDataType.new(fields, [2, 9]) + @number_field = Arrow::Field.new("number", @number_field_data_type) + @text_field = Arrow::Field.new("text", @text_field_data_type) + @fields = [ + @number_field, + @text_field, + ] + @data_type = Arrow::SparseUnionDataType.new(@fields, [2, 9]) end def test_type @@ -32,4 +40,21 @@ def test_to_s assert_equal("union[sparse]", @data_type.to_s) end + + def test_fields + assert_equal(@fields.zip(@field_data_types), + @data_type.fields.collect {|field| [field, field.data_type]}) + end + + def test_get_field + field = @data_type.get_field(0) + assert_equal([ + @fields[0], + @field_data_types[0], + ], + [ + field, + field.data_type, + ]) + end end diff --git a/c_glib/test/test-struct-data-type.rb b/c_glib/test/test-struct-data-type.rb index ce94e41c70148..82ce19ec6a495 100644 --- a/c_glib/test/test-struct-data-type.rb +++ b/c_glib/test/test-struct-data-type.rb @@ -17,8 +17,14 @@ class TestStructDataType < Test::Unit::TestCase def setup - @enabled_field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) - @message_field = Arrow::Field.new("message", Arrow::StringDataType.new) + @enabled_field_data_type = Arrow::BooleanDataType.new + @message_field_data_type = Arrow::StringDataType.new + @field_data_types = [ + @enabled_field_data_type, + @message_field_data_type, + ] + @enabled_field = Arrow::Field.new("enabled", @enabled_field_data_type) + @message_field = Arrow::Field.new("message", @message_field_data_type) @fields = [@enabled_field, @message_field] @data_type = Arrow::StructDataType.new(@fields) end @@ -37,7 +43,8 @@ def test_n_fields end def test_fields - assert_equal(@fields, @data_type.fields) + assert_equal(@fields.zip(@field_data_types), + @data_type.fields.collect {|field| [field, field.data_type]}) end sub_test_case("#get_field") do @@ -52,6 +59,18 @@ def test_negative def test_over assert_equal(nil, @data_type.get_field(2)) end + + def test_data_type + field = @data_type.get_field(0) + assert_equal([ + @fields[0], + @field_data_types[0], + ], + [ + field, + field.data_type, + ]) + end end sub_test_case("#get_field_by_name") do @@ -64,9 +83,21 @@ def test_not_found assert_equal(nil, @data_type.get_field_by_name("nonexistent")) end + + def test_data_type + field = @data_type.get_field_by_name("enabled") + assert_equal([ + @enabled_field, + @enabled_field_data_type, + ], + [ + field, + field.data_type, + ]) + end end - sub_test_case("#get_field_by_name") do + sub_test_case("#get_field_index") do def test_found assert_equal(@fields.index(@enabled_field), @data_type.get_field_index("enabled")) From fc7b414faa5c187770ef8e28c26319f416ad7018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 10 Jan 2019 11:46:06 +0100 Subject: [PATCH 079/203] ARROW-4210: [Python] Mention boost-cpp directly in the conda meta.yaml for pyarrow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crossbow builds: [kszucs/crossbow/build-402](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=402) Author: Krisztián Szűcs Closes #3367 from kszucs/ARROW-4210 and squashes the following commits: 0647ee68 add boost-cpp to pyarrow's recipe --- dev/tasks/conda-recipes/pyarrow/meta.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev/tasks/conda-recipes/pyarrow/meta.yaml b/dev/tasks/conda-recipes/pyarrow/meta.yaml index 7c653876765b5..9f6ae79dc64d7 100644 --- a/dev/tasks/conda-recipes/pyarrow/meta.yaml +++ b/dev/tasks/conda-recipes/pyarrow/meta.yaml @@ -33,6 +33,9 @@ requirements: - {{ compiler('c') }} - {{ compiler('cxx') }} host: + # directly pin boost-cpp as we also seem to directly include boost symbols + # in the Python modules. + - boost-cpp - python - setuptools - setuptools_scm @@ -42,6 +45,7 @@ requirements: - arrow-cpp {{ ARROW_VERSION }} run: + - boost-cpp - python - setuptools - {{ pin_compatible('numpy', lower_bound='1.14') }} From 9d342ec4ffe2441ab0b072c90a4f652aa2678dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 10 Jan 2019 13:24:23 -0600 Subject: [PATCH 080/203] ARROW-3819: [Packaging] Update conda variant files to conform with feedstock after compiler migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crossbow builds: - [kszucs/crossbow/build-403](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=build-403) - [kszucs/crossbow/build-404](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=build-404) - [kszucs/crossbow/build-405](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=build-405) - [kszucs/crossbow/build-406](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=build-406) - [kszucs/crossbow/build-407](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=build-407) Author: Krisztián Szűcs Closes #3368 from kszucs/conda_forge_migration and squashes the following commits: e0a5a6422 use --croot 3749a2ff9 git on osx; set FEEDSTOSK_ROOT ca7217d7f support channel sources from variant files 33cba7118 fix conda path on linux 2505828b7 fix task names 0c4a10bc3 conda recipes for python 3.7; compiler migration --- LICENSE.txt | 33 +++++ dev/release/rat_exclude_files.txt | 1 + dev/tasks/conda-recipes/travis.linux.yml | 23 ++-- dev/tasks/conda-recipes/travis.osx.yml | 24 ++-- ...c_compilergcccxx_compilergxxpython2.7.yaml | 29 ++++ ...c_compilergcccxx_compilergxxpython3.6.yaml | 29 ++++ ...c_compilergcccxx_compilergxxpython3.7.yaml | 29 ++++ ...n_ccxx_compilertoolchain_cxxpython2.7.yaml | 29 ++++ ...n_ccxx_compilertoolchain_cxxpython3.6.yaml | 29 ++++ ...n_ccxx_compilertoolchain_cxxpython3.7.yaml | 29 ++++ .../variants/linux_python2.7.yaml | 47 ------- .../variants/linux_python3.5.yaml | 47 ------- .../variants/linux_python3.6.yaml | 47 ------- ...ilerclangcxx_compilerclangxxpython2.7.yaml | 32 +++++ ...ilerclangcxx_compilerclangxxpython3.6.yaml | 32 +++++ ...ilerclangcxx_compilerclangxxpython3.7.yaml | 32 +++++ ...n_ccxx_compilertoolchain_cxxpython2.7.yaml | 32 +++++ ...n_ccxx_compilertoolchain_cxxpython3.6.yaml | 32 +++++ ...n_ccxx_compilertoolchain_cxxpython3.7.yaml | 32 +++++ .../conda-recipes/variants/osx_python2.7.yaml | 53 -------- .../conda-recipes/variants/osx_python3.5.yaml | 53 -------- .../conda-recipes/variants/osx_python3.6.yaml | 47 ------- ...ilervs2015cxx_compilervs2015python3.5.yaml | 51 ------- ...ilervs2015cxx_compilervs2015python3.6.yaml | 39 +----- ...ilervs2015cxx_compilervs2015python3.7.yaml | 22 ++++ dev/tasks/tasks.yml | 124 +++++++++++++----- 26 files changed, 547 insertions(+), 430 deletions(-) create mode 100644 dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython2.7.yaml create mode 100644 dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.6.yaml create mode 100644 dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.7.yaml create mode 100644 dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml create mode 100644 dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml create mode 100644 dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml delete mode 100644 dev/tasks/conda-recipes/variants/linux_python2.7.yaml delete mode 100644 dev/tasks/conda-recipes/variants/linux_python3.5.yaml delete mode 100644 dev/tasks/conda-recipes/variants/linux_python3.6.yaml create mode 100644 dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml create mode 100644 dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml create mode 100644 dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml create mode 100644 dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml create mode 100644 dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml create mode 100644 dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml delete mode 100644 dev/tasks/conda-recipes/variants/osx_python2.7.yaml delete mode 100644 dev/tasks/conda-recipes/variants/osx_python3.5.yaml delete mode 100644 dev/tasks/conda-recipes/variants/osx_python3.6.yaml delete mode 100644 dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.5.yaml create mode 100644 dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.7.yaml diff --git a/LICENSE.txt b/LICENSE.txt index 572d3ef548917..ad2255d431066 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -795,3 +795,36 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - xxHash homepage: http://www.xxhash.com - xxHash source repository : https://github.com/Cyan4973/xxHash + +-------------------------------------------------------------------------------- + +The files in dev/tasks/conda-recipes/variants have the following license + +BSD 3-clause license +Copyright (c) 2015-2018, conda-forge +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 720b19d894ace..282f57c515b7c 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -114,6 +114,7 @@ dev/tasks/linux-packages/debian/plasma-store-server.install dev/tasks/linux-packages/debian/rules dev/tasks/linux-packages/debian/source/format dev/tasks/linux-packages/debian/watch +dev/tasks/conda-recipes/variants/*.yaml docs/requirements.txt go/arrow/go.sum go/arrow/Gopkg.lock diff --git a/dev/tasks/conda-recipes/travis.linux.yml b/dev/tasks/conda-recipes/travis.linux.yml index c0fc71d230a55..a3c2929b7e6db 100644 --- a/dev/tasks/conda-recipes/travis.linux.yml +++ b/dev/tasks/conda-recipes/travis.linux.yml @@ -38,23 +38,28 @@ install: MINICONDA_FILE="Miniconda3-latest-Linux-x86_64.sh" curl -L -O "${MINICONDA_URL}/${MINICONDA_FILE}" bash $MINICONDA_FILE -b - - # Configure conda. + # Install conda build dependency - | echo "" echo "Configuring conda." source /home/travis/miniconda3/bin/activate root - conda config --remove channels defaults - conda config --add channels defaults - conda config --add channels conda-forge - conda config --set show_channel_urls true - conda install --yes --quiet conda-build + conda install -n root -c conda-forge --quiet --yes conda-forge-ci-setup=2 -script: +before_script: - git clone -b {{ arrow.branch }} {{ arrow.remote }} arrow - git -C arrow checkout {{ arrow.head }} - pushd arrow/dev/tasks/conda-recipes - - conda build --output-folder . -m {{ variant_config_file }} parquet-cpp arrow-cpp pyarrow + # Configure conda + - setup_conda_rc ./ ./ {{ variant_config_file }} + - source run_conda_forge_build_setup + +script: + # Don't need to run make_build_number, no build number decrementation happens, it's always 0 + - | + conda build --croot $TRAVIS_HOME/conda_build_root \ + --output-folder . \ + -m {{ variant_config_file }} \ + parquet-cpp arrow-cpp pyarrow deploy: provider: releases diff --git a/dev/tasks/conda-recipes/travis.osx.yml b/dev/tasks/conda-recipes/travis.osx.yml index 193539d8c9f37..6b3e561a3c5b0 100644 --- a/dev/tasks/conda-recipes/travis.osx.yml +++ b/dev/tasks/conda-recipes/travis.osx.yml @@ -47,24 +47,28 @@ install: MINICONDA_FILE="Miniconda3-latest-MacOSX-x86_64.sh" curl -L -O "${MINICONDA_URL}/${MINICONDA_FILE}" bash $MINICONDA_FILE -b - - # Configure conda. + # Install conda build dependency - | echo "" echo "Configuring conda." source /Users/travis/miniconda3/bin/activate root - conda config --remove channels defaults - conda config --add channels defaults - conda config --add channels conda-forge - conda config --set show_channel_urls true - conda install --yes --quiet conda-forge-ci-setup=1 - source run_conda_forge_build_setup + conda install -n root -c conda-forge --quiet --yes conda-forge-ci-setup=2 -script: +before_script: - git clone -b {{ arrow.branch }} {{ arrow.remote }} arrow - git -C arrow checkout {{ arrow.head }} - pushd arrow/dev/tasks/conda-recipes - - conda build --output-folder . -m {{ variant_config_file }} parquet-cpp arrow-cpp pyarrow + # Configure conda + - setup_conda_rc ./ ./ {{ variant_config_file }} + - source run_conda_forge_build_setup + +script: + # Don't need to run make_build_number, no build number decrementation happens, it's always 0 + - | + conda build --croot $TRAVIS_HOME/conda_build_root \ + --output-folder . \ + -m {{ variant_config_file }} \ + parquet-cpp arrow-cpp pyarrow deploy: provider: releases diff --git a/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython2.7.yaml b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython2.7.yaml new file mode 100644 index 0000000000000..43b2902b5986a --- /dev/null +++ b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython2.7.yaml @@ -0,0 +1,29 @@ +boost_cpp: +- 1.68.0 +build_number_decrement: +- '0' +c_compiler: +- gcc +channel_sources: +- conda-forge/label/gcc7,defaults +channel_targets: +- conda-forge gcc7 +cxx_compiler: +- gxx +docker_image: +- condaforge/linux-anvil-comp7 +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '2.7' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - docker_image + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.6.yaml b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.6.yaml new file mode 100644 index 0000000000000..e5c89f2fed039 --- /dev/null +++ b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.6.yaml @@ -0,0 +1,29 @@ +boost_cpp: +- 1.68.0 +build_number_decrement: +- '0' +c_compiler: +- gcc +channel_sources: +- conda-forge/label/gcc7,defaults +channel_targets: +- conda-forge gcc7 +cxx_compiler: +- gxx +docker_image: +- condaforge/linux-anvil-comp7 +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '3.6' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - docker_image + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.7.yaml b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.7.yaml new file mode 100644 index 0000000000000..3892e5e8a509b --- /dev/null +++ b/dev/tasks/conda-recipes/variants/linux_c_compilergcccxx_compilergxxpython3.7.yaml @@ -0,0 +1,29 @@ +boost_cpp: +- 1.68.0 +build_number_decrement: +- '0' +c_compiler: +- gcc +channel_sources: +- conda-forge/label/gcc7,defaults +channel_targets: +- conda-forge gcc7 +cxx_compiler: +- gxx +docker_image: +- condaforge/linux-anvil-comp7 +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '3.7' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - docker_image + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml b/dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml new file mode 100644 index 0000000000000..9a9e0f79cecc7 --- /dev/null +++ b/dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml @@ -0,0 +1,29 @@ +boost_cpp: +- 1.68.0 +build_number_decrement: +- '1000' +c_compiler: +- toolchain_c +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- toolchain_cxx +docker_image: +- condaforge/linux-anvil +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '2.7' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - docker_image + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml b/dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml new file mode 100644 index 0000000000000..5f01b786de4a1 --- /dev/null +++ b/dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml @@ -0,0 +1,29 @@ +boost_cpp: +- 1.68.0 +build_number_decrement: +- '1000' +c_compiler: +- toolchain_c +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- toolchain_cxx +docker_image: +- condaforge/linux-anvil +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '3.6' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - docker_image + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml b/dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml new file mode 100644 index 0000000000000..0e27f2ec290d7 --- /dev/null +++ b/dev/tasks/conda-recipes/variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml @@ -0,0 +1,29 @@ +boost_cpp: +- 1.68.0 +build_number_decrement: +- '1000' +c_compiler: +- toolchain_c +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- toolchain_cxx +docker_image: +- condaforge/linux-anvil +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '3.7' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - docker_image + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/linux_python2.7.yaml b/dev/tasks/conda-recipes/variants/linux_python2.7.yaml deleted file mode 100644 index 45026b07d60ab..0000000000000 --- a/dev/tasks/conda-recipes/variants/linux_python2.7.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -boost_cpp: -- 1.67.0 -c_compiler: -- toolchain_c -cxx_compiler: -- toolchain_cxx -lz4_c: -- 1.8.1 -pin_run_as_build: - boost-cpp: - max_pin: x.x.x - lz4-c: - max_pin: x.x.x - python: - min_pin: x.x - max_pin: x.x - snappy: - max_pin: x.x.x - zlib: - max_pin: x.x - zstd: - max_pin: x.x.x -python: -- '2.7' -snappy: -- 1.1.7 -zlib: -- '1.2' -zstd: -- 1.3.3 diff --git a/dev/tasks/conda-recipes/variants/linux_python3.5.yaml b/dev/tasks/conda-recipes/variants/linux_python3.5.yaml deleted file mode 100644 index 683022f834913..0000000000000 --- a/dev/tasks/conda-recipes/variants/linux_python3.5.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -boost_cpp: -- 1.67.0 -c_compiler: -- toolchain_c -cxx_compiler: -- toolchain_cxx -lz4_c: -- 1.8.1 -pin_run_as_build: - boost-cpp: - max_pin: x.x.x - lz4-c: - max_pin: x.x.x - python: - min_pin: x.x - max_pin: x.x - snappy: - max_pin: x.x.x - zlib: - max_pin: x.x - zstd: - max_pin: x.x.x -python: -- '3.5' -snappy: -- 1.1.7 -zlib: -- '1.2' -zstd: -- 1.3.3 diff --git a/dev/tasks/conda-recipes/variants/linux_python3.6.yaml b/dev/tasks/conda-recipes/variants/linux_python3.6.yaml deleted file mode 100644 index 6b7d8896ac369..0000000000000 --- a/dev/tasks/conda-recipes/variants/linux_python3.6.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -boost_cpp: -- 1.67.0 -c_compiler: -- toolchain_c -cxx_compiler: -- toolchain_cxx -lz4_c: -- 1.8.1 -pin_run_as_build: - boost-cpp: - max_pin: x.x.x - lz4-c: - max_pin: x.x.x - python: - min_pin: x.x - max_pin: x.x - snappy: - max_pin: x.x.x - zlib: - max_pin: x.x - zstd: - max_pin: x.x.x -python: -- '3.6' -snappy: -- 1.1.7 -zlib: -- '1.2' -zstd: -- 1.3.3 diff --git a/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml new file mode 100644 index 0000000000000..caf6bf7ebb41f --- /dev/null +++ b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml @@ -0,0 +1,32 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +boost_cpp: +- 1.68.0 +build_number_decrement: +- '0' +c_compiler: +- clang +channel_sources: +- conda-forge/label/gcc7,defaults +channel_targets: +- conda-forge gcc7 +cxx_compiler: +- clangxx +macos_machine: +- x86_64-apple-darwin13.4.0 +macos_min_version: +- '10.9' +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '2.7' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml new file mode 100644 index 0000000000000..94f51c0ac1461 --- /dev/null +++ b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml @@ -0,0 +1,32 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +boost_cpp: +- 1.68.0 +build_number_decrement: +- '0' +c_compiler: +- clang +channel_sources: +- conda-forge/label/gcc7,defaults +channel_targets: +- conda-forge gcc7 +cxx_compiler: +- clangxx +macos_machine: +- x86_64-apple-darwin13.4.0 +macos_min_version: +- '10.9' +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '3.6' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml new file mode 100644 index 0000000000000..25b5c4175ddbc --- /dev/null +++ b/dev/tasks/conda-recipes/variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml @@ -0,0 +1,32 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +boost_cpp: +- 1.68.0 +build_number_decrement: +- '0' +c_compiler: +- clang +channel_sources: +- conda-forge/label/gcc7,defaults +channel_targets: +- conda-forge gcc7 +cxx_compiler: +- clangxx +macos_machine: +- x86_64-apple-darwin13.4.0 +macos_min_version: +- '10.9' +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '3.7' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml b/dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml new file mode 100644 index 0000000000000..e11b9f8c60cb8 --- /dev/null +++ b/dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml @@ -0,0 +1,32 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +boost_cpp: +- 1.68.0 +build_number_decrement: +- '1000' +c_compiler: +- toolchain_c +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- toolchain_cxx +macos_machine: +- x86_64-apple-darwin13.4.0 +macos_min_version: +- '10.9' +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '2.7' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml b/dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml new file mode 100644 index 0000000000000..01aa8595a1e24 --- /dev/null +++ b/dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml @@ -0,0 +1,32 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +boost_cpp: +- 1.68.0 +build_number_decrement: +- '1000' +c_compiler: +- toolchain_c +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- toolchain_cxx +macos_machine: +- x86_64-apple-darwin13.4.0 +macos_min_version: +- '10.9' +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '3.6' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml b/dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml new file mode 100644 index 0000000000000..836650a03a7eb --- /dev/null +++ b/dev/tasks/conda-recipes/variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml @@ -0,0 +1,32 @@ +MACOSX_DEPLOYMENT_TARGET: +- '10.9' +boost_cpp: +- 1.68.0 +build_number_decrement: +- '1000' +c_compiler: +- toolchain_c +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- toolchain_cxx +macos_machine: +- x86_64-apple-darwin13.4.0 +macos_min_version: +- '10.9' +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '3.7' +zip_keys: +- - c_compiler + - cxx_compiler + - channel_sources + - channel_targets + - build_number_decrement diff --git a/dev/tasks/conda-recipes/variants/osx_python2.7.yaml b/dev/tasks/conda-recipes/variants/osx_python2.7.yaml deleted file mode 100644 index b8fc15f924dd5..0000000000000 --- a/dev/tasks/conda-recipes/variants/osx_python2.7.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -MACOSX_DEPLOYMENT_TARGET: -- '10.9' -boost_cpp: -- 1.67.0 -c_compiler: -- toolchain_c -cxx_compiler: -- toolchain_cxx -lz4_c: -- 1.8.1 -macos_machine: -- x86_64-apple-darwin13.4.0 -macos_min_version: -- '10.9' -pin_run_as_build: - boost-cpp: - max_pin: x.x.x - lz4-c: - max_pin: x.x.x - python: - min_pin: x.x - max_pin: x.x - snappy: - max_pin: x.x.x - zlib: - max_pin: x.x - zstd: - max_pin: x.x.x -python: -- '2.7' -snappy: -- 1.1.7 -zlib: -- '1.2' -zstd: -- 1.3.3 diff --git a/dev/tasks/conda-recipes/variants/osx_python3.5.yaml b/dev/tasks/conda-recipes/variants/osx_python3.5.yaml deleted file mode 100644 index 05f7a8dd4d36d..0000000000000 --- a/dev/tasks/conda-recipes/variants/osx_python3.5.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -MACOSX_DEPLOYMENT_TARGET: -- '10.9' -boost_cpp: -- 1.67.0 -c_compiler: -- toolchain_c -cxx_compiler: -- toolchain_cxx -lz4_c: -- 1.8.1 -macos_machine: -- x86_64-apple-darwin13.4.0 -macos_min_version: -- '10.9' -pin_run_as_build: - boost-cpp: - max_pin: x.x.x - lz4-c: - max_pin: x.x.x - python: - min_pin: x.x - max_pin: x.x - snappy: - max_pin: x.x.x - zlib: - max_pin: x.x - zstd: - max_pin: x.x.x -python: -- '3.5' -snappy: -- 1.1.7 -zlib: -- '1.2' -zstd: -- 1.3.3 diff --git a/dev/tasks/conda-recipes/variants/osx_python3.6.yaml b/dev/tasks/conda-recipes/variants/osx_python3.6.yaml deleted file mode 100644 index 6b7d8896ac369..0000000000000 --- a/dev/tasks/conda-recipes/variants/osx_python3.6.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -boost_cpp: -- 1.67.0 -c_compiler: -- toolchain_c -cxx_compiler: -- toolchain_cxx -lz4_c: -- 1.8.1 -pin_run_as_build: - boost-cpp: - max_pin: x.x.x - lz4-c: - max_pin: x.x.x - python: - min_pin: x.x - max_pin: x.x - snappy: - max_pin: x.x.x - zlib: - max_pin: x.x - zstd: - max_pin: x.x.x -python: -- '3.6' -snappy: -- 1.1.7 -zlib: -- '1.2' -zstd: -- 1.3.3 diff --git a/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.5.yaml b/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.5.yaml deleted file mode 100644 index d886b0e39ff7f..0000000000000 --- a/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.5.yaml +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -boost_cpp: -- 1.67.0 -c_compiler: -- vs2015 -cxx_compiler: -- vs2015 -lz4_c: -- 1.8.1 -pin_run_as_build: - boost-cpp: - max_pin: x.x.x - lz4-c: - max_pin: x.x.x - python: - min_pin: x.x - max_pin: x.x - snappy: - max_pin: x.x.x - zlib: - max_pin: x.x - zstd: - max_pin: x.x.x -python: -- '3.5' -snappy: -- 1.1.7 -zip_keys: -- - python - - c_compiler - - cxx_compiler -zlib: -- '1.2' -zstd: -- 1.3.3 diff --git a/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.6.yaml b/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.6.yaml index 880642f5b7d85..a56ee638f6753 100644 --- a/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.6.yaml +++ b/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.6.yaml @@ -1,51 +1,22 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - boost_cpp: -- 1.67.0 +- 1.68.0 c_compiler: - vs2015 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main cxx_compiler: - vs2015 -lz4_c: -- 1.8.1 pin_run_as_build: boost-cpp: max_pin: x.x.x - lz4-c: - max_pin: x.x.x python: min_pin: x.x max_pin: x.x - snappy: - max_pin: x.x.x - zlib: - max_pin: x.x - zstd: - max_pin: x.x.x python: - '3.6' -snappy: -- 1.1.7 zip_keys: - - python - c_compiler - cxx_compiler -zlib: -- '1.2' -zstd: -- 1.3.3 diff --git a/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.7.yaml b/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.7.yaml new file mode 100644 index 0000000000000..1cce7445c73e7 --- /dev/null +++ b/dev/tasks/conda-recipes/variants/win_c_compilervs2015cxx_compilervs2015python3.7.yaml @@ -0,0 +1,22 @@ +boost_cpp: +- 1.68.0 +c_compiler: +- vs2015 +channel_sources: +- conda-forge,defaults +channel_targets: +- conda-forge main +cxx_compiler: +- vs2015 +pin_run_as_build: + boost-cpp: + max_pin: x.x.x + python: + min_pin: x.x + max_pin: x.x +python: +- '3.7' +zip_keys: +- - python + - c_compiler + - cxx_compiler diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 52bbc577e6f1b..4b10b57fd0990 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -19,14 +19,20 @@ groups: # these groups are just for convenience # makes it easier to submit related tasks conda: - - conda-linux-py27 - - conda-linux-py35 - - conda-linux-py36 - - conda-osx-py27 - - conda-osx-py35 - - conda-osx-py36 - - conda-win-py35 - - conda-win-py36 + - conda-linux-gcc-py27 + - conda-linux-gcc-py36 + - conda-linux-gcc-py37 + - conda-linux-toolchain-py27 + - conda-linux-toolchain-py36 + - conda-linux-toolchain-py37 + - conda-osx-clang-py27 + - conda-osx-clang-py36 + - conda-osx-clang-py37 + - conda-osx-toolchain-py27 + - conda-osx-toolchain-py36 + - conda-osx-toolchain-py37 + - conda-win-vs2015-py36 + - conda-win-vs2015-py37 wheel: - wheel-linux-cp27m - wheel-linux-cp27mu @@ -64,81 +70,135 @@ tasks: ############################## Conda Linux ################################## - conda-linux-py27: + conda-linux-gcc-py27: platform: linux template: conda-recipes/travis.linux.yml params: - variant_config_file: variants/linux_python2.7.yaml + variant_config_file: variants/linux_c_compilergcccxx_compilergxxpython2.7.yaml artifacts: - arrow-cpp-{version}-py27(h[a-z0-9]+)_0.tar.bz2 - pyarrow-{version}-py27(h[a-z0-9]+)_0.tar.bz2 - conda-linux-py35: + conda-linux-gcc-py36: platform: linux template: conda-recipes/travis.linux.yml params: - variant_config_file: variants/linux_python3.5.yaml + variant_config_file: variants/linux_c_compilergcccxx_compilergxxpython3.6.yaml artifacts: - - arrow-cpp-{version}-py35(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py35(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + + conda-linux-gcc-py37: + platform: linux + template: conda-recipes/travis.linux.yml + params: + variant_config_file: variants/linux_c_compilergcccxx_compilergxxpython3.7.yaml + artifacts: + - arrow-cpp-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + + conda-linux-toolchain-py27: + platform: linux + template: conda-recipes/travis.linux.yml + params: + variant_config_file: variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml + artifacts: + - arrow-cpp-{version}-py27(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{version}-py27(h[a-z0-9]+)_0.tar.bz2 - conda-linux-py36: + conda-linux-toolchain-py36: platform: linux template: conda-recipes/travis.linux.yml params: - variant_config_file: variants/linux_python3.6.yaml + variant_config_file: variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml artifacts: - arrow-cpp-{version}-py36(h[a-z0-9]+)_0.tar.bz2 - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + conda-linux-toolchain-py37: + platform: linux + template: conda-recipes/travis.linux.yml + params: + variant_config_file: variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml + artifacts: + - arrow-cpp-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + ############################## Conda OSX #################################### - conda-osx-py27: + conda-osx-clang-py27: platform: osx template: conda-recipes/travis.osx.yml params: - variant_config_file: variants/osx_python2.7.yaml + variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml artifacts: - arrow-cpp-{version}-py27(h[a-z0-9]+)_0.tar.bz2 - pyarrow-{version}-py27(h[a-z0-9]+)_0.tar.bz2 - conda-osx-py35: + conda-osx-clang-py36: + platform: osx + template: conda-recipes/travis.osx.yml + params: + variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml + artifacts: + - arrow-cpp-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + + conda-osx-clang-py37: platform: osx template: conda-recipes/travis.osx.yml params: - variant_config_file: variants/osx_python3.5.yaml + variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml artifacts: - - arrow-cpp-{version}-py35(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py35(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + + conda-osx-toolchain-py27: + platform: osx + template: conda-recipes/travis.osx.yml + params: + variant_config_file: variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml + artifacts: + - arrow-cpp-{version}-py27(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{version}-py27(h[a-z0-9]+)_0.tar.bz2 - conda-osx-py36: + conda-osx-toolchain-py36: platform: osx template: conda-recipes/travis.osx.yml params: - variant_config_file: variants/osx_python3.6.yaml + variant_config_file: variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml artifacts: - arrow-cpp-{version}-py36(h[a-z0-9]+)_0.tar.bz2 - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + conda-osx-toolchain-py37: + platform: osx + template: conda-recipes/travis.osx.yml + params: + variant_config_file: variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml + artifacts: + - arrow-cpp-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + ############################## Conda Windows ################################ - conda-win-py35: + conda-win-vs2015-py36: platform: win template: conda-recipes/appveyor.yml params: - variant_config_file: variants\win_c_compilervs2015cxx_compilervs2015python3.5.yaml + variant_config_file: variants\win_c_compilervs2015cxx_compilervs2015python3.6.yaml artifacts: - - arrow-cpp-{version}-py35_vc14(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py35(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{version}-py36_vc14(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 - conda-win-py36: + conda-win-vs2015-py37: platform: win template: conda-recipes/appveyor.yml params: - variant_config_file: variants\win_c_compilervs2015cxx_compilervs2015python3.6.yaml + variant_config_file: variants\win_c_compilervs2015cxx_compilervs2015python3.7.yaml artifacts: - - arrow-cpp-{version}-py36_vc14(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{version}-py37_vc14(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{version}-py37(h[a-z0-9]+)_0.tar.bz2 ############################## Wheel Linux ################################## From 5a502d281545402240e818d5fd97a9aaf36363f2 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 10 Jan 2019 21:05:31 +0100 Subject: [PATCH 081/203] ARROW-4216: [Python] Add CUDA API docs Also reorganize the API docs into several documents, and add/improve docstrings. To allow building the docs without CUDA enabled, I added some conditional inclusion logic. When CUDA isn't enabled, the API docs are still generated but the docstrings are empty. This seems to be the only sane setting that doesn't produce Sphinx errors, one way or the other. Author: Antoine Pitrou Closes #3372 from pitrou/ARROW-4216-cuda-py-docs and squashes the following commits: 80600da5 ARROW-4216: Add CUDA API docs --- docs/source/conf.py | 33 +++ docs/source/python/api.rst | 389 +-------------------------- docs/source/python/api/arrays.rst | 109 ++++++++ docs/source/python/api/cuda.rst | 62 +++++ docs/source/python/api/datatypes.rst | 134 +++++++++ docs/source/python/api/files.rst | 65 +++++ docs/source/python/api/formats.rst | 70 +++++ docs/source/python/api/ipc.rst | 59 ++++ docs/source/python/api/memory.rst | 68 +++++ docs/source/python/api/misc.rst | 40 +++ docs/source/python/api/plasma.rst | 33 +++ docs/source/python/api/tables.rst | 54 ++++ python/pyarrow/__init__.py | 4 +- python/pyarrow/_cuda.pyx | 70 ++--- python/pyarrow/array.pxi | 132 +++++++-- python/pyarrow/io.pxi | 86 ++++-- python/pyarrow/memory.pxi | 14 +- python/pyarrow/scalar.pxi | 106 +++++++- python/pyarrow/types.pxi | 103 ++++++- 19 files changed, 1167 insertions(+), 464 deletions(-) create mode 100644 docs/source/python/api/arrays.rst create mode 100644 docs/source/python/api/cuda.rst create mode 100644 docs/source/python/api/datatypes.rst create mode 100644 docs/source/python/api/files.rst create mode 100644 docs/source/python/api/formats.rst create mode 100644 docs/source/python/api/ipc.rst create mode 100644 docs/source/python/api/memory.rst create mode 100644 docs/source/python/api/misc.rst create mode 100644 docs/source/python/api/plasma.rst create mode 100644 docs/source/python/api/tables.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index 1cadef18b64f2..d525fa943138b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -53,6 +53,7 @@ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.doctest', + 'sphinx.ext.ifconfig', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', @@ -69,6 +70,9 @@ 'inherited-members': None } +# Overriden conditionally below +autodoc_mock_imports = [] + # ipython directive options ipython_mplbackend = '' @@ -387,3 +391,32 @@ # If true, do not generate a @detailmenu in the "Top" node's menu. # # texinfo_no_detailmenu = False + + +# -- Customization -------------------------------------------------------- + +# Conditional API doc generation + +# Sphinx has two features for conditional inclusion: +# - The "only" directive +# https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#including-content-based-on-tags +# - The "ifconfig" extension +# https://www.sphinx-doc.org/en/master/usage/extensions/ifconfig.html +# +# Both have issues, but "ifconfig" seems to work in this setting. + +try: + import pyarrow.cuda + cuda_enabled = True +except ImportError: + cuda_enabled = False + # Mock pyarrow.cuda to avoid autodoc warnings. + # XXX I can't get autodoc_mock_imports to work, so mock manually instead + # (https://github.com/sphinx-doc/sphinx/issues/2174#issuecomment-453177550) + from unittest import mock + pyarrow.cuda = sys.modules['pyarrow.cuda'] = mock.Mock() + +def setup(app): + # Use a config value to indicate whether CUDA API docs can be generated. + # This will also rebuild appropriately when the value changes. + app.add_config_value('cuda_enabled', cuda_enabled, 'env') diff --git a/docs/source/python/api.rst b/docs/source/python/api.rst index 0bad76ff0bf63..b06509f7a5b19 100644 --- a/docs/source/python/api.rst +++ b/docs/source/python/api.rst @@ -15,385 +15,22 @@ .. specific language governing permissions and limitations .. under the License. -.. currentmodule:: pyarrow .. _api: ************* API Reference ************* -.. _api.types: - -Type and Schema Factory Functions ---------------------------------- - -.. autosummary:: - :toctree: generated/ - - null - bool_ - int8 - int16 - int32 - int64 - uint8 - uint16 - uint32 - uint64 - float16 - float32 - float64 - time32 - time64 - timestamp - date32 - date64 - binary - string - utf8 - decimal128 - list_ - struct - dictionary - field - schema - from_numpy_dtype - -.. currentmodule:: pyarrow.types -.. _api.types.checking: - -Type checking functions ------------------------ - -.. autosummary:: - :toctree: generated/ - - is_boolean - is_integer - is_signed_integer - is_unsigned_integer - is_int8 - is_int16 - is_int32 - is_int64 - is_uint8 - is_uint16 - is_uint32 - is_uint64 - is_floating - is_float16 - is_float32 - is_float64 - is_decimal - is_list - is_struct - is_union - is_nested - is_temporal - is_timestamp - is_date - is_date32 - is_date64 - is_time - is_time32 - is_time64 - is_null - is_binary - is_unicode - is_string - is_fixed_size_binary - is_map - is_dictionary - -.. currentmodule:: pyarrow - -.. _api.value: - -Scalar Value Types ------------------- - -.. autosummary:: - :toctree: generated/ - - NA - Scalar - ArrayValue - BooleanValue - Int8Value - Int16Value - Int32Value - Int64Value - UInt8Value - UInt16Value - UInt32Value - UInt64Value - FloatValue - DoubleValue - ListValue - BinaryValue - StringValue - FixedSizeBinaryValue - Date32Value - Date64Value - TimestampValue - DecimalValue - -.. _api.array: - -.. currentmodule:: pyarrow - -Array Types ------------ - -.. autosummary:: - :toctree: generated/ - - array - Array - BooleanArray - DictionaryArray - FloatingPointArray - IntegerArray - Int8Array - Int16Array - Int32Array - Int64Array - NullArray - NumericArray - UInt8Array - UInt16Array - UInt32Array - UInt64Array - BinaryArray - FixedSizeBinaryArray - StringArray - Time32Array - Time64Array - Date32Array - Date64Array - TimestampArray - Decimal128Array - ListArray - -.. _api.table: - -.. currentmodule:: pyarrow - -Tables and Record Batches -------------------------- - -.. autosummary:: - :toctree: generated/ - - column - chunked_array - concat_tables - ChunkedArray - Column - RecordBatch - Table - -.. _api.tensor: - -Tensor type and Functions -------------------------- - -.. autosummary:: - :toctree: generated/ - - Tensor - -.. _api.io: - -In-Memory Buffers ------------------ - -.. autosummary:: - :toctree: generated/ - - allocate_buffer - compress - decompress - py_buffer - foreign_buffer - Buffer - ResizableBuffer - -Input / Output and Shared Memory --------------------------------- - -.. autosummary:: - :toctree: generated/ - - input_stream - output_stream - BufferReader - BufferOutputStream - FixedSizeBufferWriter - NativeFile - OSFile - MemoryMappedFile - CompressedInputStream - CompressedOutputStream - memory_map - create_memory_map - PythonFile - -File Systems ------------- - -.. autosummary:: - :toctree: generated/ - - hdfs.connect - LocalFileSystem - -.. class:: HadoopFileSystem - :noindex: - -.. _api.ipc: - -Serialization and IPC ---------------------- - -.. autosummary:: - :toctree: generated/ - - ipc.open_file - ipc.open_stream - Message - MessageReader - RecordBatchFileReader - RecordBatchFileWriter - RecordBatchStreamReader - RecordBatchStreamWriter - read_message - read_record_batch - get_record_batch_size - read_tensor - write_tensor - get_tensor_size - serialize - serialize_to - deserialize - deserialize_components - deserialize_from - read_serialized - SerializedPyObject - SerializationContext - -.. _api.memory_pool: - -Memory Pools ------------- - -.. currentmodule:: pyarrow - -.. autosummary:: - :toctree: generated/ - - MemoryPool - default_memory_pool - total_allocated_bytes - set_memory_pool - log_memory_allocations - -.. _api.type_classes: - -.. currentmodule:: pyarrow - -Type Classes ------------- - -.. autosummary:: - :toctree: generated/ - - DataType - Field - Schema - -.. currentmodule:: pyarrow.plasma - -.. _api.plasma: - -Plasma In-Memory Object Store ------------------------------ - -.. autosummary:: - :toctree: generated/ - - ObjectID - PlasmaClient - PlasmaBuffer - -.. currentmodule:: pyarrow.csv - -.. _api.csv: - -CSV Files ---------- - -.. autosummary:: - :toctree: generated/ - - ReadOptions - ParseOptions - ConvertOptions - read_csv - -.. _api.feather: - -Feather Files -------------- - -.. currentmodule:: pyarrow.feather - -.. autosummary:: - :toctree: generated/ - - read_feather - write_feather - -.. currentmodule:: pyarrow - -.. _api.parquet: - -Parquet Files -------------- - -.. currentmodule:: pyarrow.parquet - -.. autosummary:: - :toctree: generated/ - - ParquetDataset - ParquetFile - ParquetWriter - read_table - read_metadata - read_pandas - read_schema - write_metadata - write_table - write_to_dataset - -.. currentmodule:: pyarrow - -Multi-Threading ---------------- - -.. autosummary:: - :toctree: generated/ - - cpu_count - set_cpu_count - -Using with C extensions ------------------------ - -.. autosummary:: - :toctree: generated/ - - get_include - get_libraries - get_library_dirs +.. toctree:: + :maxdepth: 2 + + api/datatypes + api/arrays + api/memory + api/files + api/tables + api/ipc + api/formats + api/plasma + api/cuda + api/misc diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst new file mode 100644 index 0000000000000..db45eeff0ca5a --- /dev/null +++ b/docs/source/python/api/arrays.rst @@ -0,0 +1,109 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. _api.array: +.. currentmodule:: pyarrow + +Arrays and Scalars +================== + +Factory Function +---------------- + +This function is the main entry point to create an Arrow array from Python. + +.. autosummary:: + :toctree: ../generated/ + + array + +Array Types +----------- + +An array's Python class depends on its data type. Concrete array classes +may expose data type-specific methods or properties. + +.. autosummary:: + :toctree: ../generated/ + + Array + BooleanArray + FloatingPointArray + IntegerArray + Int8Array + Int16Array + Int32Array + Int64Array + NullArray + NumericArray + UInt8Array + UInt16Array + UInt32Array + UInt64Array + BinaryArray + StringArray + FixedSizeBinaryArray + Time32Array + Time64Array + Date32Array + Date64Array + TimestampArray + Decimal128Array + DictionaryArray + ListArray + StructArray + UnionArray + +.. _api.scalar: + +Array Scalars +------------- + +Indexing an array wraps the represented value in a scalar object whose +concrete type depends on the array data type. You shouldn't instantiate +any of those classes directly. + +.. autosummary:: + :toctree: ../generated/ + + NA + Scalar + ArrayValue + BooleanValue + Int8Value + Int16Value + Int32Value + Int64Value + UInt8Value + UInt16Value + UInt32Value + UInt64Value + FloatValue + DoubleValue + BinaryValue + StringValue + FixedSizeBinaryValue + Time32Value + Time64Value + Date32Value + Date64Value + TimestampValue + DecimalValue + DictionaryValue + ListValue + StructValue + UnionValue diff --git a/docs/source/python/api/cuda.rst b/docs/source/python/api/cuda.rst new file mode 100644 index 0000000000000..364f032403586 --- /dev/null +++ b/docs/source/python/api/cuda.rst @@ -0,0 +1,62 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. currentmodule:: pyarrow.cuda + +CUDA Integration +================ + +.. ifconfig:: not cuda_enabled + + .. error:: + This documentation was built without CUDA enabled. The CUDA + API docs are not available. + +.. NOTE We still generate those API docs (with empty docstrings) +.. when CUDA is disabled and `pyarrow.cuda` mocked (see conf.py). +.. Otherwise we'd get autodoc warnings, see https://github.com/sphinx-doc/sphinx/issues/4770 + +CUDA Contexts +------------- + +.. autosummary:: + :toctree: ../generated/ + + Context + +CUDA Buffers +------------ + +.. autosummary:: + :toctree: ../generated/ + + CudaBuffer + new_host_buffer + HostBuffer + BufferReader + BufferWriter + +Serialization and IPC +--------------------- + +.. autosummary:: + :toctree: ../generated/ + + serialize_record_batch + read_record_batch + read_message + IpcMemHandle diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst new file mode 100644 index 0000000000000..5ad0204966337 --- /dev/null +++ b/docs/source/python/api/datatypes.rst @@ -0,0 +1,134 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. _api.types: +.. currentmodule:: pyarrow + +Data Types and Schemas +====================== + +Factory Functions +----------------- + +These should be used to create Arrow data types and schemas. + +.. autosummary:: + :toctree: ../generated/ + + null + bool_ + int8 + int16 + int32 + int64 + uint8 + uint16 + uint32 + uint64 + float16 + float32 + float64 + time32 + time64 + timestamp + date32 + date64 + binary + string + utf8 + decimal128 + list_ + struct + dictionary + field + schema + from_numpy_dtype + +.. _api.type_classes: +.. currentmodule:: pyarrow + +Type Classes +------------ + +Do not instantiate these classes directly. Instead, call one of the factory +functions above. + +.. autosummary:: + :toctree: ../generated/ + + DataType + DictionaryType + ListType + StructType + UnionType + TimestampType + Time32Type + Time64Type + FixedSizeBinaryType + Decimal128Type + Field + Schema + +.. _api.types.checking: +.. currentmodule:: pyarrow.types + +Type Checking +------------- + +These functions are predicates to check whether a :class:`DataType` instance +represents a given data type (such as ``int32``) or general category +(such as "is a signed integer"). + +.. autosummary:: + :toctree: ../generated/ + + is_boolean + is_integer + is_signed_integer + is_unsigned_integer + is_int8 + is_int16 + is_int32 + is_int64 + is_uint8 + is_uint16 + is_uint32 + is_uint64 + is_floating + is_float16 + is_float32 + is_float64 + is_decimal + is_list + is_struct + is_union + is_nested + is_temporal + is_timestamp + is_date + is_date32 + is_date64 + is_time + is_time32 + is_time64 + is_null + is_binary + is_unicode + is_string + is_fixed_size_binary + is_map + is_dictionary diff --git a/docs/source/python/api/files.rst b/docs/source/python/api/files.rst new file mode 100644 index 0000000000000..106dfde8abffb --- /dev/null +++ b/docs/source/python/api/files.rst @@ -0,0 +1,65 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. currentmodule:: pyarrow + +Streams and File Access +======================= + +.. _api.io: + +Factory Functions +----------------- + +These factory functions are the recommended way to create a Arrow stream. +They accept various kinds of sources, such as in-memory buffers or on-disk files. + +.. autosummary:: + :toctree: ../generated/ + + input_stream + output_stream + memory_map + create_memory_map + +Stream Classes +-------------- + +.. autosummary:: + :toctree: ../generated/ + + NativeFile + OSFile + PythonFile + BufferReader + BufferOutputStream + FixedSizeBufferWriter + MemoryMappedFile + CompressedInputStream + CompressedOutputStream + +File Systems +------------ + +.. autosummary:: + :toctree: ../generated/ + + hdfs.connect + LocalFileSystem + +.. class:: HadoopFileSystem + :noindex: diff --git a/docs/source/python/api/formats.rst b/docs/source/python/api/formats.rst new file mode 100644 index 0000000000000..8de30ece93584 --- /dev/null +++ b/docs/source/python/api/formats.rst @@ -0,0 +1,70 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Tabular File Formats +==================== + +.. currentmodule:: pyarrow.csv + +.. _api.csv: + +CSV Files +--------- + +.. autosummary:: + :toctree: ../generated/ + + ReadOptions + ParseOptions + ConvertOptions + read_csv + +.. _api.feather: + +Feather Files +------------- + +.. currentmodule:: pyarrow.feather + +.. autosummary:: + :toctree: ../generated/ + + read_feather + write_feather + +.. currentmodule:: pyarrow + +.. _api.parquet: + +Parquet Files +------------- + +.. currentmodule:: pyarrow.parquet + +.. autosummary:: + :toctree: ../generated/ + + ParquetDataset + ParquetFile + ParquetWriter + read_table + read_metadata + read_pandas + read_schema + write_metadata + write_table + write_to_dataset diff --git a/docs/source/python/api/ipc.rst b/docs/source/python/api/ipc.rst new file mode 100644 index 0000000000000..bd14d30dcb274 --- /dev/null +++ b/docs/source/python/api/ipc.rst @@ -0,0 +1,59 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. currentmodule:: pyarrow + +.. _api.ipc: + +Serialization and IPC +===================== + +Inter-Process Communication +--------------------------- + +.. autosummary:: + :toctree: ../generated/ + + ipc.open_file + ipc.open_stream + Message + MessageReader + RecordBatchFileReader + RecordBatchFileWriter + RecordBatchStreamReader + RecordBatchStreamWriter + read_message + read_record_batch + get_record_batch_size + read_tensor + write_tensor + get_tensor_size + +Serialization +------------- + +.. autosummary:: + :toctree: ../generated/ + + serialize + serialize_to + deserialize + deserialize_components + deserialize_from + read_serialized + SerializedPyObject + SerializationContext diff --git a/docs/source/python/api/memory.rst b/docs/source/python/api/memory.rst new file mode 100644 index 0000000000000..da9156fcad539 --- /dev/null +++ b/docs/source/python/api/memory.rst @@ -0,0 +1,68 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. currentmodule:: pyarrow + +.. _api.memory: + +Buffers and Memory +================== + +In-Memory Buffers +----------------- + +Factory Functions +~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: ../generated/ + + allocate_buffer + py_buffer + foreign_buffer + +Classes +~~~~~~~ + +.. autosummary:: + :toctree: ../generated/ + + Buffer + ResizableBuffer + +Miscellaneous +~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: ../generated/ + + compress + decompress + +.. _api.memory_pool: + +Memory Pools +------------ + +.. autosummary:: + :toctree: ../generated/ + + MemoryPool + default_memory_pool + total_allocated_bytes + set_memory_pool + log_memory_allocations diff --git a/docs/source/python/api/misc.rst b/docs/source/python/api/misc.rst new file mode 100644 index 0000000000000..c13b80620f154 --- /dev/null +++ b/docs/source/python/api/misc.rst @@ -0,0 +1,40 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. currentmodule:: pyarrow + +Miscellaneous +============= + +Multi-Threading +--------------- + +.. autosummary:: + :toctree: ../generated/ + + cpu_count + set_cpu_count + +Using with C extensions +----------------------- + +.. autosummary:: + :toctree: ../generated/ + + get_include + get_libraries + get_library_dirs diff --git a/docs/source/python/api/plasma.rst b/docs/source/python/api/plasma.rst new file mode 100644 index 0000000000000..8df9e4e21ac8b --- /dev/null +++ b/docs/source/python/api/plasma.rst @@ -0,0 +1,33 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. currentmodule:: pyarrow.plasma + +.. _api.plasma: + +Plasma In-Memory Object Store +============================= + +Classes +------- + +.. autosummary:: + :toctree: ../generated/ + + ObjectID + PlasmaClient + PlasmaBuffer diff --git a/docs/source/python/api/tables.rst b/docs/source/python/api/tables.rst new file mode 100644 index 0000000000000..5a229d29fa60b --- /dev/null +++ b/docs/source/python/api/tables.rst @@ -0,0 +1,54 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. currentmodule:: pyarrow + +.. _api.table: + +Tables and Tensors +================== + +Factory Functions +----------------- + +.. autosummary:: + :toctree: ../generated/ + + column + chunked_array + concat_tables + +Classes +------- + +.. autosummary:: + :toctree: ../generated/ + + ChunkedArray + Column + RecordBatch + Table + +.. _api.tensor: + +Tensors +------- + +.. autosummary:: + :toctree: ../generated/ + + Tensor diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 0d1c1bef87a1c..dabcdf1813059 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -60,7 +60,9 @@ def parse_git(root, **kwargs): binary, string, utf8, decimal128, list_, struct, union, dictionary, field, type_for_alias, - DataType, + DataType, DictionaryType, ListType, StructType, + UnionType, TimestampType, Time32Type, Time64Type, + FixedSizeBinaryType, Decimal128Type, Field, Schema, schema, diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx index cd5704947297b..c2d95a6f13652 100644 --- a/python/pyarrow/_cuda.pyx +++ b/python/pyarrow/_cuda.pyx @@ -23,21 +23,29 @@ cimport cpython as cp cdef class Context: - """ CUDA driver context. + """ + CUDA driver context. """ - def __cinit__(self, int device_number=0, uintptr_t handle=0): - """Construct the shared CUDA driver context for a particular device. + def __init__(self, *args, **kwargs): + """ + Create a CUDA driver context for a particular device. + + If a CUDA context handle is passed, it is wrapped, otherwise + a default CUDA context for the given device is requested. Parameters ---------- - device_number : int - Specify the gpu device for which the CUDA driver context is + device_number : int (default 0) + Specify the GPU device for which the CUDA driver context is requested. - handle : int - Specify handle for a shared context that has been created by - another library. + handle : int, optional + Specify CUDA handle for a shared context that has been created + by another library. """ + # This method exposed because autodoc doesn't pick __cinit__ + + def __cinit__(self, int device_number=0, uintptr_t handle=0): cdef CCudaDeviceManager* manager check_status(CCudaDeviceManager.GetInstance(&manager)) cdef int n = manager.num_devices() @@ -55,13 +63,14 @@ cdef class Context: @staticmethod def from_numba(context=None): - """Create Context instance from a numba CUDA context. + """ + Create a Context instance from a Numba CUDA context. Parameters ---------- context : {numba.cuda.cudadrv.driver.Context, None} - Specify numba CUDA context instance. When None, use the - current numba context. + A Numba CUDA context instance. + If None, the current Numba context is used. Returns ------- @@ -75,7 +84,8 @@ cdef class Context: handle=context.handle.value) def to_numba(self): - """Convert Context to numba CUDA context. + """ + Convert Context to a Numba CUDA context. Returns ------- @@ -238,7 +248,7 @@ cdef class Context: cdef class IpcMemHandle: - """A container for a CUDA IPC handle. + """A serializable container for a CUDA IPC handle. """ cdef void init(self, shared_ptr[CCudaIpcMemHandle]& h): self.handle = h @@ -285,14 +295,10 @@ cdef class IpcMemHandle: cdef class CudaBuffer(Buffer): """An Arrow buffer with data located in a GPU device. - To create a CudaBuffer instance, use - - .device_buffer(data=, offset=, - size=) - - The memory allocated in CudaBuffer instance is freed when the - instance is deleted. + To create a CudaBuffer instance, use Context.device_buffer(). + The memory allocated in a CudaBuffer is freed when the buffer object + is deleted. """ def __init__(self): @@ -529,7 +535,7 @@ cdef class CudaBuffer(Buffer): After calling this function, this device memory will not be freed when the CudaBuffer is destructed. - Results + Returns ------- ipc_handle : IpcMemHandle The exported IPC handle @@ -774,9 +780,9 @@ def serialize_record_batch(object batch, object ctx): Parameters ---------- batch : RecordBatch - Specify record batch to write + Record batch to write ctx : Context - Specify context to allocate device memory from + CUDA Context to allocate device memory from Returns ------- @@ -797,14 +803,14 @@ def read_message(object source, pool=None): Parameters ---------- source : {CudaBuffer, cuda.BufferReader} - Specify device buffer or reader of device buffer. - pool : {MemoryPool, None} - Specify pool to allocate CPU memory for the metadata + Device buffer or reader of device buffer. + pool : MemoryPool (optional) + Pool to allocate CPU memory for the metadata Returns ------- message : Message - the deserialized message, body still on device + The deserialized message, body still on device """ cdef: Message result = Message.__new__(Message) @@ -824,16 +830,16 @@ def read_record_batch(object buffer, object schema, pool=None): Parameters ---------- buffer : - Specify device buffer containing the complete IPC message + Device buffer containing the complete IPC message schema : Schema - Specify schema for the record batch - pool : {MemoryPool, None} - Specify pool to use for allocating space for the metadata + The schema for the record batch + pool : MemoryPool (optional) + Pool to allocate metadata from Returns ------- batch : RecordBatch - reconstructed record batch, with device pointers + Reconstructed record batch, with device pointers """ cdef shared_ptr[CSchema] schema_ = pyarrow_unwrap_schema(schema) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 54d0e92cd5561..41a3b970b3acf 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -392,6 +392,9 @@ cdef class _PandasConvertible: cdef class Array(_PandasConvertible): + """ + The base class for all Arrow arrays. + """ def __init__(self): raise TypeError("Do not call {}'s constructor directly, use one of " @@ -616,11 +619,18 @@ cdef class Array(_PandasConvertible): def isnull(self): raise NotImplemented - def __getitem__(self, key): - if PySlice_Check(key): - return _normalize_slice(self, key) + def __getitem__(self, index): + """ + Return the value at the given index. - return self.getitem(_normalize_index(key, self.length())) + Returns + ------- + value : Scalar + """ + if PySlice_Check(index): + return _normalize_slice(self, index) + + return self.getitem(_normalize_index(index, self.length())) cdef getitem(self, int64_t i): return box_scalar(self.type, self.sp_array, i) @@ -736,6 +746,9 @@ cdef class Array(_PandasConvertible): cdef class Tensor: + """ + A n-dimensional array a.k.a Tensor. + """ def __init__(self): raise TypeError("Do not call Tensor's constructor directly, use one " @@ -842,98 +855,147 @@ cdef wrap_array_output(PyObject* output): cdef class NullArray(Array): - pass + """ + Concrete class for Arrow arrays of null data type. + """ cdef class BooleanArray(Array): - pass + """ + Concrete class for Arrow arrays of boolean data type. + """ cdef class NumericArray(Array): - pass + """ + A base class for Arrow numeric arrays. + """ cdef class IntegerArray(NumericArray): - pass + """ + A base class for Arrow integer arrays. + """ cdef class FloatingPointArray(NumericArray): - pass + """ + A base class for Arrow floating-point arrays. + """ cdef class Int8Array(IntegerArray): - pass + """ + Concrete class for Arrow arrays of int8 data type. + """ cdef class UInt8Array(IntegerArray): - pass + """ + Concrete class for Arrow arrays of uint8 data type. + """ cdef class Int16Array(IntegerArray): - pass + """ + Concrete class for Arrow arrays of int16 data type. + """ cdef class UInt16Array(IntegerArray): - pass + """ + Concrete class for Arrow arrays of uint16 data type. + """ cdef class Int32Array(IntegerArray): - pass + """ + Concrete class for Arrow arrays of int32 data type. + """ cdef class UInt32Array(IntegerArray): - pass + """ + Concrete class for Arrow arrays of uint32 data type. + """ cdef class Int64Array(IntegerArray): - pass + """ + Concrete class for Arrow arrays of int64 data type. + """ cdef class UInt64Array(IntegerArray): - pass + """ + Concrete class for Arrow arrays of uint64 data type. + """ cdef class Date32Array(NumericArray): - pass + """ + Concrete class for Arrow arrays of date32 data type. + """ cdef class Date64Array(NumericArray): - pass + """ + Concrete class for Arrow arrays of date64 data type. + """ cdef class TimestampArray(NumericArray): - pass + """ + Concrete class for Arrow arrays of timestamp data type. + """ cdef class Time32Array(NumericArray): - pass + """ + Concrete class for Arrow arrays of time32 data type. + """ cdef class Time64Array(NumericArray): - pass + """ + Concrete class for Arrow arrays of time64 data type. + """ cdef class HalfFloatArray(FloatingPointArray): - pass + """ + Concrete class for Arrow arrays of float16 data type. + """ cdef class FloatArray(FloatingPointArray): - pass + """ + Concrete class for Arrow arrays of float32 data type. + """ cdef class DoubleArray(FloatingPointArray): - pass + """ + Concrete class for Arrow arrays of float64 data type. + """ cdef class FixedSizeBinaryArray(Array): - pass + """ + Concrete class for Arrow arrays of a fixed-size binary data type. + """ cdef class Decimal128Array(FixedSizeBinaryArray): - pass + """ + Concrete class for Arrow arrays of decimal128 data type. + """ cdef class ListArray(Array): + """ + Concrete class for Arrow arrays of a list data type. + """ @staticmethod def from_arrays(offsets, values, MemoryPool pool=None): @@ -975,6 +1037,9 @@ cdef class ListArray(Array): cdef class UnionArray(Array): + """ + Concrete class for Arrow arrays of a Union data type. + """ @staticmethod def from_dense(Array types, Array value_offsets, list children): @@ -1028,6 +1093,9 @@ cdef class UnionArray(Array): cdef class StringArray(Array): + """ + Concrete class for Arrow arrays of string (or utf8) data type. + """ @staticmethod def from_buffers(int length, Buffer value_offsets, Buffer data, @@ -1066,10 +1134,15 @@ cdef class StringArray(Array): cdef class BinaryArray(Array): - pass + """ + Concrete class for Arrow arrays of variable-sized binary data type. + """ cdef class DictionaryArray(Array): + """ + Concrete class for dictionary-encoded Arrow arrays. + """ def dictionary_encode(self): return self @@ -1163,6 +1236,9 @@ cdef class DictionaryArray(Array): cdef class StructArray(Array): + """ + Concrete class for Arrow arrays of a struct data type. + """ def field(self, index): """ diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 52122740b63ae..8edffbec6dea2 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -44,6 +44,16 @@ cdef extern from "Python.h": cdef class NativeFile: + """ + The base class for all Arrow streams. + + Streams are either readable, writable, or both. + They optionally support seeking. + + While this class exposes methods to read or write data from Python, the + primary intent of using a Arrow stream is to pass it to other Arrow + facilities that will make use of it, such as Arrow IPC routines. + """ def __cinit__(self): self.own_file = False @@ -559,6 +569,16 @@ BufferedIOBase.register(NativeFile) cdef class PythonFile(NativeFile): + """ + A stream backed by a Python file object. + + This class allows using Python file objects with arbitrary Arrow + functions, including functions written in another language than Python. + + As a downside, there is a non-zero redirection cost in translating + Arrow stream calls to Python method calls. Furthermore, Python's + Global Interpreter Lock may limit parallelism in some situations. + """ cdef: object handle @@ -628,7 +648,9 @@ cdef class PythonFile(NativeFile): cdef class MemoryMappedFile(NativeFile): """ - Supports 'r', 'r+w', 'w' modes + A stream that represents a memory-mapped file. + + Supports 'r', 'r+', 'w' modes. """ cdef: shared_ptr[CMemoryMappedFile] handle @@ -704,7 +726,9 @@ def memory_map(path, mode='r'): Parameters ---------- path : string - mode : {'r', 'w'}, default 'r' + mode : {'r', 'r+', 'w'}, default 'r' + Whether the file is opened for reading ('r+'), writing ('w') + or both ('r+'). Returns ------- @@ -717,13 +741,14 @@ def memory_map(path, mode='r'): def create_memory_map(path, size): """ - Create memory map at indicated path of the given size, return open - writable file object + Create a file of the given size and memory-map it. Parameters ---------- path : string + The file path to create, on the local filesystem. size : int + The file size to create. Returns ------- @@ -734,7 +759,7 @@ def create_memory_map(path, size): cdef class OSFile(NativeFile): """ - Supports 'r', 'w' modes + A stream backed by a regular file descriptor. """ cdef: object path @@ -774,6 +799,9 @@ cdef class OSFile(NativeFile): cdef class FixedSizeBufferWriter(NativeFile): + """ + A stream writing to a Arrow buffer. + """ def __cinit__(self, Buffer buffer): self.output_stream.reset(new CFixedSizeBufferWriter(buffer.buffer)) @@ -800,6 +828,12 @@ cdef class FixedSizeBufferWriter(NativeFile): cdef class Buffer: + """ + The base class for all Arrow buffers. + + A buffer represents a contiguous memory area. Many buffers will own + their memory, though not all of them do. + """ def __cinit__(self): pass @@ -818,14 +852,23 @@ cdef class Buffer: @property def size(self): + """ + The buffer size in bytes. + """ return self.buffer.get().size() @property def address(self): + """ + The buffer's address, as an integer. + """ return self.buffer.get().data() @property def is_mutable(self): + """ + Whether the buffer is mutable. + """ return self.buffer.get().is_mutable() @property @@ -848,7 +891,9 @@ cdef class Buffer: def slice(self, offset=0, length=None): """ - Compute slice of this buffer + Slice this buffer. Memory is not copied. + + You can also use the Python slice notation ``buffer[start:stop]``. Parameters ---------- @@ -861,6 +906,7 @@ cdef class Buffer: Returns ------- sliced : Buffer + A logical view over this buffer. """ cdef shared_ptr[CBuffer] result @@ -876,7 +922,7 @@ cdef class Buffer: def equals(self, Buffer other): """ - Determine if two buffers contain exactly the same data + Determine if two buffers contain exactly the same data. Parameters ---------- @@ -904,6 +950,9 @@ cdef class Buffer: return py_buffer, (self.to_pybytes(),) def to_pybytes(self): + """ + Return this buffer as a Python bytes object. Memory is copied. + """ return cp.PyBytes_FromStringAndSize( self.buffer.get().data(), self.buffer.get().size()) @@ -950,21 +999,25 @@ cdef class Buffer: cdef class ResizableBuffer(Buffer): + """ + A base class for buffers that can be resized. + """ cdef void init_rz(self, const shared_ptr[CResizableBuffer]& buffer): self.init( buffer) def resize(self, int64_t new_size, shrink_to_fit=False): """ - Resize buffer to indicated size + Resize buffer to indicated size. Parameters ---------- - new_size : int64_t + new_size : int New size of buffer (padding may be added internally) shrink_to_fit : boolean, default False - If new_size is less than the current size, shrink internal - capacity, otherwise leave at current capacity + If this is true, the buffer is shrunk when new_size is less + than the current size. + If this is false, the buffer is never shrunk. """ cdef c_bool c_shrink_to_fit = shrink_to_fit with nogil: @@ -982,15 +1035,17 @@ cdef shared_ptr[CResizableBuffer] _allocate_buffer(CMemoryPool* pool): def allocate_buffer(int64_t size, MemoryPool memory_pool=None, resizable=False): """ - Allocate mutable fixed-size buffer + Allocate a mutable buffer. Parameters ---------- size : int Number of bytes to allocate (plus internal padding) memory_pool : MemoryPool, optional - Uses default memory pool if not provided + The pool to allocate memory from. + If not given, the default memory pool is used. resizable : boolean, default False + If true, the returned buffer is resizable. Returns ------- @@ -1305,8 +1360,7 @@ def _detect_compression(path): def compress(object buf, codec='lz4', asbytes=False, memory_pool=None): """ - Compress pyarrow.Buffer or Python object supporting the buffer (memoryview) - protocol + Compress data from buffer-like object. Parameters ---------- @@ -1367,7 +1421,7 @@ def compress(object buf, codec='lz4', asbytes=False, memory_pool=None): def decompress(object buf, decompressed_size=None, codec='lz4', asbytes=False, memory_pool=None): """ - Decompress data from buffer-like object + Decompress data from buffer-like object. Parameters ---------- diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi index 7fa6d79a370d7..047e70d17abcc 100644 --- a/python/pyarrow/memory.pxi +++ b/python/pyarrow/memory.pxi @@ -21,6 +21,12 @@ cdef class MemoryPool: + """ + Base class for memory allocation. + + Besides tracking its number of allocated bytes, a memory pool also + takes care of the required 64-byte alignment for Arrow data. + """ def __init__(self): raise TypeError("Do not call {}'s constructor directly, " @@ -68,8 +74,9 @@ cdef class LoggingMemoryPool(MemoryPool): cdef class ProxyMemoryPool(MemoryPool): """ - Derived MemoryPool class that tracks the number of bytes and - maximum memory allocated through its direct calls. + Memory pool implementation that tracks the number of bytes and + maximum memory allocated through its direct calls, while redirecting + to another memory pool. """ cdef: unique_ptr[CProxyMemoryPool] proxy_pool @@ -81,6 +88,9 @@ cdef class ProxyMemoryPool(MemoryPool): def default_memory_pool(): + """ + Return the process-global memory pool. + """ cdef: MemoryPool pool = MemoryPool.__new__(MemoryPool) pool.init(c_get_memory_pool()) diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index fd3f58072d452..e2c1481797df6 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -19,10 +19,17 @@ _NULL = NA = None +cdef class Scalar: + """ + The base class for all array elements. + """ + + cdef class NullType(Scalar): """ - Null (NA) value singleton + Singleton for null array elements. """ + # TODO rename this NullValue? def __cinit__(self): global NA if NA is not None: @@ -44,6 +51,9 @@ _NULL = NA = NullType() cdef class ArrayValue(Scalar): + """ + The base class for non-null array elements. + """ def __init__(self): raise TypeError("Do not call {}'s constructor directly, use array " @@ -85,6 +95,9 @@ cdef class ArrayValue(Scalar): cdef class BooleanValue(ArrayValue): + """ + Concrete class for boolean array elements. + """ def as_py(self): """ @@ -95,6 +108,9 @@ cdef class BooleanValue(ArrayValue): cdef class Int8Value(ArrayValue): + """ + Concrete class for int8 array elements. + """ def as_py(self): """ @@ -105,6 +121,9 @@ cdef class Int8Value(ArrayValue): cdef class UInt8Value(ArrayValue): + """ + Concrete class for uint8 array elements. + """ def as_py(self): """ @@ -115,6 +134,9 @@ cdef class UInt8Value(ArrayValue): cdef class Int16Value(ArrayValue): + """ + Concrete class for int16 array elements. + """ def as_py(self): """ @@ -125,6 +147,9 @@ cdef class Int16Value(ArrayValue): cdef class UInt16Value(ArrayValue): + """ + Concrete class for uint16 array elements. + """ def as_py(self): """ @@ -135,6 +160,9 @@ cdef class UInt16Value(ArrayValue): cdef class Int32Value(ArrayValue): + """ + Concrete class for int32 array elements. + """ def as_py(self): """ @@ -145,6 +173,9 @@ cdef class Int32Value(ArrayValue): cdef class UInt32Value(ArrayValue): + """ + Concrete class for uint32 array elements. + """ def as_py(self): """ @@ -155,6 +186,9 @@ cdef class UInt32Value(ArrayValue): cdef class Int64Value(ArrayValue): + """ + Concrete class for int64 array elements. + """ def as_py(self): """ @@ -165,6 +199,9 @@ cdef class Int64Value(ArrayValue): cdef class UInt64Value(ArrayValue): + """ + Concrete class for uint64 array elements. + """ def as_py(self): """ @@ -175,6 +212,9 @@ cdef class UInt64Value(ArrayValue): cdef class Date32Value(ArrayValue): + """ + Concrete class for date32 array elements. + """ def as_py(self): """ @@ -188,6 +228,9 @@ cdef class Date32Value(ArrayValue): cdef class Date64Value(ArrayValue): + """ + Concrete class for date64 array elements. + """ def as_py(self): """ @@ -199,6 +242,9 @@ cdef class Date64Value(ArrayValue): cdef class Time32Value(ArrayValue): + """ + Concrete class for time32 array elements. + """ def as_py(self): """ @@ -217,6 +263,9 @@ cdef class Time32Value(ArrayValue): cdef class Time64Value(ArrayValue): + """ + Concrete class for time64 array elements. + """ def as_py(self): """ @@ -269,6 +318,9 @@ else: cdef class TimestampValue(ArrayValue): + """ + Concrete class for timestamp array elements. + """ @property def value(self): @@ -301,6 +353,9 @@ cdef class TimestampValue(ArrayValue): cdef class HalfFloatValue(ArrayValue): + """ + Concrete class for float16 array elements. + """ def as_py(self): """ @@ -311,6 +366,9 @@ cdef class HalfFloatValue(ArrayValue): cdef class FloatValue(ArrayValue): + """ + Concrete class for float32 array elements. + """ def as_py(self): """ @@ -321,6 +379,9 @@ cdef class FloatValue(ArrayValue): cdef class DoubleValue(ArrayValue): + """ + Concrete class for float64 array elements. + """ def as_py(self): """ @@ -331,6 +392,9 @@ cdef class DoubleValue(ArrayValue): cdef class DecimalValue(ArrayValue): + """ + Concrete class for decimal128 array elements. + """ def as_py(self): """ @@ -343,6 +407,9 @@ cdef class DecimalValue(ArrayValue): cdef class StringValue(ArrayValue): + """ + Concrete class for string (utf8) array elements. + """ def as_py(self): """ @@ -353,6 +420,9 @@ cdef class StringValue(ArrayValue): cdef class BinaryValue(ArrayValue): + """ + Concrete class for variable-sized binary array elements. + """ def as_py(self): """ @@ -380,14 +450,26 @@ cdef class BinaryValue(ArrayValue): cdef class ListValue(ArrayValue): + """ + Concrete class for list array elements. + """ def __len__(self): + """ + Return the number of values. + """ return self.length() def __getitem__(self, i): + """ + Return the value at the given index. + """ return self.getitem(_normalize_index(i, self.length())) def __iter__(self): + """ + Iterate over this element's values. + """ for i in range(len(self)): yield self.getitem(i) raise StopIteration @@ -419,6 +501,9 @@ cdef class ListValue(ArrayValue): cdef class UnionValue(ArrayValue): + """ + Concrete class for union array elements. + """ cdef void _set_array(self, const shared_ptr[CArray]& sp_array): self.sp_array = sp_array @@ -436,11 +521,16 @@ cdef class UnionValue(ArrayValue): def as_py(self): """ Return this value as a Python object. + + The exact type depends on the underlying union member. """ return self.getitem(self.index).as_py() cdef class FixedSizeBinaryValue(ArrayValue): + """ + Concrete class for fixed-size binary array elements. + """ def as_py(self): """ @@ -459,12 +549,18 @@ cdef class FixedSizeBinaryValue(ArrayValue): cdef class StructValue(ArrayValue): + """ + Concrete class for struct array elements. + """ cdef void _set_array(self, const shared_ptr[CArray]& sp_array): self.sp_array = sp_array self.ap = sp_array.get() def __getitem__(self, key): + """ + Return the child value for the given field name. + """ cdef: CStructType* type int index @@ -496,17 +592,23 @@ cdef class StructValue(ArrayValue): cdef class DictionaryValue(ArrayValue): + """ + Concrete class for dictionary-encoded array elements. + """ def as_py(self): """ Return this value as a Python object. + + The exact type depends on the dictionary value type. """ return self.dictionary_value.as_py() @property def index_value(self): """ - Return this value's underlying index as a Int32Value. + Return this value's underlying index as a ArrayValue of the right + signed integer type. """ cdef CDictionaryArray* darr = (self.sp_array.get()) indices = pyarrow_wrap_array(darr.indices()) diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 29b2a1ea3c9a0..7c6aec34282fe 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -88,7 +88,9 @@ ctypedef CFixedWidthType* _CFixedWidthTypePtr cdef class DataType: """ - Base type for Apache Arrow data type instances. Wraps C++ arrow::DataType + Base class of all Arrow data types. + + Each data type is an *instance* of this class. """ def __cinit__(self): pass @@ -162,7 +164,7 @@ cdef class DataType: def to_pandas_dtype(self): """ - Return the NumPy dtype that would be used for storing this + Return the equivalent NumPy / Pandas dtype. """ cdef Type type_id = self.type.id() if type_id in _pandas_type_map: @@ -172,6 +174,9 @@ cdef class DataType: cdef class DictionaryType(DataType): + """ + Concrete class for dictionary data types. + """ cdef void init(self, const shared_ptr[CDataType]& type): DataType.init(self, type) @@ -182,18 +187,31 @@ cdef class DictionaryType(DataType): @property def ordered(self): + """ + Whether the dictionary is ordered, i.e. whether the ordering of values + in the dictionary is important. + """ return self.dict_type.ordered() @property def index_type(self): + """ + The data type of dictionary indices (a signed integer type). + """ return pyarrow_wrap_data_type(self.dict_type.index_type()) @property def dictionary(self): + """ + The dictionary array, mapping dictionary indices to values. + """ return pyarrow_wrap_array(self.dict_type.dictionary()) cdef class ListType(DataType): + """ + Concrete class for list data types. + """ cdef void init(self, const shared_ptr[CDataType]& type): DataType.init(self, type) @@ -204,10 +222,16 @@ cdef class ListType(DataType): @property def value_type(self): + """ + The data type of list values. + """ return pyarrow_wrap_data_type(self.list_type.value_type()) cdef class StructType(DataType): + """ + Concrete class for struct data types. + """ cdef void init(self, const shared_ptr[CDataType]& type): DataType.init(self, type) @@ -215,13 +239,13 @@ cdef class StructType(DataType): cdef Field field(self, int i): """ - Alias for child(i) + Return a child field by its index. """ return self.child(i) cdef Field field_by_name(self, name): """ - Access a child field by its name rather than the column index. + Return a child field by its name rather than its index. """ cdef shared_ptr[CField] field @@ -232,13 +256,22 @@ cdef class StructType(DataType): return pyarrow_wrap_field(field) def __len__(self): + """ + Like num_children(). + """ return self.type.num_children() def __iter__(self): + """ + Iterate over struct fields, in order. + """ for i in range(len(self)): yield self[i] def __getitem__(self, i): + """ + Return the struct field with the given index or name. + """ if isinstance(i, six.string_types): return self.field_by_name(i) elif isinstance(i, six.integer_types): @@ -251,20 +284,32 @@ cdef class StructType(DataType): @property def num_children(self): + """ + The number of struct fields. + """ return self.type.num_children() cdef class UnionType(DataType): + """ + Concrete class for struct data types. + """ cdef void init(self, const shared_ptr[CDataType]& type): DataType.init(self, type) @property def num_children(self): + """ + The number of union members. + """ return self.type.num_children() @property def mode(self): + """ + The mode of the union ("dense" or "sparse"). + """ cdef CUnionType* type = self.sp_type.get() cdef int mode = type.mode() if mode == _UnionMode_DENSE: @@ -274,13 +319,22 @@ cdef class UnionType(DataType): assert 0 def __len__(self): + """ + Like num_children() + """ return self.type.num_children() def __iter__(self): + """ + Iterate over union members, in order. + """ for i in range(len(self)): yield self[i] def __getitem__(self, i): + """ + Return a child member by its index. + """ return self.child(i) def __reduce__(self): @@ -288,6 +342,9 @@ cdef class UnionType(DataType): cdef class TimestampType(DataType): + """ + Concrete class for timestamp data types. + """ cdef void init(self, const shared_ptr[CDataType]& type): DataType.init(self, type) @@ -295,10 +352,16 @@ cdef class TimestampType(DataType): @property def unit(self): + """ + The timestamp unit ('s', 'ms', 'us' or 'ns'). + """ return timeunit_to_string(self.ts_type.unit()) @property def tz(self): + """ + The timestamp time zone, if any, or None. + """ if self.ts_type.timezone().size() > 0: return frombytes(self.ts_type.timezone()) else: @@ -306,7 +369,7 @@ cdef class TimestampType(DataType): def to_pandas_dtype(self): """ - Return the NumPy dtype that would be used for storing this + Return the equivalent NumPy / Pandas dtype. """ if self.tz is None: return _pandas_type_map[_Type_TIMESTAMP] @@ -319,6 +382,9 @@ cdef class TimestampType(DataType): cdef class Time32Type(DataType): + """ + Concrete class for time32 data types. + """ cdef void init(self, const shared_ptr[CDataType]& type): DataType.init(self, type) @@ -326,10 +392,16 @@ cdef class Time32Type(DataType): @property def unit(self): + """ + The time unit ('s', 'ms', 'us' or 'ns'). + """ return timeunit_to_string(self.time_type.unit()) cdef class Time64Type(DataType): + """ + Concrete class for time64 data types. + """ cdef void init(self, const shared_ptr[CDataType]& type): DataType.init(self, type) @@ -337,10 +409,16 @@ cdef class Time64Type(DataType): @property def unit(self): + """ + The time unit ('s', 'ms', 'us' or 'ns'). + """ return timeunit_to_string(self.time_type.unit()) cdef class FixedSizeBinaryType(DataType): + """ + Concrete class for fixed-size binary data types. + """ cdef void init(self, const shared_ptr[CDataType]& type): DataType.init(self, type) @@ -352,10 +430,16 @@ cdef class FixedSizeBinaryType(DataType): @property def byte_width(self): + """ + The binary size in bytes. + """ return self.fixed_size_binary_type.byte_width() cdef class Decimal128Type(FixedSizeBinaryType): + """ + Concrete class for decimal128 data types. + """ cdef void init(self, const shared_ptr[CDataType]& type): FixedSizeBinaryType.init(self, type) @@ -366,17 +450,22 @@ cdef class Decimal128Type(FixedSizeBinaryType): @property def precision(self): + """ + The decimal precision, in number of decimal digits (an integer). + """ return self.decimal128_type.precision() @property def scale(self): + """ + The decimal scale (an integer). + """ return self.decimal128_type.scale() cdef class Field: """ - Represents a named field, with a data type, nullability, and optional - metadata + A named field, with a data type, nullability, and optional metadata. Notes ----- From 76618f66ee8ce75cbe09d1d1a8c313dad3d94127 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 10 Jan 2019 22:35:53 +0100 Subject: [PATCH 082/203] [Release/Java] Disable Flight test case --- .../src/test/java/org/apache/arrow/flight/TestBackPressure.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/java/flight/src/test/java/org/apache/arrow/flight/TestBackPressure.java b/java/flight/src/test/java/org/apache/arrow/flight/TestBackPressure.java index 6b23a40f29348..71c90d3a00d47 100644 --- a/java/flight/src/test/java/org/apache/arrow/flight/TestBackPressure.java +++ b/java/flight/src/test/java/org/apache/arrow/flight/TestBackPressure.java @@ -29,6 +29,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; import com.google.common.collect.ImmutableList; @@ -78,6 +79,7 @@ public void ensureIndependentSteams() throws Exception { /** * Make sure that a stream doesn't go faster than the consumer is consuming. */ + @Ignore @Test public void ensureWaitUntilProceed() throws Exception { // request some values. From d7a68335cca4dd996ed6c9d2967f01601f15d5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 11 Jan 2019 13:59:20 +0100 Subject: [PATCH 083/203] ARROW-4229: [Packaging] Set crossbow target explicitly to enable building arbitrary arrow repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change eliminates the need of: ``` # checkout the tag under a new branch name and push that branch to your fork's remote # # to launch a crossbow build this branch _must_ exist on your remote git checkout -b zero-one-zero-rc0 apache-arrow-0.1.0 git push -u zero-one-zero-rc0 ``` during the [release procedure](https://cwiki.apache.org/confluence/display/ARROW/Release+Management+Guide): Usage: ```bash python dev/tasks/crossbow.py submit \ -r apache/arrow \ -t apache-arrow-0.12.0 \ -v 0.12.0 \ -g conda -g wheel -g linux ``` Testing it... Author: Krisztián Szűcs Closes #3369 from kszucs/arbitrary-crossbow-repo and squashes the following commits: c97354ed allow passing crossbow repo and branch explicitly --- dev/tasks/crossbow.py | 46 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/dev/tasks/crossbow.py b/dev/tasks/crossbow.py index 74facf4b7fa01..d700384e55988 100755 --- a/dev/tasks/crossbow.py +++ b/dev/tasks/crossbow.py @@ -398,9 +398,10 @@ def __init__(self, head, branch, remote, version, email=None): self.version = version @classmethod - def from_repo(cls, repo): + def from_repo(cls, repo, version=None): assert isinstance(repo, Repo) - version = get_version(repo.path, local_scheme=lambda v: '') + if version is None: + version = get_version(repo.path, local_scheme=lambda v: '') return cls(head=str(repo.head.target), email=repo.email, branch=repo.branch.branch_name, @@ -587,17 +588,48 @@ def load_tasks_from_config(config_path, task_names, group_names): help='Task configuration yml. Defaults to tasks.yml') @click.option('--arrow-version', '-v', default=None, help='Set target version explicitly') +@click.option('--arrow-repo', '-r', default=None, + help='Set Github repo name explicitly, e.g. apache/arrow, ' + 'kszucs/arrow, this repository is going to be cloned on ' + 'the CI services. Note, that no validation happens locally ' + 'and potentially --arrow-branch and --arrow-sha must be ' + 'defined as well') +@click.option('--arrow-branch', '-b', default='master', + help='Give the branch name explicitly, e.g. master, ARROW-1949.' + 'Only available if --arrow-repo is set.') +@click.option('--arrow-sha', '-t', default='HEAD', + help='Set commit SHA or Tag name explicitly, e.g. f67a515, ' + 'apache-arrow-0.11.1. Only available if both --arrow-repo ' + '--arrow-branch are set.') @click.option('--dry-run/--push', default=False, help='Just display the rendered CI configurations without ' 'submitting them') @click.pass_context -def submit(ctx, task, group, job_prefix, config_path, arrow_version, dry_run): +def submit(ctx, task, group, job_prefix, config_path, arrow_version, + arrow_repo, arrow_branch, arrow_sha, dry_run): queue, arrow = ctx.obj['queue'], ctx.obj['arrow'] - target = Target.from_repo(arrow) - # explicitly set arrow version - if arrow_version: - target.version = arrow_version + if arrow_repo is not None: + values = {'version': arrow_version, + 'branch': arrow_branch, + 'sha': arrow_sha} + for k, v in values.items(): + if not v: + raise ValueError('Must pass --arrow-{} argument'.format(k)) + + # Set repo url, branch and sha explicitly - this aims to make release + # procedure a bit simpler. + # Note, that the target resivion's crossbow templates must be + # compatible with the locally checked out version of crossbow (which is + # in case of the release procedure), because the templates still + # contain some business logic (dependency installation, deployments) + # which will be reduced to a single command in the future. + remote = 'https://github.com/{}'.format(arrow_repo) + target = Target(head=arrow_sha, branch=arrow_branch, remote=remote, + version=arrow_version) + else: + # instantiate target from the locally checked out repository and branch + target = Target.from_repo(arrow, version=arrow_version) no_rc_version = re.sub(r'-rc\d+\Z', '', target.version) params = { From 54b35b4c13a8904286eca80bb76d9f4e7b619a87 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Fri, 11 Jan 2019 14:03:38 +0100 Subject: [PATCH 084/203] ARROW-4233: [Packaging] Use Docker to build source archive Author: Kouhei Sutou Closes #3376 from kou/packaging-source-use-docker and squashes the following commits: 404efe87 Use Docker to build source archive --- dev/release/02-source.sh | 48 ++++++++++---------------------- dev/release/source/Dockerfile | 48 ++++++++++++++++++++++++++++++++ dev/release/source/build.sh | 52 +++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 33 deletions(-) create mode 100644 dev/release/source/Dockerfile create mode 100755 dev/release/source/build.sh diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh index e224584223b4c..85dee3302e917 100755 --- a/dev/release/02-source.sh +++ b/dev/release/02-source.sh @@ -45,46 +45,28 @@ echo "Using commit $release_hash" tarball=${tag}.tar.gz -extract_dir=tmp-apache-arrow -rm -rf ${extract_dir} +archive_name=tmp-apache-arrow # be conservative and use the release hash, even though git produces the same # archive (identical hashes) using the scm tag -git archive ${release_hash} --prefix ${extract_dir}/ | tar xf - - -# build Apache Arrow C++ before building Apache Arrow GLib because -# Apache Arrow GLib requires Apache Arrow C++. -mkdir -p ${extract_dir}/cpp/build -cpp_install_dir=${PWD}/${extract_dir}/cpp/install -cd ${extract_dir}/cpp/build -cmake .. \ - -DCMAKE_INSTALL_PREFIX=${cpp_install_dir} \ - -DCMAKE_INSTALL_LIBDIR=${cpp_install_dir}/lib \ - -DARROW_BUILD_TESTS=no \ - -DARROW_PARQUET=yes -make -j8 -make install -cd - - -# build source archive for Apache Arrow GLib by "make dist". -cd ${extract_dir}/c_glib -./autogen.sh -./configure \ - PKG_CONFIG_PATH=$cpp_install_dir/lib/pkgconfig \ - --enable-gtk-doc -LD_LIBRARY_PATH=$cpp_install_dir/lib:$LD_LIBRARY_PATH make -j8 -make dist -tar xzf *.tar.gz -rm *.tar.gz -cd - -rm -rf tmp-c_glib/ -mv ${extract_dir}/c_glib/apache-arrow-glib-* tmp-c_glib/ -rm -rf ${extract_dir} +git archive ${release_hash} --prefix ${archive_name}/ > ${archive_name}.tar.gz + +dist_c_glib_tar_gz=c_glib.tar.gz +docker_image_name=apache-arrow/release-source +DEBUG=yes docker build -t ${docker_image_name} ${SOURCE_DIR}/source +docker \ + run \ + --rm \ + --interactive \ + --volume "$PWD":/host \ + ${docker_image_name} \ + /build.sh ${archive_name} ${dist_c_glib_tar_gz} # replace c_glib/ by tar.gz generated by "make dist" rm -rf ${tag} git archive $release_hash --prefix ${tag}/ | tar xf - rm -rf ${tag}/c_glib -mv tmp-c_glib ${tag}/c_glib +tar xf ${dist_c_glib_tar_gz} -C ${tag} +rm -f ${dist_c_glib_tar_gz} # Create new tarball from modified source directory tar czhf ${tarball} ${tag} diff --git a/dev/release/source/Dockerfile b/dev/release/source/Dockerfile new file mode 100644 index 0000000000000..70ed8aa866dd0 --- /dev/null +++ b/dev/release/source/Dockerfile @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM ubuntu:18.04 + +ENV DEBIAN_FRONTEND noninteractive + +RUN \ + apt update && \ + apt install -y -V \ + autoconf-archive \ + bison \ + clang-6.0 \ + cmake \ + flex \ + g++ \ + gcc \ + gtk-doc-tools \ + libboost-filesystem-dev \ + libboost-regex-dev \ + libboost-system-dev \ + libgirepository1.0-dev \ + libglib2.0-doc \ + libprotobuf-dev \ + libprotoc-dev \ + libtool \ + lsb-release \ + make \ + pkg-config \ + protobuf-compiler && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* + +COPY build.sh /build.sh diff --git a/dev/release/source/build.sh b/dev/release/source/build.sh new file mode 100755 index 0000000000000..039d07591f2ef --- /dev/null +++ b/dev/release/source/build.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +archive_name=$1 +dist_c_glib_tar_gz=$2 + +tar xf /host/${archive_name}.tar.gz + +# build Apache Arrow C++ before building Apache Arrow GLib because +# Apache Arrow GLib requires Apache Arrow C++. +mkdir -p ${archive_name}/cpp/build +cpp_install_dir=${PWD}/${archive_name}/cpp/install +cd ${archive_name}/cpp/build +cmake .. \ + -DCMAKE_INSTALL_PREFIX=${cpp_install_dir} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DARROW_PARQUET=yes +make -j8 +make install +cd - + +# build source archive for Apache Arrow GLib by "make dist". +cd ${archive_name}/c_glib +./autogen.sh +./configure \ + PKG_CONFIG_PATH=${cpp_install_dir}/lib/pkgconfig \ + --enable-gtk-doc +LD_LIBRARY_PATH=${cpp_install_dir}/lib make -j8 +make dist +tar xzf *.tar.gz +rm *.tar.gz +cd - +mv ${archive_name}/c_glib/apache-arrow-glib-* c_glib/ +tar czf /host/${dist_c_glib_tar_gz} c_glib From 38a628dff6fcd5f3c7e6b402f5ceb35cc8bd52c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sat, 12 Jan 2019 06:03:45 +0900 Subject: [PATCH 085/203] ARROW-4238: [Packaging] Fix RC version conflict between crossbow and rake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This shouldn't affect the current release candidate, the binaries are already building this way. Can be merged after RC0 is finished (either way). Author: Krisztián Szűcs Closes #3380 from kszucs/ARROW-4238 and squashes the following commits: 0e865002 use no_rc_version everywhere --- dev/release/00-prepare.sh | 4 ++ dev/tasks/conda-recipes/appveyor.yml | 2 +- dev/tasks/conda-recipes/travis.linux.yml | 2 +- dev/tasks/conda-recipes/travis.osx.yml | 2 +- dev/tasks/crossbow.py | 4 +- dev/tasks/python-wheels/appveyor.yml | 2 +- dev/tasks/python-wheels/travis.linux.yml | 2 +- dev/tasks/python-wheels/travis.osx.yml | 2 +- dev/tasks/tasks.yml | 86 ++++++++++++------------ 9 files changed, 55 insertions(+), 51 deletions(-) diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh index 1c233a35c21ef..96bfd69115a05 100755 --- a/dev/release/00-prepare.sh +++ b/dev/release/00-prepare.sh @@ -107,6 +107,8 @@ update_versions() { } if [ "$#" -eq 2 ]; then + ############################## Pre-Tag Commits ############################## + version=$1 next_version=$2 next_version_snapshot=${next_version}-SNAPSHOT @@ -136,6 +138,8 @@ if [ "$#" -eq 2 ]; then mvn release:prepare -Dtag=${tag} -DreleaseVersion=${version} -DautoVersionSubmodules -DdevelopmentVersion=${next_version_snapshot} cd - + ############################## Post-Tag Commits ############################# + echo "Updating versions for ${next_version_snapshot}" update_versions "${version}" "${next_version}" "snapshot" git commit -m "[Release] Update versions for ${next_version_snapshot}" diff --git a/dev/tasks/conda-recipes/appveyor.yml b/dev/tasks/conda-recipes/appveyor.yml index cdc9d97537156..3d3ba43be7584 100644 --- a/dev/tasks/conda-recipes/appveyor.yml +++ b/dev/tasks/conda-recipes/appveyor.yml @@ -16,7 +16,7 @@ # under the License. environment: - ARROW_VERSION: {{ arrow.version }} + ARROW_VERSION: {{ arrow.no_rc_version }} # regardless of the python version we build against CONDA_INSTALL_LOCN: C:\Miniconda36-x64 diff --git a/dev/tasks/conda-recipes/travis.linux.yml b/dev/tasks/conda-recipes/travis.linux.yml index a3c2929b7e6db..f0c4c77adae06 100644 --- a/dev/tasks/conda-recipes/travis.linux.yml +++ b/dev/tasks/conda-recipes/travis.linux.yml @@ -25,7 +25,7 @@ if: tag IS blank env: global: - TRAVIS_TAG={{ task.tag }} - - ARROW_VERSION={{ arrow.version }} + - ARROW_VERSION={{ arrow.no_rc_version }} - PYTHONUNBUFFERED=1 install: diff --git a/dev/tasks/conda-recipes/travis.osx.yml b/dev/tasks/conda-recipes/travis.osx.yml index 6b3e561a3c5b0..23fd6e104ab4e 100644 --- a/dev/tasks/conda-recipes/travis.osx.yml +++ b/dev/tasks/conda-recipes/travis.osx.yml @@ -25,7 +25,7 @@ if: tag IS blank env: global: - TRAVIS_TAG={{ task.tag }} - - ARROW_VERSION={{ arrow.version }} + - ARROW_VERSION={{ arrow.no_rc_version }} - PYTHONUNBUFFERED=1 before_install: diff --git a/dev/tasks/crossbow.py b/dev/tasks/crossbow.py index d700384e55988..2d0c53089d056 100755 --- a/dev/tasks/crossbow.py +++ b/dev/tasks/crossbow.py @@ -396,6 +396,7 @@ def __init__(self, head, branch, remote, version, email=None): self.branch = branch self.remote = remote self.version = version + self.no_rc_version = re.sub(r'-rc\d+\Z', '', version) @classmethod def from_repo(cls, repo, version=None): @@ -631,10 +632,9 @@ def submit(ctx, task, group, job_prefix, config_path, arrow_version, # instantiate target from the locally checked out repository and branch target = Target.from_repo(arrow, version=arrow_version) - no_rc_version = re.sub(r'-rc\d+\Z', '', target.version) params = { 'version': target.version, - 'no_rc_version': no_rc_version, + 'no_rc_version': target.no_rc_version, } # task and group variables are lists, containing multiple values diff --git a/dev/tasks/python-wheels/appveyor.yml b/dev/tasks/python-wheels/appveyor.yml index c220f922bc45c..be6ad302e1a5c 100644 --- a/dev/tasks/python-wheels/appveyor.yml +++ b/dev/tasks/python-wheels/appveyor.yml @@ -24,7 +24,7 @@ environment: PYTHON: "{{ python_version }}" MSVC_DEFAULT_OPTIONS: ON ARROW_SRC: C:\apache-arrow - PYARROW_VERSION: {{ arrow.version }} + PYARROW_VERSION: {{ arrow.no_rc_version }} PYARROW_REF: {{ arrow.head }} init: diff --git a/dev/tasks/python-wheels/travis.linux.yml b/dev/tasks/python-wheels/travis.linux.yml index 17888ccc9f1bb..b5cbc65bc7e7e 100644 --- a/dev/tasks/python-wheels/travis.linux.yml +++ b/dev/tasks/python-wheels/travis.linux.yml @@ -40,7 +40,7 @@ script: # build wheel - pushd arrow/python/manylinux1 - docker run --shm-size=2g - -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.version }} + -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} -e PYTHON_VERSIONS="{{ python_version }},{{ unicode_width }}" -v $PWD:/io -v $PWD/../../:/arrow diff --git a/dev/tasks/python-wheels/travis.osx.yml b/dev/tasks/python-wheels/travis.osx.yml index c6bd010da4ebc..a98841335e728 100644 --- a/dev/tasks/python-wheels/travis.osx.yml +++ b/dev/tasks/python-wheels/travis.osx.yml @@ -26,7 +26,7 @@ env: - PLAT=x86_64 - TRAVIS_TAG={{ task.tag }} - MACOSX_DEPLOYMENT_TARGET="10.9" - - PYARROW_VERSION={{ arrow.version }} + - PYARROW_VERSION={{ arrow.no_rc_version }} - PYARROW_BUILD_VERBOSE=1 - MB_PYTHON_VERSION={{ python_version }} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 4b10b57fd0990..ce311e546d495 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -66,7 +66,7 @@ tasks: # artifacts: list of regex patterns, each needs to match a single github # release asset, version variable is replaced in the pattern # e.g.: - # - pyarrow-{version}-py36(h[a-z0-9]+)_0-linux-64.tar.bz2 + # - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0-linux-64.tar.bz2 ############################## Conda Linux ################################## @@ -76,8 +76,8 @@ tasks: params: variant_config_file: variants/linux_c_compilergcccxx_compilergxxpython2.7.yaml artifacts: - - arrow-cpp-{version}-py27(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py27(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 conda-linux-gcc-py36: platform: linux @@ -85,8 +85,8 @@ tasks: params: variant_config_file: variants/linux_c_compilergcccxx_compilergxxpython3.6.yaml artifacts: - - arrow-cpp-{version}-py36(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 conda-linux-gcc-py37: platform: linux @@ -94,8 +94,8 @@ tasks: params: variant_config_file: variants/linux_c_compilergcccxx_compilergxxpython3.7.yaml artifacts: - - arrow-cpp-{version}-py37(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 conda-linux-toolchain-py27: platform: linux @@ -103,8 +103,8 @@ tasks: params: variant_config_file: variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml artifacts: - - arrow-cpp-{version}-py27(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py27(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 conda-linux-toolchain-py36: platform: linux @@ -112,8 +112,8 @@ tasks: params: variant_config_file: variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml artifacts: - - arrow-cpp-{version}-py36(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 conda-linux-toolchain-py37: platform: linux @@ -121,8 +121,8 @@ tasks: params: variant_config_file: variants/linux_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml artifacts: - - arrow-cpp-{version}-py37(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 ############################## Conda OSX #################################### @@ -132,8 +132,8 @@ tasks: params: variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml artifacts: - - arrow-cpp-{version}-py27(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py27(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 conda-osx-clang-py36: platform: osx @@ -141,8 +141,8 @@ tasks: params: variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml artifacts: - - arrow-cpp-{version}-py36(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 conda-osx-clang-py37: platform: osx @@ -150,8 +150,8 @@ tasks: params: variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml artifacts: - - arrow-cpp-{version}-py37(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 conda-osx-toolchain-py27: platform: osx @@ -159,8 +159,8 @@ tasks: params: variant_config_file: variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython2.7.yaml artifacts: - - arrow-cpp-{version}-py27(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py27(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 conda-osx-toolchain-py36: platform: osx @@ -168,8 +168,8 @@ tasks: params: variant_config_file: variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.6.yaml artifacts: - - arrow-cpp-{version}-py36(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 conda-osx-toolchain-py37: platform: osx @@ -177,8 +177,8 @@ tasks: params: variant_config_file: variants/osx_c_compilertoolchain_ccxx_compilertoolchain_cxxpython3.7.yaml artifacts: - - arrow-cpp-{version}-py37(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 ############################## Conda Windows ################################ @@ -188,8 +188,8 @@ tasks: params: variant_config_file: variants\win_c_compilervs2015cxx_compilervs2015python3.6.yaml artifacts: - - arrow-cpp-{version}-py36_vc14(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py36(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py36_vc14(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 conda-win-vs2015-py37: platform: win @@ -197,8 +197,8 @@ tasks: params: variant_config_file: variants\win_c_compilervs2015cxx_compilervs2015python3.7.yaml artifacts: - - arrow-cpp-{version}-py37_vc14(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{version}-py37(h[a-z0-9]+)_0.tar.bz2 + - arrow-cpp-{no_rc_version}-py37_vc14(h[a-z0-9]+)_0.tar.bz2 + - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 ############################## Wheel Linux ################################## @@ -210,7 +210,7 @@ tasks: unicode_width: 16 test_docker_images: [] artifacts: - - pyarrow-{version}-cp27-cp27m-manylinux1_x86_64.whl + - pyarrow-{no_rc_version}-cp27-cp27m-manylinux1_x86_64.whl wheel-linux-cp27mu: platform: linux @@ -221,7 +221,7 @@ tasks: test_docker_images: - python:2.7-slim # debian ucs4 artifacts: - - pyarrow-{version}-cp27-cp27mu-manylinux1_x86_64.whl + - pyarrow-{no_rc_version}-cp27-cp27mu-manylinux1_x86_64.whl wheel-linux-cp35m: platform: linux @@ -232,7 +232,7 @@ tasks: test_docker_images: - python:3.5-slim artifacts: - - pyarrow-{version}-cp35-cp35m-manylinux1_x86_64.whl + - pyarrow-{no_rc_version}-cp35-cp35m-manylinux1_x86_64.whl wheel-linux-cp36m: platform: linux @@ -243,7 +243,7 @@ tasks: test_docker_images: - python:3.6-slim artifacts: - - pyarrow-{version}-cp36-cp36m-manylinux1_x86_64.whl + - pyarrow-{no_rc_version}-cp36-cp36m-manylinux1_x86_64.whl wheel-linux-cp37m: platform: linux @@ -254,7 +254,7 @@ tasks: test_docker_images: - python:3.7-slim artifacts: - - pyarrow-{version}-cp37-cp37m-manylinux1_x86_64.whl + - pyarrow-{no_rc_version}-cp37-cp37m-manylinux1_x86_64.whl ############################## Wheel OSX #################################### @@ -264,7 +264,7 @@ tasks: params: python_version: 2.7 artifacts: - - pyarrow-{version}-cp27-cp27m-macosx_10_6_intel.whl + - pyarrow-{no_rc_version}-cp27-cp27m-macosx_10_6_intel.whl wheel-osx-cp35m: platform: osx @@ -272,7 +272,7 @@ tasks: params: python_version: 3.5 artifacts: - - pyarrow-{version}-cp35-cp35m-macosx_10_6_intel.whl + - pyarrow-{no_rc_version}-cp35-cp35m-macosx_10_6_intel.whl wheel-osx-cp36m: platform: osx @@ -280,7 +280,7 @@ tasks: params: python_version: 3.6 artifacts: - - pyarrow-{version}-cp36-cp36m-macosx_10_6_intel.whl + - pyarrow-{no_rc_version}-cp36-cp36m-macosx_10_6_intel.whl wheel-osx-cp37m: platform: osx @@ -288,7 +288,7 @@ tasks: params: python_version: 3.7 artifacts: - - pyarrow-{version}-cp37-cp37m-macosx_10_6_intel.whl + - pyarrow-{no_rc_version}-cp37-cp37m-macosx_10_6_intel.whl ############################## Wheel Windows ################################ @@ -298,7 +298,7 @@ tasks: params: python_version: 3.5 artifacts: - - pyarrow-{version}-cp35-cp35m-win_amd64.whl + - pyarrow-{no_rc_version}-cp35-cp35m-win_amd64.whl wheel-win-cp36m: platform: win @@ -306,7 +306,7 @@ tasks: params: python_version: 3.6 artifacts: - - pyarrow-{version}-cp36-cp36m-win_amd64.whl + - pyarrow-{no_rc_version}-cp36-cp36m-win_amd64.whl wheel-win-cp37m: platform: win @@ -314,7 +314,7 @@ tasks: params: python_version: 3.7 artifacts: - - pyarrow-{version}-cp37-cp37m-win_amd64.whl + - pyarrow-{no_rc_version}-cp37-cp37m-win_amd64.whl ############################## Linux PKGS #################################### @@ -597,10 +597,10 @@ tasks: platform: linux template: gandiva-jars/travis.linux.yml artifacts: - - arrow-gandiva-{version}-SNAPSHOT.jar + - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar gandiva-jar-osx: platform: osx template: gandiva-jars/travis.osx.yml artifacts: - - arrow-gandiva-{version}-SNAPSHOT.jar + - arrow-gandiva-{no_rc_version}-SNAPSHOT.jar From 06de47afcb7532a9646089ca23bd7d1e62eddc10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sat, 12 Jan 2019 06:04:50 +0900 Subject: [PATCH 086/203] ARROW-4237: [Packaging] Fix CMAKE_INSTALL_LIBDIR in release verification script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Required to verify RC0 > commit msg should be: "lib instead of $ARROW_HOME/lib" :) Author: Krisztián Szűcs Closes #3381 from kszucs/ARROW-4237 and squashes the following commits: f831b0e3 lib instead of /lib --- dev/release/verify-release-candidate.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 71324ec12f7c5..0e4609735ba53 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -160,7 +160,7 @@ test_and_install_cpp() { ARROW_CMAKE_OPTIONS=" -DCMAKE_INSTALL_PREFIX=$ARROW_HOME --DCMAKE_INSTALL_LIBDIR=$ARROW_HOME/lib +-DCMAKE_INSTALL_LIBDIR=lib -DARROW_PLASMA=ON -DARROW_ORC=ON -DARROW_PYTHON=ON From 9178ad8c3c9ea371c3b7edb3fcee3073f5082bdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sat, 12 Jan 2019 06:32:52 +0900 Subject: [PATCH 087/203] ARROW-4241: [Packaging] Disable crossbow conda OSX clang builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They are expected to fail. Author: Krisztián Szűcs Closes #3383 from kszucs/disable_conda_clang and squashes the following commits: 42417bdb Disable conda OSX clang builds --- dev/tasks/tasks.yml | 58 ++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index ce311e546d495..e6764580966f0 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -25,9 +25,9 @@ groups: - conda-linux-toolchain-py27 - conda-linux-toolchain-py36 - conda-linux-toolchain-py37 - - conda-osx-clang-py27 - - conda-osx-clang-py36 - - conda-osx-clang-py37 + # - conda-osx-clang-py27 + # - conda-osx-clang-py36 + # - conda-osx-clang-py37 - conda-osx-toolchain-py27 - conda-osx-toolchain-py36 - conda-osx-toolchain-py37 @@ -126,32 +126,32 @@ tasks: ############################## Conda OSX #################################### - conda-osx-clang-py27: - platform: osx - template: conda-recipes/travis.osx.yml - params: - variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml - artifacts: - - arrow-cpp-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 - - conda-osx-clang-py36: - platform: osx - template: conda-recipes/travis.osx.yml - params: - variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml - artifacts: - - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 - - conda-osx-clang-py37: - platform: osx - template: conda-recipes/travis.osx.yml - params: - variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml - artifacts: - - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 - - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 + # conda-osx-clang-py27: + # platform: osx + # template: conda-recipes/travis.osx.yml + # params: + # variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython2.7.yaml + # artifacts: + # - arrow-cpp-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 + # - pyarrow-{no_rc_version}-py27(h[a-z0-9]+)_0.tar.bz2 + # + # conda-osx-clang-py36: + # platform: osx + # template: conda-recipes/travis.osx.yml + # params: + # variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython3.6.yaml + # artifacts: + # - arrow-cpp-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 + # - pyarrow-{no_rc_version}-py36(h[a-z0-9]+)_0.tar.bz2 + # + # conda-osx-clang-py37: + # platform: osx + # template: conda-recipes/travis.osx.yml + # params: + # variant_config_file: variants/osx_c_compilerclangcxx_compilerclangxxpython3.7.yaml + # artifacts: + # - arrow-cpp-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 + # - pyarrow-{no_rc_version}-py37(h[a-z0-9]+)_0.tar.bz2 conda-osx-toolchain-py27: platform: osx From 0a553b7eb9dc65e53254abe31e7841b31ea132a9 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Fri, 11 Jan 2019 22:38:18 +0100 Subject: [PATCH 088/203] ARROW-4240: [Packaging] Add missing Plasma GLib and Gandiva GLib documents to souce archive Author: Kouhei Sutou Closes #3382 from kou/packaging-source-archive-include-all-built-documents and squashes the following commits: ef5bd01c Add missing Plasma GLib and Gandiva GLib documents to source archive --- dev/release/source/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/release/source/build.sh b/dev/release/source/build.sh index 039d07591f2ef..25775fdc3e813 100755 --- a/dev/release/source/build.sh +++ b/dev/release/source/build.sh @@ -32,6 +32,8 @@ cd ${archive_name}/cpp/build cmake .. \ -DCMAKE_INSTALL_PREFIX=${cpp_install_dir} \ -DCMAKE_INSTALL_LIBDIR=lib \ + -DARROW_PLASMA=yes \ + -DARROW_GANDIVA=yes \ -DARROW_PARQUET=yes make -j8 make install From f7eb1f79619cb4f55e3b2cd46feae0f3dd0ef05b Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Sat, 12 Jan 2019 09:18:35 +0100 Subject: [PATCH 089/203] ARROW-4239: [Packaging] Fix version update for the next version This also includes BSD sed support. Author: Kouhei Sutou Closes #3385 from kou/packaging-fix-version-update and squashes the following commits: add6fd73 Fix version update for the next version --- dev/release/00-prepare.sh | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh index 96bfd69115a05..d57c51739b100 100755 --- a/dev/release/00-prepare.sh +++ b/dev/release/00-prepare.sh @@ -28,17 +28,17 @@ update_versions() { case ${type} in release) - version=${base_version} - r_version=${base_version} + local version=${base_version} + local r_version=${base_version} ;; snapshot) - version=${next_version}-SNAPSHOT - r_version=${base_version}.9000 + local version=${next_version}-SNAPSHOT + local r_version=${base_version}.9000 ;; esac cd "${SOURCE_DIR}/../../cpp" - sed -i.bak -r -e \ + sed -i.bak -E -e \ "s/^set\(ARROW_VERSION \".+\"\)/set(ARROW_VERSION \"${version}\")/" \ CMakeLists.txt rm -f CMakeLists.txt.bak @@ -46,10 +46,10 @@ update_versions() { cd - cd "${SOURCE_DIR}/../../c_glib" - sed -i.bak -r -e \ + sed -i.bak -E -e \ "s/^m4_define\(\[arrow_glib_version\], .+\)/m4_define([arrow_glib_version], ${version})/" \ configure.ac - sed -i.bak -r -e \ + sed -i.bak -E -e \ "s/^version = '.+'/version = '${version}'/" \ meson.build rm -f configure.ac.bak meson.build.bak @@ -58,7 +58,7 @@ update_versions() { # We can enable this when Arrow JS uses the same version. # cd "${SOURCE_DIR}/../../js" - # sed -i.bak -r -e \ + # sed -i.bak -E -e \ # "s/^ \"version\": \".+\"/ \"version\": \"${version}\"/" \ # package.json # rm -f package.json @@ -66,7 +66,7 @@ update_versions() { # cd - cd "${SOURCE_DIR}/../../matlab" - sed -i.bak -r -e \ + sed -i.bak -E -e \ "s/^set\(MLARROW_VERSION \".+\"\)/set(MLARROW_VERSION \"${version}\")/" \ CMakeLists.txt rm -f CMakeLists.txt.bak @@ -74,7 +74,7 @@ update_versions() { cd - cd "${SOURCE_DIR}/../../python" - sed -i.bak -r -e \ + sed -i.bak -E -e \ "s/^default_version: '.+'/default_version = '${version}'/" \ setup.py rm -f setup.py.bak @@ -82,7 +82,7 @@ update_versions() { cd - cd "${SOURCE_DIR}/../../r" - sed -i.bak -r -e \ + sed -i.bak -E -e \ "s/^Version: .+/Version: ${r_version}/" \ DESCRIPTION rm -f DESCRIPTION.bak @@ -90,7 +90,7 @@ update_versions() { cd - cd "${SOURCE_DIR}/../../ruby" - sed -i.bak -r -e \ + sed -i.bak -E -e \ "s/^ VERSION = \".+\"/ VERSION = \"${version}\"/g" \ */*/*/version.rb rm -f */*/*/version.rb.bak @@ -98,7 +98,7 @@ update_versions() { cd - cd "${SOURCE_DIR}/../../rust" - sed -i.bak -r -e \ + sed -i.bak -E -e \ "s/^version = \".+\"/version = \"${version}\"/g" \ arrow/Cargo.toml parquet/Cargo.toml rm -f arrow/Cargo.toml.bak parquet/Cargo.toml.bak @@ -145,8 +145,8 @@ if [ "$#" -eq 2 ]; then git commit -m "[Release] Update versions for ${next_version_snapshot}" echo "Updating .deb package names for ${next_version}" - deb_lib_suffix=$(echo $version | sed -r -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/') - next_deb_lib_suffix=$(echo $next_version | sed -r -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/') + deb_lib_suffix=$(echo $version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/') + next_deb_lib_suffix=$(echo $next_version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/') cd $SOURCE_DIR/../tasks/linux-packages/ for target in debian*/lib*${deb_lib_suffix}.install; do git mv \ @@ -154,17 +154,17 @@ if [ "$#" -eq 2 ]; then $(echo $target | sed -e "s/${deb_lib_suffix}/${next_deb_lib_suffix}/") done deb_lib_suffix_substitute_pattern="s/(lib(arrow|gandiva|parquet|plasma)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g" - sed -i.bak -r -e "${deb_lib_suffix_substitute_pattern}" debian*/control + sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" debian*/control rm -f debian*/control.bak git add debian*/control cd - cd $SOURCE_DIR/../tasks/ - sed -i.bak -r -e "${deb_lib_suffix_substitute_pattern}" tasks.yml + sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" tasks.yml rm -f tasks.yml.bak git add tasks.yml cd - cd $SOURCE_DIR - sed -i.bak -r -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt + sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt rm -f rat_exclude_files.txt.bak git add rat_exclude_files.txt git commit -m "[Release] Update .deb package names for $next_version" From 3e97ca1c207cacfb5340940bc86f95107849cbcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Sat, 12 Jan 2019 09:52:10 +0100 Subject: [PATCH 090/203] ARROW-4243: [Python] Fix test failures with pandas 0.24.0rc1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Author: Krisztián Szűcs Author: Kouhei Sutou Closes #3387 from kou/python-pandas and squashes the following commits: 8c9cb641 fix python dockerfile 27d15a6d Fix test failures with pandas 0.24.0rc1 --- python/Dockerfile | 3 +-- python/pyarrow/pandas_compat.py | 8 ++++++-- python/pyarrow/serialization.py | 21 +++++++++++++++++++++ python/pyarrow/tests/test_convert_pandas.py | 6 ++---- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/python/Dockerfile b/python/Dockerfile index ecabc94493cf0..e20f266da216d 100644 --- a/python/Dockerfile +++ b/python/Dockerfile @@ -21,9 +21,8 @@ FROM arrow:cpp ARG PYTHON_VERSION=3.6 ADD ci/conda_env_python.yml /arrow/ci/ RUN conda install -c conda-forge \ - nomkl \ --file arrow/ci/conda_env_python.yml \ - python=$PYTHON_VERSION && \ + python=$PYTHON_VERSION nomkl && \ conda clean --all ENV ARROW_PYTHON=ON \ diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index a5d8621590f13..403f15dfc2cdb 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -33,7 +33,7 @@ def infer_dtype(column): try: - return pd.api.types.infer_dtype(column) + return pd.api.types.infer_dtype(column, skipna=False) except AttributeError: return pd.lib.infer_dtype(column) @@ -111,6 +111,9 @@ def get_logical_type_from_numpy(pandas_collection): except KeyError: if hasattr(pandas_collection.dtype, 'tz'): return 'datetimetz' + # See https://github.com/pandas-dev/pandas/issues/24739 + if str(pandas_collection.dtype) == 'datetime64[ns]': + return 'datetime64[ns]' result = infer_dtype(pandas_collection) if result == 'string': @@ -477,7 +480,8 @@ def dataframe_to_serialized_dict(frame): if isinstance(block, _int.DatetimeTZBlock): block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz) - values = values.values + if hasattr(values, 'values'): + values = values.values elif isinstance(block, _int.CategoricalBlock): block_data.update(dictionary=values.categories, ordered=values.ordered) diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py index 22f7c0cb52ab8..6bbe1c7bc896c 100644 --- a/python/pyarrow/serialization.py +++ b/python/pyarrow/serialization.py @@ -174,6 +174,27 @@ def _deserialize_pandas_series(data): custom_serializer=_pickle_to_buffer, custom_deserializer=_load_pickle_from_buffer) + if hasattr(pd.core.arrays, 'interval'): + context.register_type( + pd.core.arrays.interval.IntervalArray, + 'pd.core.arrays.interval.IntervalArray', + custom_serializer=_pickle_to_buffer, + custom_deserializer=_load_pickle_from_buffer) + + if hasattr(pd.core.arrays, 'period'): + context.register_type( + pd.core.arrays.period.PeriodArray, + 'pd.core.arrays.period.PeriodArray', + custom_serializer=_pickle_to_buffer, + custom_deserializer=_load_pickle_from_buffer) + + if hasattr(pd.core.arrays, 'datetimes'): + context.register_type( + pd.core.arrays.datetimes.DatetimeArray, + 'pd.core.arrays.datetimes.DatetimeArray', + custom_serializer=_pickle_to_buffer, + custom_deserializer=_load_pickle_from_buffer) + context.register_type( pd.DataFrame, 'pd.DataFrame', custom_serializer=_serialize_pandas_dataframe, diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index cd7f4999ace3a..466d2e9562dd0 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -837,8 +837,7 @@ def test_timestamps_with_timezone(self): '2010-08-13T05:46:57.437'], dtype='datetime64[ms]') }) - df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern') - .to_frame()) + df['datetime64'] = df['datetime64'].dt.tz_localize('US/Eastern') _check_pandas_roundtrip(df) _check_series_roundtrip(df['datetime64']) @@ -852,8 +851,7 @@ def test_timestamps_with_timezone(self): '2010-08-13T05:46:57.437699912'], dtype='datetime64[ns]') }) - df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern') - .to_frame()) + df['datetime64'] = df['datetime64'].dt.tz_localize('US/Eastern') _check_pandas_roundtrip(df) From be663c14637b2bdfef935946b6e91b6317219332 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Sun, 13 Jan 2019 11:14:51 +0100 Subject: [PATCH 091/203] ARROW-4247: [Release] Update verify script for 0.12.0 * C++: -DARROW_GPU -> -DARROW_CUDA * C++: Enable Gandiva * C++: default: -DARROW_BUILD_TESTS=ON -> OFF * Ruby: red-plasma, red-gandiva and red-parquet are added * Rust: The top-level Cargo.toml is a virtual manifest Author: Kouhei Sutou Closes #3389 from kou/release-update-verify-script and squashes the following commits: f019a3fc Update verify script for 0.12.0 --- dev/release/verify-release-candidate.sh | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 0e4609735ba53..c8b9c54c82c4c 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -51,10 +51,10 @@ HERE=$(cd `dirname "${BASH_SOURCE[0]:-$0}"` && pwd) ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' -: ${ARROW_HAVE_GPU:=} -if [ -z "$ARROW_HAVE_GPU" ]; then +: ${ARROW_HAVE_CUDA:=} +if [ -z "$ARROW_HAVE_CUDA" ]; then if nvidia-smi --list-gpus 2>&1 > /dev/null; then - ARROW_HAVE_GPU=yes + ARROW_HAVE_CUDA=yes fi fi @@ -164,13 +164,15 @@ test_and_install_cpp() { -DARROW_PLASMA=ON -DARROW_ORC=ON -DARROW_PYTHON=ON +-DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_BOOST_USE_SHARED=ON -DCMAKE_BUILD_TYPE=release +-DARROW_BUILD_TESTS=ON -DARROW_BUILD_BENCHMARKS=ON " - if [ "$ARROW_HAVE_GPU" = "yes" ]; then - ARROW_CMAKE_OPTIONS="$ARROW_CMAKE_OPTIONS -DARROW_GPU=ON" + if [ "$ARROW_HAVE_CUDA" = "yes" ]; then + ARROW_CMAKE_OPTIONS="$ARROW_CMAKE_OPTIONS -DARROW_CUDA=ON" fi cmake $ARROW_CMAKE_OPTIONS .. @@ -238,17 +240,17 @@ test_js() { test_ruby() { pushd ruby - pushd red-arrow - bundle install --path vendor/bundle - bundle exec ruby test/run-test.rb - popd + local modules="red-arrow red-plasma red-gandiva red-parquet" + if [ "${ARROW_HAVE_CUDA}" = "yes" ]; then + modules="${modules} red-arrow-cuda" + fi - if [ "$ARROW_HAVE_GPU" = "yes" ]; then - pushd red-arrow-gpu + for module in ${modules}; do + pushd ${module} bundle install --path vendor/bundle bundle exec ruby test/run-test.rb popd - fi + done popd } @@ -274,9 +276,7 @@ test_rust() { cargo fmt --all -- --check # raises on any warnings - cargo rustc -- -D warnings - - cargo build + RUSTFLAGS="-D warnings" cargo build cargo test popd From 5598d2f42573ed19e7db4aae7adb02af2cd4ccd0 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Sun, 13 Jan 2019 13:35:22 -0600 Subject: [PATCH 092/203] ARROW-2828: [JS] Refactor Data, Vectors, Visitor, Typings, build, tests, dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's the big one; The Great ArrowJS Refactor of 2018. Thanks for bearing with me through yet another huge PR. [Check out this sweet gif](https://user-images.githubusercontent.com/178183/50551046-19a94d00-0c30-11e9-80ed-74b9290e8c49.gif) of all the new features in action. With streaming getting to a good place, we've already started working on demos/integrations with other projects like [uber/deck.gl](https://github.com/Pessimistress/deck.gl/tree/a5940e20cb1659a44cba7839082b0803a997a12f/test/apps/arrow) :tada: ### The JIRAs In addition to everything I detail below, this PR closes the following JIRAs: * [ARROW-2828](https://issues.apache.org/jira/browse/ARROW-2828): Refactor Vector Data classes * [ARROW-2839](https://issues.apache.org/jira/browse/ARROW-2839): Support whatwg/streams in IPC reader/writer * [ARROW-2235](https://issues.apache.org/jira/browse/ARROW-2235): Add tests for IPC messages split across multiple buffers * [ARROW-3337](https://issues.apache.org/jira/browse/ARROW-3337): IPC writer doesn't serialize the dictionary of nested Vectors * [ARROW-3689](https://issues.apache.org/jira/browse/ARROW-3689): Upgrade to TS 3.1 * [ARROW-3560](https://issues.apache.org/jira/browse/ARROW-3560): Remove @std/esm * [ARROW-3561](https://issues.apache.org/jira/browse/ARROW-3561): Update ts-jest * [ARROW-2778](https://issues.apache.org/jira/browse/ARROW-2778): Add Utf8Vector.from * [ARROW-2766](https://issues.apache.org/jira/browse/ARROW-2766): Add ability to construct a Table from a list of Arrays/TypedArrays ### The stats The gulp scripts have been updated to parallelize as much as possible. These are the numbers from my Intel Core i7-8700K CPU @ 3.70GHz × 12 running Ubuntu 18.04 and node v11.6.0: ```sh $ time npm run build [22:11:04] Finished 'build' after 39 s real 0m40.341s user 4m55.428s sys 0m5.559s ``` ```sh $ npm run test:coverage =============================== Coverage summary =============================== Statements : 90.45% ( 4321/4777 ) Branches : 76.7% ( 1570/2047 ) Functions : 84.62% ( 1106/1307 ) Lines : 91.5% ( 3777/4128 ) ================================================================================ Test Suites: 21 passed, 21 total Tests: 5644 passed, 5644 total Snapshots: 0 total Time: 16.023s ``` ### The fixes * `Vector#indexOf(value)` works for all DataTypes * `Vector#set(i, value)` now works for all DataTypes * Reading from node streams is now fully zero-copy * The IPC writers now serialize dictionaries of nested Vectors correctly (ARROW-3337) * DictionaryBatches marked as `isDelta` now correctly updates the dictionaries for all Vectors that point to that dictionary, even if they were created before the delta batch arrived * A few `arrow2csv` fixes: * Ignore `stdin` if it's a TTY * Now read all the Arrow formats from `stdin` * Always show the `help` text when we don't understand the input * Proper backpressure support to play nicely with other Unix utilities like `head` and `less` * [Fixes an unfiled bug](https://github.com/trxcllnt/arrow/commit/070ec9809a9f5822d62268252d0570366ec40883) we encountered last week where JS would throw an error creating RowProxies for a Table or Struct with duplicate column names ### The upgrades * New zero-copy Message/RecordBatchReaders! * [`RecordBatchReader.from()`](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/test/unit/ipc/reader/from-inference-tests.ts#L37) will peek at the underlying bytes, and return the correct implementation based on whether the data is an Arrow File, Stream, or JSON * [`RecordBatchFileReader`](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/test/unit/ipc/reader/file-reader-tests.ts#L74) now supports random-access seek, enabling more efficient web-worker/multi-process workflows * [`RecordBatchStreamReader`](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/test/unit/ipc/reader/streams-dom-tests.ts#L119) can now read multiple tables from the same underlying socket * `MessageReader` now [guarantees/enforces](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/src/ipc/message.ts#L126) message body byte alignment (this one even surfaced bugs in [node core](https://github.com/nodejs/node/issues/24817) and the [DOM streams polyfill](https://github.com/MattiasBuelens/web-streams-polyfill/issues/3)) * New RecordBatchWriters * Adds RecordBatchJSONWriter, RecordBatchFileWriter and RecordBatchStreamWriter * Adds static `RecordBatchWriter.writeAll()` method to easily write a Table or stream of RecordBatches * Both sync and async flushes based on the WritableSink * Full integration with platform I/O primitives * We can still synchronously read JSON, Buffers, `Iterable`, or `AsyncIterable` * In node, we can now read from any [`ReadableStream`](https://nodejs.org/docs/latest/api/stream.html#stream_class_stream_readable), [`fs.FileHandle`](https://nodejs.org/docs/latest/api/fs.html#fs_class_filehandle) * In the browser, we can read from any [`ReadableStream` or `ReadableByteStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream), or the [`Response`](https://developer.mozilla.org/en-US/docs/Web/API/Response) returned from the `fetch()` API. (Wrapping the [FileReader](https://developer.mozilla.org/en-US/docs/Web/API/FileReader) is still todo) * We also [accept Promises](https://github.com/Pessimistress/deck.gl/blob/a5940e20cb1659a44cba7839082b0803a997a12f/test/apps/arrow/loader.js#L20) of any of the above * New convenience methods for integrating with node or DOM streams * [`throughNode()`](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/test/unit/ipc/reader/streams-node-tests.ts#L54)/[`throughDOM()`](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/test/unit/ipc/reader/streams-dom-tests.ts#L50) * [`toReadableNodeStream()`](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/test/unit/ipc/reader/streams-node-tests.ts#L69)/[`toReadableDOMStream()`](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/test/unit/ipc/reader/streams-dom-tests.ts#L65) * [`pipe()`](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/test/unit/ipc/writer/streams-node-tests.ts#L91)/[`pipeTo()`/`pipeThrough()`](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/test/unit/ipc/writer/streams-dom-tests.ts#L92) * Generic type parameters inherited from `DataType` now flow recursively ```js const table = Table.from<{ str: Utf8, i32: Int32, bools: List }>(data); table.get(0); // will be of type { str: string, i32: number, bools: BoolVector } ``` * New simplified [`Data` class](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/src/data.ts) * New simplified, faster `Visitor` class with support for optional, more narrow [`visitT` implementations](https://github.com/trxcllnt/arrow/blob/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/src/visitor.ts#L181) * New specialized Visitor implementations to enable runtime reflection (e.g. dynamically lookup the Vector constructor for a given DataType) * New abstract `Chunked` base class for the applicative (concat) operation * public `chunkedInst.chunks` field is the list of inner chunks * New `Column` class extends `Chunked`, combines `Field` with the chunks (provides access to the field `name` from the Schema) * `RecordBatch#concat(...batchesOrTables)` now returns a Table * Table now extends `Chunked`, so it inherits: * `Table#slice(from, to)` * `Table#concat(...batchesOrTables)` * `Table#getChildAt(i)` exists, alias of `getColumnAt(i)` * `Table#getColumn[At]()` returns a Column ### The breaking changes * All the old IPC functions are gone, but the new APIs will live for much longer * `Table#batches` is now `Table#chunks`, which it inherits from `Chunked` (maybe controversial, open to aliasing) * `Table#batchesUnion` is now just... the Table instance itself (also maybe controversial, open to aliasing) * `DataType#TType` is now `DataType#typeId` -- it should have always been this, was a typo. Easy to alias if necessary. * The complicated View classes are now gone, logic centralized as specialized [`Visitors`](https://github.com/trxcllnt/arrow/tree/b58e29bc83675583238bbb94fba2f3ebf8f1e4aa/js/src/visitor) ### The tests * **Tests no longer rely on any C++ or Java generated integration files** * Integration tests have been moved into `bin/integration.js`, and they finish much quicker * The tsconfig files have been tweaked to speed up test run time and improve the async debugging experience * A streaming `RecordBatchJSONWriter` has been implemented so we can easily debug and validate written output * The JSON results are also tested against the corresponding binary representation, similar to the integration tests * A [suite of test-data helpers](https://github.com/trxcllnt/arrow/blob/d9970bb9a6a9d80bbe07b321dc6389bccf1b0835/js/test/generate-test-data.ts) have been added to auto-generate data for validation at runtime * They produce the underlying Arrow VectorData buffers, as well as the expected plain-JS-value representation [for verification](https://github.com/trxcllnt/arrow/blob/d9970bb9a6a9d80bbe07b321dc6389bccf1b0835/js/test/unit/generated-data-tests.ts#L23) * This allows us to test all possible type configuration combinations, e.g. [all types Dictionary-encode](https://github.com/trxcllnt/arrow/blob/d9970bb9a6a9d80bbe07b321dc6389bccf1b0835/js/test/data/tables.ts#L61), all types serialize when nested, etc. * A [suite of IO test helpers](https://github.com/trxcllnt/arrow/blob/d9970bb9a6a9d80bbe07b321dc6389bccf1b0835/js/test/unit/ipc/helpers.ts#L36) has been added * We use [`memfs`](https://www.npmjs.com/package/memfs) to mock the file system, which contributes to test performance improvements * This enables us to [easily test](https://github.com/trxcllnt/arrow/blob/d9970bb9a6a9d80bbe07b321dc6389bccf1b0835/js/test/unit/ipc/reader/file-reader-tests.ts#L38) all the flavors of io primitives across node and browser environments * A vscode debugging launch configuration has been added to ease the process of contributing more tests (and because I've been asked for mine so often) ### The build * Faster * Node 11+ (needs `Symbol.asyncIterator` enabled) * Closure-compiler upgrades and build enhancements mean we can auto-generate the externs file during compilation, rather than maintaining it by hand ### Misc * Added `arrow2csv` to `js/bin/arrow2csv`, so anybody with the JS project dependencies installed can easily view a CSV-ish thing (`cat foo.arrow | js/bin/arrow2csv.js`) ### Todos * Docs/Recipes/Examples * Highlight/write more tools (like `arrow2csv`) * Flesh out the RecordBatchWriters a bit more * Gather feedback on the new RecordBatchReader APIs Author: ptaylor Author: Paul Taylor Closes #3290 from trxcllnt/js-data-refactor and squashes the following commits: 2ef150f7e bind getByteWidth to the vector type 9acfaa367 handle the case where collapsed Uint8Arrays fully overlap 6a97ee09f perf: defer creating rowProxy on nested types, use Array instead of Object for creating Data instances 2cad76065 pipe directly to stdout to ensure backpressure is preserved f006a2681 ensure schema and field always have a metadata map 8dc5d2cbf fix Float64 Array typings 162c7d873 fix arrow2csv left-pad measurement for new bignum/decimal output 64dc01519 teach closure about Symbol.toPrimitive ca0db9e2a fix lint ec12cdd18 add a small BigNum mixin to make working with Int64 and Decimal values a bit easier 62578b93e fix bug where valueToString function would return undefined (JSON.striingify(undefined) === undefined) 4b58bde06 fix visitor method overload type signatures d16541335 don't print comma that includes system paths 708f1b4e7 move stride to data, fix chunked slicing, remove intermediate binding and getters in favor of direct property accesses 78ecc4cfd use the textencoders from the global instead of Buffer for perf testing 47f0677bf perf: use a closure instead of binding 380dbc7de add a single-chunk column type 6bcaad6ac fix lint f7d2b2ef2 add getters for the dictionary and indices of chunked dictionary vectors aaf42c8a9 Consolidated JS data handling refactor --- .travis.yml | 4 +- ci/travis_script_integration.sh | 2 +- ci/travis_script_js.sh | 5 +- integration/integration_test.py | 2 +- js/.gitignore | 8 +- js/.vscode/launch.json | 169 + js/README.md | 43 +- js/bin/arrow2csv.js | 27 + js/bin/file-to-stream.js | 27 +- js/bin/integration.js | 229 +- js/bin/json-to-arrow.js | 53 +- js/bin/print-buffer-alignment.js | 53 +- js/bin/stream-to-file.js | 27 +- js/examples/read_file.html | 2 +- js/gulp/argv.js | 27 +- js/gulp/arrow-task.js | 10 +- js/gulp/clean-task.js | 13 +- js/gulp/closure-task.js | 192 +- js/gulp/{build-task.js => compile-task.js} | 6 +- js/gulp/memoize-task.js | 10 +- js/gulp/minify-task.js | 101 +- js/gulp/package-task.js | 21 +- js/gulp/test-task.js | 47 +- js/gulp/typescript-task.js | 19 +- js/gulp/util.js | 59 +- js/gulpfile.js | 71 +- js/index.ts | 2 +- js/jest.config.js | 56 + js/jest.coverage.config.js | 30 + js/npm-release.sh | 6 +- js/package-lock.json | 8050 ++++++----------- js/package.json | 110 +- js/perf/index.js | 24 +- js/src/Arrow.dom.ts | 86 + js/src/Arrow.externs.js | 814 -- js/src/Arrow.node.ts | 29 + js/src/Arrow.ts | 374 +- js/src/bin/arrow2csv.ts | 224 +- js/src/column.ts | 100 + js/src/compute/dataframe.ts | 209 + js/src/{ => compute}/predicate.ts | 37 +- js/src/data.ts | 482 +- js/src/enum.ts | 95 + js/src/fb/Schema.ts | 2 +- js/src/interfaces.ts | 240 + js/src/io/adapters.ts | 386 + js/src/io/file.ts | 116 + js/src/io/interfaces.ts | 180 + js/src/io/stream.ts | 158 + js/src/ipc/magic.ts | 53 - js/src/ipc/message.ts | 249 + js/src/ipc/metadata.ts | 96 - js/src/ipc/metadata/file.ts | 163 + js/src/ipc/metadata/json.ts | 208 + js/src/ipc/metadata/message.ts | 593 ++ js/src/ipc/node/iterable.ts | 106 + js/src/ipc/node/reader.ts | 85 + js/src/ipc/node/writer.ts | 76 + js/src/ipc/reader.ts | 737 ++ js/src/ipc/reader/arrow.ts | 55 - js/src/ipc/reader/binary.ts | 432 - js/src/ipc/reader/json.ts | 304 - js/src/ipc/reader/node.ts | 78 - js/src/ipc/reader/vector.ts | 131 - js/src/ipc/whatwg/iterable.ts | 88 + js/src/ipc/whatwg/reader.ts | 52 + js/src/ipc/whatwg/writer.ts | 50 + js/src/ipc/writer.ts | 417 + js/src/ipc/writer/binary.ts | 725 -- js/src/recordbatch.ts | 126 +- js/src/schema.ts | 107 + js/src/table.ts | 430 +- js/src/type.ts | 671 +- js/src/util/bit.ts | 35 +- js/src/util/bn.ts | 171 + js/src/util/buffer.ts | 228 + js/src/util/compat.ts | 153 +- js/src/util/int.ts | 94 +- js/src/util/node.ts | 93 - js/src/util/pretty.ts | 33 +- js/src/util/utf8.ts | 47 + js/src/util/vector.ts | 134 + js/src/vector.ts | 481 +- js/src/vector/base.ts | 109 + .../{ipc/writer/arrow.ts => vector/binary.ts} | 26 +- js/src/vector/bool.ts | 32 + js/src/vector/chunked.ts | 314 +- js/src/vector/date.ts | 43 + js/src/vector/decimal.ts | 21 + js/src/vector/dictionary.ts | 65 +- js/src/vector/fixedsizebinary.ts | 22 + js/src/vector/fixedsizelist.ts | 22 + js/src/vector/flat.ts | 290 - js/src/vector/float.ts | 37 + js/src/vector/index.ts | 183 + js/src/vector/int.ts | 53 + js/src/vector/interval.ts | 23 + js/src/vector/list.ts | 131 +- js/src/vector/map.ts | 32 + js/src/vector/nested.ts | 247 - js/src/vector/null.ts | 21 + js/src/vector/row.ts | 100 + js/src/vector/struct.ts | 32 + js/src/vector/time.ts | 25 + js/src/vector/timestamp.ts | 25 + js/src/vector/union.ts | 29 + js/src/vector/utf8.ts | 37 + js/src/vector/validity.ts | 75 - js/src/vector/view.ts | 9 - js/src/visitor.ts | 326 +- js/src/visitor/bytewidth.ts | 65 + js/src/visitor/get.ts | 315 + js/src/visitor/indexof.ts | 181 + js/src/visitor/iterator.ts | 170 + js/src/visitor/jsontypeassembler.ts | 88 + js/src/visitor/jsonvectorassembler.ts | 181 + js/src/visitor/set.ts | 326 + js/src/visitor/toarray.ts | 151 + js/src/visitor/typeassembler.ts | 154 + js/src/visitor/typector.ts | 79 + js/src/visitor/vectorassembler.ts | 230 + js/src/visitor/vectorctor.ts | 96 + js/src/visitor/vectorloader.ts | 132 + js/test/Arrow.ts | 36 +- js/test/data/tables.ts | 85 + js/test/generate-test-data.ts | 657 ++ js/test/inference/column.ts | 70 + js/test/inference/nested.ts | 46 + js/test/inference/visitor/get.ts | 39 + js/test/integration/test-config.ts | 52 - js/test/integration/validate-tests.ts | 213 - js/test/jest-extensions.ts | 156 +- js/test/tsconfig.coverage.json | 6 + js/test/tsconfig.json | 11 +- js/test/unit/generated-data-tests.ts | 238 + js/test/unit/int-tests.ts | 2 +- js/test/unit/ipc/helpers.ts | 206 + js/test/unit/ipc/message-reader-tests.ts | 116 + js/test/unit/ipc/reader/file-reader-tests.ts | 123 + .../unit/ipc/reader/from-inference-tests.ts | 152 + js/test/unit/ipc/reader/json-reader-tests.ts | 43 + .../unit/ipc/reader/stream-reader-tests.ts | 65 + js/test/unit/ipc/reader/streams-dom-tests.ts | 189 + js/test/unit/ipc/reader/streams-node-tests.ts | 188 + js/test/unit/ipc/validate.ts | 74 + js/test/unit/ipc/writer/file-writer-tests.ts | 46 + js/test/unit/ipc/writer/json-writer-tests.ts | 49 + .../unit/ipc/writer/stream-writer-tests.ts | 71 + js/test/unit/ipc/writer/streams-dom-tests.ts | 283 + js/test/unit/ipc/writer/streams-node-tests.ts | 277 + js/test/unit/table-tests.ts | 176 +- js/test/unit/vector-tests.ts | 433 - js/test/unit/vector/bool-vector-tests.ts | 102 + .../unit/{ => vector}/date-vector-tests.ts | 24 +- js/test/unit/vector/float16-vector-tests.ts | 73 + js/test/unit/vector/numeric-vector-tests.ts | 190 + js/test/unit/vector/vector-tests.ts | 127 + js/test/unit/visitor-tests.ts | 168 + js/test/unit/writer-tests.ts | 62 - js/tsconfig/tsconfig.base.json | 2 +- js/tsconfig/tsconfig.bin.cjs.json | 2 +- js/tsconfig/tsconfig.es5.cls.json | 4 +- 162 files changed, 17451 insertions(+), 12440 deletions(-) create mode 100644 js/.vscode/launch.json create mode 100755 js/bin/arrow2csv.js rename js/gulp/{build-task.js => compile-task.js} (90%) create mode 100644 js/jest.config.js create mode 100644 js/jest.coverage.config.js create mode 100644 js/src/Arrow.dom.ts delete mode 100644 js/src/Arrow.externs.js create mode 100644 js/src/Arrow.node.ts create mode 100644 js/src/column.ts create mode 100644 js/src/compute/dataframe.ts rename js/src/{ => compute}/predicate.ts (94%) create mode 100644 js/src/enum.ts create mode 100644 js/src/interfaces.ts create mode 100644 js/src/io/adapters.ts create mode 100644 js/src/io/file.ts create mode 100644 js/src/io/interfaces.ts create mode 100644 js/src/io/stream.ts delete mode 100644 js/src/ipc/magic.ts create mode 100644 js/src/ipc/message.ts delete mode 100644 js/src/ipc/metadata.ts create mode 100644 js/src/ipc/metadata/file.ts create mode 100644 js/src/ipc/metadata/json.ts create mode 100644 js/src/ipc/metadata/message.ts create mode 100644 js/src/ipc/node/iterable.ts create mode 100644 js/src/ipc/node/reader.ts create mode 100644 js/src/ipc/node/writer.ts create mode 100644 js/src/ipc/reader.ts delete mode 100644 js/src/ipc/reader/arrow.ts delete mode 100644 js/src/ipc/reader/binary.ts delete mode 100644 js/src/ipc/reader/json.ts delete mode 100644 js/src/ipc/reader/node.ts delete mode 100644 js/src/ipc/reader/vector.ts create mode 100644 js/src/ipc/whatwg/iterable.ts create mode 100644 js/src/ipc/whatwg/reader.ts create mode 100644 js/src/ipc/whatwg/writer.ts create mode 100644 js/src/ipc/writer.ts delete mode 100644 js/src/ipc/writer/binary.ts create mode 100644 js/src/schema.ts create mode 100644 js/src/util/bn.ts create mode 100644 js/src/util/buffer.ts delete mode 100644 js/src/util/node.ts create mode 100644 js/src/util/utf8.ts create mode 100644 js/src/util/vector.ts create mode 100644 js/src/vector/base.ts rename js/src/{ipc/writer/arrow.ts => vector/binary.ts} (52%) create mode 100644 js/src/vector/bool.ts create mode 100644 js/src/vector/date.ts create mode 100644 js/src/vector/decimal.ts create mode 100644 js/src/vector/fixedsizebinary.ts create mode 100644 js/src/vector/fixedsizelist.ts delete mode 100644 js/src/vector/flat.ts create mode 100644 js/src/vector/float.ts create mode 100644 js/src/vector/index.ts create mode 100644 js/src/vector/int.ts create mode 100644 js/src/vector/interval.ts create mode 100644 js/src/vector/map.ts delete mode 100644 js/src/vector/nested.ts create mode 100644 js/src/vector/null.ts create mode 100644 js/src/vector/row.ts create mode 100644 js/src/vector/struct.ts create mode 100644 js/src/vector/time.ts create mode 100644 js/src/vector/timestamp.ts create mode 100644 js/src/vector/union.ts create mode 100644 js/src/vector/utf8.ts delete mode 100644 js/src/vector/validity.ts delete mode 100644 js/src/vector/view.ts create mode 100644 js/src/visitor/bytewidth.ts create mode 100644 js/src/visitor/get.ts create mode 100644 js/src/visitor/indexof.ts create mode 100644 js/src/visitor/iterator.ts create mode 100644 js/src/visitor/jsontypeassembler.ts create mode 100644 js/src/visitor/jsonvectorassembler.ts create mode 100644 js/src/visitor/set.ts create mode 100644 js/src/visitor/toarray.ts create mode 100644 js/src/visitor/typeassembler.ts create mode 100644 js/src/visitor/typector.ts create mode 100644 js/src/visitor/vectorassembler.ts create mode 100644 js/src/visitor/vectorctor.ts create mode 100644 js/src/visitor/vectorloader.ts create mode 100644 js/test/data/tables.ts create mode 100644 js/test/generate-test-data.ts create mode 100644 js/test/inference/column.ts create mode 100644 js/test/inference/nested.ts create mode 100644 js/test/inference/visitor/get.ts delete mode 100644 js/test/integration/test-config.ts delete mode 100644 js/test/integration/validate-tests.ts create mode 100644 js/test/tsconfig.coverage.json create mode 100644 js/test/unit/generated-data-tests.ts create mode 100644 js/test/unit/ipc/helpers.ts create mode 100644 js/test/unit/ipc/message-reader-tests.ts create mode 100644 js/test/unit/ipc/reader/file-reader-tests.ts create mode 100644 js/test/unit/ipc/reader/from-inference-tests.ts create mode 100644 js/test/unit/ipc/reader/json-reader-tests.ts create mode 100644 js/test/unit/ipc/reader/stream-reader-tests.ts create mode 100644 js/test/unit/ipc/reader/streams-dom-tests.ts create mode 100644 js/test/unit/ipc/reader/streams-node-tests.ts create mode 100644 js/test/unit/ipc/validate.ts create mode 100644 js/test/unit/ipc/writer/file-writer-tests.ts create mode 100644 js/test/unit/ipc/writer/json-writer-tests.ts create mode 100644 js/test/unit/ipc/writer/stream-writer-tests.ts create mode 100644 js/test/unit/ipc/writer/streams-dom-tests.ts create mode 100644 js/test/unit/ipc/writer/streams-node-tests.ts delete mode 100644 js/test/unit/vector-tests.ts create mode 100644 js/test/unit/vector/bool-vector-tests.ts rename js/test/unit/{ => vector}/date-vector-tests.ts (77%) create mode 100644 js/test/unit/vector/float16-vector-tests.ts create mode 100644 js/test/unit/vector/numeric-vector-tests.ts create mode 100644 js/test/unit/vector/vector-tests.ts create mode 100644 js/test/unit/visitor-tests.ts delete mode 100644 js/test/unit/writer-tests.ts diff --git a/.travis.yml b/.travis.yml index 8532cc7f3b662..c57c473c041f9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -230,7 +230,7 @@ matrix: - if [ $ARROW_CI_INTEGRATION_AFFECTED != "1" ]; then exit; fi - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh - - nvm install 10.1 + - nvm install 11.6 - $TRAVIS_BUILD_DIR/ci/travis_before_script_js.sh - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh script: @@ -240,7 +240,7 @@ matrix: language: node_js os: linux node_js: - - '10.1' + - '11.6' before_script: - if [ $ARROW_CI_JS_AFFECTED != "1" ]; then exit; fi - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh diff --git a/ci/travis_script_integration.sh b/ci/travis_script_integration.sh index 9c2786282b08b..342db58b5dfd3 100755 --- a/ci/travis_script_integration.sh +++ b/ci/travis_script_integration.sh @@ -36,7 +36,7 @@ pushd $ARROW_JS_DIR # lint and compile JS source npm run lint -npm run build +npm run build -- -t apache-arrow popd diff --git a/ci/travis_script_js.sh b/ci/travis_script_js.sh index 1871b4265cd01..34b07115e70b1 100755 --- a/ci/travis_script_js.sh +++ b/ci/travis_script_js.sh @@ -23,9 +23,10 @@ source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh pushd $ARROW_JS_DIR -npm run lint +npm run lint:ci npm run build -# run the non-snapshot unit tests npm test +npm run test:coverage +bash <(curl -s https://codecov.io/bash) || echo "Codecov did not collect coverage reports" popd diff --git a/integration/integration_test.py b/integration/integration_test.py index 7101af2516ad9..c0191c372915c 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -1122,7 +1122,7 @@ def _run(self, exe_cmd, arrow_path=None, json_path=None, if json_path is not None: cmd.extend(['-j', json_path]) - cmd.extend(['--mode', command, '-t', 'es5', '-m', 'umd']) + cmd.extend(['--mode', command]) if self.debug: print(' '.join(cmd)) diff --git a/js/.gitignore b/js/.gitignore index 3437e39da6c0a..5e412f8ee8a57 100644 --- a/js/.gitignore +++ b/js/.gitignore @@ -23,7 +23,8 @@ npm-debug.log* yarn-debug.log* yarn-error.log* -.vscode +.vscode/** +!.vscode/launch.json # Runtime data pids @@ -78,10 +79,13 @@ yarn.lock .env # compilation targets +doc dist targets # test data files -test/data/ +test/data/**/*.json +test/data/**/*.arrow + # jest snapshots (too big) test/__snapshots__/ diff --git a/js/.vscode/launch.json b/js/.vscode/launch.json new file mode 100644 index 0000000000000..ba5609e0c10e8 --- /dev/null +++ b/js/.vscode/launch.json @@ -0,0 +1,169 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "node", + "request": "launch", + "name": "Debug Gulp Build", + "program": "${workspaceFolder}/node_modules/gulp/bin/gulp.js", + "args": [ + "build", + // Specify we want to debug the "src" target, which won't clean or build -- essentially a "dry-run" of the gulp build + "--target", "src" + ] + }, + { + "type": "node", + "request": "launch", + "name": "Debug Unit Tests", + "cwd": "${workspaceRoot}", + "program": "${workspaceFolder}/node_modules/.bin/jest", + "skipFiles": [ + "/**/*.js", + "${workspaceFolder}/node_modules/**/*.js" + ], + "env": { + "NODE_NO_WARNINGS": "1", + "READABLE_STREAM": "disable", + "TEST_DOM_STREAMS": "true", + "TEST_NODE_STREAMS": "true", + // Modify these environment variables to run tests on a specific compilation target + module format combo + "TEST_TS_SOURCE": "true", + // "TEST_TS_SOURCE": "false", + // "TEST_TARGET": "es5", + // "TEST_MODULE": "umd" + }, + "args": [ + // "-i", + "test/unit/", + + // Uncomment any of these to run individual test suites + // "test/unit/int-tests.ts", + // "test/unit/table-tests.ts", + // "test/unit/generated-data-tests.ts", + + // "test/unit/vector/vector-tests.ts", + // "test/unit/vector/bool-vector-tests.ts", + // "test/unit/vector/date-vector-tests.ts", + // "test/unit/vector/float16-vector-tests.ts", + // "test/unit/vector/numeric-vector-tests.ts", + + // "test/unit/visitor-tests.ts", + + // "test/unit/ipc/message-reader-tests.ts", + // "test/unit/ipc/reader/file-reader-tests.ts", + // "test/unit/ipc/reader/json-reader-tests.ts", + // "test/unit/ipc/reader/from-inference-tests.ts", + // "test/unit/ipc/reader/stream-reader-tests.ts", + // "test/unit/ipc/reader/streams-dom-tests.ts", + // "test/unit/ipc/reader/streams-node-tests.ts", + // "test/unit/ipc/writer/file-writer-tests.ts", + // "test/unit/ipc/writer/json-writer-tests.ts", + // "test/unit/ipc/writer/stream-writer-tests.ts", + // "test/unit/ipc/writer/streams-dom-tests.ts", + // "test/unit/ipc/writer/streams-node-tests.ts", + ] + }, + { + "type": "node", + "request": "launch", + "name": "Debug Integration Tests", + "cwd": "${workspaceRoot}", + "program": "${workspaceFolder}/bin/integration.js", + "skipFiles": [ + "/**/*.js", + "${workspaceFolder}/node_modules/**/*.js" + ], + "env": { + "NODE_NO_WARNINGS": "1", + "READABLE_STREAM": "disable" + }, + "args": [ + "--mode", "VALIDATE" + ] + }, + { + "type": "node", + "request": "launch", + "name": "Debug bin/arrow2csv", + "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" }, + "runtimeArgs": ["-r", "ts-node/register"], + "console": "integratedTerminal", + "skipFiles": [ + "/**/*.js", + "${workspaceFolder}/node_modules/**/*.js" + ], + "args": [ + "${workspaceFolder}/src/bin/arrow2csv.ts", + "-f", "./test/data/cpp/stream/simple.arrow" + ] + }, + { + "type": "node", + "request": "launch", + "name": "Debug bin/file-to-stream", + "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" }, + "runtimeArgs": ["-r", "ts-node/register"], + "skipFiles": [ + "/**/*.js", + "${workspaceFolder}/node_modules/**/*.js" + ], + "args": [ + "${workspaceFolder}/bin/file-to-stream.js", + "./test/data/cpp/file/struct_example.arrow", + "./struct_example-stream-out.arrow", + ] + }, + { + "type": "node", + "request": "launch", + "name": "Debug bin/stream-to-file", + "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" }, + "runtimeArgs": ["-r", "ts-node/register"], + "skipFiles": [ + "/**/*.js", + "${workspaceFolder}/node_modules/**/*.js" + ], + "args": [ + "${workspaceFolder}/bin/stream-to-file.js", + "./test/data/cpp/stream/struct_example.arrow", + "./struct_example-file-out.arrow", + ] + }, + { + "type": "node", + "request": "launch", + "name": "Debug bin/json-to-arrow", + "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" }, + "runtimeArgs": ["-r", "ts-node/register"], + "skipFiles": [ + "/**/*.js", + "${workspaceFolder}/node_modules/**/*.js" + ], + "args": [ + "${workspaceFolder}/bin/json-to-arrow.js", + "-j", "./test/data/json/struct_example.json", + "-a", "./struct_example-stream-out.arrow", + "-f", "stream" + ] + }, + { + "type": "node", + "request": "launch", + "name": "Debug bin/print-buffer-alignment", + "env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" }, + "runtimeArgs": ["-r", "ts-node/register"], + "skipFiles": [ + "/**/*.js", + "${workspaceFolder}/node_modules/**/*.js" + ], + "args": [ + "${workspaceFolder}/bin/print-buffer-alignment.js", + "./test/data/cpp/stream/struct_example.arrow" + ] + } + ] +} diff --git a/js/README.md b/js/README.md index 15d7ed03f65a4..0af4fecabccc9 100644 --- a/js/README.md +++ b/js/README.md @@ -49,7 +49,7 @@ Check out our [API documentation][7] to learn more about how to use Apache Arrow ### Get a table from an Arrow file on disk (in IPC format) -```es6 +```js import { readFileSync } from 'fs'; import { Table } from 'apache-arrow'; @@ -70,7 +70,7 @@ null, null, null ### Create a Table when the Arrow file is split across buffers -```es6 +```js import { readFileSync } from 'fs'; import { Table } from 'apache-arrow'; @@ -93,12 +93,24 @@ console.log(table.toString()); ### Create a Table from JavaScript arrays -```es6 +```js +import { + Table, + FloatVector, + DateVector +} from 'apache-arrow'; + const LENGTH = 2000; -const rainAmounts = Float32Array.from({length: LENGTH}, () => Number((Math.random() * 20).toFixed(1))); -const rainDates = Array.from({length: LENGTH}, (_, i) => new Date(Date.now() - 1000 * 60 * 60 * 24 * i)); -const rainfall = arrow.Table.fromVectors( +const rainAmounts = Float32Array.from( + { length: LENGTH }, + () => Number((Math.random() * 20).toFixed(1))); + +const rainDates = Array.from( + { length: LENGTH }, + (_, i) => new Date(Date.now() - 1000 * 60 * 60 * 24 * i)); + +const rainfall = Table.fromVectors( [FloatVector.from(rainAmounts), DateVector.from(rainDates)], ['precipitation', 'date'] ); @@ -106,20 +118,17 @@ const rainfall = arrow.Table.fromVectors( ### Load data with `fetch` -```es6 +```js import { Table } from "apache-arrow"; -fetch(require("simple.arrow")).then(response => { - response.arrayBuffer().then(buffer => { - const table = Table.from(new Uint8Array(buffer)); - console.log(table.toString()); - }); -}); +const table = await Table.from(fetch(("/simple.arrow"))); +console.log(table.toString()); + ``` ### Columns look like JS Arrays -```es6 +```js import { readFileSync } from 'fs'; import { Table } from 'apache-arrow'; @@ -131,7 +140,7 @@ const table = Table.from([ const column = table.getColumn('origin_lat'); // Copy the data into a TypedArray -const typed = column.slice(); +const typed = column.toArray(); assert(typed instanceof Float32Array); for (let i = -1, n = column.length; ++i < n;) { @@ -141,7 +150,7 @@ for (let i = -1, n = column.length; ++i < n;) { ### Usage with MapD Core -```es6 +```js import MapD from 'rxjs-mapd'; import { Table } from 'apache-arrow'; @@ -164,7 +173,7 @@ MapD.open(host, port) ) .map(([schema, records]) => // Create Arrow Table from results - Table.from(schema, records)) + Table.from([schema, records])) .map((table) => // Stringify the table to CSV with row numbers table.toString({ index: true })) diff --git a/js/bin/arrow2csv.js b/js/bin/arrow2csv.js new file mode 100755 index 0000000000000..afd59736bf521 --- /dev/null +++ b/js/bin/arrow2csv.js @@ -0,0 +1,27 @@ +#! /usr/bin/env node + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +const Path = require(`path`); +const here = Path.resolve(__dirname, '../'); +const tsnode = require.resolve(`ts-node/register`); +const arrow2csv = Path.join(here, `src/bin/arrow2csv.ts`); + +require('child_process').spawn(`node`, [ + `-r`, tsnode, arrow2csv, ...process.argv.slice(2) +], { cwd: here, env: process.env, stdio: `inherit` }); diff --git a/js/bin/file-to-stream.js b/js/bin/file-to-stream.js index fa4e5d17bbd3a..090cd0b0eda77 100755 --- a/js/bin/file-to-stream.js +++ b/js/bin/file-to-stream.js @@ -17,21 +17,24 @@ // specific language governing permissions and limitations // under the License. +// @ts-check + const fs = require('fs'); const path = require('path'); - -const encoding = 'binary'; -const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; -const { util: { PipeIterator } } = require(`../index${ext}`); -const { Table, serializeStream, fromReadableStream } = require(`../index${ext}`); +const eos = require('util').promisify(require('stream').finished); +const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const { RecordBatchReader, RecordBatchStreamWriter } = require(`../index${extension}`); (async () => { - // Todo (ptaylor): implement `serializeStreamAsync` that accepts an - // AsyncIterable, rather than aggregating into a Table first - const in_ = process.argv.length < 3 - ? process.stdin : fs.createReadStream(path.resolve(process.argv[2])); - const out = process.argv.length < 4 - ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3])); - new PipeIterator(serializeStream(await Table.fromAsync(fromReadableStream(in_))), encoding).pipe(out); + + const readable = process.argv.length < 3 ? process.stdin : fs.createReadStream(path.resolve(process.argv[2])); + const writable = process.argv.length < 4 ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3])); + + const fileToStream = readable + .pipe(RecordBatchReader.throughNode()) + .pipe(RecordBatchStreamWriter.throughNode()) + .pipe(writable); + + await eos(fileToStream); })().catch((e) => { console.error(e); process.exit(1); }); diff --git a/js/bin/integration.js b/js/bin/integration.js index 6c064deac258d..c6f6cd7a24ed5 100755 --- a/js/bin/integration.js +++ b/js/bin/integration.js @@ -17,61 +17,55 @@ // specific language governing permissions and limitations // under the License. +// @ts-nocheck + const fs = require('fs'); -const glob = require('glob'); -const path = require('path'); -const child_process = require(`child_process`); +const Path = require('path'); +const { promisify } = require('util'); +const glob = promisify(require('glob')); +const { zip } = require('ix/iterable/zip'); +const { parse: bignumJSONParse } = require('json-bignum'); const argv = require(`command-line-args`)(cliOpts(), { partial: true }); -const gulpPath = require.resolve(path.join(`..`, `node_modules/gulp/bin/gulp.js`)); - -let jsonPaths = [...(argv.json || [])]; -let arrowPaths = [...(argv.arrow || [])]; +const { + Table, + RecordBatchReader, + util: { createElementComparator } +} = require('../targets/apache-arrow/Arrow.es5.min'); -if (!argv.mode) { - return print_usage(); +const exists = async (p) => { + try { + return !!(await fs.promises.stat(p)); + } catch (e) { return false; } } -let mode = argv.mode.toUpperCase(); -if (mode === 'VALIDATE' && !jsonPaths.length) { - jsonPaths = glob.sync(path.resolve(__dirname, `../test/data/json/`, `*.json`)); - if (!arrowPaths.length) { - [jsonPaths, arrowPaths] = jsonPaths.reduce(([jsonPaths, arrowPaths], jsonPath) => { - const { name } = path.parse(jsonPath); - for (const source of ['cpp', 'java']) { - for (const format of ['file', 'stream']) { - const arrowPath = path.resolve(__dirname, `../test/data/${source}/${format}/${name}.arrow`); - if (fs.existsSync(arrowPath)) { - jsonPaths.push(jsonPath); - arrowPaths.push(arrowPath); - } - } - } - return [jsonPaths, arrowPaths]; - }, [[], []]); - console.log(`jsonPaths: [\n\t${jsonPaths.join('\n\t')}\n]`); - console.log(`arrowPaths: [\n\t${arrowPaths.join('\n\t')}\n]`); +(async () => { + + if (!argv.mode) { return print_usage(); } + + let mode = argv.mode.toUpperCase(); + let jsonPaths = [...(argv.json || [])]; + let arrowPaths = [...(argv.arrow || [])]; + + if (mode === 'VALIDATE' && !jsonPaths.length) { + [jsonPaths, arrowPaths] = await loadLocalJSONAndArrowPathsForDebugging(jsonPaths, arrowPaths); } -} else if (!jsonPaths.length) { - return print_usage(); -} -switch (mode) { - case 'VALIDATE': - const args = [`test`, `-i`].concat(argv._unknown || []); - jsonPaths.forEach((p, i) => { - args.push('-j', p, '-a', arrowPaths[i]); - }); - process.exitCode = child_process.spawnSync( - gulpPath, args, - { - cwd: path.resolve(__dirname, '..'), - stdio: ['ignore', 'inherit', 'inherit'] + if (!jsonPaths.length) { return print_usage(); } + + switch (mode) { + case 'VALIDATE': + for (let [jsonPath, arrowPath] of zip(jsonPaths, arrowPaths)) { + await validate(jsonPath, arrowPath); } - ).status || process.exitCode || 0; - break; - default: - print_usage(); -} + break; + default: + return print_usage(); + } +})() +.then((x) => +x || 0, (e) => { + e && process.stderr.write(`${e && e.stack || e}\n`); + return process.exitCode || 1; +}).then((code) => process.exit(code)); function cliOpts() { return [ @@ -118,5 +112,144 @@ function print_usage() { ] }, ])); - process.exit(1); + return 1; +} + +async function validate(jsonPath, arrowPath) { + + const files = await Promise.all([ + fs.promises.readFile(arrowPath), + fs.promises.readFile(jsonPath, 'utf8'), + ]); + + const arrowData = files[0]; + const jsonData = bignumJSONParse(files[1]); + + validateReaderIntegration(jsonData, arrowData); + validateTableFromBuffersIntegration(jsonData, arrowData); + validateTableToBuffersIntegration('json', 'file')(jsonData, arrowData); + validateTableToBuffersIntegration('json', 'file')(jsonData, arrowData); + validateTableToBuffersIntegration('binary', 'file')(jsonData, arrowData); + validateTableToBuffersIntegration('binary', 'file')(jsonData, arrowData); +} + +function validateReaderIntegration(jsonData, arrowBuffer) { + const msg = `json and arrow record batches report the same values`; + try { + const jsonReader = RecordBatchReader.from(jsonData); + const binaryReader = RecordBatchReader.from(arrowBuffer); + for (const [jsonRecordBatch, binaryRecordBatch] of zip(jsonReader, binaryReader)) { + compareTableIsh(jsonRecordBatch, binaryRecordBatch); + } + } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); } + process.stdout.write(`${msg}: pass\n`); +} + +function validateTableFromBuffersIntegration(jsonData, arrowBuffer) { + const msg = `json and arrow tables report the same values`; + try { + const jsonTable = Table.from(jsonData); + const binaryTable = Table.from(arrowBuffer); + compareTableIsh(jsonTable, binaryTable); + } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); } + process.stdout.write(`${msg}: pass\n`); +} + +function validateTableToBuffersIntegration(srcFormat, arrowFormat) { + const refFormat = srcFormat === `json` ? `binary` : `json`; + return function testTableToBuffersIntegration(jsonData, arrowBuffer) { + const msg = `serialized ${srcFormat} ${arrowFormat} reports the same values as the ${refFormat} ${arrowFormat}`; + try { + const refTable = Table.from(refFormat === `json` ? jsonData : arrowBuffer); + const srcTable = Table.from(srcFormat === `json` ? jsonData : arrowBuffer); + const dstTable = Table.from(srcTable.serialize(`binary`, arrowFormat === `stream`)); + compareTableIsh(dstTable, refTable); + } catch (e) { throw new Error(`${msg}: fail \n ${e && e.stack || e}`); } + process.stdout.write(`${msg}: pass\n`); + }; +} + +function compareTableIsh(actual, expected) { + if (actual.length !== expected.length) { + throw new Error(`length: ${actual.length} !== ${expected.length}`); + } + if (actual.numCols !== expected.numCols) { + throw new Error(`numCols: ${actual.numCols} !== ${expected.numCols}`); + } + (() => { + const getChildAtFn = expected instanceof Table ? 'getColumnAt' : 'getChildAt'; + for (let i = -1, n = actual.numCols; ++i < n;) { + const v1 = actual[getChildAtFn](i); + const v2 = expected[getChildAtFn](i); + compareVectors(v1, v2); + } + })(); +} + +function compareVectors(actual, expected) { + + if ((actual == null && expected != null) || (expected == null && actual != null)) { + throw new Error(`${actual == null ? `actual` : `expected`} is null, was expecting ${actual == null ? expected : actual} to be that also`); + } + + let props = ['type', 'length', 'nullCount']; + + (() => { + for (let i = -1, n = props.length; ++i < n;) { + const prop = props[i]; + if (`${actual[prop]}` !== `${expected[prop]}`) { + throw new Error(`${prop}: ${actual[prop]} !== ${expected[prop]}`); + } + } + })(); + + (() => { + for (let i = -1, n = actual.length; ++i < n;) { + let x1 = actual.get(i), x2 = expected.get(i); + if (!createElementComparator(x2)(x1)) { + throw new Error(`${i}: ${x1} !== ${x2}`); + } + } + })(); + + (() => { + let i = -1; + for (let [x1, x2] of zip(actual, expected)) { + ++i; + if (!createElementComparator(x2)(x1)) { + throw new Error(`${i}: ${x1} !== ${x2}`); + } + } + })(); +} + +async function loadLocalJSONAndArrowPathsForDebugging(jsonPaths, arrowPaths) { + + const sourceJSONPaths = await glob(Path.resolve(__dirname, `../test/data/json/`, `*.json`)); + + if (!arrowPaths.length) { + await loadJSONAndArrowPaths(sourceJSONPaths, jsonPaths, arrowPaths, 'cpp', 'file'); + await loadJSONAndArrowPaths(sourceJSONPaths, jsonPaths, arrowPaths, 'java', 'file'); + await loadJSONAndArrowPaths(sourceJSONPaths, jsonPaths, arrowPaths, 'cpp', 'stream'); + await loadJSONAndArrowPaths(sourceJSONPaths, jsonPaths, arrowPaths, 'java', 'stream'); + } + + for (let [jsonPath, arrowPath] of zip(jsonPaths, arrowPaths)) { + console.log(`jsonPath: ${jsonPath}`); + console.log(`arrowPath: ${arrowPath}`); + } + + return [jsonPaths, arrowPaths]; + + async function loadJSONAndArrowPaths(sourceJSONPaths, jsonPaths, arrowPaths, source, format) { + for (const jsonPath of sourceJSONPaths) { + const { name } = Path.parse(jsonPath); + const arrowPath = Path.resolve(__dirname, `../test/data/${source}/${format}/${name}.arrow`); + if (await exists(arrowPath)) { + jsonPaths.push(jsonPath); + arrowPaths.push(arrowPath); + } + } + return [jsonPaths, arrowPaths]; + } } diff --git a/js/bin/json-to-arrow.js b/js/bin/json-to-arrow.js index f28b4145ffaed..7a98d56d1a5e2 100755 --- a/js/bin/json-to-arrow.js +++ b/js/bin/json-to-arrow.js @@ -17,37 +17,46 @@ // specific language governing permissions and limitations // under the License. +// @ts-check + const fs = require('fs'); -const glob = require('glob'); -const path = require('path'); -const { promisify } = require('util'); +const Path = require('path'); const { parse } = require('json-bignum'); +const eos = require('util').promisify(require('stream').finished); +const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; const argv = require(`command-line-args`)(cliOpts(), { partial: true }); +const { RecordBatchReader, RecordBatchFileWriter, RecordBatchStreamWriter } = require(`../index${extension}`); -const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; -const { Table } = require(`../index${ext}`); - -const encoding = 'binary'; -const stream = argv.format === 'stream'; const jsonPaths = [...(argv.json || [])]; const arrowPaths = [...(argv.arrow || [])]; -if (!jsonPaths.length || !arrowPaths.length || (jsonPaths.length !== arrowPaths.length)) { - return print_usage(); -} +(async () => { -const readFile = callResolved(promisify(fs.readFile)); -const writeFile = callResolved(promisify(fs.writeFile)); + if (!jsonPaths.length || !arrowPaths.length || (jsonPaths.length !== arrowPaths.length)) { + return print_usage(); + } -(async () => await Promise.all(jsonPaths.map(async (jPath, i) => { - const aPath = arrowPaths[i]; - const arrowTable = Table.from(parse('' + (await readFile(jPath)))); - await writeFile(aPath, arrowTable.serialize(encoding, stream), encoding); -})))().catch((e) => { console.error(e); process.exit(1); }); + await Promise.all(jsonPaths.map(async (path, i) => { + + const RecordBatchWriter = argv.format !== 'stream' + ? RecordBatchFileWriter + : RecordBatchStreamWriter; -function callResolved(fn) { - return async (path_, ...xs) => await fn(path.resolve(path_), ...xs); -} + const reader = RecordBatchReader.from(parse( + await fs.promises.readFile(Path.resolve(path), 'utf8'))); + + const jsonToArrow = reader + .pipe(RecordBatchWriter.throughNode()) + .pipe(fs.createWriteStream(arrowPaths[i])); + + await eos(jsonToArrow); + + })); +})() +.then((x) => +x || 0, (e) => { + e && process.stderr.write(`${e}`); + return process.exitCode || 1; +}).then((code = 0) => process.exit(code)); function cliOpts() { return [ @@ -95,5 +104,5 @@ function print_usage() { ] }, ])); - process.exit(1); + return 1; } diff --git a/js/bin/print-buffer-alignment.js b/js/bin/print-buffer-alignment.js index a4cd9bb2351e7..8d422aad60d74 100755 --- a/js/bin/print-buffer-alignment.js +++ b/js/bin/print-buffer-alignment.js @@ -17,34 +17,41 @@ // specific language governing permissions and limitations // under the License. +// @ts-check + const fs = require('fs'); const path = require('path'); - -const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; -const base = process.env.ARROW_JS_DEBUG === 'src' ? '../src' : '../targets/apache-arrow'; -const { Message } = require(`${base}/ipc/metadata${ext}`); -const { readBuffersAsync } = require(`${base}/ipc/reader/binary${ext}`); -const { Table, VectorVisitor, fromReadableStream } = require(`../index${ext}`); +const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const { AsyncMessageReader } = require(`../index${extension}`); (async () => { - const in_ = process.argv.length < 3 - ? process.stdin : fs.createReadStream(path.resolve(process.argv[2])); - - let recordBatchIndex = 0; - let dictionaryBatchIndex = 0; - - for await (let { message, loader } of readBuffersAsync(fromReadableStream(in_))) { - - if (Message.isRecordBatch(message)) { - console.log(`record batch ${++recordBatchIndex}, offset ${loader.messageOffset}`); - } else if (Message.isDictionaryBatch(message)) { - message = message.data; - console.log(`dictionary batch ${++dictionaryBatchIndex}, offset ${loader.messageOffset}`); - } else { continue; } - - message.buffers.forEach(({offset, length}, i) => { - console.log(`\tbuffer ${i+1}: { offset: ${offset}, length: ${length} }`); + + const readable = process.argv.length < 3 ? process.stdin : fs.createReadStream(path.resolve(process.argv[2])); + const reader = new AsyncMessageReader(readable); + + let recordBatchIndex = 0, dictionaryBatchIndex = 0; + + for await (let message of reader) { + + let bufferRegions = []; + + if (message.isSchema()) { + continue; + } else if (message.isRecordBatch()) { + bufferRegions = message.header().buffers; + const body = await reader.readMessageBody(message.bodyLength); + console.log(`record batch ${++recordBatchIndex}, byteOffset ${body.byteOffset}`); + } else if (message.isDictionaryBatch()) { + bufferRegions = message.header().data.buffers; + const body = await reader.readMessageBody(message.bodyLength); + console.log(`dictionary batch ${++dictionaryBatchIndex}, byteOffset ${body.byteOffset}`); + } + + bufferRegions.forEach(({ offset, length }, i) => { + console.log(`\tbuffer ${i + 1}: { offset: ${offset}, length: ${length} }`); }); } + await reader.return(); + })().catch((e) => { console.error(e); process.exit(1); }); diff --git a/js/bin/stream-to-file.js b/js/bin/stream-to-file.js index f33646ac61a41..015a5eace74d8 100755 --- a/js/bin/stream-to-file.js +++ b/js/bin/stream-to-file.js @@ -17,21 +17,24 @@ // specific language governing permissions and limitations // under the License. +// @ts-check + const fs = require('fs'); const path = require('path'); - -const encoding = 'binary'; -const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; -const { util: { PipeIterator } } = require(`../index${ext}`); -const { Table, serializeFile, fromReadableStream } = require(`../index${ext}`); +const eos = require('util').promisify(require('stream').finished); +const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : ''; +const { RecordBatchReader, RecordBatchFileWriter } = require(`../index${extension}`); (async () => { - // Todo (ptaylor): implement `serializeFileAsync` that accepts an - // AsyncIterable, rather than aggregating into a Table first - const in_ = process.argv.length < 3 - ? process.stdin : fs.createReadStream(path.resolve(process.argv[2])); - const out = process.argv.length < 4 - ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3])); - new PipeIterator(serializeFile(await Table.fromAsync(fromReadableStream(in_))), encoding).pipe(out); + + const readable = process.argv.length < 3 ? process.stdin : fs.createReadStream(path.resolve(process.argv[2])); + const writable = process.argv.length < 4 ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3])); + + const streamToFile = readable + .pipe(RecordBatchReader.throughNode()) + .pipe(RecordBatchFileWriter.throughNode()) + .pipe(writable); + + await eos(streamToFile); })().catch((e) => { console.error(e); process.exit(1); }); diff --git a/js/examples/read_file.html b/js/examples/read_file.html index 3e082d9dc412f..ec96d0e4755e2 100644 --- a/js/examples/read_file.html +++ b/js/examples/read_file.html @@ -86,6 +86,6 @@ - + diff --git a/js/gulp/argv.js b/js/gulp/argv.js index 7dceb0f74c587..3a028f813f936 100644 --- a/js/gulp/argv.js +++ b/js/gulp/argv.js @@ -21,16 +21,12 @@ const path = require('path'); const argv = require(`command-line-args`)([ { name: `all`, type: Boolean }, - { name: 'update', alias: 'u', type: Boolean }, - { name: 'verbose', alias: 'v', type: Boolean }, + { name: 'verbose', alias: `v`, type: Boolean }, { name: `target`, type: String, defaultValue: `` }, { name: `module`, type: String, defaultValue: `` }, { name: `coverage`, type: Boolean, defaultValue: false }, - { name: `integration`, alias: `i`, type: Boolean, defaultValue: false }, { name: `targets`, alias: `t`, type: String, multiple: true, defaultValue: [] }, { name: `modules`, alias: `m`, type: String, multiple: true, defaultValue: [] }, - { name: `json_files`, alias: `j`, type: String, multiple: true, defaultValue: [] }, - { name: `arrow_files`, alias: `a`, type: String, multiple: true, defaultValue: [] }, ], { partial: true }); const { targets, modules } = argv; @@ -44,25 +40,4 @@ if (argv.target === `src`) { (argv.all || !modules.length) && modules.push(`all`); } -if (argv.coverage && (!argv.json_files || !argv.json_files.length)) { - - let [jsonPaths, arrowPaths] = glob - .sync(path.resolve(__dirname, `../test/data/json/`, `*.json`)) - .reduce((paths, jsonPath) => { - const { name } = path.parse(jsonPath); - const [jsonPaths, arrowPaths] = paths; - ['cpp', 'java'].forEach((source) => ['file', 'stream'].forEach((format) => { - const arrowPath = path.resolve(__dirname, `../test/data/${source}/${format}/${name}.arrow`); - if (fs.existsSync(arrowPath)) { - jsonPaths.push(jsonPath); - arrowPaths.push(arrowPath); - } - })); - return paths; - }, [[], []]); - - argv.json_files = jsonPaths; - argv.arrow_files = arrowPaths; -} - module.exports = { argv, targets, modules }; diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js index 95fc1eed0f84e..e119c540dc351 100644 --- a/js/gulp/arrow-task.js +++ b/js/gulp/arrow-task.js @@ -16,24 +16,22 @@ // under the License. const { - mainExport, gCCLanguageNames, targetDir, observableFromStreams } = require('./util'); const del = require('del'); const gulp = require('gulp'); -const path = require('path'); const { promisify } = require('util'); const gulpRename = require(`gulp-rename`); const { memoizeTask } = require('./memoize-task'); const exec = promisify(require('child_process').exec); const { Observable, ReplaySubject } = require('rxjs'); -const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target, format) { +const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target) { const out = targetDir(target); const dtsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.ts`; const cjsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.js`; - const esmGlob = `${targetDir(`es2015`, `esm`)}/**/*.js`; + const esmGlob = `${targetDir(`esnext`, `esm`)}/**/*.js`; const es5UmdGlob = `${targetDir(`es5`, `umd`)}/*.js`; const es5UmdMaps = `${targetDir(`es5`, `umd`)}/*.map`; const es2015UmdGlob = `${targetDir(`es2015`, `umd`)}/*.js`; @@ -46,7 +44,7 @@ const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target, forma observableFromStreams(gulp.src(esmGlob), ch_ext(`.mjs`), gulp.dest(out)), // copy es2015 esm files and rename to `.mjs` observableFromStreams(gulp.src(es5UmdGlob), append(`.es5.min`), gulp.dest(out)), // copy es5 umd files and add `.min` observableFromStreams(gulp.src(es5UmdMaps), gulp.dest(out)), // copy es5 umd sourcemap files, but don't rename - observableFromStreams(gulp.src(es2015UmdGlob), append(`.es2015.min`), gulp.dest(out)), // copy es2015 umd files and add `.es6.min` + observableFromStreams(gulp.src(es2015UmdGlob), append(`.es2015.min`), gulp.dest(out)), // copy es2015 umd files and add `.es2015.min` observableFromStreams(gulp.src(es2015UmdMaps), gulp.dest(out)), // copy es2015 umd sourcemap files, but don't rename ).publish(new ReplaySubject()).refCount(); }))({}); @@ -61,4 +59,4 @@ const arrowTSTask = ((cache) => memoizeTask(cache, async function copyTS(target, module.exports = arrowTask; module.exports.arrowTask = arrowTask; -module.exports.arrowTSTask = arrowTSTask; \ No newline at end of file +module.exports.arrowTSTask = arrowTSTask; diff --git a/js/gulp/clean-task.js b/js/gulp/clean-task.js index d6c90f4637c8b..551aeb41af739 100644 --- a/js/gulp/clean-task.js +++ b/js/gulp/clean-task.js @@ -16,16 +16,15 @@ // under the License. const del = require('del'); +const { Observable } = require('rxjs'); const { targetDir } = require('./util'); -const { memoizeTask } = require('./memoize-task'); -const { Observable, ReplaySubject } = require('rxjs'); +const memoizeTask = require('./memoize-task'); const cleanTask = ((cache) => memoizeTask(cache, function clean(target, format) { - return Observable - .from(del(`${targetDir(target, format)}/**`)) - .catch((e) => Observable.empty()) - .multicast(new ReplaySubject()).refCount(); + const dir = targetDir(target, format); + return Observable.from(del(dir)) + .catch((e) => Observable.empty()); }))({}); module.exports = cleanTask; -module.exports.cleanTask = cleanTask; \ No newline at end of file +module.exports.cleanTask = cleanTask; diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js index 547e760a7fa8a..ef629982ae39f 100644 --- a/js/gulp/closure-task.js +++ b/js/gulp/closure-task.js @@ -18,52 +18,83 @@ const { targetDir, mainExport, + esmRequire, gCCLanguageNames, - UMDSourceTargets, - observableFromStreams + publicModulePaths, + observableFromStreams, + shouldRunInChildProcess, + spawnGulpCommandInChildProcess, } = require('./util'); +const fs = require('fs'); const gulp = require('gulp'); const path = require('path'); const sourcemaps = require('gulp-sourcemaps'); const { memoizeTask } = require('./memoize-task'); const { compileBinFiles } = require('./typescript-task'); -const { Observable, ReplaySubject } = require('rxjs'); +const mkdirp = require('util').promisify(require('mkdirp')); const closureCompiler = require('google-closure-compiler').gulp(); -const closureTask = ((cache) => memoizeTask(cache, function closure(target, format) { +const closureTask = ((cache) => memoizeTask(cache, async function closure(target, format) { + + if (shouldRunInChildProcess(target, format)) { + return spawnGulpCommandInChildProcess('compile', target, format); + } + const src = targetDir(target, `cls`); + const srcAbsolute = path.resolve(src); const out = targetDir(target, format); - const entry = path.join(src, mainExport); - const externs = path.join(`src/Arrow.externs.js`); - return observableFromStreams( - gulp.src([ -/* external libs first --> */ `node_modules/tslib/package.json`, - `node_modules/tslib/tslib.es6.js`, - `node_modules/flatbuffers/package.json`, - `node_modules/flatbuffers/js/flatbuffers.mjs`, - `node_modules/text-encoding-utf-8/package.json`, - `node_modules/text-encoding-utf-8/src/encoding.js`, -/* then sources globs --> */ `${src}/**/*.js`, - ], { base: `./` }), - sourcemaps.init(), - closureCompiler(createClosureArgs(entry, externs)), - // rename the sourcemaps from *.js.map files to *.min.js.map - sourcemaps.write(`.`, { mapFile: (mapPath) => mapPath.replace(`.js.map`, `.${target}.min.js.map`) }), - gulp.dest(out) - ) - .merge(compileBinFiles(target, format)) - .takeLast(1) - .publish(new ReplaySubject()).refCount(); + const externs = path.join(`${out}/${mainExport}.externs.js`); + const entry_point = path.join(`${src}/${mainExport}.dom.cls.js`); + + const exportedImports = publicModulePaths(srcAbsolute).reduce((entries, publicModulePath) => [ + ...entries, { + publicModulePath, + exports_: getPublicExportedNames(esmRequire(publicModulePath, { warnings: false })) + } + ], []); + + await mkdirp(out); + + await Promise.all([ + fs.promises.writeFile(externs, generateExternsFile(exportedImports)), + fs.promises.writeFile(entry_point, generateUMDExportAssignnent(srcAbsolute, exportedImports)) + ]); + + return await Promise.all([ + runClosureCompileAsObservable().toPromise(), + compileBinFiles(target, format).toPromise() + ]); + + function runClosureCompileAsObservable() { + return observableFromStreams( + gulp.src([ + /* external libs first */ + `node_modules/flatbuffers/package.json`, + `node_modules/flatbuffers/js/flatbuffers.mjs`, + `node_modules/text-encoding-utf-8/package.json`, + `node_modules/text-encoding-utf-8/src/encoding.js`, + `${src}/**/*.js` /* <-- then source globs */ + ], { base: `./` }), + sourcemaps.init(), + closureCompiler(createClosureArgs(entry_point, externs)), + // rename the sourcemaps from *.js.map files to *.min.js.map + sourcemaps.write(`.`, { mapFile: (mapPath) => mapPath.replace(`.js.map`, `.${target}.min.js.map`) }), + gulp.dest(out) + ); + } }))({}); -const createClosureArgs = (entry, externs) => ({ +module.exports = closureTask; +module.exports.closureTask = closureTask; + +const createClosureArgs = (entry_point, externs) => ({ externs, + entry_point, third_party: true, warning_level: `QUIET`, dependency_mode: `STRICT`, rewrite_polyfills: false, - entry_point: `${entry}.js`, module_resolution: `NODE`, // formatting: `PRETTY_PRINT`, // debug: true, @@ -72,10 +103,99 @@ const createClosureArgs = (entry, externs) => ({ package_json_entry_names: `module,jsnext:main,main`, assume_function_wrapper: true, js_output_file: `${mainExport}.js`, - language_in: gCCLanguageNames[`es2015`], + language_in: gCCLanguageNames[`esnext`], language_out: gCCLanguageNames[`es5`], - output_wrapper: -`// Licensed to the Apache Software Foundation (ASF) under one + output_wrapper:`${apacheHeader()} +(function (global, factory) { + typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : + typeof define === 'function' && define.amd ? define(['Arrow'], factory) : + (factory(global.Arrow = global.Arrow || {})); +}(this, (function (exports) {%output%}.bind(this))));` +}); + +function generateUMDExportAssignnent(src, exportedImports) { + return [ + ...exportedImports.map(({ publicModulePath }, i) => { + const p = publicModulePath.slice(src.length + 1); + return (`import * as exports${i} from './${p}';`); + }).filter(Boolean), + 'Object.assign(arguments[0], exports0);' + ].join('\n'); +} + +function generateExternsFile(exportedImports) { + return [ + externsHeader(), + ...exportedImports.reduce((externBodies, { exports_ }) => [ + ...externBodies, ...exports_.map(externBody) + ], []).filter(Boolean) + ].join('\n'); +} + +function externBody({ exportName, staticNames, instanceNames }) { + return [ + `var ${exportName} = function() {};`, + staticNames.map((staticName) => (isNaN(+staticName) + ? `/** @type {?} */\n${exportName}.${staticName} = function() {};` + : `/** @type {?} */\n${exportName}[${staticName}] = function() {};` + )).join('\n'), + instanceNames.map((instanceName) => (isNaN(+instanceName) + ? `/** @type {?} */\n${exportName}.prototype.${instanceName};` + : `/** @type {?} */\n${exportName}.prototype[${instanceName}];` + )).join('\n') + ].filter(Boolean).join('\n'); +} + +function externsHeader() { + return (`${apacheHeader()} +// @ts-nocheck +/* tslint:disable */ +/** + * @fileoverview Closure Compiler externs for Arrow + * @externs + * @suppress {duplicate,checkTypes} + */ +/** @type {symbol} */ +Symbol.iterator; +/** @type {symbol} */ +Symbol.toPrimitive; +/** @type {symbol} */ +Symbol.asyncIterator; +`); +} + +function getPublicExportedNames(entryModule) { + const fn = function() {}; + const isStaticOrProtoName = (x) => ( + !(x in fn) && + (x !== `default`) && + (x !== `undefined`) && + (x !== `__esModule`) && + (x !== `constructor`) && + !(x.startsWith('_')) + ); + return Object + .getOwnPropertyNames(entryModule) + .filter((name) => name !== 'default') + .filter((name) => ( + typeof entryModule[name] === `object` || + typeof entryModule[name] === `function` + )) + .map((name) => [name, entryModule[name]]) + .reduce((reserved, [name, value]) => { + + const staticNames = value && + typeof value === 'object' ? Object.getOwnPropertyNames(value).filter(isStaticOrProtoName) : + typeof value === 'function' ? Object.getOwnPropertyNames(value).filter(isStaticOrProtoName) : []; + + const instanceNames = (typeof value === `function` && Object.getOwnPropertyNames(value.prototype || {}) || []).filter(isStaticOrProtoName); + + return [...reserved, { exportName: name, staticNames, instanceNames }]; + }, []); +} + +function apacheHeader() { + return `// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file @@ -90,13 +210,5 @@ const createClosureArgs = (entry, externs) => ({ // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations -// under the License. -(function (global, factory) { - typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : - typeof define === 'function' && define.amd ? define(['exports'], factory) : - (factory(global.Arrow = global.Arrow || {})); -}(this, (function (exports) {%output%}.bind(this))));` -}); - -module.exports = closureTask; -module.exports.closureTask = closureTask; +// under the License.` +} diff --git a/js/gulp/build-task.js b/js/gulp/compile-task.js similarity index 90% rename from js/gulp/build-task.js rename to js/gulp/compile-task.js index 9f3402cdd3508..60e2ebbe36a93 100644 --- a/js/gulp/build-task.js +++ b/js/gulp/compile-task.js @@ -24,7 +24,7 @@ const closureTask = require('./closure-task'); const typescriptTask = require('./typescript-task'); const { arrowTask, arrowTSTask } = require('./arrow-task'); -const buildTask = ((cache) => memoizeTask(cache, function build(target, format, ...args) { +const compileTask = ((cache) => memoizeTask(cache, function compile(target, format, ...args) { return target === `src` ? Observable.empty() : target === npmPkgName ? arrowTask(target, format, ...args)() : target === `ts` ? arrowTSTask(target, format, ...args)() @@ -33,5 +33,5 @@ const buildTask = ((cache) => memoizeTask(cache, function build(target, format, : typescriptTask(target, format, ...args)(); }))({}); -module.exports = buildTask; -module.exports.buildTask = buildTask; +module.exports = compileTask; +module.exports.compileTask = compileTask; diff --git a/js/gulp/memoize-task.js b/js/gulp/memoize-task.js index 0b0fc843c451a..408ee3b8839db 100644 --- a/js/gulp/memoize-task.js +++ b/js/gulp/memoize-task.js @@ -17,6 +17,13 @@ const { taskName } = require('./util'); +const createTask = ((taskFn) => ((target, format, ...args) => { + // Give the memoized fn a displayName so gulp's output is easier to follow. + const fn = () => taskFn(target, format, ...args); + fn.displayName = `${taskFn.name || ``}:${taskName(target, format, ...args)}:task`; + return fn; +})); + const memoizeTask = ((cache, taskFn) => ((target, format, ...args) => { // Give the memoized fn a displayName so gulp's output is easier to follow. const fn = () => ( @@ -27,4 +34,5 @@ const memoizeTask = ((cache, taskFn) => ((target, format, ...args) => { })); module.exports = memoizeTask; -module.exports.memoizeTask = memoizeTask; \ No newline at end of file +module.exports.createTask = createTask; +module.exports.memoizeTask = memoizeTask; diff --git a/js/gulp/minify-task.js b/js/gulp/minify-task.js index 82145aa90861a..81cb5e5f3f536 100644 --- a/js/gulp/minify-task.js +++ b/js/gulp/minify-task.js @@ -18,10 +18,10 @@ const { targetDir, mainExport, - ESKeywords, UMDSourceTargets, terserLanguageNames, - observableFromStreams + shouldRunInChildProcess, + spawnGulpCommandInChildProcess, } = require('./util'); const path = require('path'); @@ -30,41 +30,24 @@ const { memoizeTask } = require('./memoize-task'); const { compileBinFiles } = require('./typescript-task'); const { Observable, ReplaySubject } = require('rxjs'); const TerserPlugin = require(`terser-webpack-plugin`); -const esmRequire = require(`@std/esm`)(module, { - mode: `js`, - warnings: false, - cjs: { - /* A boolean for storing ES modules in require.cache. */ - cache: true, - /* A boolean for respecting require.extensions in ESM. */ - extensions: true, - /* A boolean for __esModule interoperability. */ - interop: true, - /* A boolean for importing named exports of CJS modules. */ - namedExports: true, - /* A boolean for following CJS path rules in ESM. */ - paths: true, - /* A boolean for __dirname, __filename, and require in ESM. */ - vars: true, - } -}); const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJS(target, format) { + if (shouldRunInChildProcess(target, format)) { + return spawnGulpCommandInChildProcess('compile', target, format); + } + const sourceTarget = UMDSourceTargets[target]; - const PublicNames = reservePublicNames(sourceTarget, `cls`); const out = targetDir(target, format), src = targetDir(sourceTarget, `cls`); const targetConfig = { ...commonConfig, output: { ...commonConfig.output, path: path.resolve(`./${out}`) } }; - const webpackConfigs = [ - [mainExport, PublicNames] - ].map(([entry, reserved]) => ({ + const webpackConfigs = [mainExport].map((entry) => ({ ...targetConfig, name: entry, - entry: { [entry]: path.resolve(`${src}/${entry}.js`) }, + entry: { [entry]: path.resolve(`${src}/${entry}.dom.js`) }, plugins: [ ...(targetConfig.plugins || []), new webpack.SourceMapDevToolPlugin({ @@ -73,20 +56,23 @@ const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJ resourcePath .replace(/\s/, `_`) .replace(/\.\/node_modules\//, ``) - }), - new TerserPlugin({ - sourceMap: true, - terserOptions: { - ecma: terserLanguageNames[target], - compress: { unsafe: true }, - output: { comments: false, beautify: false }, - mangle: { eval: true, - properties: { reserved, keep_quoted: true } - }, - safari10: true // <-- works around safari10 bugs, see the "safari10" option here: https://github.com/terser-js/terser#minify-options - }, }) - ] + ], + optimization: { + minimize: true, + minimizer: [ + new TerserPlugin({ + sourceMap: true, + terserOptions: { + ecma: terserLanguageNames[target], + output: { comments: false, beautify: false }, + compress: { unsafe: true }, + mangle: true, + safari10: true // <-- works around safari10 bugs, see the "safari10" option here: https://github.com/terser-js/terser#minify-options + }, + }) + ] + } })); const compilers = webpack(webpackConfigs); @@ -102,42 +88,3 @@ const minifyTask = ((cache, commonConfig) => memoizeTask(cache, function minifyJ module.exports = minifyTask; module.exports.minifyTask = minifyTask; - -const reservePublicNames = ((ESKeywords) => function reservePublicNames(target, format) { - const src = targetDir(target, format); - const publicModulePaths = [ - `../${src}/data.js`, - `../${src}/type.js`, - `../${src}/table.js`, - `../${src}/vector.js`, - `../${src}/util/int.js`, - `../${src}/predicate.js`, - `../${src}/recordbatch.js`, - `../${src}/${mainExport}.js`, - ]; - return publicModulePaths.reduce((keywords, publicModulePath) => [ - ...keywords, ...reserveExportedNames(esmRequire(publicModulePath, { warnings: false })) - ], [...ESKeywords]); -})(ESKeywords); - -// Reflect on the Arrow modules to come up with a list of keys to save from -// Terser's -// mangler. Assume all the non-inherited static and prototype members of the Arrow -// module and its direct exports are public, and should be preserved through minification. -const reserveExportedNames = (entryModule) => ( - Object - .getOwnPropertyNames(entryModule) - .filter((name) => ( - typeof entryModule[name] === `object` || - typeof entryModule[name] === `function` - )) - .map((name) => [name, entryModule[name]]) - .reduce((reserved, [name, value]) => { - const fn = function() {}; - const ownKeys = value && typeof value === 'object' && Object.getOwnPropertyNames(value) || []; - const protoKeys = typeof value === `function` && Object.getOwnPropertyNames(value.prototype || {}) || []; - const publicNames = [...ownKeys, ...protoKeys].filter((x) => x !== `default` && x !== `undefined` && !(x in fn)); - return [...reserved, name, ...publicNames]; - }, [] - ) -); diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js index 8c0f8fb0e4767..2a67c812206ce 100644 --- a/js/gulp/package-task.js +++ b/js/gulp/package-task.js @@ -46,17 +46,19 @@ const createMainPackageJson = (target, format) => (orig) => ({ ...createTypeScriptPackageJson(target, format)(orig), bin: orig.bin, name: npmPkgName, - main: mainExport, - types: `${mainExport}.d.ts`, - module: `${mainExport}.mjs`, + main: `${mainExport}.node`, + browser: `${mainExport}.dom`, + types: `${mainExport}.node.d.ts`, unpkg: `${mainExport}.es5.min.js`, - [`@std/esm`]: { mode: `all`, warnings: false, sourceMap: true } + [`esm`]: { mode: `all`, sourceMap: true } }); const createTypeScriptPackageJson = (target, format) => (orig) => ({ ...createScopedPackageJSON(target, format)(orig), - main: `${mainExport}.ts`, types: `${mainExport}.ts`, bin: undefined, + main: `${mainExport}.node.ts`, + types: `${mainExport}.node.ts`, + browser: `${mainExport}.dom.ts`, dependencies: { '@types/flatbuffers': '*', '@types/node': '*', @@ -70,8 +72,10 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) => (xs, key) => ({ ...xs, [key]: xs[key] || orig[key] }), { name: `${npmOrgName}/${packageName(target, format)}`, - version: undefined, main: `${mainExport}.js`, types: `${mainExport}.d.ts`, - unpkg: undefined, module: undefined, [`@std/esm`]: undefined + browser: format === 'umd' ? undefined : `${mainExport}.dom`, + main: format === 'umd' ? `${mainExport}` : `${mainExport}.node`, + types: format === 'umd' ? undefined : `${mainExport}.node.d.ts`, + version: undefined, unpkg: undefined, module: undefined, [`esm`]: undefined, } ) ) @@ -80,6 +84,5 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) => const conditionallyAddStandardESMEntry = (target, format) => (packageJSON) => ( format !== `esm` && format !== `cls` ? packageJSON - : { ...packageJSON, [`@std/esm`]: { mode: `js`, warnings: false, sourceMap: true } } + : { ...packageJSON, [`esm`]: { mode: `auto`, sourceMap: true } } ); - \ No newline at end of file diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js index b0e34f8c94426..c7ad7d513c652 100644 --- a/js/gulp/test-task.js +++ b/js/gulp/test-task.js @@ -20,44 +20,47 @@ const path = require('path'); const { argv } = require('./argv'); const { promisify } = require('util'); const glob = promisify(require('glob')); -const stat = promisify(require('fs').stat); const mkdirp = promisify(require('mkdirp')); const rimraf = promisify(require('rimraf')); const child_process = require(`child_process`); const { memoizeTask } = require('./memoize-task'); const readFile = promisify(require('fs').readFile); +const asyncDone = promisify(require('async-done')); const exec = promisify(require('child_process').exec); const parseXML = promisify(require('xml2js').parseString); const jestArgv = []; -argv.update && jestArgv.push(`-u`); argv.verbose && jestArgv.push(`--verbose`); -argv.coverage && jestArgv.push(`--coverage`); +argv.coverage + ? jestArgv.push(`-c`, `jest.coverage.config.js`, `--coverage`) + : jestArgv.push(`-c`, `jest.config.js`, `-i`) -const debugArgv = [`--runInBand`, `--env`, `node-debug`]; -const jest = require.resolve(path.join(`..`, `node_modules`, `.bin`, `jest`)); +const jest = path.join(path.parse(require.resolve(`jest`)).dir, `../bin/jest.js`); const testOptions = { - env: { ...process.env }, stdio: [`ignore`, `inherit`, `inherit`], + env: { + ...process.env, + // hide fs.promises/stream[Symbol.asyncIterator] warnings + NODE_NO_WARNINGS: `1`, + // prevent the user-land `readable-stream` module from + // patching node's streams -- they're better now + READABLE_STREAM: `disable` + }, }; -const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function test(target, format, debug = false) { +const testTask = ((cache, execArgv, testOptions) => memoizeTask(cache, function test(target, format) { const opts = { ...testOptions }; - const args = !debug ? [...execArgv] : [...debugArgv, ...execArgv]; - if (!argv.coverage) { - args.push(`test/${argv.integration ? `integration/*` : `unit/*`}`); - } - opts.env = { ...opts.env, + const args = [...execArgv, `test/unit/`]; + opts.env = { + ...opts.env, TEST_TARGET: target, TEST_MODULE: format, - TEST_TS_SOURCE: !!argv.coverage || (target === 'src') || (opts.env.TEST_TS_SOURCE === 'true'), - JSON_PATHS: JSON.stringify(Array.isArray(argv.json_files) ? argv.json_files : [argv.json_files]), - ARROW_PATHS: JSON.stringify(Array.isArray(argv.arrow_files) ? argv.arrow_files : [argv.arrow_files]), + TEST_DOM_STREAMS: (target ==='src' || format === 'umd').toString(), + TEST_NODE_STREAMS: (target ==='src' || format !== 'umd').toString(), + TEST_TS_SOURCE: !!argv.coverage || (target === 'src') || (opts.env.TEST_TS_SOURCE === 'true') }; - return !debug ? - child_process.spawn(jest, args, opts) : - child_process.exec(`node --inspect-brk ${jest} ${args.join(` `)}`, opts); -}))({}, jestArgv, testOptions); + return asyncDone(() => child_process.spawn(`node`, args, opts)); +}))({}, [jest, ...jestArgv], testOptions); module.exports = testTask; module.exports.testTask = testTask; @@ -69,9 +72,9 @@ const ARROW_HOME = process.env.ARROW_HOME || path.resolve('../'); const ARROW_JAVA_DIR = process.env.ARROW_JAVA_DIR || path.join(ARROW_HOME, 'java'); const CPP_EXE_PATH = process.env.ARROW_CPP_EXE_PATH || path.join(ARROW_HOME, 'cpp/build/debug'); const ARROW_INTEGRATION_DIR = process.env.ARROW_INTEGRATION_DIR || path.join(ARROW_HOME, 'integration'); -const CPP_JSON_TO_ARROW = path.join(CPP_EXE_PATH, 'json-integration-test'); -const CPP_STREAM_TO_FILE = path.join(CPP_EXE_PATH, 'stream-to-file'); -const CPP_FILE_TO_STREAM = path.join(CPP_EXE_PATH, 'file-to-stream'); +const CPP_JSON_TO_ARROW = path.join(CPP_EXE_PATH, 'arrow-json-integration-test'); +const CPP_STREAM_TO_FILE = path.join(CPP_EXE_PATH, 'arrow-stream-to-file'); +const CPP_FILE_TO_STREAM = path.join(CPP_EXE_PATH, 'arrow-file-to-stream'); const testFilesDir = path.join(ARROW_HOME, 'js/test/data'); const snapshotsDir = path.join(ARROW_HOME, 'js/test/__snapshots__'); diff --git a/js/gulp/typescript-task.js b/js/gulp/typescript-task.js index beffab8a08ce0..fe694cac860b3 100644 --- a/js/gulp/typescript-task.js +++ b/js/gulp/typescript-task.js @@ -16,19 +16,26 @@ // under the License. const { - targetDir, tsconfigName, observableFromStreams + targetDir, + tsconfigName, + observableFromStreams, + shouldRunInChildProcess, + spawnGulpCommandInChildProcess, } = require('./util'); -const del = require('del'); const gulp = require('gulp'); const path = require('path'); const ts = require(`gulp-typescript`); -const gulpRename = require(`gulp-rename`); const sourcemaps = require('gulp-sourcemaps'); const { memoizeTask } = require('./memoize-task'); const { Observable, ReplaySubject } = require('rxjs'); const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target, format) { + + if (shouldRunInChildProcess(target, format)) { + return spawnGulpCommandInChildProcess('compile', target, format); + } + const out = targetDir(target, format); const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName(target, format)}.json`); return compileTypescript(out, tsconfigPath) @@ -39,11 +46,11 @@ const typescriptTask = ((cache) => memoizeTask(cache, function typescript(target function compileBinFiles(target, format) { const out = targetDir(target, format); const tsconfigPath = path.join(`tsconfig`, `tsconfig.${tsconfigName('bin', 'cjs')}.json`); - return compileTypescript(path.join(out, 'bin'), tsconfigPath); + return compileTypescript(path.join(out, 'bin'), tsconfigPath, { target }); } -function compileTypescript(out, tsconfigPath) { - const tsProject = ts.createProject(tsconfigPath, { typescript: require(`typescript`) }); +function compileTypescript(out, tsconfigPath, tsconfigOverrides) { + const tsProject = ts.createProject(tsconfigPath, { typescript: require(`typescript`), ...tsconfigOverrides }); const { stream: { js, dts } } = observableFromStreams( tsProject.src(), sourcemaps.init(), tsProject(ts.reporter.defaultReporter()) diff --git a/js/gulp/util.js b/js/gulp/util.js index 12d21b0e16be2..bd87684a1dc3d 100644 --- a/js/gulp/util.js +++ b/js/gulp/util.js @@ -17,8 +17,11 @@ const fs = require('fs'); const path = require(`path`); -const pump = require(`pump`); +const pump = require(`stream`).pipeline; +const child_process = require(`child_process`); +const { targets, modules } = require('./argv'); const { Observable, ReplaySubject } = require('rxjs'); +const asyncDone = require('util').promisify(require('async-done')); const mainExport = `Arrow`; const npmPkgName = `apache-arrow`; @@ -29,7 +32,7 @@ const knownTargets = [`es5`, `es2015`, `esnext`]; const knownModules = [`cjs`, `esm`, `cls`, `umd`]; const tasksToSkipPerTargetOrFormat = { src: { clean: true, build: true }, - cls: { test: true, integration: true } + cls: { test: true, package: true } }; const packageJSONFields = [ `version`, `license`, `description`, @@ -66,7 +69,7 @@ const UMDSourceTargets = { es2015: `es2015`, es2016: `es2015`, es2017: `es2015`, - esnext: `es2015` + esnext: `esnext` }; const terserLanguageNames = { @@ -109,12 +112,27 @@ function targetDir(target, format) { return path.join(releasesRootDir, ...(!format ? [target] : [target, format])); } -function logAndDie(e) { - if (e) { - process.exit(1); - } +function shouldRunInChildProcess(target, format) { + // If we're building more than one module/target, then yes run this task in a child process + if (targets.length > 1 || modules.length > 1) { return true; } + // If the target we're building *isn't* the target the gulp command was configured to run, then yes run that in a child process + if (targets[0] !== target || modules[0] !== format) { return true; } + // Otherwise no need -- either gulp was run for just one target, or we've been spawned as the child of a multi-target parent gulp + return false; +} + +const gulp = path.join(path.parse(require.resolve(`gulp`)).dir, `bin/gulp.js`); +function spawnGulpCommandInChildProcess(command, target, format) { + const args = [gulp, command, '-t', target, '-m', format, `--silent`]; + const opts = { + stdio: [`ignore`, `inherit`, `inherit`], + env: { ...process.env, NODE_NO_WARNINGS: `1` } + }; + return asyncDone(() => child_process.spawn(`node`, args, opts)) + .catch((e) => { throw { message: `${command}:${taskName(target, format)}` }; }); } +const logAndDie = (e) => { if (e) { process.exit(1); } }; function observableFromStreams(...streams) { if (streams.length <= 0) { return Observable.empty(); } const pumped = streams.length <= 1 ? streams[0] : pump(...streams, logAndDie); @@ -164,12 +182,37 @@ function* combinations(_targets, _modules) { } } +const publicModulePaths = (dir) => [ + `${dir}/${mainExport}.dom.js`, + `${dir}/util/int.js`, + `${dir}/compute/predicate.js`, +]; + +const esmRequire = require(`esm`)(module, { + mode: `auto`, + cjs: { + /* A boolean for storing ES modules in require.cache. */ + cache: true, + /* A boolean for respecting require.extensions in ESM. */ + extensions: true, + /* A boolean for __esModule interoperability. */ + interop: true, + /* A boolean for importing named exports of CJS modules. */ + namedExports: true, + /* A boolean for following CJS path rules in ESM. */ + paths: true, + /* A boolean for __dirname, __filename, and require in ESM. */ + vars: true, + } +}); + module.exports = { mainExport, npmPkgName, npmOrgName, metadataFiles, packageJSONFields, knownTargets, knownModules, tasksToSkipPerTargetOrFormat, - ESKeywords, gCCLanguageNames, UMDSourceTargets, terserLanguageNames, + gCCLanguageNames, UMDSourceTargets, terserLanguageNames, taskName, packageName, tsconfigName, targetDir, combinations, observableFromStreams, + ESKeywords, publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess }; diff --git a/js/gulpfile.js b/js/gulpfile.js index 78aaa17ddb8b4..37c1d187995d2 100644 --- a/js/gulpfile.js +++ b/js/gulpfile.js @@ -17,17 +17,15 @@ const del = require('del'); const gulp = require('gulp'); -const path = require('path'); const { Observable } = require('rxjs'); -const buildTask = require('./gulp/build-task'); const cleanTask = require('./gulp/clean-task'); +const compileTask = require('./gulp/compile-task'); const packageTask = require('./gulp/package-task'); const { targets, modules } = require('./gulp/argv'); const { testTask, createTestData, cleanTestData } = require('./gulp/test-task'); const { - targetDir, taskName, combinations, - knownTargets, + targetDir, knownTargets, npmPkgName, UMDSourceTargets, tasksToSkipPerTargetOrFormat } = require('./gulp/util'); @@ -36,63 +34,60 @@ for (const [target, format] of combinations([`all`], [`all`])) { const task = taskName(target, format); gulp.task(`clean:${task}`, cleanTask(target, format)); gulp.task( `test:${task}`, testTask(target, format)); - gulp.task(`debug:${task}`, testTask(target, format, true)); - gulp.task(`build:${task}`, gulp.series(`clean:${task}`, - buildTask(target, format), - packageTask(target, format))); + gulp.task(`compile:${task}`, compileTask(target, format)); + gulp.task(`package:${task}`, packageTask(target, format)); + gulp.task(`build:${task}`, gulp.series( + `clean:${task}`, `compile:${task}`, `package:${task}` + )); } // The UMD bundles build temporary es5/6/next targets via TS, // then run the TS source through either closure-compiler or // a minifier, so we special case that here. -knownTargets.forEach((target) => - gulp.task(`build:${target}:umd`, - gulp.series( - gulp.parallel( - cleanTask(target, `umd`), - cleanTask(UMDSourceTargets[target], `cls`) - ), - buildTask(UMDSourceTargets[target], `cls`), - buildTask(target, `umd`), packageTask(target, `umd`) - ) - ) -); +knownTargets.forEach((target) => { + const umd = taskName(target, `umd`); + const cls = taskName(UMDSourceTargets[target], `cls`); + gulp.task(`build:${umd}`, gulp.series( + `build:${cls}`, + `clean:${umd}`, `compile:${umd}`, `package:${umd}`, + function remove_closure_tmp_files() { + return del(targetDir(target, `cls`)) + } + )); +}); // The main "apache-arrow" module builds the es5/umd, es2015/cjs, // es2015/esm, and es2015/umd targets, then copies and renames the // compiled output into the apache-arrow folder gulp.task(`build:${npmPkgName}`, gulp.series( - cleanTask(npmPkgName), gulp.parallel( `build:${taskName(`es5`, `umd`)}`, `build:${taskName(`es2015`, `cjs`)}`, `build:${taskName(`es2015`, `esm`)}`, `build:${taskName(`es2015`, `umd`)}` ), - buildTask(npmPkgName), packageTask(npmPkgName) + `clean:${npmPkgName}`, + `compile:${npmPkgName}`, + `package:${npmPkgName}` ) ); - -function gulpConcurrent(tasks) { - return () => Observable.bindCallback((tasks, cb) => gulp.parallel(tasks)(cb))(tasks); -} - -const buildConcurrent = (tasks) => () => - gulpConcurrent(tasks)() - .concat(Observable - .defer(() => Observable - .merge(...knownTargets.map((target) => - del(`${targetDir(target, `cls`)}/**`))))); - +// And finally the global composite tasks gulp.task(`clean:testdata`, cleanTestData); gulp.task(`create:testdata`, createTestData); -gulp.task(`test`, gulp.series(getTasks(`test`))); -gulp.task(`debug`, gulp.series(getTasks(`debug`))); +gulp.task(`test`, gulpConcurrent(getTasks(`test`))); gulp.task(`clean`, gulp.parallel(getTasks(`clean`))); -gulp.task(`build`, buildConcurrent(getTasks(`build`))); -gulp.task(`default`, gulp.series(`build`, `test`)); +gulp.task(`build`, gulpConcurrent(getTasks(`build`))); +gulp.task(`compile`, gulpConcurrent(getTasks(`compile`))); +gulp.task(`package`, gulpConcurrent(getTasks(`package`))); +gulp.task(`default`, gulp.series(`clean`, `build`, `test`)); + +function gulpConcurrent(tasks) { + const numCPUs = Math.max(1, require('os').cpus().length * 0.75) | 0; + return () => Observable.from(tasks.map((task) => gulp.series(task))) + .flatMap((task) => Observable.bindNodeCallback(task)(), numCPUs); +} function getTasks(name) { const tasks = []; diff --git a/js/index.ts b/js/index.ts index 51b8676abbd9d..cfd64bbbe9730 100644 --- a/js/index.ts +++ b/js/index.ts @@ -15,4 +15,4 @@ // specific language governing permissions and limitations // under the License. -export * from './src/Arrow'; \ No newline at end of file +export * from './src/Arrow.node'; \ No newline at end of file diff --git a/js/jest.config.js b/js/jest.config.js new file mode 100644 index 0000000000000..55028d09f969e --- /dev/null +++ b/js/jest.config.js @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +module.exports = { + "verbose": false, + "reporters": [ + "jest-silent-reporter" + ], + "testEnvironment": "node", + "globals": { + "ts-jest": { + "diagnostics": false, + "tsConfig": "test/tsconfig.json" + } + }, + "roots": [ + "/test/" + ], + "moduleFileExtensions": [ + "js", + "ts", + "tsx" + ], + "coverageReporters": [ + "lcov" + ], + "coveragePathIgnorePatterns": [ + "fb\\/(File|Message|Schema|Tensor)\\.(js|ts)$", + "test\\/.*\\.(ts|tsx|js)$", + "/node_modules/" + ], + "transform": { + "^.+\\.jsx?$": "ts-jest", + "^.+\\.tsx?$": "ts-jest" + }, + "transformIgnorePatterns": [ + "/node_modules/(?!web-stream-tools).+\\.js$" + ], + "testRegex": "(.*(-|\\.)(test|spec)s?)\\.(ts|tsx|js)$", + "preset": "ts-jest", + "testMatch": null +}; diff --git a/js/jest.coverage.config.js b/js/jest.coverage.config.js new file mode 100644 index 0000000000000..72ddd3c9345a0 --- /dev/null +++ b/js/jest.coverage.config.js @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +module.exports = { + ...require('./jest.config'), + "reporters": undefined, + "coverageReporters": [ + "lcov", "json" + ], + "globals": { + "ts-jest": { + "diagnostics": false, + "tsConfig": "test/tsconfig.coverage.json" + } + } +}; diff --git a/js/npm-release.sh b/js/npm-release.sh index 3ef24d3e6f828..a52e25ed7884a 100755 --- a/js/npm-release.sh +++ b/js/npm-release.sh @@ -20,11 +20,7 @@ set -e # validate the targets pass all tests before publishing npm install -# npx run-s clean:all lint create:testdata build -# npm run test -- -t ts -u --integration -# npm run test -- --integration -npx run-s clean:all lint build -npm run test +npx gulp # publish the JS target modules to npm npx lerna exec -- npm publish diff --git a/js/package-lock.json b/js/package-lock.json index ef38db9a7468d..3b31a6dff2085 100644 --- a/js/package-lock.json +++ b/js/package-lock.json @@ -64,57 +64,56 @@ } }, "@lerna/add": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/add/-/add-3.5.0.tgz", - "integrity": "sha512-hoOqtal/ChEEtt9rxR/6xmyvTN7581XF4kWHoWPV9NbfZN9e8uTR8z4mCcJq2DiZhRuY7aA5FEROEbl12soowQ==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/add/-/add-3.7.2.tgz", + "integrity": "sha512-/kCuyytOEmYcqpbU8MhHc2/3bPJjEx+qq7SOdb0cCDG+QcJ/oSsDCZ3xVHxhyLRYAoRlKBch3DiBmY4BeIm0Ag==", "dev": true, "requires": { - "@lerna/bootstrap": "^3.5.0", - "@lerna/command": "^3.5.0", - "@lerna/filter-options": "^3.5.0", - "@lerna/npm-conf": "^3.4.1", - "@lerna/validation-error": "^3.0.0", + "@lerna/bootstrap": "^3.7.2", + "@lerna/command": "^3.7.2", + "@lerna/filter-options": "^3.6.0", + "@lerna/npm-conf": "^3.7.0", + "@lerna/validation-error": "^3.6.0", "dedent": "^0.7.0", - "npm-package-arg": "^6.0.0", + "libnpm": "^2.0.1", "p-map": "^1.2.0", - "pacote": "^9.1.0", "semver": "^5.5.0" } }, "@lerna/batch-packages": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@lerna/batch-packages/-/batch-packages-3.1.2.tgz", - "integrity": "sha512-HAkpptrYeUVlBYbLScXgeCgk6BsNVXxDd53HVWgzzTWpXV4MHpbpeKrByyt7viXlNhW0w73jJbipb/QlFsHIhQ==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/batch-packages/-/batch-packages-3.6.0.tgz", + "integrity": "sha512-khG15B+EFLH3Oms6A6WsMAy54DrnKIhEAm6CCATN2BKnBkNgitYjLN2vKBzlR2LfQpTkgub67QKIJkMFQcK1Sg==", "dev": true, "requires": { - "@lerna/package-graph": "^3.1.2", - "@lerna/validation-error": "^3.0.0", - "npmlog": "^4.1.2" + "@lerna/package-graph": "^3.6.0", + "@lerna/validation-error": "^3.6.0", + "libnpm": "^2.0.1" } }, "@lerna/bootstrap": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/bootstrap/-/bootstrap-3.5.0.tgz", - "integrity": "sha512-+z4kVVJFO5EGfC2ob/4C9LetqWwDtbhZgTRllr1+zOi/2clbD+WKcVI0ku+/ckzKjz783SOc83swX7RrmiLwMQ==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/bootstrap/-/bootstrap-3.7.2.tgz", + "integrity": "sha512-yVjr450UivC7gbIh3GZowJ6bzPy/xC75bduq2Zm+jdIksjM/8SA3HRXWNothaSyZWudV+WY+cy6MvwrtFe8Kbg==", "dev": true, "requires": { - "@lerna/batch-packages": "^3.1.2", - "@lerna/command": "^3.5.0", - "@lerna/filter-options": "^3.5.0", + "@lerna/batch-packages": "^3.6.0", + "@lerna/command": "^3.7.2", + "@lerna/filter-options": "^3.6.0", "@lerna/has-npm-version": "^3.3.0", - "@lerna/npm-conf": "^3.4.1", - "@lerna/npm-install": "^3.3.0", - "@lerna/rimraf-dir": "^3.3.0", - "@lerna/run-lifecycle": "^3.4.1", + "@lerna/npm-install": "^3.6.0", + "@lerna/package-graph": "^3.6.0", + "@lerna/pulse-till-done": "^3.7.1", + "@lerna/rimraf-dir": "^3.6.0", + "@lerna/run-lifecycle": "^3.7.1", "@lerna/run-parallel-batches": "^3.0.0", - "@lerna/symlink-binary": "^3.3.0", - "@lerna/symlink-dependencies": "^3.3.0", - "@lerna/validation-error": "^3.0.0", + "@lerna/symlink-binary": "^3.7.2", + "@lerna/symlink-dependencies": "^3.7.2", + "@lerna/validation-error": "^3.6.0", "dedent": "^0.7.0", "get-port": "^3.2.0", + "libnpm": "^2.0.1", "multimatch": "^2.1.0", - "npm-package-arg": "^6.0.0", - "npmlog": "^4.1.2", "p-finally": "^1.0.0", "p-map": "^1.2.0", "p-map-series": "^1.0.0", @@ -124,26 +123,26 @@ } }, "@lerna/changed": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/changed/-/changed-3.5.0.tgz", - "integrity": "sha512-p9o7/hXwFAoet7UPeHIzIPonYxLHZe9bcNcjxKztZYAne5/OgmZiF4X1UPL2S12wtkT77WQy4Oz8NjRTczcapg==", + "version": "3.8.0", + "resolved": "https://registry.npmjs.org/@lerna/changed/-/changed-3.8.0.tgz", + "integrity": "sha512-IeOxB+nwGFpAuEgUi9FeP19hj6Abp1aNCeMjS9/KpOxrSGt3ejKlSKY83lwqDPbb6OnthQTRBlodWZpSiSPWqg==", "dev": true, "requires": { - "@lerna/collect-updates": "^3.5.0", - "@lerna/command": "^3.5.0", - "@lerna/listable": "^3.0.0", - "@lerna/output": "^3.0.0", - "@lerna/version": "^3.5.0" + "@lerna/collect-updates": "^3.6.0", + "@lerna/command": "^3.7.2", + "@lerna/listable": "^3.6.0", + "@lerna/output": "^3.6.0", + "@lerna/version": "^3.8.0" } }, "@lerna/check-working-tree": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/check-working-tree/-/check-working-tree-3.5.0.tgz", - "integrity": "sha512-aWeIputHddeZgf7/wA1e5yuv6q9S5si2y7fzO2Ah7m3KyDyl8XHP1M0VSSDzZeiloYCryAYQAoRgcrdH65Vhow==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/check-working-tree/-/check-working-tree-3.6.0.tgz", + "integrity": "sha512-Ioy1t2aVasAwhY1Oi5kfpwbW9RDupxxVVu2t2c1EeBYYCu3jIt1A5ad34gidgsKyiG3HeBEVziI4Uaihnb96ZQ==", "dev": true, "requires": { - "@lerna/describe-ref": "^3.5.0", - "@lerna/validation-error": "^3.0.0" + "@lerna/describe-ref": "^3.6.0", + "@lerna/validation-error": "^3.6.0" } }, "@lerna/child-process": { @@ -193,33 +192,44 @@ "requires": { "pump": "^3.0.0" } + }, + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } } } }, "@lerna/clean": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/clean/-/clean-3.5.0.tgz", - "integrity": "sha512-bHUFF6Wv7ms81Tmwe56xk296oqU74Sg9NSkUCDG4kZLpYZx347Aw+89ZPTlaSmUwqCgEXKYLr65ZVVvKmflpcA==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/clean/-/clean-3.7.2.tgz", + "integrity": "sha512-BhuPnAWQa2av6hSE8imbOhenUnveSp0VDO1X0jzC1EX+K6sBCubbowM13kYi+N0qUd2kdeatBNwmafzkBZ3LcQ==", "dev": true, "requires": { - "@lerna/command": "^3.5.0", - "@lerna/filter-options": "^3.5.0", - "@lerna/prompt": "^3.3.1", - "@lerna/rimraf-dir": "^3.3.0", + "@lerna/command": "^3.7.2", + "@lerna/filter-options": "^3.6.0", + "@lerna/prompt": "^3.6.0", + "@lerna/pulse-till-done": "^3.7.1", + "@lerna/rimraf-dir": "^3.6.0", "p-map": "^1.2.0", "p-map-series": "^1.0.0", "p-waterfall": "^1.0.0" } }, "@lerna/cli": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/@lerna/cli/-/cli-3.2.0.tgz", - "integrity": "sha512-JdbLyTxHqxUlrkI+Ke+ltXbtyA+MPu9zR6kg/n8Fl6uaez/2fZWtReXzYi8MgLxfUFa7+1OHWJv4eAMZlByJ+Q==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/cli/-/cli-3.6.0.tgz", + "integrity": "sha512-FGCx7XOLpqmU5eFOlo0Lt0hRZraxSUTEWM0bce0p+HNpOxBc91o6d2tenW1azPYFP9HzsMQey1NBtU0ofJJeog==", "dev": true, "requires": { "@lerna/global-options": "^3.1.3", "dedent": "^0.7.0", - "npmlog": "^4.1.2", + "libnpm": "^2.0.1", "yargs": "^12.0.1" }, "dependencies": { @@ -260,13 +270,13 @@ } }, "execa": { - "version": "0.10.0", - "resolved": "https://registry.npmjs.org/execa/-/execa-0.10.0.tgz", - "integrity": "sha512-7XOMnz8Ynx1gGo/3hyV9loYNPWM94jG3+3T3Y8tsfSstFmETmENCMU/A/zj8Lyaj1lkgEepKepvd6240tBRvlw==", + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/execa/-/execa-1.0.0.tgz", + "integrity": "sha512-adbxcyWV46qiHyvSp50TKt05tB4tK3HcmF7/nxfAdhnox83seTDbwnaqKO4sXRy7roHAIFqJP/Rw/AuEbX61LA==", "dev": true, "requires": { "cross-spawn": "^6.0.0", - "get-stream": "^3.0.0", + "get-stream": "^4.0.0", "is-stream": "^1.1.0", "npm-run-path": "^2.0.0", "p-finally": "^1.0.0", @@ -283,6 +293,15 @@ "locate-path": "^3.0.0" } }, + "get-stream": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz", + "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==", + "dev": true, + "requires": { + "pump": "^3.0.0" + } + }, "invert-kv": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/invert-kv/-/invert-kv-2.0.0.tgz", @@ -326,20 +345,20 @@ } }, "os-locale": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-3.0.1.tgz", - "integrity": "sha512-7g5e7dmXPtzcP4bgsZ8ixDVqA7oWYuEz4lOSujeWyliPai4gfVDiFIcwBg3aGCPnmSGfzOKTK3ccPn0CKv3DBw==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-3.1.0.tgz", + "integrity": "sha512-Z8l3R4wYWM40/52Z+S265okfFj8Kt2cC2MKY+xNi3kFs+XGI7WXu/I309QQQYbRW4ijiZ+yxs9pqEhJh0DqW3Q==", "dev": true, "requires": { - "execa": "^0.10.0", + "execa": "^1.0.0", "lcid": "^2.0.0", "mem": "^4.0.0" } }, "p-limit": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.0.0.tgz", - "integrity": "sha512-fl5s52lI5ahKCernzzIyAP0QAZbGIovtVHGwpcu1Jr/EpzLVDI2myISHwGqK7m8uQFugVWSrbxH7XnhGtvEc+A==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.1.0.tgz", + "integrity": "sha512-NhURkNcrVB+8hNfLuysU8enY5xn2KXphsHBaC2YmRNTZRc7RWusw6apSpdEj3jo4CMb6W9nrF6tTnsJsJeyu6g==", "dev": true, "requires": { "p-try": "^2.0.0" @@ -360,6 +379,22 @@ "integrity": "sha512-hMp0onDKIajHfIkdRk3P4CdCmErkYAxxDtP3Wx/4nZ3aGlau2VKh3mZpcuFkH27WQkL/3WBCPOktzA9ZOAnMQQ==", "dev": true }, + "path-exists": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", + "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", + "dev": true + }, + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, "string-width": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", @@ -379,6 +414,12 @@ "ansi-regex": "^3.0.0" } }, + "which-module": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz", + "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=", + "dev": true + }, "yargs": { "version": "12.0.5", "resolved": "https://registry.npmjs.org/yargs/-/yargs-12.0.5.tgz", @@ -412,34 +453,34 @@ } }, "@lerna/collect-updates": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/collect-updates/-/collect-updates-3.5.0.tgz", - "integrity": "sha512-rFCng14K8vHyrDJSAacj6ABKKT/TxZdpL9uPEtZN7DsoJKlKPzqFeRvRGA2+ed/I6mEm4ltauEjEpKG5O6xqtw==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/collect-updates/-/collect-updates-3.6.0.tgz", + "integrity": "sha512-knliEz3phY51SGnwDhhYqx6SJN6y9qh/gZrZgQ7ogqz1UgA/MyJb27gszjsyyG6jUQshimBpjsG7OMwjt8+n9A==", "dev": true, "requires": { "@lerna/child-process": "^3.3.0", - "@lerna/describe-ref": "^3.5.0", + "@lerna/describe-ref": "^3.6.0", + "libnpm": "^2.0.1", "minimatch": "^3.0.4", - "npmlog": "^4.1.2", "slash": "^1.0.0" } }, "@lerna/command": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/command/-/command-3.5.0.tgz", - "integrity": "sha512-C/0e7qPbuKZ9vEqzRePksoKDJk4TOWzsU5qaPP/ikqc6vClJbKucsIehk3za6glSjlgLCJpzBTF2lFjHfb+JNw==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/command/-/command-3.7.2.tgz", + "integrity": "sha512-WtBnlvQfzKmnc2i3g+GLazx7pUXwbzASiXHy4j1CoC0w90H42LUqhwJICro4VhnE8xi38BNhcH/+xFNiHX5ERA==", "dev": true, "requires": { "@lerna/child-process": "^3.3.0", - "@lerna/package-graph": "^3.1.2", - "@lerna/project": "^3.5.0", - "@lerna/validation-error": "^3.0.0", - "@lerna/write-log-file": "^3.0.0", + "@lerna/package-graph": "^3.6.0", + "@lerna/project": "^3.7.2", + "@lerna/validation-error": "^3.6.0", + "@lerna/write-log-file": "^3.6.0", "dedent": "^0.7.0", "execa": "^1.0.0", "is-ci": "^1.0.10", - "lodash": "^4.17.5", - "npmlog": "^4.1.2" + "libnpm": "^2.0.1", + "lodash": "^4.17.5" }, "dependencies": { "cross-spawn": { @@ -478,23 +519,32 @@ "requires": { "pump": "^3.0.0" } + }, + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } } } }, "@lerna/conventional-commits": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/conventional-commits/-/conventional-commits-3.5.0.tgz", - "integrity": "sha512-roKPILPYnDWiCDxOeBQ0cObJ2FbDgzJSToxr1ZwIqvJU5hGQ4RmooCf8GHcCW9maBJz7ETeestv8M2mBUgBPbg==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/conventional-commits/-/conventional-commits-3.6.0.tgz", + "integrity": "sha512-KkY3wd7w/tj76EEIhTMYZlSBk/5WkT2NA9Gr/EuSwKV70PYyVA55l1OGlikBUAnuqIjwyfw9x3y+OcbYI4aNEg==", "dev": true, "requires": { - "@lerna/validation-error": "^3.0.0", + "@lerna/validation-error": "^3.6.0", "conventional-changelog-angular": "^5.0.2", "conventional-changelog-core": "^3.1.5", "conventional-recommended-bump": "^4.0.4", "fs-extra": "^7.0.0", "get-stream": "^4.0.0", - "npm-package-arg": "^6.0.0", - "npmlog": "^4.1.2", + "libnpm": "^2.0.1", "semver": "^5.5.0" }, "dependencies": { @@ -506,25 +556,36 @@ "requires": { "pump": "^3.0.0" } + }, + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } } } }, "@lerna/create": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/create/-/create-3.5.0.tgz", - "integrity": "sha512-ek4flHRmpMegZp9tP3RmuDhmMb9+/Hhy9B5eaZc5X5KWqDvFKJtn56sw+M9hNjiYehiimCwhaLWgE2WSikPvcQ==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/create/-/create-3.7.2.tgz", + "integrity": "sha512-eE6i4mVi5CefQ8Mw4WhkX9GcgiDllfEYfMq3LDMCtBH4pdzXO9oNG2p1J7bbwKgCFqhmKB4nr5FTFhijOIMRRw==", "dev": true, "requires": { "@lerna/child-process": "^3.3.0", - "@lerna/command": "^3.5.0", - "@lerna/npm-conf": "^3.4.1", - "@lerna/validation-error": "^3.0.0", + "@lerna/command": "^3.7.2", + "@lerna/npm-conf": "^3.7.0", + "@lerna/validation-error": "^3.6.0", "camelcase": "^4.1.0", "dedent": "^0.7.0", "fs-extra": "^7.0.0", "globby": "^8.0.1", "init-package-json": "^1.10.3", - "npm-package-arg": "^6.0.0", + "libnpm": "^2.0.1", + "p-reduce": "^1.0.0", "pify": "^3.0.0", "semver": "^5.5.0", "slash": "^1.0.0", @@ -541,7 +602,7 @@ }, "globby": { "version": "8.0.1", - "resolved": "https://registry.npmjs.org/globby/-/globby-8.0.1.tgz", + "resolved": "http://registry.npmjs.org/globby/-/globby-8.0.1.tgz", "integrity": "sha512-oMrYrJERnKBLXNLVTqhm3vPEdJ/b2ZE28xN4YARiix1NOIOBPEpOUnm844K1iu/BkphCaf2WNFwMszv8Soi1pw==", "dev": true, "requires": { @@ -568,81 +629,115 @@ } }, "@lerna/create-symlink": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/@lerna/create-symlink/-/create-symlink-3.3.0.tgz", - "integrity": "sha512-0lb88Nnq1c/GG+fwybuReOnw3+ah4dB81PuWwWwuqUNPE0n50qUf/M/7FfSb5JEh/93fcdbZI0La8t3iysNW1w==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/create-symlink/-/create-symlink-3.6.0.tgz", + "integrity": "sha512-YG3lTb6zylvmGqKU+QYA3ylSnoLn+FyLH5XZmUsD0i85R884+EyJJeHx/zUk+yrL2ZwHS4RBUgJfC24fqzgPoA==", "dev": true, "requires": { "cmd-shim": "^2.0.2", "fs-extra": "^7.0.0", - "npmlog": "^4.1.2" + "libnpm": "^2.0.1" } }, "@lerna/describe-ref": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/describe-ref/-/describe-ref-3.5.0.tgz", - "integrity": "sha512-XvecK2PSwUv4z+otib5moWJMI+h3mtAg8nFlfo4KbivVtD/sI11jfKsr3S75HuAwhVAa8tAijoAxmuBJSsTE1g==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/describe-ref/-/describe-ref-3.6.0.tgz", + "integrity": "sha512-hVZJ2hYVbrrNiEG+dEg/Op4pYAbROkDZdiIUabAJffr0T/frcN+5es2HfmOC//4+78Cs1M9iTyQRoyC1RXS2BQ==", "dev": true, "requires": { "@lerna/child-process": "^3.3.0", - "npmlog": "^4.1.2" + "libnpm": "^2.0.1" } }, "@lerna/diff": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/diff/-/diff-3.5.0.tgz", - "integrity": "sha512-iyZ0ZRPqH5Y5XEhOYoKS8H/8UXC/gZ/idlToMFHhUn1oTSd8v9HVU1c2xq1ge0u36ZH/fx/YydUk0A/KSv+p3Q==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/diff/-/diff-3.7.2.tgz", + "integrity": "sha512-BVcceQHxwr0hIO4hZ8Udeb1Afn2opDiMXSh3dEyV7kcbYlgc66AxsviVPr4txGP/p8uRlzBUDzgHShVMplMGcg==", "dev": true, "requires": { "@lerna/child-process": "^3.3.0", - "@lerna/command": "^3.5.0", - "@lerna/validation-error": "^3.0.0", - "npmlog": "^4.1.2" + "@lerna/command": "^3.7.2", + "@lerna/validation-error": "^3.6.0", + "libnpm": "^2.0.1" } }, "@lerna/exec": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/exec/-/exec-3.5.0.tgz", - "integrity": "sha512-H5jeIueDiuNsxeuGKaP7HqTcenvMsFfBFeWr0W6knHv9NrOF8il34dBqYgApZEDSQ7+2fA3ghwWbF+jUGTSh/A==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/exec/-/exec-3.7.2.tgz", + "integrity": "sha512-oEm3EbSxXeMguqC+ekXaBlRmo/aaJc2BcWPHrd+5+9evHhHo/7oOu/xXmbhJYCgZytGkJ6BrX3F9XhWnC+14wg==", "dev": true, "requires": { - "@lerna/batch-packages": "^3.1.2", + "@lerna/batch-packages": "^3.6.0", "@lerna/child-process": "^3.3.0", - "@lerna/command": "^3.5.0", - "@lerna/filter-options": "^3.5.0", + "@lerna/command": "^3.7.2", + "@lerna/filter-options": "^3.6.0", "@lerna/run-parallel-batches": "^3.0.0", - "@lerna/validation-error": "^3.0.0" + "@lerna/validation-error": "^3.6.0" } }, "@lerna/filter-options": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/filter-options/-/filter-options-3.5.0.tgz", - "integrity": "sha512-7pEQy1i5ynYOYjcSeo+Qaps4+Ais55RRdnT6/SLLBgyyHAMziflFLX5TnoyEaaXoU90iKfQ5z/ioEp6dFAXSMg==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/filter-options/-/filter-options-3.6.0.tgz", + "integrity": "sha512-6iUMZuvvXPL5EAF7Zo9azaZ6FxOq6tGbiSX8fUXgCdN+jlRjorvkzR+E0HS4bEGTWmV446lnLwdQLZuySfLcbQ==", "dev": true, "requires": { - "@lerna/collect-updates": "^3.5.0", - "@lerna/filter-packages": "^3.0.0", + "@lerna/collect-updates": "^3.6.0", + "@lerna/filter-packages": "^3.6.0", "dedent": "^0.7.0" } }, "@lerna/filter-packages": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/@lerna/filter-packages/-/filter-packages-3.0.0.tgz", - "integrity": "sha512-zwbY1J4uRjWRZ/FgYbtVkq7I3Nduwsg2V2HwLKSzwV2vPglfGqgovYOVkND6/xqe2BHwDX4IyA2+e7OJmLaLSA==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/filter-packages/-/filter-packages-3.6.0.tgz", + "integrity": "sha512-O/nIENV3LOqp/TiUIw3Ir6L/wUGFDeYBdJsJTQDlTAyHZsgYA1OIn9FvlW8nqBu1bNLzoBVHXh3c5azx1kE+Hg==", "dev": true, "requires": { - "@lerna/validation-error": "^3.0.0", - "multimatch": "^2.1.0", - "npmlog": "^4.1.2" + "@lerna/validation-error": "^3.6.0", + "libnpm": "^2.0.1", + "multimatch": "^2.1.0" } }, "@lerna/get-npm-exec-opts": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/@lerna/get-npm-exec-opts/-/get-npm-exec-opts-3.0.0.tgz", - "integrity": "sha512-arcYUm+4xS8J3Palhl+5rRJXnZnFHsLFKHBxznkPIxjwGQeAEw7df38uHdVjEQ+HNeFmHnBgSqfbxl1VIw5DHg==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/get-npm-exec-opts/-/get-npm-exec-opts-3.6.0.tgz", + "integrity": "sha512-ruH6KuLlt75aCObXfUIdVJqmfVq7sgWGq5mXa05vc1MEqxTIiU23YiJdWzofQOOUOACaZkzZ4K4Nu7wXEg4Xgg==", + "dev": true, + "requires": { + "libnpm": "^2.0.1" + } + }, + "@lerna/get-packed": { + "version": "3.7.0", + "resolved": "https://registry.npmjs.org/@lerna/get-packed/-/get-packed-3.7.0.tgz", + "integrity": "sha512-yuFtjsUZIHjeIvIYQ/QuytC+FQcHwo3peB+yGBST2uWCLUCR5rx6knoQcPzbxdFDCuUb5IFccFGd3B1fHFg3RQ==", "dev": true, "requires": { - "npmlog": "^4.1.2" + "fs-extra": "^7.0.0", + "ssri": "^6.0.1", + "tar": "^4.4.8" + }, + "dependencies": { + "tar": { + "version": "4.4.8", + "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.8.tgz", + "integrity": "sha512-LzHF64s5chPQQS0IYBn9IN5h3i98c12bo4NCO7e0sGM2llXQ3p2FGC5sdENN4cTW48O915Sh+x+EXx7XW96xYQ==", + "dev": true, + "requires": { + "chownr": "^1.1.1", + "fs-minipass": "^1.2.5", + "minipass": "^2.3.4", + "minizlib": "^1.1.1", + "mkdirp": "^0.5.0", + "safe-buffer": "^5.1.2", + "yallist": "^3.0.2" + } + }, + "yallist": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz", + "integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==", + "dev": true + } } }, "@lerna/global-options": { @@ -662,84 +757,86 @@ } }, "@lerna/import": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/import/-/import-3.5.0.tgz", - "integrity": "sha512-vgI6lMEzd1ODgi75cmAlfPYylaK37WY3E2fwKyO/lj6UKSGj46dVSK0KwTRHx33tu4PLvPzFi5C6nbY57o5ykQ==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/import/-/import-3.7.2.tgz", + "integrity": "sha512-TGTYjhzDGLEqc9imWOi/fvIbZdmVxfV71OFB6AS98N9KQE68bbpttehQqCUIPATReVuzPUzxEiF3tMnKd7iEqg==", "dev": true, "requires": { "@lerna/child-process": "^3.3.0", - "@lerna/command": "^3.5.0", - "@lerna/prompt": "^3.3.1", - "@lerna/validation-error": "^3.0.0", + "@lerna/command": "^3.7.2", + "@lerna/prompt": "^3.6.0", + "@lerna/pulse-till-done": "^3.7.1", + "@lerna/validation-error": "^3.6.0", "dedent": "^0.7.0", "fs-extra": "^7.0.0", "p-map-series": "^1.0.0" } }, "@lerna/init": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/init/-/init-3.5.0.tgz", - "integrity": "sha512-V21/UWj34Mph+9NxIGH1kYcuJAp+uFjfG8Ku2nMy62OGL3553+YQ+Izr+R6egY8y/99UMCDpi5gkQni5eGv3MA==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/init/-/init-3.7.2.tgz", + "integrity": "sha512-840Az0GtyepX7/WH3QvOQDZJCEGFf4IykjjFuCLF+23+Od8Wxn3QCsp4Yn/+HKi/w7bSpsCHJ6xQG208dygfdw==", "dev": true, "requires": { "@lerna/child-process": "^3.3.0", - "@lerna/command": "^3.5.0", + "@lerna/command": "^3.7.2", "fs-extra": "^7.0.0", "p-map": "^1.2.0", "write-json-file": "^2.3.0" } }, "@lerna/link": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/link/-/link-3.5.0.tgz", - "integrity": "sha512-KSu1mhxwNRmguqMqUTJd4c7QIk9/xmxJxbmMkA71OaJd4fwondob6DyI/B17NIWutdLbvSWQ7pRlFOPxjQVoUw==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/link/-/link-3.7.2.tgz", + "integrity": "sha512-iwxftHVPknb+RXtD7257/FR4DYiCxJRxqo6z/YGlojWjehYRfbK7tJe4xzRzxepIXAE8+ooQFqQ73m0/ozk6kQ==", "dev": true, "requires": { - "@lerna/command": "^3.5.0", - "@lerna/package-graph": "^3.1.2", - "@lerna/symlink-dependencies": "^3.3.0", + "@lerna/command": "^3.7.2", + "@lerna/package-graph": "^3.6.0", + "@lerna/symlink-dependencies": "^3.7.2", "p-map": "^1.2.0", "slash": "^1.0.0" } }, "@lerna/list": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/list/-/list-3.5.0.tgz", - "integrity": "sha512-T+NZBQ/l6FmZklgrtFuN7luMs3AC/BoS52APOPrM7ZmxW4nenvov0xMwQW1783w/t365YDkDlYd5gM0nX3D1Hg==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/list/-/list-3.7.2.tgz", + "integrity": "sha512-yup9KivG31APzr+C96up83m1llqs62spsLuKkinwVUhL5mobhDscT6QwIWTJPRJ8Bbmi++SdXGLfGFkYmgujzQ==", "dev": true, "requires": { - "@lerna/command": "^3.5.0", - "@lerna/filter-options": "^3.5.0", - "@lerna/listable": "^3.0.0", - "@lerna/output": "^3.0.0" + "@lerna/command": "^3.7.2", + "@lerna/filter-options": "^3.6.0", + "@lerna/listable": "^3.6.0", + "@lerna/output": "^3.6.0" } }, "@lerna/listable": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/@lerna/listable/-/listable-3.0.0.tgz", - "integrity": "sha512-HX/9hyx1HLg2kpiKXIUc1EimlkK1T58aKQ7ovO7rQdTx9ForpefoMzyLnHE1n4XrUtEszcSWJIICJ/F898M6Ag==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/listable/-/listable-3.6.0.tgz", + "integrity": "sha512-fz63+zlqrJ9KQxIiv0r7qtufM4DEinSayAuO8YJuooz+1ctIP7RvMEQNvYI/E9tDlUo9Q0de68b5HbKrpmA5rQ==", "dev": true, "requires": { + "@lerna/batch-packages": "^3.6.0", "chalk": "^2.3.1", "columnify": "^1.5.4" } }, "@lerna/log-packed": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@lerna/log-packed/-/log-packed-3.0.4.tgz", - "integrity": "sha512-vVQHgMagE2wnbxhNY9nFkdu+Cx2TsyWalkJfkxbNzmo6gOCrDsxCBDj9vTEV8Q+4aWx0C0Bsc0sB2Eb8y/+ofA==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/log-packed/-/log-packed-3.6.0.tgz", + "integrity": "sha512-T/J41zMkzpWB5nbiTRS5PmYTFn74mJXe6RQA2qhkdLi0UqnTp97Pux1loz3jsJf2yJtiQUnyMM7KuKIAge0Vlw==", "dev": true, "requires": { "byte-size": "^4.0.3", "columnify": "^1.5.4", "has-unicode": "^2.0.1", - "npmlog": "^4.1.2" + "libnpm": "^2.0.1" } }, "@lerna/npm-conf": { - "version": "3.4.1", - "resolved": "https://registry.npmjs.org/@lerna/npm-conf/-/npm-conf-3.4.1.tgz", - "integrity": "sha512-i9G6DnbCqiAqxKx2rSXej/n14qxlV/XOebL6QZonxJKzNTB+Q2wglnhTXmfZXTPJfoqimLaY4NfAEtbOXRWOXQ==", + "version": "3.7.0", + "resolved": "https://registry.npmjs.org/@lerna/npm-conf/-/npm-conf-3.7.0.tgz", + "integrity": "sha512-+WSMDfPKcKzMfqq283ydz9RRpOU6p9wfx0wy4hVSUY/6YUpsyuk8SShjcRtY8zTM5AOrxvFBuuV90H4YpZ5+Ng==", "dev": true, "requires": { "config-chain": "^1.1.11", @@ -747,120 +844,175 @@ } }, "@lerna/npm-dist-tag": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/@lerna/npm-dist-tag/-/npm-dist-tag-3.3.0.tgz", - "integrity": "sha512-EtZJXzh3w5tqXEev+EBBPrWKWWn0WgJfxm4FihfS9VgyaAW8udIVZHGkIQ3f+tBtupcAzA9Q8cQNUkGF2efwmA==", + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/@lerna/npm-dist-tag/-/npm-dist-tag-3.7.1.tgz", + "integrity": "sha512-caUfA1L6wFl/nvIkk4q7qbFHZSnF2P8zf3Xk7vJMolRybYbj+WT1gYb5C446qPIF75p7JtFu3C/AJzwzdbljCw==", "dev": true, "requires": { - "@lerna/child-process": "^3.3.0", - "@lerna/get-npm-exec-opts": "^3.0.0", - "npmlog": "^4.1.2" + "figgy-pudding": "^3.5.1", + "libnpm": "^2.0.1" } }, "@lerna/npm-install": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/@lerna/npm-install/-/npm-install-3.3.0.tgz", - "integrity": "sha512-WoVvKdS8ltROTGSNQwo6NDq0YKnjwhvTG4li1okcN/eHKOS3tL9bxbgPx7No0wOq5DKBpdeS9KhAfee6LFAZ5g==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/npm-install/-/npm-install-3.6.0.tgz", + "integrity": "sha512-RKV31VdrBZKjmKfq25JG4mIHJ8NAOsLKq/aYSaBs8zP+uwXH7RU39saVfv9ReKiAzhKE2ghOG2JeMdIHtYnPNA==", "dev": true, "requires": { "@lerna/child-process": "^3.3.0", - "@lerna/get-npm-exec-opts": "^3.0.0", + "@lerna/get-npm-exec-opts": "^3.6.0", "fs-extra": "^7.0.0", - "npm-package-arg": "^6.0.0", - "npmlog": "^4.1.2", + "libnpm": "^2.0.1", "signal-exit": "^3.0.2", "write-pkg": "^3.1.0" } }, "@lerna/npm-publish": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/@lerna/npm-publish/-/npm-publish-3.3.1.tgz", - "integrity": "sha512-bVTlWIcBL6Zpyzqvr9C7rxXYcoPw+l7IPz5eqQDNREj1R39Wj18OWB2KTJq8l7LIX7Wf4C2A1uT5hJaEf9BuvA==", + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/@lerna/npm-publish/-/npm-publish-3.7.1.tgz", + "integrity": "sha512-3Tv4UWD+1Wz1Eqc7/8eEvAHL5c2pTx+rOKYMEc6P5Z1glN1+TfIfPckPAX0H2xg44yTCh1KGJSSBpJQl68QqIQ==", "dev": true, "requires": { - "@lerna/child-process": "^3.3.0", - "@lerna/get-npm-exec-opts": "^3.0.0", - "@lerna/has-npm-version": "^3.3.0", - "@lerna/log-packed": "^3.0.4", + "@lerna/run-lifecycle": "^3.7.1", + "figgy-pudding": "^3.5.1", "fs-extra": "^7.0.0", - "npmlog": "^4.1.2", - "p-map": "^1.2.0" + "libnpm": "^2.0.1" } }, "@lerna/npm-run-script": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/@lerna/npm-run-script/-/npm-run-script-3.3.0.tgz", - "integrity": "sha512-YqDguWZzp4jIomaE4aWMUP7MIAJAFvRAf6ziQLpqwoQskfWLqK5mW0CcszT1oLjhfb3cY3MMfSTFaqwbdKmICg==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/npm-run-script/-/npm-run-script-3.6.0.tgz", + "integrity": "sha512-6DRNFma30ex9r1a8mMDXziSRHf1/mo//hnvW1Zc1ctBh+7PU4I8n3A2ht/+742vtoTQH93Iqs3QSJl2KOLSsYg==", "dev": true, "requires": { "@lerna/child-process": "^3.3.0", - "@lerna/get-npm-exec-opts": "^3.0.0", - "npmlog": "^4.1.2" + "@lerna/get-npm-exec-opts": "^3.6.0", + "libnpm": "^2.0.1" } }, "@lerna/output": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/@lerna/output/-/output-3.0.0.tgz", - "integrity": "sha512-EFxnSbO0zDEVKkTKpoCUAFcZjc3gn3DwPlyTDxbeqPU7neCfxP4rA4+0a6pcOfTlRS5kLBRMx79F2TRCaMM3DA==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/output/-/output-3.6.0.tgz", + "integrity": "sha512-9sjQouf6p7VQtVCRnzoTGlZyURd48i3ha3WBHC/UBJnHZFuXMqWVPKNuvnMf2kRXDyoQD+2mNywpmEJg5jOnRg==", + "dev": true, + "requires": { + "libnpm": "^2.0.1" + } + }, + "@lerna/pack-directory": { + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/pack-directory/-/pack-directory-3.7.2.tgz", + "integrity": "sha512-yAZNSdAsBD26as+Il1l5R0fQaI6vTJqyNeK181V2vf34+KC0NX9TVaM+/Ht28QpK+3SaD2tvVP1T7OP2w0g2qg==", "dev": true, "requires": { - "npmlog": "^4.1.2" + "@lerna/get-packed": "^3.7.0", + "@lerna/package": "^3.7.2", + "@lerna/run-lifecycle": "^3.7.1", + "figgy-pudding": "^3.5.1", + "libnpm": "^2.0.1", + "npm-packlist": "^1.1.12", + "tar": "^4.4.8", + "temp-write": "^3.4.0" + }, + "dependencies": { + "tar": { + "version": "4.4.8", + "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.8.tgz", + "integrity": "sha512-LzHF64s5chPQQS0IYBn9IN5h3i98c12bo4NCO7e0sGM2llXQ3p2FGC5sdENN4cTW48O915Sh+x+EXx7XW96xYQ==", + "dev": true, + "requires": { + "chownr": "^1.1.1", + "fs-minipass": "^1.2.5", + "minipass": "^2.3.4", + "minizlib": "^1.1.1", + "mkdirp": "^0.5.0", + "safe-buffer": "^5.1.2", + "yallist": "^3.0.2" + } + }, + "yallist": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz", + "integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==", + "dev": true + } } }, "@lerna/package": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/@lerna/package/-/package-3.0.0.tgz", - "integrity": "sha512-djzEJxzn212wS8d9znBnlXkeRlPL7GqeAYBykAmsuq51YGvaQK67Umh5ejdO0uxexF/4r7yRwgrlRHpQs8Rfqg==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/package/-/package-3.7.2.tgz", + "integrity": "sha512-8A5hN2CekM1a0Ix4VUO/g+REo+MsnXb8lnQ0bGjr1YGWzSL5NxYJ0Z9+0pwTfDpvRDYlFYO0rMVwBUW44b4dUw==", "dev": true, "requires": { - "npm-package-arg": "^6.0.0", + "libnpm": "^2.0.1", + "load-json-file": "^4.0.0", "write-pkg": "^3.1.0" + }, + "dependencies": { + "load-json-file": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", + "integrity": "sha1-L19Fq5HjMhYjT9U62rZo607AmTs=", + "dev": true, + "requires": { + "graceful-fs": "^4.1.2", + "parse-json": "^4.0.0", + "pify": "^3.0.0", + "strip-bom": "^3.0.0" + } + }, + "parse-json": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz", + "integrity": "sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=", + "dev": true, + "requires": { + "error-ex": "^1.3.1", + "json-parse-better-errors": "^1.0.1" + } + }, + "strip-bom": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", + "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=", + "dev": true + } } }, "@lerna/package-graph": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@lerna/package-graph/-/package-graph-3.1.2.tgz", - "integrity": "sha512-9wIWb49I1IJmyjPdEVZQ13IAi9biGfH/OZHOC04U2zXGA0GLiY+B3CAx6FQvqkZ8xEGfqzmXnv3LvZ0bQfc1aQ==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/package-graph/-/package-graph-3.6.0.tgz", + "integrity": "sha512-Xtldh3DTiC3cPDrs6OY5URiuRXGPMIN6uFKcx59rOu3TkqYRt346jRyX+hm85996Y/pboo3+JuQlonvuEP/9QQ==", "dev": true, "requires": { - "@lerna/validation-error": "^3.0.0", - "npm-package-arg": "^6.0.0", + "@lerna/validation-error": "^3.6.0", + "libnpm": "^2.0.1", "semver": "^5.5.0" } }, "@lerna/project": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/project/-/project-3.5.0.tgz", - "integrity": "sha512-uFDzqwrD7a/tTohQoo0voTsRy2cgl9D1ZOU2pHZzHzow9S1M8E0x5q3hJI2HlwsZry9IUugmDUGO6UddTjwm3Q==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/project/-/project-3.7.2.tgz", + "integrity": "sha512-YNJw61G4YrnwW0P1NAR/bd/kfDdK+WPI5YH10AHsG1TXBFV9hBusjB7MROmobYbln7zNWJJ3PQmXtWv134aaRQ==", "dev": true, "requires": { - "@lerna/package": "^3.0.0", - "@lerna/validation-error": "^3.0.0", + "@lerna/package": "^3.7.2", + "@lerna/validation-error": "^3.6.0", "cosmiconfig": "^5.0.2", "dedent": "^0.7.0", "dot-prop": "^4.2.0", "glob-parent": "^3.1.0", "globby": "^8.0.1", + "libnpm": "^2.0.1", "load-json-file": "^4.0.0", - "npmlog": "^4.1.2", "p-map": "^1.2.0", "resolve-from": "^4.0.0", "write-json-file": "^2.3.0" }, "dependencies": { - "glob-parent": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz", - "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=", - "dev": true, - "requires": { - "is-glob": "^3.1.0", - "path-dirname": "^1.0.0" - } - }, "globby": { "version": "8.0.1", - "resolved": "https://registry.npmjs.org/globby/-/globby-8.0.1.tgz", + "resolved": "http://registry.npmjs.org/globby/-/globby-8.0.1.tgz", "integrity": "sha512-oMrYrJERnKBLXNLVTqhm3vPEdJ/b2ZE28xN4YARiix1NOIOBPEpOUnm844K1iu/BkphCaf2WNFwMszv8Soi1pw==", "dev": true, "requires": { @@ -873,21 +1025,6 @@ "slash": "^1.0.0" } }, - "is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", - "dev": true - }, - "is-glob": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz", - "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=", - "dev": true, - "requires": { - "is-extglob": "^2.1.0" - } - }, "load-json-file": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", @@ -925,42 +1062,43 @@ } }, "@lerna/prompt": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/@lerna/prompt/-/prompt-3.3.1.tgz", - "integrity": "sha512-eJhofrUCUaItMIH6et8kI7YqHfhjWqGZoTsE+40NRCfAraOMWx+pDzfRfeoAl3qeRAH2HhNj1bkYn70FbUOxuQ==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/prompt/-/prompt-3.6.0.tgz", + "integrity": "sha512-nyAjPMolJ/ZRAAVcXrUH89C4n1SiWvLh4xWNvWYKLcf3PI5yges35sDFP/HYrM4+cEbkNFuJCRq6CxaET4PRsg==", "dev": true, "requires": { "inquirer": "^6.2.0", - "npmlog": "^4.1.2" + "libnpm": "^2.0.1" } }, "@lerna/publish": { - "version": "3.5.1", - "resolved": "https://registry.npmjs.org/@lerna/publish/-/publish-3.5.1.tgz", - "integrity": "sha512-ltw2YdWWzev9cZRAzons5ywZh9NJARPX67meeA95oMDVMrhD4Y9VHQNJ3T8ueec/W78/4sKlMSr3ecWyPNp5bg==", + "version": "3.8.0", + "resolved": "https://registry.npmjs.org/@lerna/publish/-/publish-3.8.0.tgz", + "integrity": "sha512-EJDF6oPySIHQRre9KMMqtltrPReuBT7Po72W6OQxCUmCjqDyUd6884lhqFHOgbtOl1axrVVaSOpxCU1m+SLNgA==", "dev": true, "requires": { - "@lerna/batch-packages": "^3.1.2", - "@lerna/check-working-tree": "^3.5.0", + "@lerna/batch-packages": "^3.6.0", + "@lerna/check-working-tree": "^3.6.0", "@lerna/child-process": "^3.3.0", - "@lerna/collect-updates": "^3.5.0", - "@lerna/command": "^3.5.0", - "@lerna/describe-ref": "^3.5.0", - "@lerna/get-npm-exec-opts": "^3.0.0", - "@lerna/npm-conf": "^3.4.1", - "@lerna/npm-dist-tag": "^3.3.0", - "@lerna/npm-publish": "^3.3.1", - "@lerna/output": "^3.0.0", - "@lerna/prompt": "^3.3.1", - "@lerna/run-lifecycle": "^3.4.1", + "@lerna/collect-updates": "^3.6.0", + "@lerna/command": "^3.7.2", + "@lerna/describe-ref": "^3.6.0", + "@lerna/log-packed": "^3.6.0", + "@lerna/npm-conf": "^3.7.0", + "@lerna/npm-dist-tag": "^3.7.1", + "@lerna/npm-publish": "^3.7.1", + "@lerna/output": "^3.6.0", + "@lerna/pack-directory": "^3.7.2", + "@lerna/prompt": "^3.6.0", + "@lerna/pulse-till-done": "^3.7.1", + "@lerna/run-lifecycle": "^3.7.1", "@lerna/run-parallel-batches": "^3.0.0", - "@lerna/validation-error": "^3.0.0", - "@lerna/version": "^3.5.0", + "@lerna/validation-error": "^3.6.0", + "@lerna/version": "^3.8.0", + "figgy-pudding": "^3.5.1", "fs-extra": "^7.0.0", - "libnpmaccess": "^3.0.0", - "npm-package-arg": "^6.0.0", + "libnpm": "^2.0.1", "npm-registry-fetch": "^3.8.0", - "npmlog": "^4.1.2", "p-finally": "^1.0.0", "p-map": "^1.2.0", "p-pipe": "^1.2.0", @@ -968,55 +1106,72 @@ "semver": "^5.5.0" } }, + "@lerna/pulse-till-done": { + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/@lerna/pulse-till-done/-/pulse-till-done-3.7.1.tgz", + "integrity": "sha512-MzpesZeW3Mc+CiAq4zUt9qTXI9uEBBKrubYHE36voQTSkHvu/Rox6YOvfUr+U7P6k8frFPeCgGpfMDTLhiqe6w==", + "dev": true, + "requires": { + "libnpm": "^2.0.1" + } + }, "@lerna/resolve-symlink": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/@lerna/resolve-symlink/-/resolve-symlink-3.3.0.tgz", - "integrity": "sha512-KmoPDcFJ2aOK2inYHbrsiO9SodedUj0L1JDvDgirVNIjMUaQe2Q6Vi4Gh+VCJcyB27JtfHioV9R2NxU72Pk2hg==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/resolve-symlink/-/resolve-symlink-3.6.0.tgz", + "integrity": "sha512-TVOAEqHJSQVhNDMFCwEUZPaOETqHDQV1TQWQfC8ZlOqyaUQ7veZUbg0yfG7RPNzlSpvF0ZaGFeR0YhYDAW03GA==", "dev": true, "requires": { "fs-extra": "^7.0.0", - "npmlog": "^4.1.2", + "libnpm": "^2.0.1", "read-cmd-shim": "^1.0.1" } }, "@lerna/rimraf-dir": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/@lerna/rimraf-dir/-/rimraf-dir-3.3.0.tgz", - "integrity": "sha512-vSqOcZ4kZduiSprbt+y40qziyN3VKYh+ygiCdnbBbsaxpdKB6CfrSMUtrLhVFrqUfBHIZRzHIzgjTdtQex1KLw==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/rimraf-dir/-/rimraf-dir-3.6.0.tgz", + "integrity": "sha512-2CfyWP1lqxDET+SfwGlLUfgqGF4vz9TYDrmb7Zi//g7IFCo899uU2vWOrEcdWTgbKE3Qgwwfk9c008w5MWUhog==", "dev": true, "requires": { "@lerna/child-process": "^3.3.0", - "npmlog": "^4.1.2", + "libnpm": "^2.0.1", "path-exists": "^3.0.0", "rimraf": "^2.6.2" + }, + "dependencies": { + "path-exists": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", + "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", + "dev": true + } } }, "@lerna/run": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/run/-/run-3.5.0.tgz", - "integrity": "sha512-BnPD52tj794xG2Xsc4FvgksyFX2CLmSR28TZw/xASEuy14NuQYMZkvbaj61SEhyOEsq7pLhHE5PpfbIv2AIFJw==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/run/-/run-3.7.2.tgz", + "integrity": "sha512-FwBjcrtYSFyvY2YXJ8GoI9VNv2UElUbVra5+iTF1DgQh37RmK0ZCODkfXp6PYyUszHkgCRuJqhK0+yMWRJo61w==", "dev": true, "requires": { - "@lerna/batch-packages": "^3.1.2", - "@lerna/command": "^3.5.0", - "@lerna/filter-options": "^3.5.0", - "@lerna/npm-run-script": "^3.3.0", - "@lerna/output": "^3.0.0", + "@lerna/batch-packages": "^3.6.0", + "@lerna/command": "^3.7.2", + "@lerna/filter-options": "^3.6.0", + "@lerna/npm-run-script": "^3.6.0", + "@lerna/output": "^3.6.0", "@lerna/run-parallel-batches": "^3.0.0", "@lerna/timer": "^3.5.0", - "@lerna/validation-error": "^3.0.0", + "@lerna/validation-error": "^3.6.0", "p-map": "^1.2.0" } }, "@lerna/run-lifecycle": { - "version": "3.4.1", - "resolved": "https://registry.npmjs.org/@lerna/run-lifecycle/-/run-lifecycle-3.4.1.tgz", - "integrity": "sha512-N/hi2srM9A4BWEkXccP7vCEbf4MmIuALF00DTBMvc0A/ccItwUpl3XNuM7+ADDRK0mkwE3hDw89lJ3A7f8oUQw==", + "version": "3.7.1", + "resolved": "https://registry.npmjs.org/@lerna/run-lifecycle/-/run-lifecycle-3.7.1.tgz", + "integrity": "sha512-kE6w8d8Qde+ewZaDNIz4zhwde8s/i8vbbOsGDlR/Vw/9nqlmtj2YBZaS262NtWj83N04dtdYr4FVj51thciGQw==", "dev": true, "requires": { - "@lerna/npm-conf": "^3.4.1", - "npm-lifecycle": "^2.0.0", - "npmlog": "^4.1.2" + "@lerna/npm-conf": "^3.7.0", + "figgy-pudding": "^3.5.1", + "libnpm": "^2.0.1" } }, "@lerna/run-parallel-batches": { @@ -1030,79 +1185,28 @@ } }, "@lerna/symlink-binary": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/@lerna/symlink-binary/-/symlink-binary-3.3.0.tgz", - "integrity": "sha512-zRo6CimhvH/VJqCFl9T4IC6syjpWyQIxEfO2sBhrapEcfwjtwbhoGgKwucsvt4rIpFazCw63jQ/AXMT27KUIHg==", + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/symlink-binary/-/symlink-binary-3.7.2.tgz", + "integrity": "sha512-xS7DdBXNQgfgrhBe2Jz27+S65yxBfnl+Xi+grvlqoEGVk7b8kt2VcBtui/XgL6AAaTg6f9szj4LUnwC/oX6S1Q==", + "dev": true, + "requires": { + "@lerna/create-symlink": "^3.6.0", + "@lerna/package": "^3.7.2", + "fs-extra": "^7.0.0", + "p-map": "^1.2.0" + } + }, + "@lerna/symlink-dependencies": { + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@lerna/symlink-dependencies/-/symlink-dependencies-3.7.2.tgz", + "integrity": "sha512-53fZUGQ+QLr5P7I9/pqFmCizLo4Q/Jz5ETd1NURO2+eABGdYuTnuvtqyGku+eOr9A4gYDaVmg50KEpsOXq9TWg==", "dev": true, "requires": { - "@lerna/create-symlink": "^3.3.0", - "@lerna/package": "^3.0.0", + "@lerna/create-symlink": "^3.6.0", + "@lerna/resolve-symlink": "^3.6.0", + "@lerna/symlink-binary": "^3.7.2", "fs-extra": "^7.0.0", - "p-map": "^1.2.0", - "read-pkg": "^3.0.0" - }, - "dependencies": { - "load-json-file": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", - "integrity": "sha1-L19Fq5HjMhYjT9U62rZo607AmTs=", - "dev": true, - "requires": { - "graceful-fs": "^4.1.2", - "parse-json": "^4.0.0", - "pify": "^3.0.0", - "strip-bom": "^3.0.0" - } - }, - "parse-json": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz", - "integrity": "sha1-vjX1Qlvh9/bHRxhPmKeIy5lHfuA=", - "dev": true, - "requires": { - "error-ex": "^1.3.1", - "json-parse-better-errors": "^1.0.1" - } - }, - "path-type": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz", - "integrity": "sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==", - "dev": true, - "requires": { - "pify": "^3.0.0" - } - }, - "read-pkg": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/read-pkg/-/read-pkg-3.0.0.tgz", - "integrity": "sha1-nLxoaXj+5l0WwA4rGcI3/Pbjg4k=", - "dev": true, - "requires": { - "load-json-file": "^4.0.0", - "normalize-package-data": "^2.3.2", - "path-type": "^3.0.0" - } - }, - "strip-bom": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", - "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=", - "dev": true - } - } - }, - "@lerna/symlink-dependencies": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/@lerna/symlink-dependencies/-/symlink-dependencies-3.3.0.tgz", - "integrity": "sha512-IRngSNCmuD5uBKVv23tHMvr7Mplti0lKHilFKcvhbvhAfu6m/Vclxhkfs/uLyHzG+DeRpl/9o86SQET3h4XDhg==", - "dev": true, - "requires": { - "@lerna/create-symlink": "^3.3.0", - "@lerna/resolve-symlink": "^3.3.0", - "@lerna/symlink-binary": "^3.3.0", - "fs-extra": "^7.0.0", - "p-finally": "^1.0.0", + "p-finally": "^1.0.0", "p-map": "^1.2.0", "p-map-series": "^1.0.0" } @@ -1114,34 +1218,34 @@ "dev": true }, "@lerna/validation-error": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/@lerna/validation-error/-/validation-error-3.0.0.tgz", - "integrity": "sha512-5wjkd2PszV0kWvH+EOKZJWlHEqCTTKrWsvfHnHhcUaKBe/NagPZFWs+0xlsDPZ3DJt5FNfbAPAnEBQ05zLirFA==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/validation-error/-/validation-error-3.6.0.tgz", + "integrity": "sha512-MWltncGO5VgMS0QedTlZCjFUMF/evRjDMMHrtVorkIB2Cp5xy0rkKa8iDBG43qpUWeG1giwi58yUlETBcWfILw==", "dev": true, "requires": { - "npmlog": "^4.1.2" + "libnpm": "^2.0.1" } }, "@lerna/version": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@lerna/version/-/version-3.5.0.tgz", - "integrity": "sha512-vxuGkUSfjJuvOIgPG7SDXVmk4GPwJF9F+uhDW9T/wJzTk4UaxL37GpBeJDo43eutQ7mwluP+t88Luwf8S3WXlA==", + "version": "3.8.0", + "resolved": "https://registry.npmjs.org/@lerna/version/-/version-3.8.0.tgz", + "integrity": "sha512-c+TNPzlyv0dgDpgMu87CPauk8R2jZwwftgQarHOCGbEZ0ClXqLFTEAKxvLpzprlt+kH3goIWYNQrZiJflpMOCA==", "dev": true, "requires": { - "@lerna/batch-packages": "^3.1.2", - "@lerna/check-working-tree": "^3.5.0", + "@lerna/batch-packages": "^3.6.0", + "@lerna/check-working-tree": "^3.6.0", "@lerna/child-process": "^3.3.0", - "@lerna/collect-updates": "^3.5.0", - "@lerna/command": "^3.5.0", - "@lerna/conventional-commits": "^3.5.0", - "@lerna/output": "^3.0.0", - "@lerna/prompt": "^3.3.1", - "@lerna/run-lifecycle": "^3.4.1", - "@lerna/validation-error": "^3.0.0", + "@lerna/collect-updates": "^3.6.0", + "@lerna/command": "^3.7.2", + "@lerna/conventional-commits": "^3.6.0", + "@lerna/output": "^3.6.0", + "@lerna/prompt": "^3.6.0", + "@lerna/run-lifecycle": "^3.7.1", + "@lerna/validation-error": "^3.6.0", "chalk": "^2.3.1", "dedent": "^0.7.0", + "libnpm": "^2.0.1", "minimatch": "^3.0.4", - "npmlog": "^4.1.2", "p-map": "^1.2.0", "p-pipe": "^1.2.0", "p-reduce": "^1.0.0", @@ -1152,15 +1256,24 @@ } }, "@lerna/write-log-file": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/@lerna/write-log-file/-/write-log-file-3.0.0.tgz", - "integrity": "sha512-SfbPp29lMeEVOb/M16lJwn4nnx5y+TwCdd7Uom9umd7KcZP0NOvpnX0PHehdonl7TyHZ1Xx2maklYuCLbQrd/A==", + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/@lerna/write-log-file/-/write-log-file-3.6.0.tgz", + "integrity": "sha512-OkLK99V6sYXsJsYg+O9wtiFS3z6eUPaiz2e6cXJt80mfIIdI1t2dnmyua0Ib5cZWExQvx2z6Y32Wlf0MnsoNsA==", "dev": true, "requires": { - "npmlog": "^4.1.2", + "libnpm": "^2.0.1", "write-file-atomic": "^2.3.0" } }, + "@mattiasbuelens/web-streams-polyfill": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/@mattiasbuelens/web-streams-polyfill/-/web-streams-polyfill-0.2.1.tgz", + "integrity": "sha512-oKuFCQFa3W7Hj7zKn0+4ypI8JFm4ZKIoncwAC6wd5WwFW2sL7O1hpPoJdSWpynQ4DJ4lQ6MvFoVDmCLilonDFg==", + "dev": true, + "requires": { + "@types/whatwg-streams": "^0.0.7" + } + }, "@mrmlnc/readdir-enhanced": { "version": "2.2.1", "resolved": "https://registry.npmjs.org/@mrmlnc/readdir-enhanced/-/readdir-enhanced-2.2.1.tgz", @@ -1177,15 +1290,6 @@ "integrity": "sha512-shAmDyaQC4H92APFoIaVDHCx5bStIocgvbwQyxPRrbUY20V1EYTbSDchWbuwlMG3V17cprZhA6+78JfB+3DTPw==", "dev": true }, - "@samverschueren/stream-to-observable": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/@samverschueren/stream-to-observable/-/stream-to-observable-0.3.0.tgz", - "integrity": "sha512-MI4Xx6LHs4Webyvi6EbspgyAb4D2Q2VtnCQ1blOJcoLS6mVa8lNN2rkIy1CVxfTUpoyIbCTkXES1rLXztFD1lg==", - "dev": true, - "requires": { - "any-observable": "^0.3.0" - } - }, "@sindresorhus/df": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/@sindresorhus/df/-/df-2.1.0.tgz", @@ -1225,12 +1329,6 @@ } } }, - "@std/esm": { - "version": "0.26.0", - "resolved": "https://registry.npmjs.org/@std/esm/-/esm-0.26.0.tgz", - "integrity": "sha512-g3RDuosSa5fZOzENtrZdx7Gevb3zabfn8qglug2aCJIVz/4woFpKoqm1yD3mG2RD0zJEZRnkkuPHsmNglKGl7g==", - "dev": true - }, "@types/events": { "version": "1.2.0", "resolved": "http://registry.npmjs.org/@types/events/-/events-1.2.0.tgz", @@ -1263,9 +1361,9 @@ } }, "@types/handlebars": { - "version": "4.0.39", - "resolved": "https://registry.npmjs.org/@types/handlebars/-/handlebars-4.0.39.tgz", - "integrity": "sha512-vjaS7Q0dVqFp85QhyPSZqDKnTTCemcSHNHFvDdalO1s0Ifz5KuE64jQD5xoUkfdWwF4WpqdJEl7LsWH8rzhKJA==", + "version": "4.0.40", + "resolved": "https://registry.npmjs.org/@types/handlebars/-/handlebars-4.0.40.tgz", + "integrity": "sha512-sGWNtsjNrLOdKha2RV1UeF8+UbQnPSG7qbe5wwbni0mw4h2gHXyPFUMOC+xwGirIiiydM/HSqjDO4rk6NFB18w==", "dev": true }, "@types/highlight.js": { @@ -1275,15 +1373,15 @@ "dev": true }, "@types/jest": { - "version": "23.3.5", - "resolved": "https://registry.npmjs.org/@types/jest/-/jest-23.3.5.tgz", - "integrity": "sha512-3LI+vUC3Wju28vbjIjsTKakhMB8HC4l+tMz+Z8WRzVK+kmvezE5jcOvKtBpznWSI5KDLFo+FouUhpTKoekadCA==", + "version": "23.3.10", + "resolved": "https://registry.npmjs.org/@types/jest/-/jest-23.3.10.tgz", + "integrity": "sha512-DC8xTuW/6TYgvEg3HEXS7cu9OijFqprVDXXiOcdOKZCU/5PJNLZU37VVvmZHdtMiGOa8wAA/We+JzbdxFzQTRQ==", "dev": true }, "@types/lodash": { - "version": "4.14.118", - "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.118.tgz", - "integrity": "sha512-iiJbKLZbhSa6FYRip/9ZDX6HXhayXLDGY2Fqws9cOkEQ6XeKfaxB0sC541mowZJueYyMnVUmmG+al5/4fCDrgw==", + "version": "4.14.119", + "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.14.119.tgz", + "integrity": "sha512-Z3TNyBL8Vd/M9D9Ms2S3LmFq2sSMzahodD6rCS9V2N44HUMINb75jNkSuwAx7eo2ufqTdfOdtGQpNbieUjPQmw==", "dev": true }, "@types/marked": { @@ -1299,14 +1397,14 @@ "dev": true }, "@types/node": { - "version": "10.12.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-10.12.0.tgz", - "integrity": "sha512-3TUHC3jsBAB7qVRGxT6lWyYo2v96BMmD2PTcl47H25Lu7UXtFH/2qqmKiVrnel6Ne//0TFYf6uvNX+HW2FRkLQ==" + "version": "10.12.18", + "resolved": "https://registry.npmjs.org/@types/node/-/node-10.12.18.tgz", + "integrity": "sha512-fh+pAqt4xRzPfqA6eh3Z2y6fyZavRIumvjhaCL753+TVkGKGhpPeyrJG2JftD0T9q4GF00KjefsQ+PQNDdWQaQ==" }, "@types/shelljs": { - "version": "0.8.0", - "resolved": "https://registry.npmjs.org/@types/shelljs/-/shelljs-0.8.0.tgz", - "integrity": "sha512-vs1hCC8RxLHRu2bwumNyYRNrU3o8BtZhLysH5A4I98iYmA2APl6R3uNQb5ihl+WiwH0xdC9LLO+vRrXLs/Kyxg==", + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@types/shelljs/-/shelljs-0.8.1.tgz", + "integrity": "sha512-1lQw+48BuVgp6c1+z8EMipp18IdnV2dLh6KQGwOm+kJy9nPjEkaqRKmwbDNEYf//EKBvKcwOC6V2cDrNxVoQeQ==", "dev": true, "requires": { "@types/glob": "*", @@ -1318,175 +1416,181 @@ "resolved": "https://registry.npmjs.org/@types/text-encoding-utf-8/-/text-encoding-utf-8-1.0.1.tgz", "integrity": "sha512-GpIEYaS+yNfYqpowLLziiY42pyaL+lThd/wMh6tTubaKuG4IRkXqqyxK7Nddn3BvpUg2+go3Gv/jbXvAFMRjiQ==" }, + "@types/whatwg-streams": { + "version": "0.0.7", + "resolved": "https://registry.npmjs.org/@types/whatwg-streams/-/whatwg-streams-0.0.7.tgz", + "integrity": "sha512-6sDiSEP6DWcY2ZolsJ2s39ZmsoGQ7KVwBDI3sESQsEm9P2dHTcqnDIHRZFRNtLCzWp7hCFGqYbw5GyfpQnJ01A==", + "dev": true + }, "@webassemblyjs/ast": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.7.10.tgz", - "integrity": "sha512-wTUeaByYN2EA6qVqhbgavtGc7fLTOx0glG2IBsFlrFG51uXIGlYBTyIZMf4SPLo3v1bgV/7lBN3l7Z0R6Hswew==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.7.11.tgz", + "integrity": "sha512-ZEzy4vjvTzScC+SH8RBssQUawpaInUdMTYwYYLh54/s8TuT0gBLuyUnppKsVyZEi876VmmStKsUs28UxPgdvrA==", "dev": true, "requires": { - "@webassemblyjs/helper-module-context": "1.7.10", - "@webassemblyjs/helper-wasm-bytecode": "1.7.10", - "@webassemblyjs/wast-parser": "1.7.10" + "@webassemblyjs/helper-module-context": "1.7.11", + "@webassemblyjs/helper-wasm-bytecode": "1.7.11", + "@webassemblyjs/wast-parser": "1.7.11" } }, "@webassemblyjs/floating-point-hex-parser": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.7.10.tgz", - "integrity": "sha512-gMsGbI6I3p/P1xL2UxqhNh1ga2HCsx5VBB2i5VvJFAaqAjd2PBTRULc3BpTydabUQEGlaZCzEUQhLoLG7TvEYQ==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.7.11.tgz", + "integrity": "sha512-zY8dSNyYcgzNRNT666/zOoAyImshm3ycKdoLsyDw/Bwo6+/uktb7p4xyApuef1dwEBo/U/SYQzbGBvV+nru2Xg==", "dev": true }, "@webassemblyjs/helper-api-error": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.7.10.tgz", - "integrity": "sha512-DoYRlPWtuw3yd5BOr9XhtrmB6X1enYF0/54yNvQWGXZEPDF5PJVNI7zQ7gkcKfTESzp8bIBWailaFXEK/jjCsw==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.7.11.tgz", + "integrity": "sha512-7r1qXLmiglC+wPNkGuXCvkmalyEstKVwcueZRP2GNC2PAvxbLYwLLPr14rcdJaE4UtHxQKfFkuDFuv91ipqvXg==", "dev": true }, "@webassemblyjs/helper-buffer": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.7.10.tgz", - "integrity": "sha512-+RMU3dt/dPh4EpVX4u5jxsOlw22tp3zjqE0m3ftU2tsYxnPULb4cyHlgaNd2KoWuwasCQqn8Mhr+TTdbtj3LlA==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.7.11.tgz", + "integrity": "sha512-MynuervdylPPh3ix+mKZloTcL06P8tenNH3sx6s0qE8SLR6DdwnfgA7Hc9NSYeob2jrW5Vql6GVlsQzKQCa13w==", "dev": true }, "@webassemblyjs/helper-code-frame": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-code-frame/-/helper-code-frame-1.7.10.tgz", - "integrity": "sha512-UiytbpKAULOEab2hUZK2ywXen4gWJVrgxtwY3Kn+eZaaSWaRM8z/7dAXRSoamhKFiBh1uaqxzE/XD9BLlug3gw==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-code-frame/-/helper-code-frame-1.7.11.tgz", + "integrity": "sha512-T8ESC9KMXFTXA5urJcyor5cn6qWeZ4/zLPyWeEXZ03hj/x9weSokGNkVCdnhSabKGYWxElSdgJ+sFa9G/RdHNw==", "dev": true, "requires": { - "@webassemblyjs/wast-printer": "1.7.10" + "@webassemblyjs/wast-printer": "1.7.11" } }, "@webassemblyjs/helper-fsm": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-fsm/-/helper-fsm-1.7.10.tgz", - "integrity": "sha512-w2vDtUK9xeSRtt5+RnnlRCI7wHEvLjF0XdnxJpgx+LJOvklTZPqWkuy/NhwHSLP19sm9H8dWxKeReMR7sCkGZA==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-fsm/-/helper-fsm-1.7.11.tgz", + "integrity": "sha512-nsAQWNP1+8Z6tkzdYlXT0kxfa2Z1tRTARd8wYnc/e3Zv3VydVVnaeePgqUzFrpkGUyhUUxOl5ML7f1NuT+gC0A==", "dev": true }, "@webassemblyjs/helper-module-context": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-module-context/-/helper-module-context-1.7.10.tgz", - "integrity": "sha512-yE5x/LzZ3XdPdREmJijxzfrf+BDRewvO0zl8kvORgSWmxpRrkqY39KZSq6TSgIWBxkK4SrzlS3BsMCv2s1FpsQ==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-module-context/-/helper-module-context-1.7.11.tgz", + "integrity": "sha512-JxfD5DX8Ygq4PvXDucq0M+sbUFA7BJAv/GGl9ITovqE+idGX+J3QSzJYz+LwQmL7fC3Rs+utvWoJxDb6pmC0qg==", "dev": true }, "@webassemblyjs/helper-wasm-bytecode": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.7.10.tgz", - "integrity": "sha512-u5qy4SJ/OrxKxZqJ9N3qH4ZQgHaAzsopsYwLvoWJY6Q33r8PhT3VPyNMaJ7ZFoqzBnZlCcS/0f4Sp8WBxylXfg==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.7.11.tgz", + "integrity": "sha512-cMXeVS9rhoXsI9LLL4tJxBgVD/KMOKXuFqYb5oCJ/opScWpkCMEz9EJtkonaNcnLv2R3K5jIeS4TRj/drde1JQ==", "dev": true }, "@webassemblyjs/helper-wasm-section": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.7.10.tgz", - "integrity": "sha512-Ecvww6sCkcjatcyctUrn22neSJHLN/TTzolMGG/N7S9rpbsTZ8c6Bl98GpSpV77EvzNijiNRHBG0+JO99qKz6g==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.7.11.tgz", + "integrity": "sha512-8ZRY5iZbZdtNFE5UFunB8mmBEAbSI3guwbrsCl4fWdfRiAcvqQpeqd5KHhSWLL5wuxo53zcaGZDBU64qgn4I4Q==", "dev": true, "requires": { - "@webassemblyjs/ast": "1.7.10", - "@webassemblyjs/helper-buffer": "1.7.10", - "@webassemblyjs/helper-wasm-bytecode": "1.7.10", - "@webassemblyjs/wasm-gen": "1.7.10" + "@webassemblyjs/ast": "1.7.11", + "@webassemblyjs/helper-buffer": "1.7.11", + "@webassemblyjs/helper-wasm-bytecode": "1.7.11", + "@webassemblyjs/wasm-gen": "1.7.11" } }, "@webassemblyjs/ieee754": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.7.10.tgz", - "integrity": "sha512-HRcWcY+YWt4+s/CvQn+vnSPfRaD4KkuzQFt5MNaELXXHSjelHlSEA8ZcqT69q0GTIuLWZ6JaoKar4yWHVpZHsQ==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.7.11.tgz", + "integrity": "sha512-Mmqx/cS68K1tSrvRLtaV/Lp3NZWzXtOHUW2IvDvl2sihAwJh4ACE0eL6A8FvMyDG9abes3saB6dMimLOs+HMoQ==", "dev": true, "requires": { "@xtuc/ieee754": "^1.2.0" } }, "@webassemblyjs/leb128": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.7.10.tgz", - "integrity": "sha512-og8MciYlA8hvzCLR71hCuZKPbVBfLQeHv7ImKZ4nlyxrYbG7uJHYtHiHu6OV9SqrGuD03H/HtXC4Bgdjfm9FHw==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.7.11.tgz", + "integrity": "sha512-vuGmgZjjp3zjcerQg+JA+tGOncOnJLWVkt8Aze5eWQLwTQGNgVLcyOTqgSCxWTR4J42ijHbBxnuRaL1Rv7XMdw==", "dev": true, "requires": { "@xtuc/long": "4.2.1" } }, "@webassemblyjs/utf8": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.7.10.tgz", - "integrity": "sha512-Ng6Pxv6siyZp635xCSnH3mKmIFgqWPCcGdoo0GBYgyGdxu7cUj4agV7Uu1a8REP66UYUFXJLudeGgd4RvuJAnQ==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.7.11.tgz", + "integrity": "sha512-C6GFkc7aErQIAH+BMrIdVSmW+6HSe20wg57HEC1uqJP8E/xpMjXqQUxkQw07MhNDSDcGpxI9G5JSNOQCqJk4sA==", "dev": true }, "@webassemblyjs/wasm-edit": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.7.10.tgz", - "integrity": "sha512-e9RZFQlb+ZuYcKRcW9yl+mqX/Ycj9+3/+ppDI8nEE/NCY6FoK8f3dKBcfubYV/HZn44b+ND4hjh+4BYBt+sDnA==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.7.11.tgz", + "integrity": "sha512-FUd97guNGsCZQgeTPKdgxJhBXkUbMTY6hFPf2Y4OedXd48H97J+sOY2Ltaq6WGVpIH8o/TGOVNiVz/SbpEMJGg==", "dev": true, "requires": { - "@webassemblyjs/ast": "1.7.10", - "@webassemblyjs/helper-buffer": "1.7.10", - "@webassemblyjs/helper-wasm-bytecode": "1.7.10", - "@webassemblyjs/helper-wasm-section": "1.7.10", - "@webassemblyjs/wasm-gen": "1.7.10", - "@webassemblyjs/wasm-opt": "1.7.10", - "@webassemblyjs/wasm-parser": "1.7.10", - "@webassemblyjs/wast-printer": "1.7.10" + "@webassemblyjs/ast": "1.7.11", + "@webassemblyjs/helper-buffer": "1.7.11", + "@webassemblyjs/helper-wasm-bytecode": "1.7.11", + "@webassemblyjs/helper-wasm-section": "1.7.11", + "@webassemblyjs/wasm-gen": "1.7.11", + "@webassemblyjs/wasm-opt": "1.7.11", + "@webassemblyjs/wasm-parser": "1.7.11", + "@webassemblyjs/wast-printer": "1.7.11" } }, "@webassemblyjs/wasm-gen": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.7.10.tgz", - "integrity": "sha512-M0lb6cO2Y0PzDye/L39PqwV+jvO+2YxEG5ax+7dgq7EwXdAlpOMx1jxyXJTScQoeTpzOPIb+fLgX/IkLF8h2yw==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.7.11.tgz", + "integrity": "sha512-U/KDYp7fgAZX5KPfq4NOupK/BmhDc5Kjy2GIqstMhvvdJRcER/kUsMThpWeRP8BMn4LXaKhSTggIJPOeYHwISA==", "dev": true, "requires": { - "@webassemblyjs/ast": "1.7.10", - "@webassemblyjs/helper-wasm-bytecode": "1.7.10", - "@webassemblyjs/ieee754": "1.7.10", - "@webassemblyjs/leb128": "1.7.10", - "@webassemblyjs/utf8": "1.7.10" + "@webassemblyjs/ast": "1.7.11", + "@webassemblyjs/helper-wasm-bytecode": "1.7.11", + "@webassemblyjs/ieee754": "1.7.11", + "@webassemblyjs/leb128": "1.7.11", + "@webassemblyjs/utf8": "1.7.11" } }, "@webassemblyjs/wasm-opt": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.7.10.tgz", - "integrity": "sha512-R66IHGCdicgF5ZliN10yn5HaC7vwYAqrSVJGjtJJQp5+QNPBye6heWdVH/at40uh0uoaDN/UVUfXK0gvuUqtVg==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.7.11.tgz", + "integrity": "sha512-XynkOwQyiRidh0GLua7SkeHvAPXQV/RxsUeERILmAInZegApOUAIJfRuPYe2F7RcjOC9tW3Cb9juPvAC/sCqvg==", "dev": true, "requires": { - "@webassemblyjs/ast": "1.7.10", - "@webassemblyjs/helper-buffer": "1.7.10", - "@webassemblyjs/wasm-gen": "1.7.10", - "@webassemblyjs/wasm-parser": "1.7.10" + "@webassemblyjs/ast": "1.7.11", + "@webassemblyjs/helper-buffer": "1.7.11", + "@webassemblyjs/wasm-gen": "1.7.11", + "@webassemblyjs/wasm-parser": "1.7.11" } }, "@webassemblyjs/wasm-parser": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.7.10.tgz", - "integrity": "sha512-AEv8mkXVK63n/iDR3T693EzoGPnNAwKwT3iHmKJNBrrALAhhEjuPzo/lTE4U7LquEwyvg5nneSNdTdgrBaGJcA==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.7.11.tgz", + "integrity": "sha512-6lmXRTrrZjYD8Ng8xRyvyXQJYUQKYSXhJqXOBLw24rdiXsHAOlvw5PhesjdcaMadU/pyPQOJ5dHreMjBxwnQKg==", "dev": true, "requires": { - "@webassemblyjs/ast": "1.7.10", - "@webassemblyjs/helper-api-error": "1.7.10", - "@webassemblyjs/helper-wasm-bytecode": "1.7.10", - "@webassemblyjs/ieee754": "1.7.10", - "@webassemblyjs/leb128": "1.7.10", - "@webassemblyjs/utf8": "1.7.10" + "@webassemblyjs/ast": "1.7.11", + "@webassemblyjs/helper-api-error": "1.7.11", + "@webassemblyjs/helper-wasm-bytecode": "1.7.11", + "@webassemblyjs/ieee754": "1.7.11", + "@webassemblyjs/leb128": "1.7.11", + "@webassemblyjs/utf8": "1.7.11" } }, "@webassemblyjs/wast-parser": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-parser/-/wast-parser-1.7.10.tgz", - "integrity": "sha512-YTPEtOBljkCL0VjDp4sHe22dAYSm3ZwdJ9+2NTGdtC7ayNvuip1wAhaAS8Zt9Q6SW9E5Jf5PX7YE3XWlrzR9cw==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-parser/-/wast-parser-1.7.11.tgz", + "integrity": "sha512-lEyVCg2np15tS+dm7+JJTNhNWq9yTZvi3qEhAIIOaofcYlUp0UR5/tVqOwa/gXYr3gjwSZqw+/lS9dscyLelbQ==", "dev": true, "requires": { - "@webassemblyjs/ast": "1.7.10", - "@webassemblyjs/floating-point-hex-parser": "1.7.10", - "@webassemblyjs/helper-api-error": "1.7.10", - "@webassemblyjs/helper-code-frame": "1.7.10", - "@webassemblyjs/helper-fsm": "1.7.10", + "@webassemblyjs/ast": "1.7.11", + "@webassemblyjs/floating-point-hex-parser": "1.7.11", + "@webassemblyjs/helper-api-error": "1.7.11", + "@webassemblyjs/helper-code-frame": "1.7.11", + "@webassemblyjs/helper-fsm": "1.7.11", "@xtuc/long": "4.2.1" } }, "@webassemblyjs/wast-printer": { - "version": "1.7.10", - "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.7.10.tgz", - "integrity": "sha512-mJ3QKWtCchL1vhU/kZlJnLPuQZnlDOdZsyP0bbLWPGdYsQDnSBvyTLhzwBA3QAMlzEL9V4JHygEmK6/OTEyytA==", + "version": "1.7.11", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.7.11.tgz", + "integrity": "sha512-m5vkAsuJ32QpkdkDOUPGSltrg8Cuk3KBx4YrmAGQwCZPRdUHXxG4phIOuuycLemHFr74sWL9Wthqss4fzdzSwg==", "dev": true, "requires": { - "@webassemblyjs/ast": "1.7.10", - "@webassemblyjs/wast-parser": "1.7.10", + "@webassemblyjs/ast": "1.7.11", + "@webassemblyjs/wast-parser": "1.7.11", "@xtuc/long": "4.2.1" } }, @@ -1550,17 +1654,17 @@ }, "dependencies": { "acorn": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.0.2.tgz", - "integrity": "sha512-GXmKIvbrN3TV7aVqAzVFaMW8F8wzVX7voEBRO3bDA64+EX37YSayggRJP5Xig6HYHBkWKpFg9W5gg6orklubhg==", + "version": "6.0.4", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-6.0.4.tgz", + "integrity": "sha512-VY4i5EKSKkofY2I+6QLTbTTN/UvEQPCo6eiwzzSaSWfpaDhOmStMCMod6wmuPciNq+XS0faCglFu2lHZpdHUtg==", "dev": true } } }, "acorn-walk": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-6.1.0.tgz", - "integrity": "sha512-ugTb7Lq7u4GfWSqqpwE0bGyoBZNMTok/zDBXxfEG0QM50jNlGhIWjRC1pPN7bvV1anhF+bs+/gNcRw+o55Evbg==", + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-6.1.1.tgz", + "integrity": "sha512-OtUw6JUTgxA2QoqqmrmQ7F2NYqiBPi/L2jqHyFtllhOUvXYQXf0Z1CYUinIfyT4bTCGmrA7gX9FvHA81uzCoVw==", "dev": true }, "agent-base": { @@ -1582,21 +1686,21 @@ } }, "ajv": { - "version": "5.5.2", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-5.5.2.tgz", - "integrity": "sha1-c7Xuyj+rZT49P5Qis0GtQiBdyWU=", + "version": "6.6.2", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.2.tgz", + "integrity": "sha512-FBHEW6Jf5TB9MGBgUUA9XHkTbjXYfAUjY43ACMfmdMRHniyoMHjHjzD50OK8LGDWQwp4rWEsIq5kEqq7rvIM1g==", "dev": true, "requires": { - "co": "^4.6.0", - "fast-deep-equal": "^1.0.0", + "fast-deep-equal": "^2.0.1", "fast-json-stable-stringify": "^2.0.0", - "json-schema-traverse": "^0.3.0" + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" } }, "ajv-errors": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/ajv-errors/-/ajv-errors-1.0.0.tgz", - "integrity": "sha1-7PAh+hCP0X37Xms4Py3SM+Mf/Fk=", + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/ajv-errors/-/ajv-errors-1.0.1.tgz", + "integrity": "sha512-DCRfO/4nQ+89p/RK43i8Ezd41EqdGIU4ld7nGF8OQ14oc/we5rEntLCUa7+jrn3nn83BosfwZA0wb4pon2o8iQ==", "dev": true }, "ajv-keywords": { @@ -1606,14 +1710,17 @@ "dev": true }, "ansi-colors": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-2.0.5.tgz", - "integrity": "sha512-yAdfUZ+c2wetVNIFsNRn44THW+Lty6S5TwMpUfLA/UaGhiXbBv/F8E60/1hMLd0cnF/CDoWH8vzVaI5bAcHCjw==", - "dev": true + "version": "1.1.0", + "resolved": "http://registry.npmjs.org/ansi-colors/-/ansi-colors-1.1.0.tgz", + "integrity": "sha512-SFKX67auSNoVR38N3L+nvsPjOE0bybKTYbkf5tRvushrAPQ9V75huw0ZxBkKVeRU9kqH3d6HA4xTckbwZ4ixmA==", + "dev": true, + "requires": { + "ansi-wrap": "^0.1.0" + } }, "ansi-escapes": { "version": "3.1.0", - "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-3.1.0.tgz", + "resolved": "http://registry.npmjs.org/ansi-escapes/-/ansi-escapes-3.1.0.tgz", "integrity": "sha512-UgAb8H9D41AQnu/PbWlCofQVcnV4Gs2bBJi9eZPxfU/hgglFh3SMDMENRIqdr7H6XFnXdoknctFByVsCOotTVw==", "dev": true }, @@ -1646,20 +1753,14 @@ "integrity": "sha1-qCJQ3bABXponyoLoLqYDu/pF768=", "dev": true }, - "any-observable": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/any-observable/-/any-observable-0.3.0.tgz", - "integrity": "sha512-/FQM1EDkTsf63Ub2C6O7GuYFDsSXUwsaZDurV0np41ocwq0jthUAYCmhBX9f+KwlaCgIuWyr/4WlUQUBfKfZog==", - "dev": true - }, "anymatch": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-1.3.2.tgz", - "integrity": "sha512-0XNayC8lTHQ2OI8aljNCN3sSx6hsr/1+rlcDAotXJR7C1oZZHCNsfpbKwMjRA3Uqb5tF1Rae2oloTr4xpq+WjA==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-2.0.0.tgz", + "integrity": "sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==", "dev": true, "requires": { - "micromatch": "^2.1.5", - "normalize-path": "^2.0.0" + "micromatch": "^3.1.4", + "normalize-path": "^2.1.1" } }, "append-buffer": { @@ -1721,13 +1822,10 @@ } }, "arr-diff": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz", - "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=", - "dev": true, - "requires": { - "arr-flatten": "^1.0.1" - } + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", + "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", + "dev": true }, "arr-filter": { "version": "1.1.2", @@ -1781,7 +1879,7 @@ }, "array-equal": { "version": "1.0.0", - "resolved": "https://registry.npmjs.org/array-equal/-/array-equal-1.0.0.tgz", + "resolved": "http://registry.npmjs.org/array-equal/-/array-equal-1.0.0.tgz", "integrity": "sha1-jCpe8kcv2ep0KwTHenUJO6J1fJM=", "dev": true }, @@ -1891,9 +1989,9 @@ "dev": true }, "array-unique": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz", - "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=", + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", + "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", "dev": true }, "arrify": { @@ -1991,14 +2089,6 @@ "once": "^1.3.2", "process-nextick-args": "^1.0.7", "stream-exhaust": "^1.0.1" - }, - "dependencies": { - "process-nextick-args": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-1.0.7.tgz", - "integrity": "sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M=", - "dev": true - } } }, "async-each": { @@ -2078,7 +2168,7 @@ }, "supports-color": { "version": "2.0.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", + "resolved": "http://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=", "dev": true } @@ -2166,6 +2256,17 @@ "find-up": "^2.1.0", "istanbul-lib-instrument": "^1.10.1", "test-exclude": "^4.2.1" + }, + "dependencies": { + "find-up": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", + "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", + "dev": true, + "requires": { + "locate-path": "^2.0.0" + } + } } }, "babel-plugin-jest-hoist": { @@ -2180,28 +2281,6 @@ "integrity": "sha1-/WU28rzhODb/o6VFjEkDpZe7O/U=", "dev": true }, - "babel-plugin-transform-es2015-modules-commonjs": { - "version": "6.26.2", - "resolved": "https://registry.npmjs.org/babel-plugin-transform-es2015-modules-commonjs/-/babel-plugin-transform-es2015-modules-commonjs-6.26.2.tgz", - "integrity": "sha512-CV9ROOHEdrjcwhIaJNBGMBCodN+1cfkwtM1SbUHmvyy35KGT7fohbpOxkE2uLz1o6odKK2Ck/tz47z+VqQfi9Q==", - "dev": true, - "requires": { - "babel-plugin-transform-strict-mode": "^6.24.1", - "babel-runtime": "^6.26.0", - "babel-template": "^6.26.0", - "babel-types": "^6.26.0" - } - }, - "babel-plugin-transform-strict-mode": { - "version": "6.24.1", - "resolved": "https://registry.npmjs.org/babel-plugin-transform-strict-mode/-/babel-plugin-transform-strict-mode-6.24.1.tgz", - "integrity": "sha1-1fr3qleKZbvlkc9e2uBKDGcCB1g=", - "dev": true, - "requires": { - "babel-runtime": "^6.22.0", - "babel-types": "^6.24.1" - } - }, "babel-preset-jest": { "version": "23.2.0", "resolved": "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-23.2.0.tgz", @@ -2360,18 +2439,6 @@ "is-data-descriptor": "^1.0.0", "kind-of": "^6.0.2" } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true } } }, @@ -2390,12 +2457,6 @@ "tweetnacl": "^0.14.3" } }, - "beeper": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/beeper/-/beeper-1.1.1.tgz", - "integrity": "sha1-5tXqjF2tABMEpwsiY4RH9pyy+Ak=", - "dev": true - }, "benchmark": { "version": "2.1.4", "resolved": "https://registry.npmjs.org/benchmark/-/benchmark-2.1.4.tgz", @@ -2407,11 +2468,24 @@ } }, "big.js": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/big.js/-/big.js-3.2.0.tgz", - "integrity": "sha512-+hN/Zh2D08Mx65pZ/4g5bsmNiZUuChDiQfTUQ7qJr4/kuopCr88xZsAXv6mBoZEsUI4OuGHlX59qE94K2mMW8Q==", + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz", + "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==", "dev": true }, + "bin-links": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/bin-links/-/bin-links-1.1.2.tgz", + "integrity": "sha512-8eEHVgYP03nILphilltWjeIjMbKyJo3wvp9K816pHbhP301ismzw15mxAAEVQ/USUwcP++1uNrbERbp8lOA6Fg==", + "dev": true, + "requires": { + "bluebird": "^3.5.0", + "cmd-shim": "^2.0.2", + "gentle-fs": "^2.0.0", + "graceful-fs": "^4.1.11", + "write-file-atomic": "^2.3.0" + } + }, "binary-extensions": { "version": "1.12.0", "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-1.12.0.tgz", @@ -2428,9 +2502,9 @@ } }, "bluebird": { - "version": "3.5.2", - "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.5.2.tgz", - "integrity": "sha512-dhHTWMI7kMx5whMQntl7Vr9C6BvV10lFXDAasnqnrMYhXVCzzk6IO9Fo2L75jXHT07WrOngL1WDXOp+yYS91Yg==", + "version": "3.5.3", + "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.5.3.tgz", + "integrity": "sha512-/qKPUQlaW1OyR51WeCPBvRnAlnZFUJkCSG5HzGnuIqhgyJtF+T94lFnn33eiazjRm2LAHVy2guNnaq48X9SJuw==", "dev": true }, "bn.js": { @@ -2450,14 +2524,32 @@ } }, "braces": { - "version": "1.8.5", - "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz", - "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=", + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", + "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", "dev": true, "requires": { - "expand-range": "^1.8.1", - "preserve": "^0.2.0", - "repeat-element": "^1.1.2" + "arr-flatten": "^1.1.0", + "array-unique": "^0.3.2", + "extend-shallow": "^2.0.1", + "fill-range": "^4.0.0", + "isobject": "^3.0.1", + "repeat-element": "^1.1.2", + "snapdragon": "^0.8.1", + "snapdragon-node": "^2.0.1", + "split-string": "^3.0.2", + "to-regex": "^3.0.1" + }, + "dependencies": { + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", + "dev": true, + "requires": { + "is-extendable": "^0.1.0" + } + } } }, "brorand": { @@ -2483,7 +2575,7 @@ "dependencies": { "resolve": { "version": "1.1.7", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.1.7.tgz", + "resolved": "http://registry.npmjs.org/resolve/-/resolve-1.1.7.tgz", "integrity": "sha1-IDEU2CrSxe2ejgQRs5ModeiJ6Xs=", "dev": true } @@ -2560,6 +2652,15 @@ "pako": "~1.0.5" } }, + "bs-logger": { + "version": "0.2.6", + "resolved": "https://registry.npmjs.org/bs-logger/-/bs-logger-0.2.6.tgz", + "integrity": "sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==", + "dev": true, + "requires": { + "fast-json-stable-stringify": "2.x" + } + }, "bser": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/bser/-/bser-2.0.0.tgz", @@ -2629,32 +2730,47 @@ "dev": true }, "cacache": { - "version": "11.2.0", - "resolved": "https://registry.npmjs.org/cacache/-/cacache-11.2.0.tgz", - "integrity": "sha512-IFWl6lfK6wSeYCHUXh+N1lY72UDrpyrYQJNIVQf48paDuWbv5RbAtJYf/4gUQFObTCHZwdZ5sI8Iw7nqwP6nlQ==", + "version": "11.3.2", + "resolved": "https://registry.npmjs.org/cacache/-/cacache-11.3.2.tgz", + "integrity": "sha512-E0zP4EPGDOaT2chM08Als91eYnf8Z+eH1awwwVsngUmgppfM5jjJ8l3z5vO5p5w/I3LsiXawb1sW0VY65pQABg==", "dev": true, "requires": { - "bluebird": "^3.5.1", - "chownr": "^1.0.1", - "figgy-pudding": "^3.1.0", - "glob": "^7.1.2", - "graceful-fs": "^4.1.11", - "lru-cache": "^4.1.3", + "bluebird": "^3.5.3", + "chownr": "^1.1.1", + "figgy-pudding": "^3.5.1", + "glob": "^7.1.3", + "graceful-fs": "^4.1.15", + "lru-cache": "^5.1.1", "mississippi": "^3.0.0", "mkdirp": "^0.5.1", "move-concurrently": "^1.0.1", "promise-inflight": "^1.0.1", "rimraf": "^2.6.2", - "ssri": "^6.0.0", - "unique-filename": "^1.1.0", + "ssri": "^6.0.1", + "unique-filename": "^1.1.1", "y18n": "^4.0.0" }, "dependencies": { + "lru-cache": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", + "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", + "dev": true, + "requires": { + "yallist": "^3.0.2" + } + }, "y18n": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz", "integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==", "dev": true + }, + "yallist": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz", + "integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==", + "dev": true } } }, @@ -2673,14 +2789,6 @@ "to-object-path": "^0.3.0", "union-value": "^1.0.0", "unset-value": "^1.0.0" - }, - "dependencies": { - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - } } }, "call-me-maybe": { @@ -2689,9 +2797,27 @@ "integrity": "sha1-JtII6onje1y95gJQoV8DHBak1ms=", "dev": true }, + "caller-callsite": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/caller-callsite/-/caller-callsite-2.0.0.tgz", + "integrity": "sha1-hH4PzgoiN1CpoCfFSzNzGtMVQTQ=", + "dev": true, + "requires": { + "callsites": "^2.0.0" + } + }, + "caller-path": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/caller-path/-/caller-path-2.0.0.tgz", + "integrity": "sha1-Ro+DBE42mrIBD6xfBs7uFbsssfQ=", + "dev": true, + "requires": { + "caller-callsite": "^2.0.0" + } + }, "callsites": { "version": "2.0.0", - "resolved": "https://registry.npmjs.org/callsites/-/callsites-2.0.0.tgz", + "resolved": "http://registry.npmjs.org/callsites/-/callsites-2.0.0.tgz", "integrity": "sha1-BuuE8A7qQT2oav/vrL/7Ngk7PFA=", "dev": true }, @@ -2752,20 +2878,24 @@ "dev": true }, "chokidar": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-1.7.0.tgz", - "integrity": "sha1-eY5ol3gVHIB2tLNg5e3SjNortGg=", + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-2.0.4.tgz", + "integrity": "sha512-z9n7yt9rOvIJrMhvDtDictKrkFHeihkNl6uWMmZlmL6tJtX9Cs+87oK+teBx+JIgzvbX3yZHT3eF8vpbDxHJXQ==", "dev": true, "requires": { - "anymatch": "^1.3.0", + "anymatch": "^2.0.0", "async-each": "^1.0.0", - "fsevents": "^1.0.0", - "glob-parent": "^2.0.0", + "braces": "^2.3.0", + "fsevents": "^1.2.2", + "glob-parent": "^3.1.0", "inherits": "^2.0.1", "is-binary-path": "^1.0.0", - "is-glob": "^2.0.0", + "is-glob": "^4.0.0", + "lodash.debounce": "^4.0.8", + "normalize-path": "^2.1.1", "path-is-absolute": "^1.0.0", - "readdirp": "^2.0.0" + "readdirp": "^2.0.0", + "upath": "^1.0.5" } }, "chownr": { @@ -2819,12 +2949,6 @@ "requires": { "is-descriptor": "^0.1.0" } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true } } }, @@ -2837,16 +2961,6 @@ "restore-cursor": "^2.0.0" } }, - "cli-truncate": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/cli-truncate/-/cli-truncate-0.2.1.tgz", - "integrity": "sha1-nxXPuwcFAFNpIWxiasfQWrkN1XQ=", - "dev": true, - "requires": { - "slice-ansi": "0.0.4", - "string-width": "^1.0.1" - } - }, "cli-width": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/cli-width/-/cli-width-2.2.0.tgz", @@ -2891,6 +3005,14 @@ "inherits": "^2.0.1", "process-nextick-args": "^2.0.0", "readable-stream": "^2.3.5" + }, + "dependencies": { + "process-nextick-args": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz", + "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==", + "dev": true + } } }, "cmd-shim": { @@ -2924,17 +3046,6 @@ "arr-map": "^2.0.2", "for-own": "^1.0.0", "make-iterator": "^1.0.0" - }, - "dependencies": { - "for-own": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/for-own/-/for-own-1.0.0.tgz", - "integrity": "sha1-xjMy9BXO3EsE2/5wz4NklMU8tEs=", - "dev": true, - "requires": { - "for-in": "^1.0.1" - } - } } }, "collection-visit": { @@ -3055,7 +3166,7 @@ }, "concat-stream": { "version": "1.6.2", - "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz", + "resolved": "http://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz", "integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==", "dev": true, "requires": { @@ -3127,11 +3238,14 @@ "through2": "^2.0.0" }, "dependencies": { - "dateformat": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-3.0.3.tgz", - "integrity": "sha512-jyCETtSl3VMZMWeRo7iY1FL19ges1t55hMo5yaam4Jrsm5EPL89UQkoQRyiI+Yf4k8r2ZpdngkV8hr1lIdjb3Q==", - "dev": true + "find-up": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", + "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", + "dev": true, + "requires": { + "locate-path": "^2.0.0" + } }, "load-json-file": { "version": "4.0.0", @@ -3215,14 +3329,6 @@ "semver": "^5.5.0", "split": "^1.0.0", "through2": "^2.0.0" - }, - "dependencies": { - "dateformat": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-3.0.3.tgz", - "integrity": "sha512-jyCETtSl3VMZMWeRo7iY1FL19ges1t55hMo5yaam4Jrsm5EPL89UQkoQRyiI+Yf4k8r2ZpdngkV8hr1lIdjb3Q==", - "dev": true - } } }, "conventional-commits-filter": { @@ -3306,9 +3412,9 @@ } }, "core-js": { - "version": "2.5.7", - "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.5.7.tgz", - "integrity": "sha512-RszJCAxg/PP6uzXVXL6BsxSXx/B05oJAQ2vkJRjyjrEcNVycaqOmNb5OTxZPE3xa5gwZduqza6L9JOCenh/Ecw==", + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.1.tgz", + "integrity": "sha512-L72mmmEayPJBejKIWe2pYtGis5r0tQ5NaJekdhyXgeMQTpJoBsH0NL4ElY2LfSoV15xeQWKQ+XTTOZdyero5Xg==", "dev": true }, "core-util-is": { @@ -3318,11 +3424,12 @@ "dev": true }, "cosmiconfig": { - "version": "5.0.6", - "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-5.0.6.tgz", - "integrity": "sha512-6DWfizHriCrFWURP1/qyhsiFvYdlJzbCzmtFWh744+KyWsJo5+kPzUZZaMRSSItoYc0pxFX7gEO7ZC1/gN/7AQ==", + "version": "5.0.7", + "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-5.0.7.tgz", + "integrity": "sha512-PcLqxTKiDmNT6pSpy4N6KtuPwb53W+2tzNvwOZw0WH9N6O0vLIBq0x8aj8Oj75ere4YcGi48bDFCL+3fRJdlNA==", "dev": true, "requires": { + "import-fresh": "^2.0.0", "is-directory": "^0.3.1", "js-yaml": "^3.9.0", "parse-json": "^4.0.0" @@ -3354,25 +3461,6 @@ "request": "^2.85.0" } }, - "cpx": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/cpx/-/cpx-1.5.0.tgz", - "integrity": "sha1-GFvgGFEdhycN7czCkxceN2VauI8=", - "dev": true, - "requires": { - "babel-runtime": "^6.9.2", - "chokidar": "^1.6.0", - "duplexer": "^0.1.1", - "glob": "^7.0.5", - "glob2base": "^0.0.12", - "minimatch": "^3.0.2", - "mkdirp": "^0.5.1", - "resolve": "^1.1.7", - "safe-buffer": "^5.0.1", - "shell-quote": "^1.6.1", - "subarg": "^1.0.0" - } - }, "create-ecdh": { "version": "4.0.3", "resolved": "https://registry.npmjs.org/create-ecdh/-/create-ecdh-4.0.3.tgz", @@ -3551,12 +3639,6 @@ } } }, - "date-fns": { - "version": "1.29.0", - "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-1.29.0.tgz", - "integrity": "sha512-lbTXWZ6M20cWH8N9S6afb0SBm6tMk+uUg6z3MqHPKE9atmsY3kJkTm8vKe93izJ2B2+q5MV990sM2CHgtAZaOw==", - "dev": true - }, "date-now": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/date-now/-/date-now-0.1.4.tgz", @@ -3564,9 +3646,9 @@ "dev": true }, "dateformat": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-2.2.0.tgz", - "integrity": "sha1-QGXiATz5+5Ft39gu+1Bq1MZ2kGI=", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-3.0.3.tgz", + "integrity": "sha512-jyCETtSl3VMZMWeRo7iY1FL19ges1t55hMo5yaam4Jrsm5EPL89UQkoQRyiI+Yf4k8r2ZpdngkV8hr1lIdjb3Q==", "dev": true }, "debug": { @@ -3755,18 +3837,6 @@ "is-data-descriptor": "^1.0.0", "kind-of": "^6.0.2" } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true } } }, @@ -3905,41 +3975,6 @@ "integrity": "sha1-rOb/gIwc5mtX0ev5eXessCM0z8E=", "dev": true }, - "duplexer2": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/duplexer2/-/duplexer2-0.0.2.tgz", - "integrity": "sha1-xhTc9n4vsUmVqRcR5aYX6KYKMds=", - "dev": true, - "requires": { - "readable-stream": "~1.1.9" - }, - "dependencies": { - "isarray": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", - "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=", - "dev": true - }, - "readable-stream": { - "version": "1.1.14", - "resolved": "http://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz", - "integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=", - "dev": true, - "requires": { - "core-util-is": "~1.0.0", - "inherits": "~2.0.1", - "isarray": "0.0.1", - "string_decoder": "~0.10.x" - } - }, - "string_decoder": { - "version": "0.10.31", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", - "integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=", - "dev": true - } - } - }, "duplexify": { "version": "3.6.1", "resolved": "https://registry.npmjs.org/duplexify/-/duplexify-3.6.1.tgz", @@ -3972,12 +4007,6 @@ "safer-buffer": "^2.1.0" } }, - "elegant-spinner": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/elegant-spinner/-/elegant-spinner-1.0.1.tgz", - "integrity": "sha1-2wQ1IcldfjA/2PNFvtwzSc+wcp4=", - "dev": true - }, "elliptic": { "version": "6.4.1", "resolved": "https://registry.npmjs.org/elliptic/-/elliptic-6.4.1.tgz", @@ -4190,6 +4219,12 @@ "estraverse": "^4.1.1" } }, + "esm": { + "version": "3.0.84", + "resolved": "https://registry.npmjs.org/esm/-/esm-3.0.84.tgz", + "integrity": "sha512-SzSGoZc17S7P+12R9cg21Bdb7eybX25RnIeRZ80xZs+VZ3kdQKzqTp2k4hZJjR7p9l0186TTXSgrxzlMDBktlw==", + "dev": true + }, "esprima": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", @@ -4273,28 +4308,90 @@ "integrity": "sha1-BjJjj42HfMghB9MKD/8aF8uhzQw=", "dev": true }, - "exit-hook": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/exit-hook/-/exit-hook-1.1.1.tgz", - "integrity": "sha1-8FyiM7SMBdVP/wd2XfhQfpXAL/g=", - "dev": true - }, "expand-brackets": { - "version": "0.1.5", - "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz", - "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=", + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", + "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", "dev": true, "requires": { - "is-posix-bracket": "^0.1.0" + "debug": "^2.3.3", + "define-property": "^0.2.5", + "extend-shallow": "^2.0.1", + "posix-character-classes": "^0.1.0", + "regex-not": "^1.0.0", + "snapdragon": "^0.8.1", + "to-regex": "^3.0.1" + }, + "dependencies": { + "define-property": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", + "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", + "dev": true, + "requires": { + "is-descriptor": "^0.1.0" + } + }, + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", + "dev": true, + "requires": { + "is-extendable": "^0.1.0" + } + } } }, "expand-range": { "version": "1.8.2", - "resolved": "https://registry.npmjs.org/expand-range/-/expand-range-1.8.2.tgz", + "resolved": "http://registry.npmjs.org/expand-range/-/expand-range-1.8.2.tgz", "integrity": "sha1-opnv/TNf4nIeuujiV+x5ZE/IUzc=", "dev": true, "requires": { "fill-range": "^2.1.0" + }, + "dependencies": { + "fill-range": { + "version": "2.2.4", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-2.2.4.tgz", + "integrity": "sha512-cnrcCbj01+j2gTG921VZPnHbjmdAf8oQV/iGeV2kZxGSyfYjjTyY79ErsK1WJWMpw6DaApEX72binqJE+/d+5Q==", + "dev": true, + "requires": { + "is-number": "^2.1.0", + "isobject": "^2.0.0", + "randomatic": "^3.0.0", + "repeat-element": "^1.1.2", + "repeat-string": "^1.5.2" + } + }, + "is-number": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-2.1.0.tgz", + "integrity": "sha1-Afy7s5NGOlSPL0ZszhbezknbkI8=", + "dev": true, + "requires": { + "kind-of": "^3.0.2" + } + }, + "isobject": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/isobject/-/isobject-2.1.0.tgz", + "integrity": "sha1-8GVWEJaj8dou9GJy+BXIQNh+DIk=", + "dev": true, + "requires": { + "isarray": "1.0.0" + } + }, + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } } }, "expand-tilde": { @@ -4359,262 +4456,37 @@ } }, "extglob": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz", - "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=", - "dev": true, - "requires": { - "is-extglob": "^1.0.0" - } - }, - "extsprintf": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz", - "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=", - "dev": true - }, - "fancy-log": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/fancy-log/-/fancy-log-1.3.2.tgz", - "integrity": "sha1-9BEl49hPLn2JpD0G2VjI94vha+E=", - "dev": true, - "requires": { - "ansi-gray": "^0.1.1", - "color-support": "^1.1.3", - "time-stamp": "^1.0.0" - } - }, - "fast-deep-equal": { - "version": "1.1.0", - "resolved": "http://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-1.1.0.tgz", - "integrity": "sha1-wFNHeBfIa1HaqFPIHgWbcz0CNhQ=", - "dev": true - }, - "fast-glob": { - "version": "2.2.4", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-2.2.4.tgz", - "integrity": "sha512-FjK2nCGI/McyzgNtTESqaWP3trPvHyRyoyY70hxjc3oKPNmDe8taohLZpoVKoUjW85tbU5txaYUZCNtVzygl1g==", + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", + "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", "dev": true, "requires": { - "@mrmlnc/readdir-enhanced": "^2.2.1", - "@nodelib/fs.stat": "^1.1.2", - "glob-parent": "^3.1.0", - "is-glob": "^4.0.0", - "merge2": "^1.2.3", - "micromatch": "^3.1.10" + "array-unique": "^0.3.2", + "define-property": "^1.0.0", + "expand-brackets": "^2.1.4", + "extend-shallow": "^2.0.1", + "fragment-cache": "^0.2.1", + "regex-not": "^1.0.0", + "snapdragon": "^0.8.1", + "to-regex": "^3.0.1" }, "dependencies": { - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - }, - "array-unique": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", - "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", - "dev": true - }, - "braces": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", - "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", - "dev": true, - "requires": { - "arr-flatten": "^1.1.0", - "array-unique": "^0.3.2", - "extend-shallow": "^2.0.1", - "fill-range": "^4.0.0", - "isobject": "^3.0.1", - "repeat-element": "^1.1.2", - "snapdragon": "^0.8.1", - "snapdragon-node": "^2.0.1", - "split-string": "^3.0.2", - "to-regex": "^3.0.1" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "expand-brackets": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", - "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", - "dev": true, - "requires": { - "debug": "^2.3.3", - "define-property": "^0.2.5", - "extend-shallow": "^2.0.1", - "posix-character-classes": "^0.1.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", - "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", - "dev": true, - "requires": { - "is-descriptor": "^0.1.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - }, - "is-accessor-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", - "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-data-descriptor": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", - "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz", - "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^0.1.6", - "is-data-descriptor": "^0.1.4", - "kind-of": "^5.0.0" - } - }, - "kind-of": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz", - "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==", - "dev": true - } - } - }, - "extglob": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", - "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", - "dev": true, - "requires": { - "array-unique": "^0.3.2", - "define-property": "^1.0.0", - "expand-brackets": "^2.1.4", - "extend-shallow": "^2.0.1", - "fragment-cache": "^0.2.1", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", - "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", - "dev": true, - "requires": { - "is-descriptor": "^1.0.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "fill-range": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", - "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", + "define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", + "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", "dev": true, "requires": { - "extend-shallow": "^2.0.1", - "is-number": "^3.0.0", - "repeat-string": "^1.6.1", - "to-regex-range": "^2.1.0" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } + "is-descriptor": "^1.0.0" } }, - "glob-parent": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz", - "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=", + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", "dev": true, "requires": { - "is-glob": "^3.1.0", - "path-dirname": "^1.0.0" - }, - "dependencies": { - "is-glob": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz", - "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=", - "dev": true, - "requires": { - "is-extglob": "^2.1.0" - } - } + "is-extendable": "^0.1.0" } }, "is-accessor-descriptor": { @@ -4645,77 +4517,53 @@ "is-data-descriptor": "^1.0.0", "kind-of": "^6.0.2" } - }, - "is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", - "dev": true - }, - "is-glob": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz", - "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=", - "dev": true, - "requires": { - "is-extglob": "^2.1.1" - } - }, - "is-number": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - }, - "micromatch": { - "version": "3.1.10", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", - "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", - "dev": true, - "requires": { - "arr-diff": "^4.0.0", - "array-unique": "^0.3.2", - "braces": "^2.3.1", - "define-property": "^2.0.2", - "extend-shallow": "^3.0.2", - "extglob": "^2.0.4", - "fragment-cache": "^0.2.1", - "kind-of": "^6.0.2", - "nanomatch": "^1.2.9", - "object.pick": "^1.3.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.2" - } } } }, + "extsprintf": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz", + "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=", + "dev": true + }, + "fancy-log": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/fancy-log/-/fancy-log-1.3.3.tgz", + "integrity": "sha512-k9oEhlyc0FrVh25qYuSELjr8oxsCoc4/LEZfg2iJJrfEk/tZL9bCoJE47gqAvI2m/AUjluCS4+3I0eTx8n3AEw==", + "dev": true, + "requires": { + "ansi-gray": "^0.1.1", + "color-support": "^1.1.3", + "parse-node-version": "^1.0.0", + "time-stamp": "^1.0.0" + } + }, + "fast-deep-equal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", + "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=", + "dev": true + }, + "fast-extend": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/fast-extend/-/fast-extend-0.0.2.tgz", + "integrity": "sha1-9exCz0C5Rg9SGmOH37Ut7u1nHb0=", + "dev": true + }, + "fast-glob": { + "version": "2.2.4", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-2.2.4.tgz", + "integrity": "sha512-FjK2nCGI/McyzgNtTESqaWP3trPvHyRyoyY70hxjc3oKPNmDe8taohLZpoVKoUjW85tbU5txaYUZCNtVzygl1g==", + "dev": true, + "requires": { + "@mrmlnc/readdir-enhanced": "^2.2.1", + "@nodelib/fs.stat": "^1.1.2", + "glob-parent": "^3.1.0", + "is-glob": "^4.0.0", + "merge2": "^1.2.3", + "micromatch": "^3.1.10" + } + }, "fast-json-stable-stringify": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz", @@ -4769,16 +4617,26 @@ } }, "fill-range": { - "version": "2.2.4", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-2.2.4.tgz", - "integrity": "sha512-cnrcCbj01+j2gTG921VZPnHbjmdAf8oQV/iGeV2kZxGSyfYjjTyY79ErsK1WJWMpw6DaApEX72binqJE+/d+5Q==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", + "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", "dev": true, "requires": { - "is-number": "^2.1.0", - "isobject": "^2.0.0", - "randomatic": "^3.0.0", - "repeat-element": "^1.1.2", - "repeat-string": "^1.5.2" + "extend-shallow": "^2.0.1", + "is-number": "^3.0.0", + "repeat-string": "^1.6.1", + "to-regex-range": "^2.1.0" + }, + "dependencies": { + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", + "dev": true, + "requires": { + "is-extendable": "^0.1.0" + } + } } }, "find-cache-dir": { @@ -4812,9 +4670,9 @@ } }, "p-limit": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.0.0.tgz", - "integrity": "sha512-fl5s52lI5ahKCernzzIyAP0QAZbGIovtVHGwpcu1Jr/EpzLVDI2myISHwGqK7m8uQFugVWSrbxH7XnhGtvEc+A==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.1.0.tgz", + "integrity": "sha512-NhURkNcrVB+8hNfLuysU8enY5xn2KXphsHBaC2YmRNTZRc7RWusw6apSpdEj3jo4CMb6W9nrF6tTnsJsJeyu6g==", "dev": true, "requires": { "p-try": "^2.0.0" @@ -4835,6 +4693,12 @@ "integrity": "sha512-hMp0onDKIajHfIkdRk3P4CdCmErkYAxxDtP3Wx/4nZ3aGlau2VKh3mZpcuFkH27WQkL/3WBCPOktzA9ZOAnMQQ==", "dev": true }, + "path-exists": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", + "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", + "dev": true + }, "pkg-dir": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-3.0.0.tgz", @@ -4846,16 +4710,10 @@ } } }, - "find-index": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/find-index/-/find-index-0.1.1.tgz", - "integrity": "sha1-Z101iyyjiS15Whq0cjL4tuLg3eQ=", - "dev": true - }, - "find-parent-dir": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/find-parent-dir/-/find-parent-dir-0.3.0.tgz", - "integrity": "sha1-M8RLQpqysvBkYpnF+fcY83b/jVQ=", + "find-npm-prefix": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/find-npm-prefix/-/find-npm-prefix-1.0.2.tgz", + "integrity": "sha512-KEftzJ+H90x6pcKtdXZEPsQse8/y/UnvzRKrOSQFprnrGaFuJ62fVkP34Iu2IYuMvyauCyoLTNkJZgrrGA2wkA==", "dev": true }, "find-replace": { @@ -4868,12 +4726,13 @@ } }, "find-up": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", - "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-1.1.2.tgz", + "integrity": "sha1-ay6YIrGizgpgq2TWEOzK1TyyTQ8=", "dev": true, "requires": { - "locate-path": "^2.0.0" + "path-exists": "^2.0.0", + "pinkie-promise": "^2.0.0" } }, "findup-sync": { @@ -4888,233 +4747,6 @@ "resolve-dir": "^1.0.1" }, "dependencies": { - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - }, - "array-unique": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", - "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", - "dev": true - }, - "braces": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", - "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", - "dev": true, - "requires": { - "arr-flatten": "^1.1.0", - "array-unique": "^0.3.2", - "extend-shallow": "^2.0.1", - "fill-range": "^4.0.0", - "isobject": "^3.0.1", - "repeat-element": "^1.1.2", - "snapdragon": "^0.8.1", - "snapdragon-node": "^2.0.1", - "split-string": "^3.0.2", - "to-regex": "^3.0.1" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "expand-brackets": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", - "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", - "dev": true, - "requires": { - "debug": "^2.3.3", - "define-property": "^0.2.5", - "extend-shallow": "^2.0.1", - "posix-character-classes": "^0.1.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", - "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", - "dev": true, - "requires": { - "is-descriptor": "^0.1.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - }, - "is-accessor-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", - "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-data-descriptor": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", - "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz", - "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^0.1.6", - "is-data-descriptor": "^0.1.4", - "kind-of": "^5.0.0" - } - }, - "kind-of": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz", - "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==", - "dev": true - } - } - }, - "extglob": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", - "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", - "dev": true, - "requires": { - "array-unique": "^0.3.2", - "define-property": "^1.0.0", - "expand-brackets": "^2.1.4", - "extend-shallow": "^2.0.1", - "fragment-cache": "^0.2.1", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", - "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", - "dev": true, - "requires": { - "is-descriptor": "^1.0.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "fill-range": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", - "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", - "dev": true, - "requires": { - "extend-shallow": "^2.0.1", - "is-number": "^3.0.0", - "repeat-string": "^1.6.1", - "to-regex-range": "^2.1.0" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "is-accessor-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", - "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-data-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", - "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-descriptor": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", - "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^1.0.0", - "is-data-descriptor": "^1.0.0", - "kind-of": "^6.0.2" - } - }, - "is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", - "dev": true - }, "is-glob": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz", @@ -5123,66 +4755,13 @@ "requires": { "is-extglob": "^2.1.0" } - }, - "is-number": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - }, - "micromatch": { - "version": "3.1.10", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", - "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", - "dev": true, - "requires": { - "arr-diff": "^4.0.0", - "array-unique": "^0.3.2", - "braces": "^2.3.1", - "define-property": "^2.0.2", - "extend-shallow": "^3.0.2", - "extglob": "^2.0.4", - "fragment-cache": "^0.2.1", - "kind-of": "^6.0.2", - "nanomatch": "^1.2.9", - "object.pick": "^1.3.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.2" - } } } }, "fined": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/fined/-/fined-1.1.0.tgz", - "integrity": "sha1-s33IRLdqL15wgeiE98CuNE8VNHY=", + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/fined/-/fined-1.1.1.tgz", + "integrity": "sha512-jQp949ZmEbiYHk3gkbdtpJ0G1+kgtLQBNdP5edFP7Fh+WAYceLQz6yO1SBj72Xkg8GVyTB3bBzAYrHJVh5Xd5g==", "dev": true, "requires": { "expand-tilde": "^2.0.2", @@ -5193,9 +4772,9 @@ } }, "flagged-respawn": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/flagged-respawn/-/flagged-respawn-1.0.0.tgz", - "integrity": "sha1-Tnmumy6zi/hrO7Vr8+ClaqX8q9c=", + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/flagged-respawn/-/flagged-respawn-1.0.1.tgz", + "integrity": "sha512-lNaHNVymajmk0OJMBn8fVUAU1BtDeKIqKoVhk4xAALB57aALg6b4W0MfJ/cUE0g9YBXy5XhSlPIpYIJ7HaY/3Q==", "dev": true }, "flatbuffers": { @@ -5220,9 +4799,9 @@ "dev": true }, "for-own": { - "version": "0.1.5", - "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz", - "integrity": "sha1-UmXGgaTylNq78XyVCbZ2OqhFEM4=", + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/for-own/-/for-own-1.0.0.tgz", + "integrity": "sha1-xjMy9BXO3EsE2/5wz4NklMU8tEs=", "dev": true, "requires": { "for-in": "^1.0.1" @@ -5294,6 +4873,23 @@ "through2": "^2.0.3" } }, + "fs-monkey": { + "version": "0.3.3", + "resolved": "https://registry.npmjs.org/fs-monkey/-/fs-monkey-0.3.3.tgz", + "integrity": "sha512-FNUvuTAJ3CqCQb5ELn+qCbGR/Zllhf2HtwsdAtBi59s1WeCjKMT81fHcSu7dwIskqGVK+MmOrb7VOBlq3/SItw==", + "dev": true + }, + "fs-vacuum": { + "version": "1.2.10", + "resolved": "https://registry.npmjs.org/fs-vacuum/-/fs-vacuum-1.2.10.tgz", + "integrity": "sha1-t2Kb7AekAxolSP35n17PHMizHjY=", + "dev": true, + "requires": { + "graceful-fs": "^4.1.2", + "path-is-inside": "^1.0.1", + "rimraf": "^2.5.2" + } + }, "fs-write-stream-atomic": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/fs-write-stream-atomic/-/fs-write-stream-atomic-1.0.10.tgz", @@ -5881,18 +5477,28 @@ "integrity": "sha512-KGDOARWVga7+rnB3z9Sd2Letx515owfk0hSxHGuqjANb1M+x2bGZGqHLiozPsYMdM2OubeMni/Hpwmjq6qIUhA==", "dev": true }, + "gentle-fs": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/gentle-fs/-/gentle-fs-2.0.1.tgz", + "integrity": "sha512-cEng5+3fuARewXktTEGbwsktcldA+YsnUEaXZwcK/3pjSE1X9ObnTs+/8rYf8s+RnIcQm2D5x3rwpN7Zom8Bew==", + "dev": true, + "requires": { + "aproba": "^1.1.2", + "fs-vacuum": "^1.2.10", + "graceful-fs": "^4.1.11", + "iferr": "^0.1.5", + "mkdirp": "^0.5.1", + "path-is-inside": "^1.0.2", + "read-cmd-shim": "^1.0.1", + "slide": "^1.1.6" + } + }, "get-caller-file": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-1.0.3.tgz", "integrity": "sha512-3t6rVToeoZfYSGd8YoLFR2DJkiQrIiUrGcjvFX2mDw3bn6k2OtwHN0TNCLbBO+w8qTvimhDkv+LSscbJY1vE6w==", "dev": true }, - "get-own-enumerable-property-symbols": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/get-own-enumerable-property-symbols/-/get-own-enumerable-property-symbols-3.0.0.tgz", - "integrity": "sha512-CIJYJC4GGF06TakLg8z4GQKvDsx9EMspVxOYih7LerEL/WosUnFIww45CGfxfeKHqlg3twgUrYRT1O3WQqjGCg==", - "dev": true - }, "get-pkg-repo": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/get-pkg-repo/-/get-pkg-repo-1.4.0.tgz", @@ -6026,27 +5632,6 @@ "meow": "^4.0.0", "split2": "^2.0.0", "through2": "^2.0.0" - }, - "dependencies": { - "lodash.template": { - "version": "4.4.0", - "resolved": "https://registry.npmjs.org/lodash.template/-/lodash.template-4.4.0.tgz", - "integrity": "sha1-5zoDhcg1VZF0bgILmWecaQ5o+6A=", - "dev": true, - "requires": { - "lodash._reinterpolate": "~3.0.0", - "lodash.templatesettings": "^4.0.0" - } - }, - "lodash.templatesettings": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/lodash.templatesettings/-/lodash.templatesettings-4.1.0.tgz", - "integrity": "sha1-K01OlbpEDZFf8IvImeRVNmZxMxY=", - "dev": true, - "requires": { - "lodash._reinterpolate": "~3.0.0" - } - } } }, "git-remote-origin-url": { @@ -6108,15 +5693,53 @@ "requires": { "glob-parent": "^2.0.0", "is-glob": "^2.0.0" + }, + "dependencies": { + "glob-parent": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-2.0.0.tgz", + "integrity": "sha1-gTg9ctsFT8zPUzbaqQLxgvbtuyg=", + "dev": true, + "requires": { + "is-glob": "^2.0.0" + } + }, + "is-extglob": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz", + "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=", + "dev": true + }, + "is-glob": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz", + "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + } } }, "glob-parent": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-2.0.0.tgz", - "integrity": "sha1-gTg9ctsFT8zPUzbaqQLxgvbtuyg=", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz", + "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=", "dev": true, "requires": { - "is-glob": "^2.0.0" + "is-glob": "^3.1.0", + "path-dirname": "^1.0.0" + }, + "dependencies": { + "is-glob": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz", + "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=", + "dev": true, + "requires": { + "is-extglob": "^2.1.0" + } + } } }, "glob-stream": { @@ -6135,33 +5758,6 @@ "remove-trailing-separator": "^1.0.1", "to-absolute-glob": "^2.0.0", "unique-stream": "^2.0.2" - }, - "dependencies": { - "glob-parent": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz", - "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=", - "dev": true, - "requires": { - "is-glob": "^3.1.0", - "path-dirname": "^1.0.0" - } - }, - "is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", - "dev": true - }, - "is-glob": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz", - "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=", - "dev": true, - "requires": { - "is-extglob": "^2.1.0" - } - } } }, "glob-to-regexp": { @@ -6182,358 +5778,6 @@ "is-negated-glob": "^1.0.0", "just-debounce": "^1.0.0", "object.defaults": "^1.1.0" - }, - "dependencies": { - "anymatch": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-2.0.0.tgz", - "integrity": "sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==", - "dev": true, - "requires": { - "micromatch": "^3.1.4", - "normalize-path": "^2.1.1" - } - }, - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - }, - "array-unique": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", - "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", - "dev": true - }, - "braces": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", - "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", - "dev": true, - "requires": { - "arr-flatten": "^1.1.0", - "array-unique": "^0.3.2", - "extend-shallow": "^2.0.1", - "fill-range": "^4.0.0", - "isobject": "^3.0.1", - "repeat-element": "^1.1.2", - "snapdragon": "^0.8.1", - "snapdragon-node": "^2.0.1", - "split-string": "^3.0.2", - "to-regex": "^3.0.1" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "chokidar": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-2.0.4.tgz", - "integrity": "sha512-z9n7yt9rOvIJrMhvDtDictKrkFHeihkNl6uWMmZlmL6tJtX9Cs+87oK+teBx+JIgzvbX3yZHT3eF8vpbDxHJXQ==", - "dev": true, - "requires": { - "anymatch": "^2.0.0", - "async-each": "^1.0.0", - "braces": "^2.3.0", - "fsevents": "^1.2.2", - "glob-parent": "^3.1.0", - "inherits": "^2.0.1", - "is-binary-path": "^1.0.0", - "is-glob": "^4.0.0", - "lodash.debounce": "^4.0.8", - "normalize-path": "^2.1.1", - "path-is-absolute": "^1.0.0", - "readdirp": "^2.0.0", - "upath": "^1.0.5" - } - }, - "expand-brackets": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", - "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", - "dev": true, - "requires": { - "debug": "^2.3.3", - "define-property": "^0.2.5", - "extend-shallow": "^2.0.1", - "posix-character-classes": "^0.1.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", - "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", - "dev": true, - "requires": { - "is-descriptor": "^0.1.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - }, - "is-accessor-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", - "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-data-descriptor": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", - "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz", - "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^0.1.6", - "is-data-descriptor": "^0.1.4", - "kind-of": "^5.0.0" - } - }, - "kind-of": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz", - "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==", - "dev": true - } - } - }, - "extglob": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", - "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", - "dev": true, - "requires": { - "array-unique": "^0.3.2", - "define-property": "^1.0.0", - "expand-brackets": "^2.1.4", - "extend-shallow": "^2.0.1", - "fragment-cache": "^0.2.1", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", - "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", - "dev": true, - "requires": { - "is-descriptor": "^1.0.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "fill-range": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", - "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", - "dev": true, - "requires": { - "extend-shallow": "^2.0.1", - "is-number": "^3.0.0", - "repeat-string": "^1.6.1", - "to-regex-range": "^2.1.0" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "glob-parent": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz", - "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=", - "dev": true, - "requires": { - "is-glob": "^3.1.0", - "path-dirname": "^1.0.0" - }, - "dependencies": { - "is-glob": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz", - "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=", - "dev": true, - "requires": { - "is-extglob": "^2.1.0" - } - } - } - }, - "is-accessor-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", - "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-data-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", - "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-descriptor": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", - "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^1.0.0", - "is-data-descriptor": "^1.0.0", - "kind-of": "^6.0.2" - } - }, - "is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", - "dev": true - }, - "is-glob": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz", - "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=", - "dev": true, - "requires": { - "is-extglob": "^2.1.1" - } - }, - "is-number": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - }, - "micromatch": { - "version": "3.1.10", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", - "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", - "dev": true, - "requires": { - "arr-diff": "^4.0.0", - "array-unique": "^0.3.2", - "braces": "^2.3.1", - "define-property": "^2.0.2", - "extend-shallow": "^3.0.2", - "extglob": "^2.0.4", - "fragment-cache": "^0.2.1", - "kind-of": "^6.0.2", - "nanomatch": "^1.2.9", - "object.pick": "^1.3.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.2" - } - } - } - }, - "glob2base": { - "version": "0.0.12", - "resolved": "https://registry.npmjs.org/glob2base/-/glob2base-0.0.12.tgz", - "integrity": "sha1-nUGbPijxLoOjYhZKJ3BVkiycDVY=", - "dev": true, - "requires": { - "find-index": "^0.1.1" } }, "global-modules": { @@ -6568,7 +5812,7 @@ }, "globby": { "version": "6.1.0", - "resolved": "https://registry.npmjs.org/globby/-/globby-6.1.0.tgz", + "resolved": "http://registry.npmjs.org/globby/-/globby-6.1.0.tgz", "integrity": "sha1-9abXDoOV4hyFj7BInWTfAkJNUGw=", "dev": true, "requires": { @@ -6588,23 +5832,25 @@ } }, "glogg": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/glogg/-/glogg-1.0.1.tgz", - "integrity": "sha512-ynYqXLoluBKf9XGR1gA59yEJisIL7YHEH4xr3ZziHB5/yl4qWfaK8Js9jGe6gBGCSCKVqiyO30WnRZADvemUNw==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/glogg/-/glogg-1.0.2.tgz", + "integrity": "sha512-5mwUoSuBk44Y4EshyiqcH95ZntbDdTQqA3QYSrxmzj28Ai0vXBGMH1ApSANH14j2sIRtqCEyg6PfsuP7ElOEDA==", "dev": true, "requires": { "sparkles": "^1.0.0" } }, "google-closure-compiler": { - "version": "20181008.0.0", - "resolved": "https://registry.npmjs.org/google-closure-compiler/-/google-closure-compiler-20181008.0.0.tgz", - "integrity": "sha512-XmJIasXHyy4kirthlsuDev2LZcXjYXWfOHwHdCLUQnfJH8T2sxWDNjFLQycaCIXwQLOyw2Kem38VgxrYfG0hzg==", + "version": "20181210.0.0", + "resolved": "https://registry.npmjs.org/google-closure-compiler/-/google-closure-compiler-20181210.0.0.tgz", + "integrity": "sha512-GCMLakdibnc+jpdNTvF3M/ET5i6I4zzxGKw67A4bQahxc0TPLXQdkVfhF3kwBSoPfK8xwgU5kA+KO0qvDZHKHw==", "dev": true, "requires": { "chalk": "^1.0.0", - "google-closure-compiler-linux": "^20181008.0.0", - "google-closure-compiler-osx": "^20181008.0.0", + "google-closure-compiler-java": "^20181210.0.0", + "google-closure-compiler-js": "^20181210.0.0", + "google-closure-compiler-linux": "^20181210.0.0", + "google-closure-compiler-osx": "^20181210.0.0", "minimist": "^1.2.0", "vinyl": "^2.0.1", "vinyl-sourcemaps-apply": "^0.2.0" @@ -6631,30 +5877,42 @@ }, "supports-color": { "version": "2.0.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", + "resolved": "http://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=", "dev": true } } }, + "google-closure-compiler-java": { + "version": "20181210.0.0", + "resolved": "https://registry.npmjs.org/google-closure-compiler-java/-/google-closure-compiler-java-20181210.0.0.tgz", + "integrity": "sha512-FMGzY+vp25DePolYNyVcXz8UI2PV/I3AYU3nuFexmHcKn5XiBVy4CqK7em6NpVbZdDXJYUF3GUv5A0x0gLvbfw==", + "dev": true + }, + "google-closure-compiler-js": { + "version": "20181210.0.0", + "resolved": "https://registry.npmjs.org/google-closure-compiler-js/-/google-closure-compiler-js-20181210.0.0.tgz", + "integrity": "sha512-gn+2hT4uQtYKD/jXJqGIXzPMln3/JD7R4caAKDPJm7adqqDvrCAw7qxAiK4Vz1rNec7hJXPXh9TeKQjzz03ZaQ==", + "dev": true + }, "google-closure-compiler-linux": { - "version": "20181008.0.0", - "resolved": "https://registry.npmjs.org/google-closure-compiler-linux/-/google-closure-compiler-linux-20181008.0.0.tgz", - "integrity": "sha512-k8njGfH2uzWJiRPPvUxM7MJB28gPrf4kI2bbuiF0gJk/1arXcWCPGjLD6pzCU0UylMy52MUXLgsIpRorqf2brw==", + "version": "20181210.0.0", + "resolved": "https://registry.npmjs.org/google-closure-compiler-linux/-/google-closure-compiler-linux-20181210.0.0.tgz", + "integrity": "sha512-Gp+yp+Vb6QWEhtYkePKxkspRlzX5dx6L46zUoHGWW7Henuk3ACYoUXuaHLQQ+tF0lmi2QAmFXEkvdnKVDIxR+Q==", "dev": true, "optional": true }, "google-closure-compiler-osx": { - "version": "20181008.0.0", - "resolved": "https://registry.npmjs.org/google-closure-compiler-osx/-/google-closure-compiler-osx-20181008.0.0.tgz", - "integrity": "sha512-xzf/yH/4MXdb6GbP84iHnpcVCOPBbH0gMVOs0JhR/KbrQh+DlJU+Y8Z/DQzTkw9HgD650R2/WZmBknURyg9OTw==", + "version": "20181210.0.0", + "resolved": "https://registry.npmjs.org/google-closure-compiler-osx/-/google-closure-compiler-osx-20181210.0.0.tgz", + "integrity": "sha512-SYUakmEpq8BorJU/O5CfrC+ABYjXR0rTvBd3Khwd1sml9B2aKEiHArdHC5SCmBRZd3ccUhp/XyrVO6PoxHKeZA==", "dev": true, "optional": true }, "graceful-fs": { - "version": "4.1.11", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.11.tgz", - "integrity": "sha1-Dovf5NHduIVNZOBOp8AOKgJuVlg=", + "version": "4.1.15", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.15.tgz", + "integrity": "sha512-6uHUhOPEBgQ24HM+r6b/QwWfZq+yiFcipKFrOFiBEnWdy5sdzYoi+pJeQaPI5qOLRFqWmAXUPQNsielzdLoecA==", "dev": true }, "growl": { @@ -6681,15 +5939,6 @@ "vinyl-fs": "^3.0.0" }, "dependencies": { - "ansi-colors": { - "version": "1.1.0", - "resolved": "http://registry.npmjs.org/ansi-colors/-/ansi-colors-1.1.0.tgz", - "integrity": "sha512-SFKX67auSNoVR38N3L+nvsPjOE0bybKTYbkf5tRvushrAPQ9V75huw0ZxBkKVeRU9kqH3d6HA4xTckbwZ4ixmA==", - "dev": true, - "requires": { - "ansi-wrap": "^0.1.0" - } - }, "gulp-cli": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/gulp-cli/-/gulp-cli-2.0.1.tgz", @@ -6715,24 +5964,21 @@ "v8flags": "^3.0.1", "yargs": "^7.1.0" } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true } } }, "gulp-json-transform": { - "version": "0.4.5", - "resolved": "https://registry.npmjs.org/gulp-json-transform/-/gulp-json-transform-0.4.5.tgz", - "integrity": "sha512-kaGUaAhgjxeLgIMNF3IPFFmYCF6AgvzBQwqmVowiIStNADZSoILtPNDisYA4mKfpwMTqSiWLogQt1q5U75+uwA==", + "version": "0.4.6", + "resolved": "https://registry.npmjs.org/gulp-json-transform/-/gulp-json-transform-0.4.6.tgz", + "integrity": "sha512-laPoNiJP/+lAeiyb0lgY3cynOOi7R/QbPvKBEXJY6bm836nYg90pwY4mgwR7w8nFDlXiCToUeaoQCBIc2NudjA==", "dev": true, "requires": { - "gulp-util": "^3.0.8", + "ansi-colors": "^1.0.1", + "fancy-log": "^1.3.2", + "plugin-error": "^1.0.1", "promise": "^8.0.1", - "through2": "^2.0.3" + "through2": "^2.0.3", + "vinyl": "^2.1.0" } }, "gulp-rename": { @@ -6769,204 +6015,39 @@ } }, "gulp-typescript": { - "version": "5.0.0-alpha.3", - "resolved": "https://registry.npmjs.org/gulp-typescript/-/gulp-typescript-5.0.0-alpha.3.tgz", - "integrity": "sha512-6iSBjqBXAUqRsLUh/9XtlOnSzpPMbLrr5rqGj4UPLtGpDwFHW/fVTuRgv6LAWiKesLIUDDM0ourxvcpu2trecQ==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/gulp-typescript/-/gulp-typescript-5.0.0.tgz", + "integrity": "sha512-lMj2U+Ni6HyFaY2nr1sSQ6D014eHil5L1i52XWBaAQUR9UAUUp9btnm4yRBT2Jb8xhrwqmhMssZf/g2B7cinCA==", "dev": true, "requires": { - "ansi-colors": "^2.0.2", + "ansi-colors": "^3.0.5", "plugin-error": "^1.0.1", "source-map": "^0.7.3", - "through2": "^2.0.3", + "through2": "^3.0.0", "vinyl": "^2.1.0", "vinyl-fs": "^3.0.3" }, "dependencies": { - "glob-parent": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz", - "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=", - "dev": true, - "requires": { - "is-glob": "^3.1.0", - "path-dirname": "^1.0.0" - } - }, - "glob-stream": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/glob-stream/-/glob-stream-6.1.0.tgz", - "integrity": "sha1-cEXJlBOz65SIjYOrRtC0BMx73eQ=", - "dev": true, - "requires": { - "extend": "^3.0.0", - "glob": "^7.1.1", - "glob-parent": "^3.1.0", - "is-negated-glob": "^1.0.0", - "ordered-read-streams": "^1.0.0", - "pumpify": "^1.3.5", - "readable-stream": "^2.1.5", - "remove-trailing-separator": "^1.0.1", - "to-absolute-glob": "^2.0.0", - "unique-stream": "^2.0.2" - } - }, - "is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", - "dev": true - }, - "is-glob": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz", - "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=", - "dev": true, - "requires": { - "is-extglob": "^2.1.0" - } - }, - "is-valid-glob": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-valid-glob/-/is-valid-glob-1.0.0.tgz", - "integrity": "sha1-Kb8+/3Ab4tTTFdusw5vDn+j2Aao=", + "ansi-colors": { + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-3.2.3.tgz", + "integrity": "sha512-LEHHyuhlPY3TmuUYMh2oz89lTShfvgbmzaBcxve9t/9Wuy7Dwf4yoAKcND7KFT1HAQfqZ12qtc+DUrBMeKF9nw==", "dev": true }, - "ordered-read-streams": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/ordered-read-streams/-/ordered-read-streams-1.0.1.tgz", - "integrity": "sha1-d8DLN8QVJdZBZtmQ/61+xqDhNj4=", - "dev": true, - "requires": { - "readable-stream": "^2.0.1" - } - }, "source-map": { "version": "0.7.3", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.3.tgz", "integrity": "sha512-CkCj6giN3S+n9qrYiBTX5gystlENnRW5jZeNLHpe6aue+SrHcG5VYwujhW9s4dY31mEGsxBDrHR6oI69fTXsaQ==", "dev": true }, - "to-absolute-glob": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/to-absolute-glob/-/to-absolute-glob-2.0.2.tgz", - "integrity": "sha1-GGX0PZ50sIItufFFt4z/fQ98hJs=", - "dev": true, - "requires": { - "is-absolute": "^1.0.0", - "is-negated-glob": "^1.0.0" - } - }, - "vinyl-fs": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/vinyl-fs/-/vinyl-fs-3.0.3.tgz", - "integrity": "sha512-vIu34EkyNyJxmP0jscNzWBSygh7VWhqun6RmqVfXePrOwi9lhvRs//dOaGOTRUQr4tx7/zd26Tk5WeSVZitgng==", - "dev": true, - "requires": { - "fs-mkdirp-stream": "^1.0.0", - "glob-stream": "^6.1.0", - "graceful-fs": "^4.0.0", - "is-valid-glob": "^1.0.0", - "lazystream": "^1.0.0", - "lead": "^1.0.0", - "object.assign": "^4.0.4", - "pumpify": "^1.3.5", - "readable-stream": "^2.3.3", - "remove-bom-buffer": "^3.0.0", - "remove-bom-stream": "^1.2.0", - "resolve-options": "^1.1.0", - "through2": "^2.0.0", - "to-through": "^2.0.0", - "value-or-function": "^3.0.0", - "vinyl": "^2.0.0", - "vinyl-sourcemap": "^1.1.0" - } - } - } - }, - "gulp-util": { - "version": "3.0.8", - "resolved": "https://registry.npmjs.org/gulp-util/-/gulp-util-3.0.8.tgz", - "integrity": "sha1-AFTh50RQLifATBh8PsxQXdVLu08=", - "dev": true, - "requires": { - "array-differ": "^1.0.0", - "array-uniq": "^1.0.2", - "beeper": "^1.0.0", - "chalk": "^1.0.0", - "dateformat": "^2.0.0", - "fancy-log": "^1.1.0", - "gulplog": "^1.0.0", - "has-gulplog": "^0.1.0", - "lodash._reescape": "^3.0.0", - "lodash._reevaluate": "^3.0.0", - "lodash._reinterpolate": "^3.0.0", - "lodash.template": "^3.0.0", - "minimist": "^1.1.0", - "multipipe": "^0.1.2", - "object-assign": "^3.0.0", - "replace-ext": "0.0.1", - "through2": "^2.0.0", - "vinyl": "^0.5.0" - }, - "dependencies": { - "ansi-styles": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-2.2.1.tgz", - "integrity": "sha1-tDLdM1i2NM914eRmQ2gkBTPB3b4=", - "dev": true - }, - "chalk": { - "version": "1.1.3", - "resolved": "http://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz", - "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=", - "dev": true, - "requires": { - "ansi-styles": "^2.2.1", - "escape-string-regexp": "^1.0.2", - "has-ansi": "^2.0.0", - "strip-ansi": "^3.0.0", - "supports-color": "^2.0.0" - } - }, - "clone": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/clone/-/clone-1.0.4.tgz", - "integrity": "sha1-2jCcwmPfFZlMaIypAheco8fNfH4=", - "dev": true - }, - "clone-stats": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/clone-stats/-/clone-stats-0.0.1.tgz", - "integrity": "sha1-uI+UqCzzi4eR1YBG6kAprYjKmdE=", - "dev": true - }, - "object-assign": { + "through2": { "version": "3.0.0", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-3.0.0.tgz", - "integrity": "sha1-m+3VygiXlJvKR+f/QIBi1Un1h/I=", - "dev": true - }, - "replace-ext": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/replace-ext/-/replace-ext-0.0.1.tgz", - "integrity": "sha1-KbvZIHinOfC8zitO5B6DeVNSKSQ=", - "dev": true - }, - "supports-color": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", - "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=", - "dev": true - }, - "vinyl": { - "version": "0.5.3", - "resolved": "https://registry.npmjs.org/vinyl/-/vinyl-0.5.3.tgz", - "integrity": "sha1-sEVbOPxeDPMNQyUTLkYZcMIJHN4=", + "resolved": "https://registry.npmjs.org/through2/-/through2-3.0.0.tgz", + "integrity": "sha512-8B+sevlqP4OiCjonI1Zw03Sf8PuV1eRsYQgLad5eonILOdyeRsY27A/2Ze8IlvlMvq31OH+3fz/styI7Ya62yQ==", "dev": true, "requires": { - "clone": "^1.0.0", - "clone-stats": "^0.0.1", - "replace-ext": "0.0.1" + "readable-stream": "2 || 3", + "xtend": "~4.0.1" } } } @@ -7007,12 +6088,12 @@ "dev": true }, "har-validator": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.0.tgz", - "integrity": "sha512-+qnmNjI4OfH2ipQ9VQOw23bBd/ibtfbVdK2fYbY4acTDqKTW/YDp9McimZdDbG8iV9fZizUqQMD5xvriB146TA==", + "version": "5.1.3", + "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.3.tgz", + "integrity": "sha512-sNvOCzEQNr/qrvJgc3UG/kD4QtlHycrzwS+6mfTrrSq97BvaYcPZZI1ZSqGSPR73Cxn4LKTD4PttRwfU7jWq5g==", "dev": true, "requires": { - "ajv": "^5.3.0", + "ajv": "^6.5.5", "har-schema": "^2.0.0" } }, @@ -7039,15 +6120,6 @@ "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", "integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=" }, - "has-gulplog": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/has-gulplog/-/has-gulplog-0.1.0.tgz", - "integrity": "sha1-ZBTIKRNpfaUVkDl9r7EvIpZ4Ec4=", - "dev": true, - "requires": { - "sparkles": "^1.0.0" - } - }, "has-symbols": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.0.tgz", @@ -7069,14 +6141,6 @@ "get-value": "^2.0.6", "has-values": "^1.0.0", "isobject": "^3.0.0" - }, - "dependencies": { - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - } } }, "has-values": { @@ -7089,26 +6153,6 @@ "kind-of": "^4.0.0" }, "dependencies": { - "is-number": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, "kind-of": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-4.0.0.tgz", @@ -7307,6 +6351,16 @@ "minimatch": "^3.0.4" } }, + "import-fresh": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-2.0.0.tgz", + "integrity": "sha1-2BNVwVYS04bGH53dOSLUMEgipUY=", + "dev": true, + "requires": { + "caller-path": "^2.0.0", + "resolve-from": "^3.0.0" + } + }, "import-local": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/import-local/-/import-local-1.0.0.tgz", @@ -7456,9 +6510,9 @@ } }, "interpret": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/interpret/-/interpret-1.1.0.tgz", - "integrity": "sha1-ftGxQQxqDg94z5XTuEQMY/eLhhQ=", + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/interpret/-/interpret-1.2.0.tgz", + "integrity": "sha512-mT34yGKMNceBQUoVn7iCDKDntA7SC6gycMAWzGx1z/CMCTV7b2AAtXlo3nRyHZ1FelRkQbQjprHSYGwzLtkVbw==", "dev": true }, "invariant": { @@ -7494,11 +6548,22 @@ }, "is-accessor-descriptor": { "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", + "resolved": "http://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", "dev": true, "requires": { "kind-of": "^3.0.2" + }, + "dependencies": { + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } } }, "is-arrayish": { @@ -7548,11 +6613,22 @@ }, "is-data-descriptor": { "version": "0.1.4", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", + "resolved": "http://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", "dev": true, "requires": { "kind-of": "^3.0.2" + }, + "dependencies": { + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } } }, "is-date-object": { @@ -7608,9 +6684,9 @@ "dev": true }, "is-extglob": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz", - "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=", + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", "dev": true }, "is-finite": { @@ -7633,17 +6709,17 @@ }, "is-generator-fn": { "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-generator-fn/-/is-generator-fn-1.0.0.tgz", + "resolved": "http://registry.npmjs.org/is-generator-fn/-/is-generator-fn-1.0.0.tgz", "integrity": "sha1-lp1J4bszKfa7fwkIm+JleLLd1Go=", "dev": true }, "is-glob": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz", - "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz", + "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=", "dev": true, "requires": { - "is-extglob": "^1.0.0" + "is-extglob": "^2.1.1" } }, "is-negated-glob": { @@ -7653,12 +6729,23 @@ "dev": true }, "is-number": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-2.1.0.tgz", - "integrity": "sha1-Afy7s5NGOlSPL0ZszhbezknbkI8=", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", + "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", "dev": true, "requires": { "kind-of": "^3.0.2" + }, + "dependencies": { + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } } }, "is-obj": { @@ -7667,15 +6754,6 @@ "integrity": "sha1-PkcprB9f3gJc19g6iW2rn09n2w8=", "dev": true }, - "is-observable": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/is-observable/-/is-observable-1.1.0.tgz", - "integrity": "sha512-NqCa4Sa2d+u7BWc6CukaObG3Fh+CU9bvixbpcXYhy2VvYS7vVGIdAgnIS5Ks3A/cqk4rebLJ9s8zBstT2aKnIA==", - "dev": true, - "requires": { - "symbol-observable": "^1.1.0" - } - }, "is-path-cwd": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/is-path-cwd/-/is-path-cwd-1.0.0.tgz", @@ -7713,14 +6791,6 @@ "dev": true, "requires": { "isobject": "^3.0.1" - }, - "dependencies": { - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - } } }, "is-posix-bracket": { @@ -7750,12 +6820,6 @@ "has": "^1.0.1" } }, - "is-regexp": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-regexp/-/is-regexp-1.0.0.tgz", - "integrity": "sha1-/S2INUXEa6xaYz57mgnof6LLUGk=", - "dev": true - }, "is-relative": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/is-relative/-/is-relative-1.0.0.tgz", @@ -7841,13 +6905,10 @@ "dev": true }, "isobject": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-2.1.0.tgz", - "integrity": "sha1-8GVWEJaj8dou9GJy+BXIQNh+DIk=", - "dev": true, - "requires": { - "isarray": "1.0.0" - } + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", + "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", + "dev": true }, "isstream": { "version": "0.1.2", @@ -7973,12 +7034,15 @@ } }, "ix": { - "version": "2.3.5", - "resolved": "https://registry.npmjs.org/ix/-/ix-2.3.5.tgz", - "integrity": "sha512-mdW2LtQiy+gPtggKa393EdSaI46RARsAa5zjlLgNKMlE57vC6dc6g6nehROI1Gj/HhsTvpb3WALSwg0EWhhz0Q==", + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/ix/-/ix-2.4.3.tgz", + "integrity": "sha512-LoFBSUQ8C41KQxIlm/dw+vgGngnR0jc8DMibryGfNoQs2l4dDodQdYUvmCNAaIGsEMkm+IdiF+hLp5SHl6C8GQ==", "dev": true, "requires": { - "tslib": "^1.8.0" + "@types/node": "^10.12.18", + "is-stream": "1.1.0", + "rxjs": "5.5.11", + "tslib": "^1.9.3" } }, "jest": { @@ -7997,6 +7061,38 @@ "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=", "dev": true }, + "arr-diff": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz", + "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=", + "dev": true, + "requires": { + "arr-flatten": "^1.0.1" + } + }, + "array-unique": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz", + "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=", + "dev": true + }, + "braces": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz", + "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=", + "dev": true, + "requires": { + "expand-range": "^1.8.1", + "preserve": "^0.2.0", + "repeat-element": "^1.1.2" + } + }, + "camelcase": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz", + "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=", + "dev": true + }, "cliui": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz", @@ -8008,12 +7104,54 @@ "wrap-ansi": "^2.0.0" } }, + "expand-brackets": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz", + "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=", + "dev": true, + "requires": { + "is-posix-bracket": "^0.1.0" + } + }, + "extglob": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz", + "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "find-up": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", + "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", + "dev": true, + "requires": { + "locate-path": "^2.0.0" + } + }, + "is-extglob": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz", + "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=", + "dev": true + }, "is-fullwidth-code-point": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz", "integrity": "sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=", "dev": true }, + "is-glob": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz", + "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, "jest-cli": { "version": "23.6.0", "resolved": "https://registry.npmjs.org/jest-cli/-/jest-cli-23.6.0.tgz", @@ -8058,6 +7196,36 @@ "yargs": "^11.0.0" } }, + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + }, + "micromatch": { + "version": "2.3.11", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz", + "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=", + "dev": true, + "requires": { + "arr-diff": "^2.0.0", + "array-unique": "^0.2.1", + "braces": "^1.8.2", + "expand-brackets": "^0.1.4", + "extglob": "^0.3.1", + "filename-regex": "^2.0.0", + "is-extglob": "^1.0.0", + "is-glob": "^2.0.1", + "kind-of": "^3.0.2", + "normalize-path": "^2.0.1", + "object.omit": "^2.0.0", + "parse-glob": "^3.0.4", + "regex-cache": "^0.4.2" + } + }, "os-locale": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-2.1.0.tgz", @@ -8088,6 +7256,12 @@ "ansi-regex": "^3.0.0" } }, + "which-module": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz", + "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=", + "dev": true + }, "yargs": { "version": "11.1.0", "resolved": "http://registry.npmjs.org/yargs/-/yargs-11.1.0.tgz", @@ -8107,6 +7281,15 @@ "y18n": "^3.2.1", "yargs-parser": "^9.0.2" } + }, + "yargs-parser": { + "version": "9.0.2", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-9.0.2.tgz", + "integrity": "sha1-nM9qQ0YP5O1Aqbto9I1DuKaMwHc=", + "dev": true, + "requires": { + "camelcase": "^4.1.0" + } } } }, @@ -8139,53 +7322,144 @@ "jest-validate": "^23.6.0", "micromatch": "^2.3.11", "pretty-format": "^23.6.0" - } - }, - "jest-diff": { - "version": "23.6.0", - "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-23.6.0.tgz", - "integrity": "sha512-Gz9l5Ov+X3aL5L37IT+8hoCUsof1CVYBb2QEkOupK64XyRR3h+uRpYIm97K7sY8diFxowR8pIGEdyfMKTixo3g==", - "dev": true, - "requires": { - "chalk": "^2.0.1", - "diff": "^3.2.0", - "jest-get-type": "^22.1.0", - "pretty-format": "^23.6.0" - } - }, - "jest-docblock": { - "version": "23.2.0", - "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-23.2.0.tgz", - "integrity": "sha1-8IXh8YVI2Z/dabICB+b9VdkTg6c=", - "dev": true, - "requires": { - "detect-newline": "^2.1.0" - } - }, - "jest-each": { - "version": "23.6.0", - "resolved": "https://registry.npmjs.org/jest-each/-/jest-each-23.6.0.tgz", - "integrity": "sha512-x7V6M/WGJo6/kLoissORuvLIeAoyo2YqLOoCDkohgJ4XOXSqOtyvr8FbInlAWS77ojBsZrafbozWoKVRdtxFCg==", - "dev": true, - "requires": { - "chalk": "^2.0.1", - "pretty-format": "^23.6.0" - } - }, - "jest-environment-jsdom": { - "version": "23.4.0", - "resolved": "https://registry.npmjs.org/jest-environment-jsdom/-/jest-environment-jsdom-23.4.0.tgz", - "integrity": "sha1-BWp5UrP+pROsYqFAosNox52eYCM=", - "dev": true, - "requires": { - "jest-mock": "^23.2.0", - "jest-util": "^23.4.0", - "jsdom": "^11.5.1" - } - }, - "jest-environment-node": { - "version": "23.4.0", - "resolved": "https://registry.npmjs.org/jest-environment-node/-/jest-environment-node-23.4.0.tgz", + }, + "dependencies": { + "arr-diff": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz", + "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=", + "dev": true, + "requires": { + "arr-flatten": "^1.0.1" + } + }, + "array-unique": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz", + "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=", + "dev": true + }, + "braces": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz", + "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=", + "dev": true, + "requires": { + "expand-range": "^1.8.1", + "preserve": "^0.2.0", + "repeat-element": "^1.1.2" + } + }, + "expand-brackets": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz", + "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=", + "dev": true, + "requires": { + "is-posix-bracket": "^0.1.0" + } + }, + "extglob": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz", + "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "is-extglob": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz", + "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=", + "dev": true + }, + "is-glob": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz", + "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + }, + "micromatch": { + "version": "2.3.11", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz", + "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=", + "dev": true, + "requires": { + "arr-diff": "^2.0.0", + "array-unique": "^0.2.1", + "braces": "^1.8.2", + "expand-brackets": "^0.1.4", + "extglob": "^0.3.1", + "filename-regex": "^2.0.0", + "is-extglob": "^1.0.0", + "is-glob": "^2.0.1", + "kind-of": "^3.0.2", + "normalize-path": "^2.0.1", + "object.omit": "^2.0.0", + "parse-glob": "^3.0.4", + "regex-cache": "^0.4.2" + } + } + } + }, + "jest-diff": { + "version": "23.6.0", + "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-23.6.0.tgz", + "integrity": "sha512-Gz9l5Ov+X3aL5L37IT+8hoCUsof1CVYBb2QEkOupK64XyRR3h+uRpYIm97K7sY8diFxowR8pIGEdyfMKTixo3g==", + "dev": true, + "requires": { + "chalk": "^2.0.1", + "diff": "^3.2.0", + "jest-get-type": "^22.1.0", + "pretty-format": "^23.6.0" + } + }, + "jest-docblock": { + "version": "23.2.0", + "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-23.2.0.tgz", + "integrity": "sha1-8IXh8YVI2Z/dabICB+b9VdkTg6c=", + "dev": true, + "requires": { + "detect-newline": "^2.1.0" + } + }, + "jest-each": { + "version": "23.6.0", + "resolved": "https://registry.npmjs.org/jest-each/-/jest-each-23.6.0.tgz", + "integrity": "sha512-x7V6M/WGJo6/kLoissORuvLIeAoyo2YqLOoCDkohgJ4XOXSqOtyvr8FbInlAWS77ojBsZrafbozWoKVRdtxFCg==", + "dev": true, + "requires": { + "chalk": "^2.0.1", + "pretty-format": "^23.6.0" + } + }, + "jest-environment-jsdom": { + "version": "23.4.0", + "resolved": "https://registry.npmjs.org/jest-environment-jsdom/-/jest-environment-jsdom-23.4.0.tgz", + "integrity": "sha1-BWp5UrP+pROsYqFAosNox52eYCM=", + "dev": true, + "requires": { + "jest-mock": "^23.2.0", + "jest-util": "^23.4.0", + "jsdom": "^11.5.1" + } + }, + "jest-environment-node": { + "version": "23.4.0", + "resolved": "https://registry.npmjs.org/jest-environment-node/-/jest-environment-node-23.4.0.tgz", "integrity": "sha1-V+gO0IQd6jAxZ8zozXlSHeuv3hA=", "dev": true, "requires": { @@ -8219,6 +7493,97 @@ "jest-worker": "^23.2.0", "micromatch": "^2.3.11", "sane": "^2.0.0" + }, + "dependencies": { + "arr-diff": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz", + "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=", + "dev": true, + "requires": { + "arr-flatten": "^1.0.1" + } + }, + "array-unique": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz", + "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=", + "dev": true + }, + "braces": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz", + "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=", + "dev": true, + "requires": { + "expand-range": "^1.8.1", + "preserve": "^0.2.0", + "repeat-element": "^1.1.2" + } + }, + "expand-brackets": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz", + "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=", + "dev": true, + "requires": { + "is-posix-bracket": "^0.1.0" + } + }, + "extglob": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz", + "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "is-extglob": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz", + "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=", + "dev": true + }, + "is-glob": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz", + "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + }, + "micromatch": { + "version": "2.3.11", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz", + "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=", + "dev": true, + "requires": { + "arr-diff": "^2.0.0", + "array-unique": "^0.2.1", + "braces": "^1.8.2", + "expand-brackets": "^0.1.4", + "extglob": "^0.3.1", + "filename-regex": "^2.0.0", + "is-extglob": "^1.0.0", + "is-glob": "^2.0.1", + "kind-of": "^3.0.2", + "normalize-path": "^2.0.1", + "object.omit": "^2.0.0", + "parse-glob": "^3.0.4", + "regex-cache": "^0.4.2" + } + } } }, "jest-jasmine2": { @@ -8272,6 +7637,97 @@ "micromatch": "^2.3.11", "slash": "^1.0.0", "stack-utils": "^1.0.1" + }, + "dependencies": { + "arr-diff": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz", + "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=", + "dev": true, + "requires": { + "arr-flatten": "^1.0.1" + } + }, + "array-unique": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz", + "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=", + "dev": true + }, + "braces": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz", + "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=", + "dev": true, + "requires": { + "expand-range": "^1.8.1", + "preserve": "^0.2.0", + "repeat-element": "^1.1.2" + } + }, + "expand-brackets": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz", + "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=", + "dev": true, + "requires": { + "is-posix-bracket": "^0.1.0" + } + }, + "extglob": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz", + "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "is-extglob": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz", + "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=", + "dev": true + }, + "is-glob": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz", + "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + }, + "micromatch": { + "version": "2.3.11", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz", + "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=", + "dev": true, + "requires": { + "arr-diff": "^2.0.0", + "array-unique": "^0.2.1", + "braces": "^1.8.2", + "expand-brackets": "^0.1.4", + "extglob": "^0.3.1", + "filename-regex": "^2.0.0", + "is-extglob": "^1.0.0", + "is-glob": "^2.0.1", + "kind-of": "^3.0.2", + "normalize-path": "^2.0.1", + "object.omit": "^2.0.0", + "parse-glob": "^3.0.4", + "regex-cache": "^0.4.2" + } + } } }, "jest-mock": { @@ -8381,6 +7837,38 @@ "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=", "dev": true }, + "arr-diff": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz", + "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=", + "dev": true, + "requires": { + "arr-flatten": "^1.0.1" + } + }, + "array-unique": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz", + "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=", + "dev": true + }, + "braces": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz", + "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=", + "dev": true, + "requires": { + "expand-range": "^1.8.1", + "preserve": "^0.2.0", + "repeat-element": "^1.1.2" + } + }, + "camelcase": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz", + "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=", + "dev": true + }, "cliui": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz", @@ -8392,12 +7880,84 @@ "wrap-ansi": "^2.0.0" } }, + "expand-brackets": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz", + "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=", + "dev": true, + "requires": { + "is-posix-bracket": "^0.1.0" + } + }, + "extglob": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz", + "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "find-up": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", + "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", + "dev": true, + "requires": { + "locate-path": "^2.0.0" + } + }, + "is-extglob": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz", + "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=", + "dev": true + }, "is-fullwidth-code-point": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz", "integrity": "sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=", "dev": true }, + "is-glob": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz", + "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + }, + "micromatch": { + "version": "2.3.11", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz", + "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=", + "dev": true, + "requires": { + "arr-diff": "^2.0.0", + "array-unique": "^0.2.1", + "braces": "^1.8.2", + "expand-brackets": "^0.1.4", + "extglob": "^0.3.1", + "filename-regex": "^2.0.0", + "is-extglob": "^1.0.0", + "is-glob": "^2.0.1", + "kind-of": "^3.0.2", + "normalize-path": "^2.0.1", + "object.omit": "^2.0.0", + "parse-glob": "^3.0.4", + "regex-cache": "^0.4.2" + } + }, "os-locale": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-2.1.0.tgz", @@ -8434,6 +7994,12 @@ "integrity": "sha1-IzTBjpx1n3vdVv3vfprj1YjmjtM=", "dev": true }, + "which-module": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz", + "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=", + "dev": true + }, "yargs": { "version": "11.1.0", "resolved": "http://registry.npmjs.org/yargs/-/yargs-11.1.0.tgz", @@ -8453,7 +8019,16 @@ "y18n": "^3.2.1", "yargs-parser": "^9.0.2" } - } + }, + "yargs-parser": { + "version": "9.0.2", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-9.0.2.tgz", + "integrity": "sha1-nM9qQ0YP5O1Aqbto9I1DuKaMwHc=", + "dev": true, + "requires": { + "camelcase": "^4.1.0" + } + } } }, "jest-serializer": { @@ -8462,6 +8037,16 @@ "integrity": "sha1-o3dq6zEekP6D+rnlM+hRAr0WQWU=", "dev": true }, + "jest-silent-reporter": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jest-silent-reporter/-/jest-silent-reporter-0.1.1.tgz", + "integrity": "sha512-nrRzOV4151hG354tnVWfyZbFGJdylpadRWYWWPSD+WeOz2hQOjUGxvIFODnaY9cKQ7JWCtG+5LgSss22ccRhBg==", + "dev": true, + "requires": { + "chalk": "^2.3.1", + "jest-util": "^23.0.0" + } + }, "jest-snapshot": { "version": "23.6.0", "resolved": "https://registry.npmjs.org/jest-snapshot/-/jest-snapshot-23.6.0.tgz", @@ -8594,7 +8179,7 @@ }, "jsesc": { "version": "1.3.0", - "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-1.3.0.tgz", + "resolved": "http://registry.npmjs.org/jsesc/-/jsesc-1.3.0.tgz", "integrity": "sha1-RsP+yMGJKxKwgz25vHYiF226s0s=", "dev": true }, @@ -8622,19 +8207,16 @@ "dev": true }, "json-schema-traverse": { - "version": "0.3.1", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.3.1.tgz", - "integrity": "sha1-NJptRMU6Ud6JtAgFxdXlm0F9M0A=", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", "dev": true }, - "json-stable-stringify": { + "json-stable-stringify-without-jsonify": { "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json-stable-stringify/-/json-stable-stringify-1.0.1.tgz", - "integrity": "sha1-mnWdOcXy/1A/1TAGRu1EX4jE+a8=", - "dev": true, - "requires": { - "jsonify": "~0.0.0" - } + "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", + "integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=", + "dev": true }, "json-stringify-safe": { "version": "5.0.1", @@ -8688,13 +8270,10 @@ "dev": true }, "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", + "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", + "dev": true }, "klaw": { "version": "1.3.1", @@ -8761,28 +8340,28 @@ "dev": true }, "lerna": { - "version": "3.4.3", - "resolved": "https://registry.npmjs.org/lerna/-/lerna-3.4.3.tgz", - "integrity": "sha512-tWq1LvpHqkyB+FaJCmkEweivr88yShDMmauofPVdh0M5gU1cVucszYnIgWafulKYu2LMQ3IfUMUU5Pp3+MvADQ==", - "dev": true, - "requires": { - "@lerna/add": "^3.4.1", - "@lerna/bootstrap": "^3.4.1", - "@lerna/changed": "^3.4.1", - "@lerna/clean": "^3.3.2", - "@lerna/cli": "^3.2.0", - "@lerna/create": "^3.4.1", - "@lerna/diff": "^3.3.0", - "@lerna/exec": "^3.3.2", - "@lerna/import": "^3.3.1", - "@lerna/init": "^3.3.0", - "@lerna/link": "^3.3.0", - "@lerna/list": "^3.3.2", - "@lerna/publish": "^3.4.3", - "@lerna/run": "^3.3.2", - "@lerna/version": "^3.4.1", + "version": "3.8.0", + "resolved": "https://registry.npmjs.org/lerna/-/lerna-3.8.0.tgz", + "integrity": "sha512-OLdf7JSWjpgVecvVLyTRpeKPjTJOcQa366IvaEhorOIxFPZvR1rNIEvi4DMOAaxNINpmCB4nSm769H7H4jNQyw==", + "dev": true, + "requires": { + "@lerna/add": "^3.7.2", + "@lerna/bootstrap": "^3.7.2", + "@lerna/changed": "^3.8.0", + "@lerna/clean": "^3.7.2", + "@lerna/cli": "^3.6.0", + "@lerna/create": "^3.7.2", + "@lerna/diff": "^3.7.2", + "@lerna/exec": "^3.7.2", + "@lerna/import": "^3.7.2", + "@lerna/init": "^3.7.2", + "@lerna/link": "^3.7.2", + "@lerna/list": "^3.7.2", + "@lerna/publish": "^3.8.0", + "@lerna/run": "^3.7.2", + "@lerna/version": "^3.8.0", "import-local": "^1.0.0", - "npmlog": "^4.1.2" + "libnpm": "^2.0.1" } }, "leven": { @@ -8801,6 +8380,34 @@ "type-check": "~0.3.2" } }, + "libnpm": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/libnpm/-/libnpm-2.0.1.tgz", + "integrity": "sha512-qTKoxyJvpBxHZQB6k0AhSLajyXq9ZE/lUsZzuHAplr2Bpv9G+k4YuYlExYdUCeVRRGqcJt8hvkPh4tBwKoV98w==", + "dev": true, + "requires": { + "bin-links": "^1.1.2", + "bluebird": "^3.5.3", + "find-npm-prefix": "^1.0.2", + "libnpmaccess": "^3.0.1", + "libnpmconfig": "^1.2.1", + "libnpmhook": "^5.0.2", + "libnpmorg": "^1.0.0", + "libnpmpublish": "^1.1.0", + "libnpmsearch": "^2.0.0", + "libnpmteam": "^1.0.1", + "lock-verify": "^2.0.2", + "npm-lifecycle": "^2.1.0", + "npm-logical-tree": "^1.2.1", + "npm-package-arg": "^6.1.0", + "npm-profile": "^4.0.1", + "npm-registry-fetch": "^3.8.0", + "npmlog": "^4.1.2", + "pacote": "^9.2.3", + "read-package-json": "^2.0.13", + "stringify-package": "^1.0.0" + } + }, "libnpmaccess": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/libnpmaccess/-/libnpmaccess-3.0.1.tgz", @@ -8827,561 +8434,290 @@ "requires": { "pump": "^3.0.0" } + }, + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } } } }, - "liftoff": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/liftoff/-/liftoff-2.5.0.tgz", - "integrity": "sha1-IAkpG7Mc6oYbvxCnwVooyvdcMew=", - "dev": true, - "requires": { - "extend": "^3.0.0", - "findup-sync": "^2.0.0", - "fined": "^1.0.1", - "flagged-respawn": "^1.0.0", - "is-plain-object": "^2.0.4", - "object.map": "^1.0.0", - "rechoir": "^0.6.2", - "resolve": "^1.1.7" - } - }, - "lint-staged": { - "version": "7.3.0", - "resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-7.3.0.tgz", - "integrity": "sha512-AXk40M9DAiPi7f4tdJggwuKIViUplYtVj1os1MVEteW7qOkU50EOehayCfO9TsoGK24o/EsWb41yrEgfJDDjCw==", + "libnpmconfig": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/libnpmconfig/-/libnpmconfig-1.2.1.tgz", + "integrity": "sha512-9esX8rTQAHqarx6qeZqmGQKBNZR5OIbl/Ayr0qQDy3oXja2iFVQQI81R6GZ2a02bSNZ9p3YOGX1O6HHCb1X7kA==", "dev": true, "requires": { - "chalk": "^2.3.1", - "commander": "^2.14.1", - "cosmiconfig": "^5.0.2", - "debug": "^3.1.0", - "dedent": "^0.7.0", - "execa": "^0.9.0", - "find-parent-dir": "^0.3.0", - "is-glob": "^4.0.0", - "is-windows": "^1.0.2", - "jest-validate": "^23.5.0", - "listr": "^0.14.1", - "lodash": "^4.17.5", - "log-symbols": "^2.2.0", - "micromatch": "^3.1.8", - "npm-which": "^3.0.1", - "p-map": "^1.1.1", - "path-is-inside": "^1.0.2", - "pify": "^3.0.0", - "please-upgrade-node": "^3.0.2", - "staged-git-files": "1.1.1", - "string-argv": "^0.0.2", - "stringify-object": "^3.2.2" + "figgy-pudding": "^3.5.1", + "find-up": "^3.0.0", + "ini": "^1.3.5" }, "dependencies": { - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - }, - "array-unique": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", - "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", - "dev": true - }, - "braces": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", - "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", - "dev": true, - "requires": { - "arr-flatten": "^1.1.0", - "array-unique": "^0.3.2", - "extend-shallow": "^2.0.1", - "fill-range": "^4.0.0", - "isobject": "^3.0.1", - "repeat-element": "^1.1.2", - "snapdragon": "^0.8.1", - "snapdragon-node": "^2.0.1", - "split-string": "^3.0.2", - "to-regex": "^3.0.1" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "debug": { - "version": "3.2.6", - "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.6.tgz", - "integrity": "sha512-mel+jf7nrtEl5Pn1Qx46zARXKDpBbvzezse7p7LqINmdoIk8PYP5SySaxEmYv6TZ0JyEKA1hsCId6DIhgITtWQ==", + "find-up": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-3.0.0.tgz", + "integrity": "sha512-1yD6RmLI1XBfxugvORwlck6f75tYL+iR0jqwsOrOxMZyGYqUuDhJ0l4AXdO1iX/FTs9cBAMEk1gWSEx1kSbylg==", "dev": true, "requires": { - "ms": "^2.1.1" + "locate-path": "^3.0.0" } }, - "execa": { - "version": "0.9.0", - "resolved": "https://registry.npmjs.org/execa/-/execa-0.9.0.tgz", - "integrity": "sha512-BbUMBiX4hqiHZUA5+JujIjNb6TyAlp2D5KLheMjMluwOuzcnylDL4AxZYLLn1n2AGB49eSWwyKvvEQoRpnAtmA==", + "locate-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-3.0.0.tgz", + "integrity": "sha512-7AO748wWnIhNqAuaty2ZWHkQHRSNfPVIsPIfwEOWO22AmaoVrWavlOcMR5nzTLNYvp36X220/maaRsrec1G65A==", "dev": true, "requires": { - "cross-spawn": "^5.0.1", - "get-stream": "^3.0.0", - "is-stream": "^1.1.0", - "npm-run-path": "^2.0.0", - "p-finally": "^1.0.0", - "signal-exit": "^3.0.0", - "strip-eof": "^1.0.0" + "p-locate": "^3.0.0", + "path-exists": "^3.0.0" } }, - "expand-brackets": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", - "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", + "p-limit": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.1.0.tgz", + "integrity": "sha512-NhURkNcrVB+8hNfLuysU8enY5xn2KXphsHBaC2YmRNTZRc7RWusw6apSpdEj3jo4CMb6W9nrF6tTnsJsJeyu6g==", "dev": true, "requires": { - "debug": "^2.3.3", - "define-property": "^0.2.5", - "extend-shallow": "^2.0.1", - "posix-character-classes": "^0.1.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "dev": true, - "requires": { - "ms": "2.0.0" - } - }, - "define-property": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", - "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", - "dev": true, - "requires": { - "is-descriptor": "^0.1.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - }, - "is-accessor-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", - "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-data-descriptor": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", - "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz", - "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^0.1.6", - "is-data-descriptor": "^0.1.4", - "kind-of": "^5.0.0" - } - }, - "kind-of": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz", - "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==", - "dev": true - }, - "ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", - "dev": true - } + "p-try": "^2.0.0" } }, - "extglob": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", - "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", + "p-locate": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-3.0.0.tgz", + "integrity": "sha512-x+12w/To+4GFfgJhBEpiDcLozRJGegY+Ei7/z0tSLkMmxGZNybVMSfWj9aJn8Z5Fc7dBUNJOOVgPv2H7IwulSQ==", "dev": true, "requires": { - "array-unique": "^0.3.2", - "define-property": "^1.0.0", - "expand-brackets": "^2.1.4", - "extend-shallow": "^2.0.1", - "fragment-cache": "^0.2.1", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", - "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", - "dev": true, - "requires": { - "is-descriptor": "^1.0.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } + "p-limit": "^2.0.0" } }, - "fill-range": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", - "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", - "dev": true, - "requires": { - "extend-shallow": "^2.0.1", - "is-number": "^3.0.0", - "repeat-string": "^1.6.1", - "to-regex-range": "^2.1.0" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } + "p-try": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.0.0.tgz", + "integrity": "sha512-hMp0onDKIajHfIkdRk3P4CdCmErkYAxxDtP3Wx/4nZ3aGlau2VKh3mZpcuFkH27WQkL/3WBCPOktzA9ZOAnMQQ==", + "dev": true }, - "is-accessor-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", - "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } + "path-exists": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", + "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", + "dev": true + } + } + }, + "libnpmhook": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/libnpmhook/-/libnpmhook-5.0.2.tgz", + "integrity": "sha512-vLenmdFWhRfnnZiNFPNMog6CK7Ujofy2TWiM2CrpZUjBRIhHkJeDaAbJdYCT6W4lcHtyrJR8yXW8KFyq6UAp1g==", + "dev": true, + "requires": { + "aproba": "^2.0.0", + "figgy-pudding": "^3.4.1", + "get-stream": "^4.0.0", + "npm-registry-fetch": "^3.8.0" + }, + "dependencies": { + "aproba": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz", + "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==", + "dev": true }, - "is-data-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", - "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", + "get-stream": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz", + "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==", "dev": true, "requires": { - "kind-of": "^6.0.0" + "pump": "^3.0.0" } }, - "is-descriptor": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", - "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", "dev": true, "requires": { - "is-accessor-descriptor": "^1.0.0", - "is-data-descriptor": "^1.0.0", - "kind-of": "^6.0.2" + "end-of-stream": "^1.1.0", + "once": "^1.3.1" } - }, - "is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", + } + } + }, + "libnpmorg": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/libnpmorg/-/libnpmorg-1.0.0.tgz", + "integrity": "sha512-o+4eVJBoDGMgRwh2lJY0a8pRV2c/tQM/SxlqXezjcAg26Qe9jigYVs+Xk0vvlYDWCDhP0g74J8UwWeAgsB7gGw==", + "dev": true, + "requires": { + "aproba": "^2.0.0", + "figgy-pudding": "^3.4.1", + "get-stream": "^4.0.0", + "npm-registry-fetch": "^3.8.0" + }, + "dependencies": { + "aproba": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz", + "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==", "dev": true }, - "is-glob": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz", - "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=", + "get-stream": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz", + "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==", "dev": true, "requires": { - "is-extglob": "^2.1.1" + "pump": "^3.0.0" } }, - "is-number": { + "pump": { "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", "dev": true, "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - }, - "micromatch": { - "version": "3.1.10", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", - "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", - "dev": true, - "requires": { - "arr-diff": "^4.0.0", - "array-unique": "^0.3.2", - "braces": "^2.3.1", - "define-property": "^2.0.2", - "extend-shallow": "^3.0.2", - "extglob": "^2.0.4", - "fragment-cache": "^0.2.1", - "kind-of": "^6.0.2", - "nanomatch": "^1.2.9", - "object.pick": "^1.3.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.2" + "end-of-stream": "^1.1.0", + "once": "^1.3.1" } - }, - "ms": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", - "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==", - "dev": true } } }, - "listr": { - "version": "0.14.2", - "resolved": "https://registry.npmjs.org/listr/-/listr-0.14.2.tgz", - "integrity": "sha512-vmaNJ1KlGuGWShHI35X/F8r9xxS0VTHh9GejVXwSN20fG5xpq3Jh4bJbnumoT6q5EDM/8/YP1z3YMtQbFmhuXw==", + "libnpmpublish": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/libnpmpublish/-/libnpmpublish-1.1.0.tgz", + "integrity": "sha512-mQ3LT2EWlpJ6Q8mgHTNqarQVCgcY32l6xadPVPMcjWLtVLz7II4WlWkzlbYg1nHGAf+xyABDwS+3aNUiRLkyaA==", "dev": true, "requires": { - "@samverschueren/stream-to-observable": "^0.3.0", - "is-observable": "^1.1.0", - "is-promise": "^2.1.0", - "is-stream": "^1.1.0", - "listr-silent-renderer": "^1.1.1", - "listr-update-renderer": "^0.4.0", - "listr-verbose-renderer": "^0.4.0", - "p-map": "^1.1.1", - "rxjs": "^6.1.0" + "aproba": "^2.0.0", + "figgy-pudding": "^3.5.1", + "get-stream": "^4.0.0", + "lodash.clonedeep": "^4.5.0", + "normalize-package-data": "^2.4.0", + "npm-package-arg": "^6.1.0", + "npm-registry-fetch": "^3.8.0", + "semver": "^5.5.1", + "ssri": "^6.0.1" }, "dependencies": { - "rxjs": { - "version": "6.3.3", - "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-6.3.3.tgz", - "integrity": "sha512-JTWmoY9tWCs7zvIk/CvRjhjGaOd+OVBM987mxFo+OW66cGpdKjZcpmc74ES1sB//7Kl/PAe8+wEakuhG4pcgOw==", + "aproba": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz", + "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==", + "dev": true + }, + "get-stream": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz", + "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==", "dev": true, "requires": { - "tslib": "^1.9.0" + "pump": "^3.0.0" + } + }, + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" } } } }, - "listr-silent-renderer": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/listr-silent-renderer/-/listr-silent-renderer-1.1.1.tgz", - "integrity": "sha1-kktaN1cVN3C/Go4/v3S4u/P5JC4=", - "dev": true - }, - "listr-update-renderer": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/listr-update-renderer/-/listr-update-renderer-0.4.0.tgz", - "integrity": "sha1-NE2YDaLKLosUW6MFkI8yrj9MyKc=", + "libnpmsearch": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/libnpmsearch/-/libnpmsearch-2.0.0.tgz", + "integrity": "sha512-vd+JWbTGzOSfiOc+72MU6y7WqmBXn49egCCrIXp27iE/88bX8EpG64ST1blWQI1bSMUr9l1AKPMVsqa2tS5KWA==", "dev": true, "requires": { - "chalk": "^1.1.3", - "cli-truncate": "^0.2.1", - "elegant-spinner": "^1.0.1", - "figures": "^1.7.0", - "indent-string": "^3.0.0", - "log-symbols": "^1.0.2", - "log-update": "^1.0.2", - "strip-ansi": "^3.0.1" + "figgy-pudding": "^3.5.1", + "get-stream": "^4.0.0", + "npm-registry-fetch": "^3.8.0" }, "dependencies": { - "ansi-styles": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-2.2.1.tgz", - "integrity": "sha1-tDLdM1i2NM914eRmQ2gkBTPB3b4=", - "dev": true - }, - "chalk": { - "version": "1.1.3", - "resolved": "http://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz", - "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=", - "dev": true, - "requires": { - "ansi-styles": "^2.2.1", - "escape-string-regexp": "^1.0.2", - "has-ansi": "^2.0.0", - "strip-ansi": "^3.0.0", - "supports-color": "^2.0.0" - } - }, - "figures": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/figures/-/figures-1.7.0.tgz", - "integrity": "sha1-y+Hjr/zxzUS4DK3+0o3Hk6lwHS4=", + "get-stream": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz", + "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==", "dev": true, "requires": { - "escape-string-regexp": "^1.0.5", - "object-assign": "^4.1.0" + "pump": "^3.0.0" } }, - "log-symbols": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-1.0.2.tgz", - "integrity": "sha1-N2/3tY6jCGoPCfrMdGF+ylAeGhg=", + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", "dev": true, "requires": { - "chalk": "^1.0.0" + "end-of-stream": "^1.1.0", + "once": "^1.3.1" } - }, - "supports-color": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", - "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=", - "dev": true } } }, - "listr-verbose-renderer": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/listr-verbose-renderer/-/listr-verbose-renderer-0.4.1.tgz", - "integrity": "sha1-ggb0z21S3cWCfl/RSYng6WWTOjU=", + "libnpmteam": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/libnpmteam/-/libnpmteam-1.0.1.tgz", + "integrity": "sha512-gDdrflKFCX7TNwOMX1snWojCoDE5LoRWcfOC0C/fqF7mBq8Uz9zWAX4B2RllYETNO7pBupBaSyBDkTAC15cAMg==", "dev": true, "requires": { - "chalk": "^1.1.3", - "cli-cursor": "^1.0.2", - "date-fns": "^1.27.2", - "figures": "^1.7.0" + "aproba": "^2.0.0", + "figgy-pudding": "^3.4.1", + "get-stream": "^4.0.0", + "npm-registry-fetch": "^3.8.0" }, "dependencies": { - "ansi-styles": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-2.2.1.tgz", - "integrity": "sha1-tDLdM1i2NM914eRmQ2gkBTPB3b4=", + "aproba": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz", + "integrity": "sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==", "dev": true }, - "chalk": { - "version": "1.1.3", - "resolved": "http://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz", - "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=", - "dev": true, - "requires": { - "ansi-styles": "^2.2.1", - "escape-string-regexp": "^1.0.2", - "has-ansi": "^2.0.0", - "strip-ansi": "^3.0.0", - "supports-color": "^2.0.0" - } - }, - "cli-cursor": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-1.0.2.tgz", - "integrity": "sha1-ZNo/fValRBLll5S9Ytw1KV6PKYc=", - "dev": true, - "requires": { - "restore-cursor": "^1.0.1" - } - }, - "figures": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/figures/-/figures-1.7.0.tgz", - "integrity": "sha1-y+Hjr/zxzUS4DK3+0o3Hk6lwHS4=", + "get-stream": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-4.1.0.tgz", + "integrity": "sha512-GMat4EJ5161kIy2HevLlr4luNjBgvmj413KaQA7jt4V8B4RDsfpHk7WQ9GVqfYyyx8OS/L66Kox+rJRNklLK7w==", "dev": true, "requires": { - "escape-string-regexp": "^1.0.5", - "object-assign": "^4.1.0" + "pump": "^3.0.0" } }, - "onetime": { - "version": "1.1.0", - "resolved": "http://registry.npmjs.org/onetime/-/onetime-1.1.0.tgz", - "integrity": "sha1-ofeDj4MUxRbwXs78vEzP4EtO14k=", - "dev": true - }, - "restore-cursor": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-1.0.1.tgz", - "integrity": "sha1-NGYfRohjJ/7SmRR5FSJS35LapUE=", + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", "dev": true, "requires": { - "exit-hook": "^1.0.0", - "onetime": "^1.0.0" + "end-of-stream": "^1.1.0", + "once": "^1.3.1" } - }, - "supports-color": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", - "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=", - "dev": true } } }, + "liftoff": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/liftoff/-/liftoff-2.5.0.tgz", + "integrity": "sha1-IAkpG7Mc6oYbvxCnwVooyvdcMew=", + "dev": true, + "requires": { + "extend": "^3.0.0", + "findup-sync": "^2.0.0", + "fined": "^1.0.1", + "flagged-respawn": "^1.0.0", + "is-plain-object": "^2.0.4", + "object.map": "^1.0.0", + "rechoir": "^0.6.2", + "resolve": "^1.1.7" + } + }, "load-json-file": { "version": "1.1.0", "resolved": "http://registry.npmjs.org/load-json-file/-/load-json-file-1.1.0.tgz", @@ -9410,14 +8746,25 @@ "dev": true }, "loader-utils": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.1.0.tgz", - "integrity": "sha1-yYrvSIvM7aL/teLeZG1qdUQp9c0=", + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.2.3.tgz", + "integrity": "sha512-fkpz8ejdnEMG3s37wGL07iSBDg99O9D5yflE9RGNH3hRdx9SOwYfnGYdZOUIZitN8E+E2vkq3MUMYMvPYl5ZZA==", "dev": true, "requires": { - "big.js": "^3.1.3", + "big.js": "^5.2.2", "emojis-list": "^2.0.0", - "json5": "^0.5.0" + "json5": "^1.0.1" + }, + "dependencies": { + "json5": { + "version": "1.0.1", + "resolved": "http://registry.npmjs.org/json5/-/json5-1.0.1.tgz", + "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", + "dev": true, + "requires": { + "minimist": "^1.2.0" + } + } } }, "locate-path": { @@ -9428,6 +8775,24 @@ "requires": { "p-locate": "^2.0.0", "path-exists": "^3.0.0" + }, + "dependencies": { + "path-exists": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", + "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", + "dev": true + } + } + }, + "lock-verify": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/lock-verify/-/lock-verify-2.0.2.tgz", + "integrity": "sha512-QNVwK0EGZBS4R3YQ7F1Ox8p41Po9VGl2QG/2GsuvTbkJZYSsPeWHKMbbH6iZMCHWSMww5nrJroZYnGzI4cePuw==", + "dev": true, + "requires": { + "npm-package-arg": "^5.1.2 || 6", + "semver": "^5.4.1" } }, "lodash": { @@ -9436,114 +8801,34 @@ "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg==", "dev": true }, - "lodash._basecopy": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/lodash._basecopy/-/lodash._basecopy-3.0.1.tgz", - "integrity": "sha1-jaDmqHbPNEwK2KVIghEd08XHyjY=", - "dev": true - }, - "lodash._basetostring": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/lodash._basetostring/-/lodash._basetostring-3.0.1.tgz", - "integrity": "sha1-0YYdh3+CSlL2aYMtyvPuFVZqB9U=", - "dev": true - }, - "lodash._basevalues": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/lodash._basevalues/-/lodash._basevalues-3.0.0.tgz", - "integrity": "sha1-W3dXYoAr3j0yl1A+JjAIIP32Ybc=", - "dev": true - }, - "lodash._getnative": { - "version": "3.9.1", - "resolved": "https://registry.npmjs.org/lodash._getnative/-/lodash._getnative-3.9.1.tgz", - "integrity": "sha1-VwvH3t5G1hzc3mh9ZdPuy6o6r/U=", - "dev": true - }, - "lodash._isiterateecall": { - "version": "3.0.9", - "resolved": "https://registry.npmjs.org/lodash._isiterateecall/-/lodash._isiterateecall-3.0.9.tgz", - "integrity": "sha1-UgOte6Ql+uhCRg5pbbnPPmqsBXw=", - "dev": true - }, - "lodash._reescape": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/lodash._reescape/-/lodash._reescape-3.0.0.tgz", - "integrity": "sha1-Kx1vXf4HyKNVdT5fJ/rH8c3hYWo=", - "dev": true - }, - "lodash._reevaluate": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/lodash._reevaluate/-/lodash._reevaluate-3.0.0.tgz", - "integrity": "sha1-WLx0xAZklTrgsSTYBpltrKQx4u0=", - "dev": true - }, "lodash._reinterpolate": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/lodash._reinterpolate/-/lodash._reinterpolate-3.0.0.tgz", "integrity": "sha1-DM8tiRZq8Ds2Y8eWU4t1rG4RTZ0=", "dev": true }, - "lodash._root": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/lodash._root/-/lodash._root-3.0.1.tgz", - "integrity": "sha1-+6HEUkwZ7ppfgTa0YJ8BfPTe1pI=", - "dev": true - }, "lodash.camelcase": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", "integrity": "sha1-soqmKIorn8ZRA1x3EfZathkDMaY=" }, + "lodash.clonedeep": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz", + "integrity": "sha1-4j8/nE+Pvd6HJSnBBxhXoIblzO8=", + "dev": true + }, "lodash.debounce": { "version": "4.0.8", "resolved": "https://registry.npmjs.org/lodash.debounce/-/lodash.debounce-4.0.8.tgz", "integrity": "sha1-gteb/zCmfEAF/9XiUVMArZyk168=", "dev": true }, - "lodash.escape": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/lodash.escape/-/lodash.escape-3.2.0.tgz", - "integrity": "sha1-mV7g3BjBtIzJLv+ucaEKq1tIdpg=", - "dev": true, - "requires": { - "lodash._root": "^3.0.0" - } - }, - "lodash.isarguments": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/lodash.isarguments/-/lodash.isarguments-3.1.0.tgz", - "integrity": "sha1-L1c9hcaiQon/AGY7SRwdM4/zRYo=", - "dev": true - }, - "lodash.isarray": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/lodash.isarray/-/lodash.isarray-3.0.4.tgz", - "integrity": "sha1-eeTriMNqgSKvhvhEqpvNhRtfu1U=", - "dev": true - }, - "lodash.keys": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/lodash.keys/-/lodash.keys-3.1.2.tgz", - "integrity": "sha1-TbwEcrFWvlCgsoaFXRvQsMZWCYo=", - "dev": true, - "requires": { - "lodash._getnative": "^3.0.0", - "lodash.isarguments": "^3.0.0", - "lodash.isarray": "^3.0.0" - } - }, "lodash.padend": { "version": "4.6.1", "resolved": "https://registry.npmjs.org/lodash.padend/-/lodash.padend-4.6.1.tgz", "integrity": "sha1-U8y6BH0G4VjTEfRdpiX05J5vFm4=" }, - "lodash.restparam": { - "version": "3.6.1", - "resolved": "https://registry.npmjs.org/lodash.restparam/-/lodash.restparam-3.6.1.tgz", - "integrity": "sha1-k2pOMJ7zMKdkXtQUWYbIWuWyCAU=", - "dev": true - }, "lodash.sortby": { "version": "4.7.0", "resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz", @@ -9551,30 +8836,22 @@ "dev": true }, "lodash.template": { - "version": "3.6.2", - "resolved": "https://registry.npmjs.org/lodash.template/-/lodash.template-3.6.2.tgz", - "integrity": "sha1-+M3sxhaaJVvpCYrosMU9N4kx0U8=", + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/lodash.template/-/lodash.template-4.4.0.tgz", + "integrity": "sha1-5zoDhcg1VZF0bgILmWecaQ5o+6A=", "dev": true, "requires": { - "lodash._basecopy": "^3.0.0", - "lodash._basetostring": "^3.0.0", - "lodash._basevalues": "^3.0.0", - "lodash._isiterateecall": "^3.0.0", - "lodash._reinterpolate": "^3.0.0", - "lodash.escape": "^3.0.0", - "lodash.keys": "^3.0.0", - "lodash.restparam": "^3.0.0", - "lodash.templatesettings": "^3.0.0" + "lodash._reinterpolate": "~3.0.0", + "lodash.templatesettings": "^4.0.0" } }, "lodash.templatesettings": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/lodash.templatesettings/-/lodash.templatesettings-3.1.1.tgz", - "integrity": "sha1-+zB4RHU7Zrnxr6VOJix0UwfbqOU=", + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/lodash.templatesettings/-/lodash.templatesettings-4.1.0.tgz", + "integrity": "sha1-K01OlbpEDZFf8IvImeRVNmZxMxY=", "dev": true, "requires": { - "lodash._reinterpolate": "^3.0.0", - "lodash.escape": "^3.0.0" + "lodash._reinterpolate": "~3.0.0" } }, "log-driver": { @@ -9583,71 +8860,19 @@ "integrity": "sha512-U7KCmLdqsGHBLeWqYlFA0V0Sl6P08EE1ZrmA9cxjUE0WVqT9qnyVDPz1kzpFEP0jdJuFnasWIfSd7fsaNXkpbg==", "dev": true }, - "log-symbols": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-2.2.0.tgz", - "integrity": "sha512-VeIAFslyIerEJLXHziedo2basKbMKtTw3vfn5IzG0XTjhAVEJyNHnL2p7vc+wBDSdQuUpNw3M2u6xb9QsAY5Eg==", + "loose-envify": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", + "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", "dev": true, "requires": { - "chalk": "^2.0.1" + "js-tokens": "^3.0.0 || ^4.0.0" } }, - "log-update": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/log-update/-/log-update-1.0.2.tgz", - "integrity": "sha1-GZKfZMQJPS0ucHWh2tivWcKWuNE=", - "dev": true, - "requires": { - "ansi-escapes": "^1.0.0", - "cli-cursor": "^1.0.2" - }, - "dependencies": { - "ansi-escapes": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-1.4.0.tgz", - "integrity": "sha1-06ioOzGapneTZisT52HHkRQiMG4=", - "dev": true - }, - "cli-cursor": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-1.0.2.tgz", - "integrity": "sha1-ZNo/fValRBLll5S9Ytw1KV6PKYc=", - "dev": true, - "requires": { - "restore-cursor": "^1.0.1" - } - }, - "onetime": { - "version": "1.1.0", - "resolved": "http://registry.npmjs.org/onetime/-/onetime-1.1.0.tgz", - "integrity": "sha1-ofeDj4MUxRbwXs78vEzP4EtO14k=", - "dev": true - }, - "restore-cursor": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-1.0.1.tgz", - "integrity": "sha1-NGYfRohjJ/7SmRR5FSJS35LapUE=", - "dev": true, - "requires": { - "exit-hook": "^1.0.0", - "onetime": "^1.0.0" - } - } - } - }, - "loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "dev": true, - "requires": { - "js-tokens": "^3.0.0 || ^4.0.0" - } - }, - "loud-rejection": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/loud-rejection/-/loud-rejection-1.6.0.tgz", - "integrity": "sha1-W0b4AUft7leIcPCG0Eghz5mOVR8=", + "loud-rejection": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/loud-rejection/-/loud-rejection-1.6.0.tgz", + "integrity": "sha1-W0b4AUft7leIcPCG0Eghz5mOVR8=", "dev": true, "requires": { "currently-unhandled": "^0.4.1", @@ -9655,9 +8880,9 @@ } }, "lru-cache": { - "version": "4.1.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.3.tgz", - "integrity": "sha512-fFEhvcgzuIoJVUF8fYr5KR0YqxD238zgObTps31YdADwPPAp82a4M8TrckkWyx7ekNlf9aBcVn81cFwwXngrJA==", + "version": "4.1.5", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.5.tgz", + "integrity": "sha512-sWZlbEP2OsHNkXrMl5GYk/jKk70MBng6UU4YI/qGDYbgf6YbP4EvmqISbXCoJiRKs+1bSpFHVgQxvJ17F2li5g==", "dev": true, "requires": { "pseudomap": "^1.0.2", @@ -9714,14 +8939,6 @@ "dev": true, "requires": { "kind-of": "^6.0.2" - }, - "dependencies": { - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - } } }, "makeerror": { @@ -9779,282 +8996,6 @@ "micromatch": "^3.0.4", "resolve": "^1.4.0", "stack-trace": "0.0.10" - }, - "dependencies": { - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - }, - "array-unique": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", - "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", - "dev": true - }, - "braces": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", - "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", - "dev": true, - "requires": { - "arr-flatten": "^1.1.0", - "array-unique": "^0.3.2", - "extend-shallow": "^2.0.1", - "fill-range": "^4.0.0", - "isobject": "^3.0.1", - "repeat-element": "^1.1.2", - "snapdragon": "^0.8.1", - "snapdragon-node": "^2.0.1", - "split-string": "^3.0.2", - "to-regex": "^3.0.1" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "expand-brackets": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", - "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", - "dev": true, - "requires": { - "debug": "^2.3.3", - "define-property": "^0.2.5", - "extend-shallow": "^2.0.1", - "posix-character-classes": "^0.1.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", - "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", - "dev": true, - "requires": { - "is-descriptor": "^0.1.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - }, - "is-accessor-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", - "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-data-descriptor": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", - "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz", - "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^0.1.6", - "is-data-descriptor": "^0.1.4", - "kind-of": "^5.0.0" - } - }, - "kind-of": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz", - "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==", - "dev": true - } - } - }, - "extglob": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", - "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", - "dev": true, - "requires": { - "array-unique": "^0.3.2", - "define-property": "^1.0.0", - "expand-brackets": "^2.1.4", - "extend-shallow": "^2.0.1", - "fragment-cache": "^0.2.1", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", - "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", - "dev": true, - "requires": { - "is-descriptor": "^1.0.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "fill-range": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", - "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", - "dev": true, - "requires": { - "extend-shallow": "^2.0.1", - "is-number": "^3.0.0", - "repeat-string": "^1.6.1", - "to-regex-range": "^2.1.0" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "is-accessor-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", - "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-data-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", - "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-descriptor": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", - "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^1.0.0", - "is-data-descriptor": "^1.0.0", - "kind-of": "^6.0.2" - } - }, - "is-number": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - }, - "micromatch": { - "version": "3.1.10", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", - "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", - "dev": true, - "requires": { - "arr-diff": "^4.0.0", - "array-unique": "^0.3.2", - "braces": "^2.3.1", - "define-property": "^2.0.2", - "extend-shallow": "^3.0.2", - "extglob": "^2.0.4", - "fragment-cache": "^0.2.1", - "kind-of": "^6.0.2", - "nanomatch": "^1.2.9", - "object.pick": "^1.3.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.2" - } - } } }, "math-random": { @@ -10083,6 +9024,16 @@ "mimic-fn": "^1.0.0" } }, + "memfs": { + "version": "2.14.2", + "resolved": "https://registry.npmjs.org/memfs/-/memfs-2.14.2.tgz", + "integrity": "sha512-y19j9L+b8nuDKwuwrrIOiDhDD2bi7pfL1/Z8kfCyPaoZzHxX2aRcI2Q5T6qdUzqVHWd3plAfxeDT3Crb2eCwUw==", + "dev": true, + "requires": { + "fast-extend": "0.0.2", + "fs-monkey": "^0.3.3" + } + }, "memoizee": { "version": "0.4.14", "resolved": "https://registry.npmjs.org/memoizee/-/memoizee-0.4.14.tgz", @@ -10132,6 +9083,15 @@ "trim-newlines": "^2.0.0" }, "dependencies": { + "find-up": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", + "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", + "dev": true, + "requires": { + "locate-path": "^2.0.0" + } + }, "load-json-file": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", @@ -10214,24 +9174,24 @@ "dev": true }, "micromatch": { - "version": "2.3.11", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz", - "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=", + "version": "3.1.10", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", + "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", "dev": true, "requires": { - "arr-diff": "^2.0.0", - "array-unique": "^0.2.1", - "braces": "^1.8.2", - "expand-brackets": "^0.1.4", - "extglob": "^0.3.1", - "filename-regex": "^2.0.0", - "is-extglob": "^1.0.0", - "is-glob": "^2.0.1", - "kind-of": "^3.0.2", - "normalize-path": "^2.0.1", - "object.omit": "^2.0.0", - "parse-glob": "^3.0.4", - "regex-cache": "^0.4.2" + "arr-diff": "^4.0.0", + "array-unique": "^0.3.2", + "braces": "^2.3.1", + "define-property": "^2.0.2", + "extend-shallow": "^3.0.2", + "extglob": "^2.0.4", + "fragment-cache": "^0.2.1", + "kind-of": "^6.0.2", + "nanomatch": "^1.2.9", + "object.pick": "^1.3.0", + "regex-not": "^1.0.0", + "snapdragon": "^0.8.1", + "to-regex": "^3.0.2" } }, "miller-rabin": { @@ -10321,9 +9281,9 @@ } }, "minizlib": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-1.1.1.tgz", - "integrity": "sha512-TrfjCjk4jLhcJyGMYymBH6oTXcWjYbUAXTHDbtnWHjZC25h0cdajHuPE1zxb4DVmu8crfh+HwH/WMuyLG0nHBg==", + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-1.2.1.tgz", + "integrity": "sha512-7+4oTUOWKg7AuL3vloEWekXY2/D20cevzsrNT2kGWm+39J9hGTCBv8VI5Pm5lXZ/o3/mdR4f8rflAPhnQb8mPA==", "dev": true, "requires": { "minipass": "^2.2.1" @@ -10345,6 +9305,18 @@ "pumpify": "^1.3.3", "stream-each": "^1.1.0", "through2": "^2.0.0" + }, + "dependencies": { + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + } } }, "mixin-deep": { @@ -10438,7 +9410,7 @@ }, "multimatch": { "version": "2.1.0", - "resolved": "https://registry.npmjs.org/multimatch/-/multimatch-2.1.0.tgz", + "resolved": "http://registry.npmjs.org/multimatch/-/multimatch-2.1.0.tgz", "integrity": "sha1-nHkGoi+0wCkZ4vX3UWG0zb1LKis=", "dev": true, "requires": { @@ -10448,13 +9420,14 @@ "minimatch": "^3.0.0" } }, - "multipipe": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/multipipe/-/multipipe-0.1.2.tgz", - "integrity": "sha1-Ko8t33Du1WTf8tV/HhoTfZ8FB4s=", + "multistream": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/multistream/-/multistream-2.1.1.tgz", + "integrity": "sha512-xasv76hl6nr1dEy3lPvy7Ej7K/Lx3O/FCvwge8PeVJpciPPoNCbaANcNiBug3IpdvTveZUcAV0DJzdnUDMesNQ==", "dev": true, "requires": { - "duplexer2": "0.0.2" + "inherits": "^2.0.1", + "readable-stream": "^2.0.5" } }, "mute-stdout": { @@ -10465,14 +9438,14 @@ }, "mute-stream": { "version": "0.0.7", - "resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-0.0.7.tgz", + "resolved": "http://registry.npmjs.org/mute-stream/-/mute-stream-0.0.7.tgz", "integrity": "sha1-MHXOk7whuPq0PhvE2n6BFe0ee6s=", "dev": true }, "nan": { - "version": "2.11.1", - "resolved": "https://registry.npmjs.org/nan/-/nan-2.11.1.tgz", - "integrity": "sha512-iji6k87OSXa0CcrLl9z+ZiYSuR2o+c0bGuNmXdrhTQTakxytAFsC56SArGYoiHlJlFoHSnvmhpceZJaXkVuOtA==", + "version": "2.12.1", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.12.1.tgz", + "integrity": "sha512-JY7V6lRkStKcKTvHO5NVSQRv+RV+FIL5pvDoLiAtSL9pKlC5x9PKQcZDsq7m4FO4d57mkhC6Z+QhAh3Jdk5JFw==", "dev": true, "optional": true }, @@ -10493,26 +9466,6 @@ "regex-not": "^1.0.0", "snapdragon": "^0.8.1", "to-regex": "^3.0.1" - }, - "dependencies": { - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - }, - "array-unique": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", - "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - } } }, "natural-compare": { @@ -10529,7 +9482,7 @@ }, "next-tick": { "version": "1.0.0", - "resolved": "https://registry.npmjs.org/next-tick/-/next-tick-1.0.0.tgz", + "resolved": "http://registry.npmjs.org/next-tick/-/next-tick-1.0.0.tgz", "integrity": "sha1-yobR/ogoFpsBICCOPchCS524NCw=", "dev": true }, @@ -10613,6 +9566,14 @@ "url": "^0.11.0", "util": "^0.10.3", "vm-browserify": "0.0.4" + }, + "dependencies": { + "punycode": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", + "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=", + "dev": true + } } }, "node-notifier": { @@ -10696,6 +9657,12 @@ } } }, + "npm-logical-tree": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/npm-logical-tree/-/npm-logical-tree-1.2.1.tgz", + "integrity": "sha512-AJI/qxDB2PWI4LG1CYN579AY1vCiNyWfkiquCsJWqntRu/WwimVrC8yXeILBFHDwxfOejxewlmnvW9XXjMlYIg==", + "dev": true + }, "npm-package-arg": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/npm-package-arg/-/npm-package-arg-6.1.0.tgz", @@ -10718,15 +9685,6 @@ "npm-bundled": "^1.0.1" } }, - "npm-path": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/npm-path/-/npm-path-2.0.4.tgz", - "integrity": "sha512-IFsj0R9C7ZdR5cP+ET342q77uSRdtWOlWpih5eC+lu29tIDbNEgDbzgVJ5UFvYHWhxDZ5TFkJafFioO0pPQjCw==", - "dev": true, - "requires": { - "which": "^1.2.10" - } - }, "npm-pick-manifest": { "version": "2.2.3", "resolved": "https://registry.npmjs.org/npm-pick-manifest/-/npm-pick-manifest-2.2.3.tgz", @@ -10738,6 +9696,17 @@ "semver": "^5.4.1" } }, + "npm-profile": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/npm-profile/-/npm-profile-4.0.1.tgz", + "integrity": "sha512-NQ1I/1Q7YRtHZXkcuU1/IyHeLy6pd+ScKg4+DQHdfsm769TGq6HPrkbuNJVJS4zwE+0mvvmeULzQdWn2L2EsVA==", + "dev": true, + "requires": { + "aproba": "^1.1.2 || 2", + "figgy-pudding": "^3.4.1", + "npm-registry-fetch": "^3.8.0" + } + }, "npm-registry-fetch": { "version": "3.8.0", "resolved": "https://registry.npmjs.org/npm-registry-fetch/-/npm-registry-fetch-3.8.0.tgz", @@ -10841,17 +9810,6 @@ "path-key": "^2.0.0" } }, - "npm-which": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/npm-which/-/npm-which-3.0.1.tgz", - "integrity": "sha1-kiXybsOihcIJyuZ8OxGmtKtxQKo=", - "dev": true, - "requires": { - "commander": "^2.9.0", - "npm-path": "^2.0.2", - "which": "^1.2.10" - } - }, "npmlog": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/npmlog/-/npmlog-4.1.2.tgz", @@ -10907,6 +9865,15 @@ "requires": { "is-descriptor": "^0.1.0" } + }, + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } } } }, @@ -10923,14 +9890,6 @@ "dev": true, "requires": { "isobject": "^3.0.0" - }, - "dependencies": { - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - } } }, "object.assign": { @@ -10955,23 +9914,6 @@ "array-slice": "^1.0.0", "for-own": "^1.0.0", "isobject": "^3.0.0" - }, - "dependencies": { - "for-own": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/for-own/-/for-own-1.0.0.tgz", - "integrity": "sha1-xjMy9BXO3EsE2/5wz4NklMU8tEs=", - "dev": true, - "requires": { - "for-in": "^1.0.1" - } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - } } }, "object.getownpropertydescriptors": { @@ -10992,17 +9934,6 @@ "requires": { "for-own": "^1.0.0", "make-iterator": "^1.0.0" - }, - "dependencies": { - "for-own": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/for-own/-/for-own-1.0.0.tgz", - "integrity": "sha1-xjMy9BXO3EsE2/5wz4NklMU8tEs=", - "dev": true, - "requires": { - "for-in": "^1.0.1" - } - } } }, "object.omit": { @@ -11013,6 +9944,17 @@ "requires": { "for-own": "^0.1.4", "is-extendable": "^0.1.1" + }, + "dependencies": { + "for-own": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz", + "integrity": "sha1-UmXGgaTylNq78XyVCbZ2OqhFEM4=", + "dev": true, + "requires": { + "for-in": "^1.0.1" + } + } } }, "object.pick": { @@ -11022,14 +9964,6 @@ "dev": true, "requires": { "isobject": "^3.0.1" - }, - "dependencies": { - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - } } }, "object.reduce": { @@ -11040,17 +9974,6 @@ "requires": { "for-own": "^1.0.0", "make-iterator": "^1.0.0" - }, - "dependencies": { - "for-own": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/for-own/-/for-own-1.0.0.tgz", - "integrity": "sha1-xjMy9BXO3EsE2/5wz4NklMU8tEs=", - "dev": true, - "requires": { - "for-in": "^1.0.1" - } - } } }, "once": { @@ -11128,7 +10051,7 @@ }, "os-homedir": { "version": "1.0.2", - "resolved": "https://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz", + "resolved": "http://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz", "integrity": "sha1-/7xJiDNuDoM94MFox+8VISGqf7M=", "dev": true }, @@ -11143,7 +10066,7 @@ }, "os-tmpdir": { "version": "1.0.2", - "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", + "resolved": "http://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", "integrity": "sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=", "dev": true }, @@ -11236,17 +10159,17 @@ } }, "pacote": { - "version": "9.2.3", - "resolved": "https://registry.npmjs.org/pacote/-/pacote-9.2.3.tgz", - "integrity": "sha512-Y3+yY3nBRAxMlZWvr62XLJxOwCmG9UmkGZkFurWHoCjqF0cZL72cTOCRJTvWw8T4OhJS2RTg13x4oYYriauvEw==", + "version": "9.3.0", + "resolved": "https://registry.npmjs.org/pacote/-/pacote-9.3.0.tgz", + "integrity": "sha512-uy5xghB5wUtmFS+uNhQGhlsIF9rfsfxw6Zsu2VpmSz4/f+8D2+5V1HwjHdSn7W6aQTrxNNmmoUF5qNE10/EVdA==", "dev": true, "requires": { - "bluebird": "^3.5.2", - "cacache": "^11.2.0", + "bluebird": "^3.5.3", + "cacache": "^11.3.2", "figgy-pudding": "^3.5.1", "get-stream": "^4.1.0", "glob": "^7.1.3", - "lru-cache": "^4.1.3", + "lru-cache": "^5.1.1", "make-fetch-happen": "^4.0.1", "minimatch": "^3.0.4", "minipass": "^2.3.5", @@ -11265,7 +10188,7 @@ "safe-buffer": "^5.1.2", "semver": "^5.6.0", "ssri": "^6.0.1", - "tar": "^4.4.6", + "tar": "^4.4.8", "unique-filename": "^1.1.1", "which": "^1.3.1" }, @@ -11279,6 +10202,25 @@ "pump": "^3.0.0" } }, + "lru-cache": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", + "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", + "dev": true, + "requires": { + "yallist": "^3.0.2" + } + }, + "pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "requires": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, "tar": { "version": "4.4.8", "resolved": "https://registry.npmjs.org/tar/-/tar-4.4.8.tgz", @@ -11302,6 +10244,14 @@ } } }, + "pad-left": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/pad-left/-/pad-left-2.1.0.tgz", + "integrity": "sha1-FuajstRKjhOMsIOMx8tAOk/J6ZQ=", + "requires": { + "repeat-string": "^1.5.4" + } + }, "pako": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.7.tgz", @@ -11359,6 +10309,23 @@ "is-dotfile": "^1.0.0", "is-extglob": "^1.0.0", "is-glob": "^2.0.0" + }, + "dependencies": { + "is-extglob": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz", + "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=", + "dev": true + }, + "is-glob": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz", + "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + } } }, "parse-json": { @@ -11370,6 +10337,12 @@ "error-ex": "^1.2.0" } }, + "parse-node-version": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/parse-node-version/-/parse-node-version-1.0.0.tgz", + "integrity": "sha512-02GTVHD1u0nWc20n2G7WX/PgdhNFG04j5fi1OkaJzPWLTcf6vh6229Lta1wTmXG/7Dg42tCssgkccVt7qvd8Kg==", + "dev": true + }, "parse-passwd": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/parse-passwd/-/parse-passwd-1.0.0.tgz", @@ -11390,7 +10363,7 @@ }, "path-browserify": { "version": "0.0.0", - "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-0.0.0.tgz", + "resolved": "http://registry.npmjs.org/path-browserify/-/path-browserify-0.0.0.tgz", "integrity": "sha1-oLhwcpquIUAFt9UDLsLLuw+0RRo=", "dev": true }, @@ -11401,14 +10374,17 @@ "dev": true }, "path-exists": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-3.0.0.tgz", - "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=", - "dev": true + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-2.1.0.tgz", + "integrity": "sha1-D+tsZPD8UY2adU3V77YscCJ2H0s=", + "dev": true, + "requires": { + "pinkie-promise": "^2.0.0" + } }, "path-is-absolute": { "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "resolved": "http://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", "dev": true }, @@ -11517,6 +10493,17 @@ "dev": true, "requires": { "find-up": "^2.1.0" + }, + "dependencies": { + "find-up": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-2.1.0.tgz", + "integrity": "sha1-RdG35QbHF93UgndaK3eSCjwMV6c=", + "dev": true, + "requires": { + "locate-path": "^2.0.0" + } + } } }, "platform": { @@ -11525,15 +10512,6 @@ "integrity": "sha512-TuvHS8AOIZNAlE77WUDiR4rySV/VMptyMfcfeoMgs4P8apaZM3JrnbzBiixKUv+XR6i+BXrQh8WAnjaSPFO65Q==", "dev": true }, - "please-upgrade-node": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/please-upgrade-node/-/please-upgrade-node-3.1.1.tgz", - "integrity": "sha512-KY1uHnQ2NlQHqIJQpnh/i54rKkuxCEBx+voJIS/Mvb+L2iYd2NMotwduhKTMjfC1uKoX3VXOxLjIYG66dfJTVQ==", - "dev": true, - "requires": { - "semver-compare": "^1.0.0" - } - }, "plugin-error": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/plugin-error/-/plugin-error-1.0.1.tgz", @@ -11544,23 +10522,6 @@ "arr-diff": "^4.0.0", "arr-union": "^3.1.0", "extend-shallow": "^3.0.2" - }, - "dependencies": { - "ansi-colors": { - "version": "1.1.0", - "resolved": "http://registry.npmjs.org/ansi-colors/-/ansi-colors-1.1.0.tgz", - "integrity": "sha512-SFKX67auSNoVR38N3L+nvsPjOE0bybKTYbkf5tRvushrAPQ9V75huw0ZxBkKVeRU9kqH3d6HA4xTckbwZ4ixmA==", - "dev": true, - "requires": { - "ansi-wrap": "^0.1.0" - } - }, - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - } } }, "pn": { @@ -11624,15 +10585,15 @@ "dev": true }, "process-nextick-args": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz", - "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-1.0.7.tgz", + "integrity": "sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M=", "dev": true }, "progress": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.2.tgz", - "integrity": "sha512-/OLz5F9beZUWwSHZDreXgap1XShX6W+DCHQCqwCF7uZ88s6uTlD2cR3JBE77SegCmNtb1Idst+NfmwcdU6KVhw==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", "dev": true }, "promise": { @@ -11707,9 +10668,9 @@ "dev": true }, "psl": { - "version": "1.1.29", - "resolved": "https://registry.npmjs.org/psl/-/psl-1.1.29.tgz", - "integrity": "sha512-AeUmQ0oLN02flVHXWh9sSJF7mcdFq0ppid/JkErufc3hGIV/AMa8Fo9VgDo/cT2jFdOWoFvHp90qqBH54W+gjQ==", + "version": "1.1.31", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.1.31.tgz", + "integrity": "sha512-/6pt4+C+T+wZUieKR620OpzN/LlnNKuWjy1iFLQ/UG35JqHlR/89MP1d96dUfkf6Dne3TuLQzOYEYshJ+Hx8mw==", "dev": true }, "public-encrypt": { @@ -11727,9 +10688,9 @@ } }, "pump": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", - "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz", + "integrity": "sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==", "dev": true, "requires": { "end-of-stream": "^1.1.0", @@ -11745,24 +10706,12 @@ "duplexify": "^3.6.0", "inherits": "^2.0.3", "pump": "^2.0.0" - }, - "dependencies": { - "pump": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz", - "integrity": "sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==", - "dev": true, - "requires": { - "end-of-stream": "^1.1.0", - "once": "^1.3.1" - } - } } }, "punycode": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", - "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=", + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", "dev": true }, "q": { @@ -11811,12 +10760,6 @@ "resolved": "https://registry.npmjs.org/is-number/-/is-number-4.0.0.tgz", "integrity": "sha512-rSklcAIlf1OmFdyAqbnWTLVelsQ58uvZ66S/ZyawjWqIviTWCjg2PzVGw8WUA+nNuPTqb4wgA+NszrJ+08LlgQ==", "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true } } }, @@ -11902,27 +10845,6 @@ "requires": { "find-up": "^1.0.0", "read-pkg": "^1.0.0" - }, - "dependencies": { - "find-up": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/find-up/-/find-up-1.1.2.tgz", - "integrity": "sha1-ay6YIrGizgpgq2TWEOzK1TyyTQ8=", - "dev": true, - "requires": { - "path-exists": "^2.0.0", - "pinkie-promise": "^2.0.0" - } - }, - "path-exists": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-2.1.0.tgz", - "integrity": "sha1-D+tsZPD8UY2adU3V77YscCJ2H0s=", - "dev": true, - "requires": { - "pinkie-promise": "^2.0.0" - } - } } }, "readable-stream": { @@ -11938,6 +10860,14 @@ "safe-buffer": "~5.1.1", "string_decoder": "~1.1.1", "util-deprecate": "~1.0.1" + }, + "dependencies": { + "process-nextick-args": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz", + "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==", + "dev": true + } } }, "readdir-scoped-modules": { @@ -11961,282 +10891,6 @@ "graceful-fs": "^4.1.11", "micromatch": "^3.1.10", "readable-stream": "^2.0.2" - }, - "dependencies": { - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - }, - "array-unique": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", - "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", - "dev": true - }, - "braces": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", - "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", - "dev": true, - "requires": { - "arr-flatten": "^1.1.0", - "array-unique": "^0.3.2", - "extend-shallow": "^2.0.1", - "fill-range": "^4.0.0", - "isobject": "^3.0.1", - "repeat-element": "^1.1.2", - "snapdragon": "^0.8.1", - "snapdragon-node": "^2.0.1", - "split-string": "^3.0.2", - "to-regex": "^3.0.1" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "expand-brackets": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", - "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", - "dev": true, - "requires": { - "debug": "^2.3.3", - "define-property": "^0.2.5", - "extend-shallow": "^2.0.1", - "posix-character-classes": "^0.1.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", - "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", - "dev": true, - "requires": { - "is-descriptor": "^0.1.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - }, - "is-accessor-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", - "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-data-descriptor": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", - "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz", - "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^0.1.6", - "is-data-descriptor": "^0.1.4", - "kind-of": "^5.0.0" - } - }, - "kind-of": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz", - "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==", - "dev": true - } - } - }, - "extglob": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", - "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", - "dev": true, - "requires": { - "array-unique": "^0.3.2", - "define-property": "^1.0.0", - "expand-brackets": "^2.1.4", - "extend-shallow": "^2.0.1", - "fragment-cache": "^0.2.1", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", - "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", - "dev": true, - "requires": { - "is-descriptor": "^1.0.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "fill-range": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", - "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", - "dev": true, - "requires": { - "extend-shallow": "^2.0.1", - "is-number": "^3.0.0", - "repeat-string": "^1.6.1", - "to-regex-range": "^2.1.0" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "is-accessor-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", - "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-data-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", - "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-descriptor": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", - "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^1.0.0", - "is-data-descriptor": "^1.0.0", - "kind-of": "^6.0.2" - } - }, - "is-number": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - }, - "micromatch": { - "version": "3.1.10", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", - "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", - "dev": true, - "requires": { - "arr-diff": "^4.0.0", - "array-unique": "^0.3.2", - "braces": "^2.3.1", - "define-property": "^2.0.2", - "extend-shallow": "^3.0.2", - "extglob": "^2.0.4", - "fragment-cache": "^0.2.1", - "kind-of": "^6.0.2", - "nanomatch": "^1.2.9", - "object.pick": "^1.3.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.2" - } - } } }, "realpath-native": { @@ -12333,8 +10987,7 @@ "repeat-string": { "version": "1.6.1", "resolved": "https://registry.npmjs.org/repeat-string/-/repeat-string-1.6.1.tgz", - "integrity": "sha1-jcrkcOHIirwtYA//Sndihtp15jc=", - "dev": true + "integrity": "sha1-jcrkcOHIirwtYA//Sndihtp15jc=" }, "repeating": { "version": "2.0.1", @@ -12423,12 +11076,12 @@ "dev": true }, "resolve": { - "version": "1.8.1", - "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.8.1.tgz", - "integrity": "sha512-AicPrAC7Qu1JxPCZ9ZgCZlY35QgFnNqc+0LtbRNxnVw4TXvjQ72wnuL9JQcEBgXkI9JM8MsT9kaQoHcpCRJOYA==", + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.9.0.tgz", + "integrity": "sha512-TZNye00tI67lwYvzxCxHGjwTNlUV70io54/Ed4j6PscB8xVfuBJpRenI/o6dVk0cY0PYTY27AgCoGGxRnYuItQ==", "dev": true, "requires": { - "path-parse": "^1.0.5" + "path-parse": "^1.0.6" } }, "resolve-cwd": { @@ -12582,14 +11235,6 @@ "dev": true, "requires": { "symbol-observable": "1.0.1" - }, - "dependencies": { - "symbol-observable": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/symbol-observable/-/symbol-observable-1.0.1.tgz", - "integrity": "sha1-g0D8RwLDEi310iKI+IKD9RPT/dQ=", - "dev": true - } } }, "safe-buffer": { @@ -12628,292 +11273,6 @@ "minimist": "^1.1.1", "walker": "~1.0.5", "watch": "~0.18.0" - }, - "dependencies": { - "anymatch": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-2.0.0.tgz", - "integrity": "sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==", - "dev": true, - "requires": { - "micromatch": "^3.1.4", - "normalize-path": "^2.1.1" - } - }, - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - }, - "array-unique": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", - "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", - "dev": true - }, - "braces": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", - "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", - "dev": true, - "requires": { - "arr-flatten": "^1.1.0", - "array-unique": "^0.3.2", - "extend-shallow": "^2.0.1", - "fill-range": "^4.0.0", - "isobject": "^3.0.1", - "repeat-element": "^1.1.2", - "snapdragon": "^0.8.1", - "snapdragon-node": "^2.0.1", - "split-string": "^3.0.2", - "to-regex": "^3.0.1" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "expand-brackets": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", - "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", - "dev": true, - "requires": { - "debug": "^2.3.3", - "define-property": "^0.2.5", - "extend-shallow": "^2.0.1", - "posix-character-classes": "^0.1.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", - "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", - "dev": true, - "requires": { - "is-descriptor": "^0.1.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - }, - "is-accessor-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", - "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-data-descriptor": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", - "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz", - "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^0.1.6", - "is-data-descriptor": "^0.1.4", - "kind-of": "^5.0.0" - } - }, - "kind-of": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz", - "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==", - "dev": true - } - } - }, - "extglob": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", - "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", - "dev": true, - "requires": { - "array-unique": "^0.3.2", - "define-property": "^1.0.0", - "expand-brackets": "^2.1.4", - "extend-shallow": "^2.0.1", - "fragment-cache": "^0.2.1", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", - "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", - "dev": true, - "requires": { - "is-descriptor": "^1.0.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "fill-range": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", - "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", - "dev": true, - "requires": { - "extend-shallow": "^2.0.1", - "is-number": "^3.0.0", - "repeat-string": "^1.6.1", - "to-regex-range": "^2.1.0" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "is-accessor-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", - "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-data-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", - "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-descriptor": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", - "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^1.0.0", - "is-data-descriptor": "^1.0.0", - "kind-of": "^6.0.2" - } - }, - "is-number": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - }, - "micromatch": { - "version": "3.1.10", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", - "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", - "dev": true, - "requires": { - "arr-diff": "^4.0.0", - "array-unique": "^0.3.2", - "braces": "^2.3.1", - "define-property": "^2.0.2", - "extend-shallow": "^3.0.2", - "extglob": "^2.0.4", - "fragment-cache": "^0.2.1", - "kind-of": "^6.0.2", - "nanomatch": "^1.2.9", - "object.pick": "^1.3.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.2" - } - } } }, "sax": { @@ -12931,32 +11290,6 @@ "ajv": "^6.1.0", "ajv-errors": "^1.0.0", "ajv-keywords": "^3.1.0" - }, - "dependencies": { - "ajv": { - "version": "6.5.4", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.5.4.tgz", - "integrity": "sha512-4Wyjt8+t6YszqaXnLDfMmG/8AlO5Zbcsy3ATHncCzjW/NoPzAId8AK6749Ybjmdt+kUY1gP60fCu46oDxPv/mg==", - "dev": true, - "requires": { - "fast-deep-equal": "^2.0.1", - "fast-json-stable-stringify": "^2.0.0", - "json-schema-traverse": "^0.4.1", - "uri-js": "^4.2.2" - } - }, - "fast-deep-equal": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", - "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=", - "dev": true - }, - "json-schema-traverse": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", - "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", - "dev": true - } } }, "semver": { @@ -12965,12 +11298,6 @@ "integrity": "sha512-RS9R6R35NYgQn++fkDWaOmqGoj4Ek9gGs+DPxNUZKuwE183xjJroKvyo1IzVFeXvUrvmALy6FWD5xrdJT25gMg==", "dev": true }, - "semver-compare": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz", - "integrity": "sha1-De4hahyUGrN+nvsXiPavxf9VN/w=", - "dev": true - }, "semver-greatest-satisfied-range": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/semver-greatest-satisfied-range/-/semver-greatest-satisfied-range-1.1.0.tgz", @@ -12981,9 +11308,9 @@ } }, "serialize-javascript": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-1.5.0.tgz", - "integrity": "sha512-Ga8c8NjAAp46Br4+0oZ2WxJCwIzwP60Gq1YPgU+39PiTVxyed/iKE/zyZI6+UlVYH5Q4PaQdHhcegIFPZTUfoQ==", + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-1.6.1.tgz", + "integrity": "sha512-A5MOagrPFga4YaKQSWHryl7AXvbQkEqpw4NNYMTNYUNV51bA8ABHgYFpqKx+YFFrw59xMV1qGH1R4AgoNIVgCw==", "dev": true }, "set-blocking": { @@ -13059,9 +11386,9 @@ } }, "shelljs": { - "version": "0.8.2", - "resolved": "https://registry.npmjs.org/shelljs/-/shelljs-0.8.2.tgz", - "integrity": "sha512-pRXeNrCA2Wd9itwhvLp5LZQvPJ0wU6bcjaTMywHHGX5XWhVN2nzSu7WV0q+oUY7mGK3mgSkDDzP3MgjqdyIgbQ==", + "version": "0.8.3", + "resolved": "https://registry.npmjs.org/shelljs/-/shelljs-0.8.3.tgz", + "integrity": "sha512-fc0BKlAWiLpwZljmOvAOTE/gXawtCoNrP5oaY7KIaQbbyHeQVg01pSEuEGvGh3HEdBU4baCD7wQBwADmM/7f7A==", "dev": true, "requires": { "glob": "^7.0.0", @@ -13104,12 +11431,6 @@ "integrity": "sha1-xB8vbDn8FtHNF61LXYlhFK5HDVU=", "dev": true }, - "slice-ansi": { - "version": "0.0.4", - "resolved": "http://registry.npmjs.org/slice-ansi/-/slice-ansi-0.0.4.tgz", - "integrity": "sha1-7b+JA/ZvfOL46v1s7tZeJkyDGzU=", - "dev": true - }, "slide": { "version": "1.1.6", "resolved": "https://registry.npmjs.org/slide/-/slide-1.1.6.tgz", @@ -13206,18 +11527,6 @@ "is-data-descriptor": "^1.0.0", "kind-of": "^6.0.2" } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true } } }, @@ -13228,6 +11537,17 @@ "dev": true, "requires": { "kind-of": "^3.2.0" + }, + "dependencies": { + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } } }, "socks": { @@ -13316,9 +11636,9 @@ "dev": true }, "spdx-correct": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.0.2.tgz", - "integrity": "sha512-q9hedtzyXHr5S0A1vEPoK/7l8NpfkFYTq6iCY+Pno2ZbdZR6WexZFtqeVGkGxW3TEJMN914Z55EnAGMmenlIQQ==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.0.tgz", + "integrity": "sha512-lr2EZCctC2BNR7j7WzJ2FpDznxky1sjfxvvYEyzxNyb6lZXHODmEoJeFu4JupYlkfha1KZpJyoqiJ7pgA1qq8Q==", "dev": true, "requires": { "spdx-expression-parse": "^3.0.0", @@ -13342,9 +11662,9 @@ } }, "spdx-license-ids": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.2.tgz", - "integrity": "sha512-qky9CVt0lVIECkEsYbNILVnPvycuEBkXoMFLRWsREkomQLevYhtRKC+R91a5TOAQ3bCMjikRwhyaRqj1VYatYg==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.3.tgz", + "integrity": "sha512-uBIcIl3Ih6Phe3XHK1NqboJLdGfwr1UN3k6wSD1dZpmPsIkb8AGNbZYJ1fOBk834+Gxy8rpfDxrS6XLEMZMY2g==", "dev": true }, "split": { @@ -13376,14 +11696,14 @@ }, "sprintf-js": { "version": "1.0.3", - "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "resolved": "http://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=", "dev": true }, "sshpk": { - "version": "1.15.2", - "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.2.tgz", - "integrity": "sha512-Ra/OXQtuh0/enyl4ETZAfTaeksa6BXks5ZcjpSUNrjBr0DvrJKX+1fsKDPpT9TBXgHAFsa4510aNVgI8g/+SzA==", + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.0.tgz", + "integrity": "sha512-Zhev35/y7hRMcID/upReIvRse+I9SVhyVre/KTJSJQWMz3C3+G+HpO7m1wK/yckEtujKZ7dS4hkVxAnmHaIGVQ==", "dev": true, "requires": { "asn1": "~0.2.3", @@ -13413,15 +11733,9 @@ "dev": true }, "stack-utils": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-1.0.1.tgz", - "integrity": "sha1-1PM6tU6OOHeLDKXP07OvsS22hiA=", - "dev": true - }, - "staged-git-files": { - "version": "1.1.1", - "resolved": "http://registry.npmjs.org/staged-git-files/-/staged-git-files-1.1.1.tgz", - "integrity": "sha512-H89UNKr1rQJvI1c/PIR3kiAMBV23yvR7LItZiV74HWZwzt7f3YHuujJ9nJZlt58WlFox7XQsOahexwk7nTe69A==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-1.0.2.tgz", + "integrity": "sha512-MTX+MeG5U994cazkjd/9KNAapsHnibjMLnfXodlkXw76JEea0UiNzrqidzo1emMwk7w5Qhc9jd4Bn9TBb1MFwA==", "dev": true }, "static-extend": { @@ -13453,7 +11767,7 @@ }, "stream-browserify": { "version": "2.0.1", - "resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-2.0.1.tgz", + "resolved": "http://registry.npmjs.org/stream-browserify/-/stream-browserify-2.0.1.tgz", "integrity": "sha1-ZiZu5fm9uZQKTkUUyvtDu3Hlyds=", "dev": true, "requires": { @@ -13496,12 +11810,6 @@ "integrity": "sha1-1cdSgl5TZ+eG944Y5EXqIjoVWVI=", "dev": true }, - "string-argv": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/string-argv/-/string-argv-0.0.2.tgz", - "integrity": "sha1-2sMECGkMIfPDYwo/86BYd73L1zY=", - "dev": true - }, "string-length": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/string-length/-/string-length-2.0.0.tgz", @@ -13531,7 +11839,7 @@ }, "string-width": { "version": "1.0.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz", + "resolved": "http://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz", "integrity": "sha1-EYvfW4zcUaKn5w0hHgfisLmxB9M=", "dev": true, "requires": { @@ -13553,23 +11861,18 @@ }, "string_decoder": { "version": "1.1.1", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "resolved": "http://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", "dev": true, "requires": { "safe-buffer": "~5.1.0" } }, - "stringify-object": { - "version": "3.3.0", - "resolved": "https://registry.npmjs.org/stringify-object/-/stringify-object-3.3.0.tgz", - "integrity": "sha512-rHqiFh1elqCQ9WPLIC8I0Q/g/wj5J1eMkyoiD6eoQApWHP0FtlK7rqnhmabL5VUY9JQCcqwwvlOaSuutekgyrw==", - "dev": true, - "requires": { - "get-own-enumerable-property-symbols": "^3.0.0", - "is-obj": "^1.0.1", - "is-regexp": "^1.0.0" - } + "stringify-package": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/stringify-package/-/stringify-package-1.0.0.tgz", + "integrity": "sha512-JIQqiWmLiEozOC0b0BtxZ/AOUtdUZHCBPgqIZ2kSJJqGwgb9neo44XdTHUC4HZSGqi03hOeB7W/E8rAlKnGe9g==", + "dev": true }, "strip-ansi": { "version": "3.0.1", @@ -13608,26 +11911,16 @@ "dev": true }, "strong-log-transformer": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/strong-log-transformer/-/strong-log-transformer-2.0.0.tgz", - "integrity": "sha512-FQmNqAXJgOX8ygOcvPLlGWBNT41mvNJ9ALoYf0GTwVt9t30mGTqpmp/oJx5gLcu52DXK10kS7dVWhx8aPXDTlg==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/strong-log-transformer/-/strong-log-transformer-2.1.0.tgz", + "integrity": "sha512-B3Hgul+z0L9a236FAUC9iZsL+nVHgoCJnqCbN588DjYxvGXaXaaFbfmQ/JhvKjZwsOukuR72XbHv71Qkug0HxA==", "dev": true, "requires": { - "byline": "^5.0.0", "duplexer": "^0.1.1", "minimist": "^1.2.0", "through": "^2.3.4" } }, - "subarg": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/subarg/-/subarg-1.0.0.tgz", - "integrity": "sha1-9izxdYHplrSPyWVpn1TAauJouNI=", - "dev": true, - "requires": { - "minimist": "^1.1.0" - } - }, "supports-color": { "version": "5.5.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", @@ -13647,9 +11940,9 @@ } }, "symbol-observable": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/symbol-observable/-/symbol-observable-1.2.0.tgz", - "integrity": "sha512-e900nM8RRtGhlV36KGEU9k65K3mPb1WV70OdjfxlG2EAuM1noi/E/BaW/uMhL7bPEssK8QV57vN3esixjUvcXQ==", + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/symbol-observable/-/symbol-observable-1.0.1.tgz", + "integrity": "sha1-g0D8RwLDEi310iKI+IKD9RPT/dQ=", "dev": true }, "symbol-tree": { @@ -13678,7 +11971,7 @@ }, "tar": { "version": "2.2.1", - "resolved": "https://registry.npmjs.org/tar/-/tar-2.2.1.tgz", + "resolved": "http://registry.npmjs.org/tar/-/tar-2.2.1.tgz", "integrity": "sha1-jk0qJWwOIYXGsYrWlK7JaLg8sdE=", "dev": true, "requires": { @@ -13708,9 +12001,9 @@ } }, "terser": { - "version": "3.10.8", - "resolved": "https://registry.npmjs.org/terser/-/terser-3.10.8.tgz", - "integrity": "sha512-GQJHWJ/vbx0EgRk+lBMONMmKaT+ifeo/XgT/hi3KpzEEFOERVyFuJSVXH8grcmJjiqKY35ds8rBCxvABUeyyuQ==", + "version": "3.13.1", + "resolved": "https://registry.npmjs.org/terser/-/terser-3.13.1.tgz", + "integrity": "sha512-ogyZye4DFqOtMzT92Y3Nxxw8OvXmL39HOALro4fc+EUYFFF9G/kk0znkvwMz6PPYgBtdKAodh3FPR70eugdaQA==", "dev": true, "requires": { "commander": "~2.17.1", @@ -13737,9 +12030,9 @@ } }, "terser-webpack-plugin": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.1.0.tgz", - "integrity": "sha512-61lV0DSxMAZ8AyZG7/A4a3UPlrbOBo8NIQ4tJzLPAdGOQ+yoNC7l5ijEow27lBAL2humer01KLS6bGIMYQxKoA==", + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-1.2.1.tgz", + "integrity": "sha512-GGSt+gbT0oKcMDmPx4SRSfJPE1XaN3kQRWG4ghxKQw9cn5G9x6aCKSsgYdvyM0na9NJ4Drv0RG6jbBByZ5CMjw==", "dev": true, "requires": { "cacache": "^11.0.2", @@ -13771,54 +12064,145 @@ "object-assign": "^4.1.0", "read-pkg-up": "^1.0.1", "require-main-filename": "^1.0.1" - } - }, - "test-value": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/test-value/-/test-value-3.0.0.tgz", - "integrity": "sha512-sVACdAWcZkSU9x7AOmJo5TqE+GyNJknHaHsMrR6ZnhjVlVN9Yx6FjHrsKZ3BjIpPCT68zYesPWkakrNupwfOTQ==", - "requires": { - "array-back": "^2.0.0", - "typical": "^2.6.1" - } - }, - "text-encoding-utf-8": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/text-encoding-utf-8/-/text-encoding-utf-8-1.0.2.tgz", - "integrity": "sha512-8bw4MY9WjdsD2aMtO0OzOCY3pXGYNx2d2FfHRVUKkiCPDWjKuOlhLVASS+pD7VkLTVjW268LYJHwsnPFlBpbAg==" - }, - "text-extensions": { - "version": "1.9.0", - "resolved": "https://registry.npmjs.org/text-extensions/-/text-extensions-1.9.0.tgz", - "integrity": "sha512-wiBrwC1EhBelW12Zy26JeOUkQ5mRu+5o8rpsJk5+2t+Y5vE7e842qtZDQ2g1NpX/29HdyFeJ4nSIhI47ENSxlQ==", - "dev": true - }, - "throat": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/throat/-/throat-4.1.0.tgz", - "integrity": "sha1-iQN8vJLFarGJJua6TLsgDhVnKmo=", - "dev": true - }, - "through": { - "version": "2.3.8", - "resolved": "http://registry.npmjs.org/through/-/through-2.3.8.tgz", + }, + "dependencies": { + "arr-diff": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-2.0.0.tgz", + "integrity": "sha1-jzuCf5Vai9ZpaX5KQlasPOrjVs8=", + "dev": true, + "requires": { + "arr-flatten": "^1.0.1" + } + }, + "array-unique": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.2.1.tgz", + "integrity": "sha1-odl8yvy8JiXMcPrc6zalDFiwGlM=", + "dev": true + }, + "braces": { + "version": "1.8.5", + "resolved": "https://registry.npmjs.org/braces/-/braces-1.8.5.tgz", + "integrity": "sha1-uneWLhLf+WnWt2cR6RS3N4V79qc=", + "dev": true, + "requires": { + "expand-range": "^1.8.1", + "preserve": "^0.2.0", + "repeat-element": "^1.1.2" + } + }, + "expand-brackets": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-0.1.5.tgz", + "integrity": "sha1-3wcoTjQqgHzXM6xa9yQR5YHRF3s=", + "dev": true, + "requires": { + "is-posix-bracket": "^0.1.0" + } + }, + "extglob": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/extglob/-/extglob-0.3.2.tgz", + "integrity": "sha1-Lhj/PS9JqydlzskCPwEdqo2DSaE=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "is-extglob": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-1.0.0.tgz", + "integrity": "sha1-rEaBd8SUNAWgkvyPKXYMb/xiBsA=", + "dev": true + }, + "is-glob": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-2.0.1.tgz", + "integrity": "sha1-0Jb5JqPe1WAPP9/ZEZjLCIjC2GM=", + "dev": true, + "requires": { + "is-extglob": "^1.0.0" + } + }, + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + }, + "micromatch": { + "version": "2.3.11", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-2.3.11.tgz", + "integrity": "sha1-hmd8l9FyCzY0MdBNDRUpO9OMFWU=", + "dev": true, + "requires": { + "arr-diff": "^2.0.0", + "array-unique": "^0.2.1", + "braces": "^1.8.2", + "expand-brackets": "^0.1.4", + "extglob": "^0.3.1", + "filename-regex": "^2.0.0", + "is-extglob": "^1.0.0", + "is-glob": "^2.0.1", + "kind-of": "^3.0.2", + "normalize-path": "^2.0.1", + "object.omit": "^2.0.0", + "parse-glob": "^3.0.4", + "regex-cache": "^0.4.2" + } + } + } + }, + "test-value": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/test-value/-/test-value-3.0.0.tgz", + "integrity": "sha512-sVACdAWcZkSU9x7AOmJo5TqE+GyNJknHaHsMrR6ZnhjVlVN9Yx6FjHrsKZ3BjIpPCT68zYesPWkakrNupwfOTQ==", + "requires": { + "array-back": "^2.0.0", + "typical": "^2.6.1" + } + }, + "text-encoding-utf-8": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/text-encoding-utf-8/-/text-encoding-utf-8-1.0.2.tgz", + "integrity": "sha512-8bw4MY9WjdsD2aMtO0OzOCY3pXGYNx2d2FfHRVUKkiCPDWjKuOlhLVASS+pD7VkLTVjW268LYJHwsnPFlBpbAg==" + }, + "text-extensions": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/text-extensions/-/text-extensions-1.9.0.tgz", + "integrity": "sha512-wiBrwC1EhBelW12Zy26JeOUkQ5mRu+5o8rpsJk5+2t+Y5vE7e842qtZDQ2g1NpX/29HdyFeJ4nSIhI47ENSxlQ==", + "dev": true + }, + "throat": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/throat/-/throat-4.1.0.tgz", + "integrity": "sha1-iQN8vJLFarGJJua6TLsgDhVnKmo=", + "dev": true + }, + "through": { + "version": "2.3.8", + "resolved": "http://registry.npmjs.org/through/-/through-2.3.8.tgz", "integrity": "sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU=", "dev": true }, "through2": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.3.tgz", - "integrity": "sha1-AARWmzfHx0ujnEPzzteNGtlBQL4=", + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/through2/-/through2-2.0.5.tgz", + "integrity": "sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==", "dev": true, "requires": { - "readable-stream": "^2.1.5", + "readable-stream": "~2.3.6", "xtend": "~4.0.1" } }, "through2-filter": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/through2-filter/-/through2-filter-2.0.0.tgz", - "integrity": "sha1-YLxVoNrLdghdsfna6Zq0P4PWIuw=", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/through2-filter/-/through2-filter-3.0.0.tgz", + "integrity": "sha512-jaRjI2WxN3W1V8/FMZ9HKIBXixtiqs3SQSX4/YGIiP3gL6djW48VoZq9tDqeCWs3MT8YY5wb/zli8VW8snY1CA==", "dev": true, "requires": { "through2": "~2.0.0", @@ -13894,6 +12278,17 @@ "dev": true, "requires": { "kind-of": "^3.0.2" + }, + "dependencies": { + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + } } }, "to-regex": { @@ -13916,17 +12311,6 @@ "requires": { "is-number": "^3.0.0", "repeat-string": "^1.6.1" - }, - "dependencies": { - "is-number": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - } - } } }, "to-through": { @@ -13946,6 +12330,14 @@ "requires": { "psl": "^1.1.24", "punycode": "^1.4.1" + }, + "dependencies": { + "punycode": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", + "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=", + "dev": true + } } }, "tr46": { @@ -13955,14 +12347,6 @@ "dev": true, "requires": { "punycode": "^2.1.0" - }, - "dependencies": { - "punycode": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", - "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", - "dev": true - } } }, "trash": { @@ -13984,7 +12368,7 @@ "dependencies": { "fs-extra": { "version": "0.30.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-0.30.0.tgz", + "resolved": "http://registry.npmjs.org/fs-extra/-/fs-extra-0.30.0.tgz", "integrity": "sha1-8jP/zAjU2n1DLapEl3aYnbHfk/A=", "dev": true, "requires": { @@ -14039,341 +12423,64 @@ "dev": true }, "ts-jest": { - "version": "22.4.6", - "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-22.4.6.tgz", - "integrity": "sha512-kYQ6g1G1AU+bOO9rv+SSQXg4WTcni6Wx3AM48iHni0nP1vIuhdNRjKTE9Cxx36Ix/IOV7L85iKu07dgXJzH2pQ==", + "version": "23.10.5", + "resolved": "https://registry.npmjs.org/ts-jest/-/ts-jest-23.10.5.tgz", + "integrity": "sha512-MRCs9qnGoyKgFc8adDEntAOP64fWK1vZKnOYU1o2HxaqjdJvGqmkLCPCnVq1/If4zkUmEjKPnCiUisTrlX2p2A==", "dev": true, "requires": { - "babel-core": "^6.26.3", - "babel-plugin-istanbul": "^4.1.6", - "babel-plugin-transform-es2015-modules-commonjs": "^6.26.2", - "babel-preset-jest": "^22.4.3", - "cpx": "^1.5.0", - "fs-extra": "6.0.0", - "jest-config": "^22.4.3", - "lodash": "^4.17.10", - "pkg-dir": "^2.0.0", - "source-map-support": "^0.5.5", - "yargs": "^11.0.0" + "bs-logger": "0.x", + "buffer-from": "1.x", + "fast-json-stable-stringify": "2.x", + "json5": "2.x", + "make-error": "1.x", + "mkdirp": "0.x", + "resolve": "1.x", + "semver": "^5.5", + "yargs-parser": "10.x" }, "dependencies": { - "ansi-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-3.0.0.tgz", - "integrity": "sha1-7QMXwyIGT3lGbAKWa922Bas32Zg=", - "dev": true - }, - "babel-plugin-jest-hoist": { - "version": "22.4.4", - "resolved": "https://registry.npmjs.org/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-22.4.4.tgz", - "integrity": "sha512-DUvGfYaAIlkdnygVIEl0O4Av69NtuQWcrjMOv6DODPuhuGLDnbsARz3AwiiI/EkIMMlxQDUcrZ9yoyJvTNjcVQ==", - "dev": true - }, - "babel-preset-jest": { - "version": "22.4.4", - "resolved": "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-22.4.4.tgz", - "integrity": "sha512-+dxMtOFwnSYWfum0NaEc0O03oSdwBsjx4tMSChRDPGwu/4wSY6Q6ANW3wkjKpJzzguaovRs/DODcT4hbSN8yiA==", - "dev": true, - "requires": { - "babel-plugin-jest-hoist": "^22.4.4", - "babel-plugin-syntax-object-rest-spread": "^6.13.0" - } - }, - "cliui": { + "camelcase": { "version": "4.1.0", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-4.1.0.tgz", - "integrity": "sha512-4FG+RSG9DL7uEwRUZXZn3SS34DiDPfzP0VOiEwtUWlE+AR2EIg+hSyvrIgUUfhdgR/UkAeW2QHgeP+hWrXs7jQ==", - "dev": true, - "requires": { - "string-width": "^2.1.1", - "strip-ansi": "^4.0.0", - "wrap-ansi": "^2.0.0" - } - }, - "expect": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/expect/-/expect-22.4.3.tgz", - "integrity": "sha512-XcNXEPehqn8b/jm8FYotdX0YrXn36qp4HWlrVT4ktwQas1l1LPxiVWncYnnL2eyMtKAmVIaG0XAp0QlrqJaxaA==", - "dev": true, - "requires": { - "ansi-styles": "^3.2.0", - "jest-diff": "^22.4.3", - "jest-get-type": "^22.4.3", - "jest-matcher-utils": "^22.4.3", - "jest-message-util": "^22.4.3", - "jest-regex-util": "^22.4.3" - } - }, - "fs-extra": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-6.0.0.tgz", - "integrity": "sha512-lk2cUCo8QzbiEWEbt7Cw3m27WMiRG321xsssbcIpfMhpRjrlC08WBOVQqj1/nQYYNnPtyIhP1oqLO3QwT2tPCw==", - "dev": true, - "requires": { - "graceful-fs": "^4.1.2", - "jsonfile": "^4.0.0", - "universalify": "^0.1.0" - } - }, - "is-fullwidth-code-point": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz", - "integrity": "sha1-o7MKXE8ZkYMWeqq5O+764937ZU8=", - "dev": true - }, - "jest-config": { - "version": "22.4.4", - "resolved": "https://registry.npmjs.org/jest-config/-/jest-config-22.4.4.tgz", - "integrity": "sha512-9CKfo1GC4zrXSoMLcNeDvQBfgtqGTB1uP8iDIZ97oB26RCUb886KkKWhVcpyxVDOUxbhN+uzcBCeFe7w+Iem4A==", - "dev": true, - "requires": { - "chalk": "^2.0.1", - "glob": "^7.1.1", - "jest-environment-jsdom": "^22.4.1", - "jest-environment-node": "^22.4.1", - "jest-get-type": "^22.1.0", - "jest-jasmine2": "^22.4.4", - "jest-regex-util": "^22.1.0", - "jest-resolve": "^22.4.2", - "jest-util": "^22.4.1", - "jest-validate": "^22.4.4", - "pretty-format": "^22.4.0" - } - }, - "jest-diff": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/jest-diff/-/jest-diff-22.4.3.tgz", - "integrity": "sha512-/QqGvCDP5oZOF6PebDuLwrB2BMD8ffJv6TAGAdEVuDx1+uEgrHpSFrfrOiMRx2eJ1hgNjlQrOQEHetVwij90KA==", - "dev": true, - "requires": { - "chalk": "^2.0.1", - "diff": "^3.2.0", - "jest-get-type": "^22.4.3", - "pretty-format": "^22.4.3" - } - }, - "jest-environment-jsdom": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/jest-environment-jsdom/-/jest-environment-jsdom-22.4.3.tgz", - "integrity": "sha512-FviwfR+VyT3Datf13+ULjIMO5CSeajlayhhYQwpzgunswoaLIPutdbrnfUHEMyJCwvqQFaVtTmn9+Y8WCt6n1w==", - "dev": true, - "requires": { - "jest-mock": "^22.4.3", - "jest-util": "^22.4.3", - "jsdom": "^11.5.1" - } - }, - "jest-environment-node": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/jest-environment-node/-/jest-environment-node-22.4.3.tgz", - "integrity": "sha512-reZl8XF6t/lMEuPWwo9OLfttyC26A5AMgDyEQ6DBgZuyfyeNUzYT8BFo6uxCCP/Av/b7eb9fTi3sIHFPBzmlRA==", - "dev": true, - "requires": { - "jest-mock": "^22.4.3", - "jest-util": "^22.4.3" - } - }, - "jest-jasmine2": { - "version": "22.4.4", - "resolved": "https://registry.npmjs.org/jest-jasmine2/-/jest-jasmine2-22.4.4.tgz", - "integrity": "sha512-nK3vdUl50MuH7vj/8at7EQVjPGWCi3d5+6aCi7Gxy/XMWdOdbH1qtO/LjKbqD8+8dUAEH+BVVh7HkjpCWC1CSw==", - "dev": true, - "requires": { - "chalk": "^2.0.1", - "co": "^4.6.0", - "expect": "^22.4.0", - "graceful-fs": "^4.1.11", - "is-generator-fn": "^1.0.0", - "jest-diff": "^22.4.0", - "jest-matcher-utils": "^22.4.0", - "jest-message-util": "^22.4.0", - "jest-snapshot": "^22.4.0", - "jest-util": "^22.4.1", - "source-map-support": "^0.5.0" - } - }, - "jest-matcher-utils": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-22.4.3.tgz", - "integrity": "sha512-lsEHVaTnKzdAPR5t4B6OcxXo9Vy4K+kRRbG5gtddY8lBEC+Mlpvm1CJcsMESRjzUhzkz568exMV1hTB76nAKbA==", - "dev": true, - "requires": { - "chalk": "^2.0.1", - "jest-get-type": "^22.4.3", - "pretty-format": "^22.4.3" - } - }, - "jest-message-util": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/jest-message-util/-/jest-message-util-22.4.3.tgz", - "integrity": "sha512-iAMeKxhB3Se5xkSjU0NndLLCHtP4n+GtCqV0bISKA5dmOXQfEbdEmYiu2qpnWBDCQdEafNDDU6Q+l6oBMd/+BA==", - "dev": true, - "requires": { - "@babel/code-frame": "^7.0.0-beta.35", - "chalk": "^2.0.1", - "micromatch": "^2.3.11", - "slash": "^1.0.0", - "stack-utils": "^1.0.1" - } - }, - "jest-mock": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/jest-mock/-/jest-mock-22.4.3.tgz", - "integrity": "sha512-+4R6mH5M1G4NK16CKg9N1DtCaFmuxhcIqF4lQK/Q1CIotqMs/XBemfpDPeVZBFow6iyUNu6EBT9ugdNOTT5o5Q==", - "dev": true - }, - "jest-regex-util": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/jest-regex-util/-/jest-regex-util-22.4.3.tgz", - "integrity": "sha512-LFg1gWr3QinIjb8j833bq7jtQopiwdAs67OGfkPrvy7uNUbVMfTXXcOKXJaeY5GgjobELkKvKENqq1xrUectWg==", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz", + "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=", "dev": true }, - "jest-resolve": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/jest-resolve/-/jest-resolve-22.4.3.tgz", - "integrity": "sha512-u3BkD/MQBmwrOJDzDIaxpyqTxYH+XqAXzVJP51gt29H8jpj3QgKof5GGO2uPGKGeA1yTMlpbMs1gIQ6U4vcRhw==", - "dev": true, - "requires": { - "browser-resolve": "^1.11.2", - "chalk": "^2.0.1" - } - }, - "jest-snapshot": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/jest-snapshot/-/jest-snapshot-22.4.3.tgz", - "integrity": "sha512-JXA0gVs5YL0HtLDCGa9YxcmmV2LZbwJ+0MfyXBBc5qpgkEYITQFJP7XNhcHFbUvRiniRpRbGVfJrOoYhhGE0RQ==", - "dev": true, - "requires": { - "chalk": "^2.0.1", - "jest-diff": "^22.4.3", - "jest-matcher-utils": "^22.4.3", - "mkdirp": "^0.5.1", - "natural-compare": "^1.4.0", - "pretty-format": "^22.4.3" - } - }, - "jest-util": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/jest-util/-/jest-util-22.4.3.tgz", - "integrity": "sha512-rfDfG8wyC5pDPNdcnAlZgwKnzHvZDu8Td2NJI/jAGKEGxJPYiE4F0ss/gSAkG4778Y23Hvbz+0GMrDJTeo7RjQ==", - "dev": true, - "requires": { - "callsites": "^2.0.0", - "chalk": "^2.0.1", - "graceful-fs": "^4.1.11", - "is-ci": "^1.0.10", - "jest-message-util": "^22.4.3", - "mkdirp": "^0.5.1", - "source-map": "^0.6.0" - } - }, - "jest-validate": { - "version": "22.4.4", - "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-22.4.4.tgz", - "integrity": "sha512-dmlf4CIZRGvkaVg3fa0uetepcua44DHtktHm6rcoNVtYlpwe6fEJRkMFsaUVcFHLzbuBJ2cPw9Gl9TKfnzMVwg==", - "dev": true, - "requires": { - "chalk": "^2.0.1", - "jest-config": "^22.4.4", - "jest-get-type": "^22.1.0", - "leven": "^2.1.0", - "pretty-format": "^22.4.0" - } - }, - "os-locale": { + "json5": { "version": "2.1.0", - "resolved": "https://registry.npmjs.org/os-locale/-/os-locale-2.1.0.tgz", - "integrity": "sha512-3sslG3zJbEYcaC4YVAvDorjGxc7tv6KVATnLPZONiljsUncvihe9BQoVCEs0RZ1kmf4Hk9OBqlZfJZWI4GanKA==", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.1.0.tgz", + "integrity": "sha512-8Mh9h6xViijj36g7Dxi+Y4S6hNGV96vcJZr/SrlHh1LR/pEn/8j/+qIBbs44YKl69Lrfctp4QD+AdWLTMqEZAQ==", "dev": true, "requires": { - "execa": "^0.7.0", - "lcid": "^1.0.0", - "mem": "^1.1.0" + "minimist": "^1.2.0" } }, - "pretty-format": { - "version": "22.4.3", - "resolved": "http://registry.npmjs.org/pretty-format/-/pretty-format-22.4.3.tgz", - "integrity": "sha512-S4oT9/sT6MN7/3COoOy+ZJeA92VmOnveLHgrwBE3Z1W5N9S2A1QGNYiE1z75DAENbJrXXUb+OWXhpJcg05QKQQ==", + "yargs-parser": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-10.1.0.tgz", + "integrity": "sha512-VCIyR1wJoEBZUqk5PA+oOBF6ypbwh5aNB3I50guxAL/quggdfs4TtNHQrSazFA3fYZ+tEqfs0zIGlv0c/rgjbQ==", "dev": true, "requires": { - "ansi-regex": "^3.0.0", - "ansi-styles": "^3.2.0" + "camelcase": "^4.1.0" } - }, - "source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "dev": true - }, - "source-map-support": { - "version": "0.5.9", - "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.9.tgz", - "integrity": "sha512-gR6Rw4MvUlYy83vP0vxoVNzM6t8MUXqNuRsuBmBHQDu1Fh6X015FrLdgoDKcNdkwGubozq0P4N0Q37UyFVr1EA==", - "dev": true, - "requires": { - "buffer-from": "^1.0.0", - "source-map": "^0.6.0" - } - }, - "string-width": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", - "integrity": "sha512-nOqH59deCq9SRHlxq1Aw85Jnt4w6KvLKqWVik6oA9ZklXLNIOlqg4F2yrT1MVaTjAqvVwdfeZ7w7aCvJD7ugkw==", - "dev": true, - "requires": { - "is-fullwidth-code-point": "^2.0.0", - "strip-ansi": "^4.0.0" - } - }, - "strip-ansi": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-4.0.0.tgz", - "integrity": "sha1-qEeQIusaw2iocTibY1JixQXuNo8=", - "dev": true, - "requires": { - "ansi-regex": "^3.0.0" - } - }, - "yargs": { - "version": "11.1.0", - "resolved": "http://registry.npmjs.org/yargs/-/yargs-11.1.0.tgz", - "integrity": "sha512-NwW69J42EsCSanF8kyn5upxvjp5ds+t3+udGBeTbFnERA+lF541DDpMawzo4z6W/QrzNM18D+BPMiOBibnFV5A==", - "dev": true, - "requires": { - "cliui": "^4.0.0", - "decamelize": "^1.1.1", - "find-up": "^2.1.0", - "get-caller-file": "^1.0.1", - "os-locale": "^2.0.0", - "require-directory": "^2.1.1", - "require-main-filename": "^1.0.1", - "set-blocking": "^2.0.0", - "string-width": "^2.0.0", - "which-module": "^2.0.0", - "y18n": "^3.2.1", - "yargs-parser": "^9.0.2" - } - } - } - }, - "ts-node": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-7.0.1.tgz", - "integrity": "sha512-BVwVbPJRspzNh2yfslyT1PSbl5uIk03EZlb493RKHN4qej/D06n1cEhjlOJG69oFsE7OT8XjpTUcYf6pKTLMhw==", - "dev": true, - "requires": { - "arrify": "^1.0.0", - "buffer-from": "^1.1.0", - "diff": "^3.1.0", - "make-error": "^1.1.1", - "minimist": "^1.2.0", - "mkdirp": "^0.5.1", - "source-map-support": "^0.5.6", - "yn": "^2.0.0" - }, - "dependencies": { + } + } + }, + "ts-node": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-7.0.1.tgz", + "integrity": "sha512-BVwVbPJRspzNh2yfslyT1PSbl5uIk03EZlb493RKHN4qej/D06n1cEhjlOJG69oFsE7OT8XjpTUcYf6pKTLMhw==", + "dev": true, + "requires": { + "arrify": "^1.0.0", + "buffer-from": "^1.1.0", + "diff": "^3.1.0", + "make-error": "^1.1.1", + "minimist": "^1.2.0", + "mkdirp": "^0.5.1", + "source-map-support": "^0.5.6", + "yn": "^2.0.0" + }, + "dependencies": { "source-map": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", @@ -14398,9 +12505,9 @@ "integrity": "sha512-4krF8scpejhaOgqzBEcGM7yDIEfi0/8+8zDRZhNZZ2kjmHJ4hv3zCbQWxoJGz1iw5U0Jl0nma13xzHXcncMavQ==" }, "tslint": { - "version": "5.11.0", - "resolved": "https://registry.npmjs.org/tslint/-/tslint-5.11.0.tgz", - "integrity": "sha1-mPMMAurjzecAYgHkwzywi0hYHu0=", + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/tslint/-/tslint-5.12.0.tgz", + "integrity": "sha512-CKEcH1MHUBhoV43SA/Jmy1l24HJJgI0eyLbBNSRyFlsQvb9v6Zdq+Nz2vEOH00nC5SUx4SneJ59PZUS/ARcokQ==", "dev": true, "requires": { "babel-code-frame": "^6.22.0", @@ -14428,7 +12535,7 @@ }, "tty-browserify": { "version": "0.0.0", - "resolved": "https://registry.npmjs.org/tty-browserify/-/tty-browserify-0.0.0.tgz", + "resolved": "http://registry.npmjs.org/tty-browserify/-/tty-browserify-0.0.0.tgz", "integrity": "sha1-oVe6QC2iTpv5V/mqadUk7tQpAaY=", "dev": true }, @@ -14463,9 +12570,9 @@ "dev": true }, "typedoc": { - "version": "0.12.0", - "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.12.0.tgz", - "integrity": "sha512-dsdlaYZ7Je8JC+jQ3j2Iroe4uyD0GhqzADNUVyBRgLuytQDP/g0dPkAw5PdM/4drnmmJjRzSWW97FkKo+ITqQg==", + "version": "0.13.0", + "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.13.0.tgz", + "integrity": "sha512-jQWtvPcV+0fiLZAXFEe70v5gqjDO6pJYJz4mlTtmGJeW2KRoIU/BEfktma6Uj8Xii7UakuZjbxFewl3UYOkU/w==", "dev": true, "requires": { "@types/fs-extra": "^5.0.3", @@ -14484,7 +12591,15 @@ "progress": "^2.0.0", "shelljs": "^0.8.2", "typedoc-default-themes": "^0.5.0", - "typescript": "3.0.x" + "typescript": "3.1.x" + }, + "dependencies": { + "typescript": { + "version": "3.1.6", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.1.6.tgz", + "integrity": "sha512-tDMYfVtvpb96msS1lDX9MEdHrW4yOuZ4Kdc4Him9oU796XldPYF/t2+uKoX0BBa0hXXwDlqYQbXY5Rzjzc5hBA==", + "dev": true + } } }, "typedoc-default-themes": { @@ -14494,9 +12609,9 @@ "dev": true }, "typescript": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.0.3.tgz", - "integrity": "sha512-kk80vLW9iGtjMnIv11qyxLqZm20UklzuR2tL0QAnDIygIUIemcZMxlMWudl9OOt76H3ntVzcTiddQ1/pAAJMYg==", + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.2.2.tgz", + "integrity": "sha512-VCj5UiSyHBjwfYacmDuc/NOk4QQixbE+Wn7MFJuS0nRuPQbof132Pw4u53dm264O8LPc2MVsc7RJNml5szurkg==", "dev": true }, "typical": { @@ -14524,155 +12639,6 @@ } } }, - "uglifyjs-webpack-plugin": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/uglifyjs-webpack-plugin/-/uglifyjs-webpack-plugin-1.3.0.tgz", - "integrity": "sha512-ovHIch0AMlxjD/97j9AYovZxG5wnHOPkL7T1GKochBADp/Zwc44pEWNqpKl1Loupp1WhFg7SlYmHZRUfdAacgw==", - "dev": true, - "requires": { - "cacache": "^10.0.4", - "find-cache-dir": "^1.0.0", - "schema-utils": "^0.4.5", - "serialize-javascript": "^1.4.0", - "source-map": "^0.6.1", - "uglify-es": "^3.3.4", - "webpack-sources": "^1.1.0", - "worker-farm": "^1.5.2" - }, - "dependencies": { - "ajv": { - "version": "6.6.1", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.1.tgz", - "integrity": "sha512-ZoJjft5B+EJBjUyu9C9Hc0OZyPZSSlOF+plzouTrg6UlA8f+e/n8NIgBFG/9tppJtpPWfthHakK7juJdNDODww==", - "dev": true, - "requires": { - "fast-deep-equal": "^2.0.1", - "fast-json-stable-stringify": "^2.0.0", - "json-schema-traverse": "^0.4.1", - "uri-js": "^4.2.2" - } - }, - "cacache": { - "version": "10.0.4", - "resolved": "https://registry.npmjs.org/cacache/-/cacache-10.0.4.tgz", - "integrity": "sha512-Dph0MzuH+rTQzGPNT9fAnrPmMmjKfST6trxJeK7NQuHRaVw24VzPRWTmg9MpcwOVQZO0E1FBICUlFeNaKPIfHA==", - "dev": true, - "requires": { - "bluebird": "^3.5.1", - "chownr": "^1.0.1", - "glob": "^7.1.2", - "graceful-fs": "^4.1.11", - "lru-cache": "^4.1.1", - "mississippi": "^2.0.0", - "mkdirp": "^0.5.1", - "move-concurrently": "^1.0.1", - "promise-inflight": "^1.0.1", - "rimraf": "^2.6.2", - "ssri": "^5.2.4", - "unique-filename": "^1.1.0", - "y18n": "^4.0.0" - } - }, - "commander": { - "version": "2.13.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-2.13.0.tgz", - "integrity": "sha512-MVuS359B+YzaWqjCL/c+22gfryv+mCBPHAv3zyVI2GN8EY6IRP8VwtasXn8jyyhvvq84R4ImN1OKRtcbIasjYA==", - "dev": true - }, - "fast-deep-equal": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", - "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=", - "dev": true - }, - "find-cache-dir": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/find-cache-dir/-/find-cache-dir-1.0.0.tgz", - "integrity": "sha1-kojj6ePMN0hxfTnq3hfPcfww7m8=", - "dev": true, - "requires": { - "commondir": "^1.0.1", - "make-dir": "^1.0.0", - "pkg-dir": "^2.0.0" - } - }, - "json-schema-traverse": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", - "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", - "dev": true - }, - "mississippi": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/mississippi/-/mississippi-2.0.0.tgz", - "integrity": "sha512-zHo8v+otD1J10j/tC+VNoGK9keCuByhKovAvdn74dmxJl9+mWHnx6EMsDN4lgRoMI/eYo2nchAxniIbUPb5onw==", - "dev": true, - "requires": { - "concat-stream": "^1.5.0", - "duplexify": "^3.4.2", - "end-of-stream": "^1.1.0", - "flush-write-stream": "^1.0.0", - "from2": "^2.1.0", - "parallel-transform": "^1.1.0", - "pump": "^2.0.1", - "pumpify": "^1.3.3", - "stream-each": "^1.1.0", - "through2": "^2.0.0" - } - }, - "pump": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/pump/-/pump-2.0.1.tgz", - "integrity": "sha512-ruPMNRkN3MHP1cWJc9OWr+T/xDP0jhXYCLfJcBuX54hhfIBnaQmAUMfDcG4DM5UMWByBbJY69QSphm3jtDKIkA==", - "dev": true, - "requires": { - "end-of-stream": "^1.1.0", - "once": "^1.3.1" - } - }, - "schema-utils": { - "version": "0.4.7", - "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-0.4.7.tgz", - "integrity": "sha512-v/iwU6wvwGK8HbU9yi3/nhGzP0yGSuhQMzL6ySiec1FSrZZDkhm4noOSWzrNFo/jEc+SJY6jRTwuwbSXJPDUnQ==", - "dev": true, - "requires": { - "ajv": "^6.1.0", - "ajv-keywords": "^3.1.0" - } - }, - "source-map": { - "version": "0.6.1", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", - "dev": true - }, - "ssri": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/ssri/-/ssri-5.3.0.tgz", - "integrity": "sha512-XRSIPqLij52MtgoQavH/x/dU1qVKtWUAAZeOHsR9c2Ddi4XerFy3mc1alf+dLJKl9EUIm/Ht+EowFkTUOA6GAQ==", - "dev": true, - "requires": { - "safe-buffer": "^5.1.1" - } - }, - "uglify-es": { - "version": "3.3.9", - "resolved": "https://registry.npmjs.org/uglify-es/-/uglify-es-3.3.9.tgz", - "integrity": "sha512-r+MU0rfv4L/0eeW3xZrd16t4NZfK8Ld4SWVglYBb7ez5uXFWHuVRs6xCTrf1yirs9a4j4Y27nn7SRfO6v67XsQ==", - "dev": true, - "requires": { - "commander": "~2.13.0", - "source-map": "~0.6.1" - } - }, - "y18n": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz", - "integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==", - "dev": true - } - } - }, "uid-number": { "version": "0.0.6", "resolved": "https://registry.npmjs.org/uid-number/-/uid-number-0.0.6.tgz", @@ -14768,13 +12734,13 @@ } }, "unique-stream": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/unique-stream/-/unique-stream-2.2.1.tgz", - "integrity": "sha1-WqADz76Uxf+GbE59ZouxxNuts2k=", + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/unique-stream/-/unique-stream-2.3.1.tgz", + "integrity": "sha512-2nY4TnBE70yoxHkDli7DMazpWiP7xMdCYqU2nBRO0UB+ZpEkGsSija7MvmvnZFUeC+mrgiUfcHSr3LmRFIg4+A==", "dev": true, "requires": { - "json-stable-stringify": "^1.0.0", - "through2-filter": "^2.0.0" + "json-stable-stringify-without-jsonify": "^1.0.1", + "through2-filter": "^3.0.0" } }, "universalify": { @@ -14820,12 +12786,6 @@ "resolved": "https://registry.npmjs.org/has-values/-/has-values-0.1.4.tgz", "integrity": "sha1-bWHeldkd/Km5oCCJrThL/49it3E=", "dev": true - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true } } }, @@ -14842,14 +12802,6 @@ "dev": true, "requires": { "punycode": "^2.1.0" - }, - "dependencies": { - "punycode": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", - "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", - "dev": true - } } }, "urix": { @@ -14882,6 +12834,15 @@ "integrity": "sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ==", "dev": true }, + "user-home": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/user-home/-/user-home-2.0.0.tgz", + "integrity": "sha1-nHC/2Babwdy/SGBODwS4tJzenp8=", + "dev": true, + "requires": { + "os-homedir": "^1.0.0" + } + }, "util": { "version": "0.10.4", "resolved": "https://registry.npmjs.org/util/-/util-0.10.4.tgz", @@ -14914,9 +12875,9 @@ "dev": true }, "v8flags": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/v8flags/-/v8flags-3.1.1.tgz", - "integrity": "sha512-iw/1ViSEaff8NJ3HLyEjawk/8hjJib3E7pvG4pddVXfUg1983s3VGsiClDjhK64MQVDGqc1Q8r18S4VKQZS9EQ==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/v8flags/-/v8flags-3.1.2.tgz", + "integrity": "sha512-MtivA7GF24yMPte9Rp/BWGCYQNaUj86zeYxV/x2RRJMKagImbbv3u8iJC57lNhWLPcGLJmHcHmFWkNsplbbLWw==", "dev": true, "requires": { "homedir-polyfill": "^1.0.1" @@ -14979,437 +12940,94 @@ "dev": true, "requires": { "fs-mkdirp-stream": "^1.0.0", - "glob-stream": "^6.1.0", - "graceful-fs": "^4.0.0", - "is-valid-glob": "^1.0.0", - "lazystream": "^1.0.0", - "lead": "^1.0.0", - "object.assign": "^4.0.4", - "pumpify": "^1.3.5", - "readable-stream": "^2.3.3", - "remove-bom-buffer": "^3.0.0", - "remove-bom-stream": "^1.2.0", - "resolve-options": "^1.1.0", - "through2": "^2.0.0", - "to-through": "^2.0.0", - "value-or-function": "^3.0.0", - "vinyl": "^2.0.0", - "vinyl-sourcemap": "^1.1.0" - } - }, - "vinyl-sourcemap": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/vinyl-sourcemap/-/vinyl-sourcemap-1.1.0.tgz", - "integrity": "sha1-kqgAWTo4cDqM2xHYswCtS+Y7PhY=", - "dev": true, - "requires": { - "append-buffer": "^1.0.2", - "convert-source-map": "^1.5.0", - "graceful-fs": "^4.1.6", - "normalize-path": "^2.1.1", - "now-and-later": "^2.0.0", - "remove-bom-buffer": "^3.0.0", - "vinyl": "^2.0.0" - } - }, - "vinyl-sourcemaps-apply": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/vinyl-sourcemaps-apply/-/vinyl-sourcemaps-apply-0.2.1.tgz", - "integrity": "sha1-q2VJ1h0XLCsbh75cUI0jnI74dwU=", - "dev": true, - "requires": { - "source-map": "^0.5.1" - } - }, - "vm-browserify": { - "version": "0.0.4", - "resolved": "https://registry.npmjs.org/vm-browserify/-/vm-browserify-0.0.4.tgz", - "integrity": "sha1-XX6kW7755Kb/ZflUOOCofDV9WnM=", - "dev": true, - "requires": { - "indexof": "0.0.1" - } - }, - "w3c-hr-time": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.1.tgz", - "integrity": "sha1-gqwr/2PZUOqeMYmlimViX+3xkEU=", - "dev": true, - "requires": { - "browser-process-hrtime": "^0.1.2" - } - }, - "walker": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.7.tgz", - "integrity": "sha1-L3+bj9ENZ3JisYqITijRlhjgKPs=", - "dev": true, - "requires": { - "makeerror": "1.0.x" - } - }, - "watch": { - "version": "0.18.0", - "resolved": "https://registry.npmjs.org/watch/-/watch-0.18.0.tgz", - "integrity": "sha1-KAlUdsbffJDJYxOJkMClQj60uYY=", - "dev": true, - "requires": { - "exec-sh": "^0.2.0", - "minimist": "^1.2.0" - } - }, - "watchpack": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-1.6.0.tgz", - "integrity": "sha512-i6dHe3EyLjMmDlU1/bGQpEw25XSjkJULPuAVKCbNRefQVq48yXKUpwg538F7AZTf9kyr57zj++pQFltUa5H7yA==", - "dev": true, - "requires": { - "chokidar": "^2.0.2", - "graceful-fs": "^4.1.2", - "neo-async": "^2.5.0" - }, - "dependencies": { - "anymatch": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-2.0.0.tgz", - "integrity": "sha512-5teOsQWABXHHBFP9y3skS5P3d/WfWXpv3FUpy+LorMrNYaT9pI4oLMQX7jzQ2KklNpGpWHzdCXTDT2Y3XGlZBw==", - "dev": true, - "requires": { - "micromatch": "^3.1.4", - "normalize-path": "^2.1.1" - } - }, - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - }, - "array-unique": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", - "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", - "dev": true - }, - "braces": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", - "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", - "dev": true, - "requires": { - "arr-flatten": "^1.1.0", - "array-unique": "^0.3.2", - "extend-shallow": "^2.0.1", - "fill-range": "^4.0.0", - "isobject": "^3.0.1", - "repeat-element": "^1.1.2", - "snapdragon": "^0.8.1", - "snapdragon-node": "^2.0.1", - "split-string": "^3.0.2", - "to-regex": "^3.0.1" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "chokidar": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-2.0.4.tgz", - "integrity": "sha512-z9n7yt9rOvIJrMhvDtDictKrkFHeihkNl6uWMmZlmL6tJtX9Cs+87oK+teBx+JIgzvbX3yZHT3eF8vpbDxHJXQ==", - "dev": true, - "requires": { - "anymatch": "^2.0.0", - "async-each": "^1.0.0", - "braces": "^2.3.0", - "fsevents": "^1.2.2", - "glob-parent": "^3.1.0", - "inherits": "^2.0.1", - "is-binary-path": "^1.0.0", - "is-glob": "^4.0.0", - "lodash.debounce": "^4.0.8", - "normalize-path": "^2.1.1", - "path-is-absolute": "^1.0.0", - "readdirp": "^2.0.0", - "upath": "^1.0.5" - } - }, - "expand-brackets": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", - "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", - "dev": true, - "requires": { - "debug": "^2.3.3", - "define-property": "^0.2.5", - "extend-shallow": "^2.0.1", - "posix-character-classes": "^0.1.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", - "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", - "dev": true, - "requires": { - "is-descriptor": "^0.1.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - }, - "is-accessor-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", - "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-data-descriptor": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", - "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz", - "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^0.1.6", - "is-data-descriptor": "^0.1.4", - "kind-of": "^5.0.0" - } - }, - "kind-of": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz", - "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==", - "dev": true - } - } - }, - "extglob": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", - "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", - "dev": true, - "requires": { - "array-unique": "^0.3.2", - "define-property": "^1.0.0", - "expand-brackets": "^2.1.4", - "extend-shallow": "^2.0.1", - "fragment-cache": "^0.2.1", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", - "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", - "dev": true, - "requires": { - "is-descriptor": "^1.0.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "fill-range": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", - "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", - "dev": true, - "requires": { - "extend-shallow": "^2.0.1", - "is-number": "^3.0.0", - "repeat-string": "^1.6.1", - "to-regex-range": "^2.1.0" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "glob-parent": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz", - "integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=", - "dev": true, - "requires": { - "is-glob": "^3.1.0", - "path-dirname": "^1.0.0" - }, - "dependencies": { - "is-glob": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz", - "integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=", - "dev": true, - "requires": { - "is-extglob": "^2.1.0" - } - } - } - }, - "is-accessor-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", - "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-data-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", - "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-descriptor": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", - "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^1.0.0", - "is-data-descriptor": "^1.0.0", - "kind-of": "^6.0.2" - } - }, - "is-extglob": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", - "integrity": "sha1-qIwCU1eR8C7TfHahueqXc8gz+MI=", - "dev": true - }, - "is-glob": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.0.tgz", - "integrity": "sha1-lSHHaEXMJhCoUgPd8ICpWML/q8A=", - "dev": true, - "requires": { - "is-extglob": "^2.1.1" - } - }, - "is-number": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - }, - "micromatch": { - "version": "3.1.10", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", - "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", - "dev": true, - "requires": { - "arr-diff": "^4.0.0", - "array-unique": "^0.3.2", - "braces": "^2.3.1", - "define-property": "^2.0.2", - "extend-shallow": "^3.0.2", - "extglob": "^2.0.4", - "fragment-cache": "^0.2.1", - "kind-of": "^6.0.2", - "nanomatch": "^1.2.9", - "object.pick": "^1.3.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.2" - } - } + "glob-stream": "^6.1.0", + "graceful-fs": "^4.0.0", + "is-valid-glob": "^1.0.0", + "lazystream": "^1.0.0", + "lead": "^1.0.0", + "object.assign": "^4.0.4", + "pumpify": "^1.3.5", + "readable-stream": "^2.3.3", + "remove-bom-buffer": "^3.0.0", + "remove-bom-stream": "^1.2.0", + "resolve-options": "^1.1.0", + "through2": "^2.0.0", + "to-through": "^2.0.0", + "value-or-function": "^3.0.0", + "vinyl": "^2.0.0", + "vinyl-sourcemap": "^1.1.0" + } + }, + "vinyl-sourcemap": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/vinyl-sourcemap/-/vinyl-sourcemap-1.1.0.tgz", + "integrity": "sha1-kqgAWTo4cDqM2xHYswCtS+Y7PhY=", + "dev": true, + "requires": { + "append-buffer": "^1.0.2", + "convert-source-map": "^1.5.0", + "graceful-fs": "^4.1.6", + "normalize-path": "^2.1.1", + "now-and-later": "^2.0.0", + "remove-bom-buffer": "^3.0.0", + "vinyl": "^2.0.0" + } + }, + "vinyl-sourcemaps-apply": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/vinyl-sourcemaps-apply/-/vinyl-sourcemaps-apply-0.2.1.tgz", + "integrity": "sha1-q2VJ1h0XLCsbh75cUI0jnI74dwU=", + "dev": true, + "requires": { + "source-map": "^0.5.1" + } + }, + "vm-browserify": { + "version": "0.0.4", + "resolved": "http://registry.npmjs.org/vm-browserify/-/vm-browserify-0.0.4.tgz", + "integrity": "sha1-XX6kW7755Kb/ZflUOOCofDV9WnM=", + "dev": true, + "requires": { + "indexof": "0.0.1" + } + }, + "w3c-hr-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/w3c-hr-time/-/w3c-hr-time-1.0.1.tgz", + "integrity": "sha1-gqwr/2PZUOqeMYmlimViX+3xkEU=", + "dev": true, + "requires": { + "browser-process-hrtime": "^0.1.2" + } + }, + "walker": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.7.tgz", + "integrity": "sha1-L3+bj9ENZ3JisYqITijRlhjgKPs=", + "dev": true, + "requires": { + "makeerror": "1.0.x" + } + }, + "watch": { + "version": "0.18.0", + "resolved": "https://registry.npmjs.org/watch/-/watch-0.18.0.tgz", + "integrity": "sha1-KAlUdsbffJDJYxOJkMClQj60uYY=", + "dev": true, + "requires": { + "exec-sh": "^0.2.0", + "minimist": "^1.2.0" + } + }, + "watchpack": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-1.6.0.tgz", + "integrity": "sha512-i6dHe3EyLjMmDlU1/bGQpEw25XSjkJULPuAVKCbNRefQVq48yXKUpwg538F7AZTf9kyr57zj++pQFltUa5H7yA==", + "dev": true, + "requires": { + "chokidar": "^2.0.2", + "graceful-fs": "^4.1.2", + "neo-async": "^2.5.0" } }, "wcwidth": { @@ -15421,6 +13039,12 @@ "defaults": "^1.0.3" } }, + "web-stream-tools": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/web-stream-tools/-/web-stream-tools-0.0.1.tgz", + "integrity": "sha512-MZUYhvTAMMy1u07OJL2pyp/tdrIu15fRJlGgnfvCQVXBS4cBNbIV1+6veYfVhTfnq0ZLispgx4nv17QxpuX+6w==", + "dev": true + }, "webidl-conversions": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-4.0.2.tgz", @@ -15428,15 +13052,15 @@ "dev": true }, "webpack": { - "version": "4.23.1", - "resolved": "https://registry.npmjs.org/webpack/-/webpack-4.23.1.tgz", - "integrity": "sha512-iE5Cu4rGEDk7ONRjisTOjVHv3dDtcFfwitSxT7evtYj/rANJpt1OuC/Kozh1pBa99AUBr1L/LsaNB+D9Xz3CEg==", + "version": "4.28.3", + "resolved": "https://registry.npmjs.org/webpack/-/webpack-4.28.3.tgz", + "integrity": "sha512-vLZN9k5I7Nr/XB1IDG9GbZB4yQd1sPuvufMFgJkx0b31fi2LD97KQIjwjxE7xytdruAYfu5S0FLBLjdxmwGJCg==", "dev": true, "requires": { - "@webassemblyjs/ast": "1.7.10", - "@webassemblyjs/helper-module-context": "1.7.10", - "@webassemblyjs/wasm-edit": "1.7.10", - "@webassemblyjs/wasm-parser": "1.7.10", + "@webassemblyjs/ast": "1.7.11", + "@webassemblyjs/helper-module-context": "1.7.11", + "@webassemblyjs/wasm-edit": "1.7.11", + "@webassemblyjs/wasm-parser": "1.7.11", "acorn": "^5.6.2", "acorn-dynamic-import": "^3.0.0", "ajv": "^6.1.0", @@ -15454,309 +13078,11 @@ "node-libs-browser": "^2.0.0", "schema-utils": "^0.4.4", "tapable": "^1.1.0", - "uglifyjs-webpack-plugin": "^1.2.4", + "terser-webpack-plugin": "^1.1.0", "watchpack": "^1.5.0", "webpack-sources": "^1.3.0" }, "dependencies": { - "ajv": { - "version": "6.6.1", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.6.1.tgz", - "integrity": "sha512-ZoJjft5B+EJBjUyu9C9Hc0OZyPZSSlOF+plzouTrg6UlA8f+e/n8NIgBFG/9tppJtpPWfthHakK7juJdNDODww==", - "dev": true, - "requires": { - "fast-deep-equal": "^2.0.1", - "fast-json-stable-stringify": "^2.0.0", - "json-schema-traverse": "^0.4.1", - "uri-js": "^4.2.2" - } - }, - "arr-diff": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", - "integrity": "sha1-1kYQdP6/7HHn4VI1dhoyml3HxSA=", - "dev": true - }, - "array-unique": { - "version": "0.3.2", - "resolved": "https://registry.npmjs.org/array-unique/-/array-unique-0.3.2.tgz", - "integrity": "sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg=", - "dev": true - }, - "braces": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-2.3.2.tgz", - "integrity": "sha512-aNdbnj9P8PjdXU4ybaWLK2IF3jc/EoDYbC7AazW6to3TRsfXxscC9UXOB5iDiEQrkyIbWp2SLQda4+QAa7nc3w==", - "dev": true, - "requires": { - "arr-flatten": "^1.1.0", - "array-unique": "^0.3.2", - "extend-shallow": "^2.0.1", - "fill-range": "^4.0.0", - "isobject": "^3.0.1", - "repeat-element": "^1.1.2", - "snapdragon": "^0.8.1", - "snapdragon-node": "^2.0.1", - "split-string": "^3.0.2", - "to-regex": "^3.0.1" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "expand-brackets": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", - "integrity": "sha1-t3c14xXOMPa27/D4OwQVGiJEliI=", - "dev": true, - "requires": { - "debug": "^2.3.3", - "define-property": "^0.2.5", - "extend-shallow": "^2.0.1", - "posix-character-classes": "^0.1.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "0.2.5", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-0.2.5.tgz", - "integrity": "sha1-w1se+RjsPJkPmlvFe+BKrOxcgRY=", - "dev": true, - "requires": { - "is-descriptor": "^0.1.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - }, - "is-accessor-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-0.1.6.tgz", - "integrity": "sha1-qeEss66Nh2cn7u84Q/igiXtcmNY=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-data-descriptor": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz", - "integrity": "sha1-C17mSDiOLIYCgueT8YVv7D8wG1Y=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "is-descriptor": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-0.1.6.tgz", - "integrity": "sha512-avDYr0SB3DwO9zsMov0gKCESFYqCnE4hq/4z3TdUlukEy5t9C0YRq7HLrsN52NAcqXKaepeCD0n+B0arnVG3Hg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^0.1.6", - "is-data-descriptor": "^0.1.4", - "kind-of": "^5.0.0" - } - }, - "kind-of": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-5.1.0.tgz", - "integrity": "sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==", - "dev": true - } - } - }, - "extglob": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/extglob/-/extglob-2.0.4.tgz", - "integrity": "sha512-Nmb6QXkELsuBr24CJSkilo6UHHgbekK5UiZgfE6UHD3Eb27YC6oD+bhcT+tJ6cl8dmsgdQxnWlcry8ksBIBLpw==", - "dev": true, - "requires": { - "array-unique": "^0.3.2", - "define-property": "^1.0.0", - "expand-brackets": "^2.1.4", - "extend-shallow": "^2.0.1", - "fragment-cache": "^0.2.1", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.1" - }, - "dependencies": { - "define-property": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/define-property/-/define-property-1.0.0.tgz", - "integrity": "sha1-dp66rz9KY6rTr56NMEybvnm/sOY=", - "dev": true, - "requires": { - "is-descriptor": "^1.0.0" - } - }, - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "fast-deep-equal": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", - "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=", - "dev": true - }, - "fill-range": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", - "integrity": "sha1-1USBHUKPmOsGpj3EAtJAPDKMOPc=", - "dev": true, - "requires": { - "extend-shallow": "^2.0.1", - "is-number": "^3.0.0", - "repeat-string": "^1.6.1", - "to-regex-range": "^2.1.0" - }, - "dependencies": { - "extend-shallow": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", - "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", - "dev": true, - "requires": { - "is-extendable": "^0.1.0" - } - } - } - }, - "is-accessor-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-accessor-descriptor/-/is-accessor-descriptor-1.0.0.tgz", - "integrity": "sha512-m5hnHTkcVsPfqx3AKlyttIPb7J+XykHvJP2B9bZDjlhLIoEq4XoK64Vg7boZlVWYK6LUY94dYPEE7Lh0ZkZKcQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-data-descriptor": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/is-data-descriptor/-/is-data-descriptor-1.0.0.tgz", - "integrity": "sha512-jbRXy1FmtAoCjQkVmIVYwuuqDFUbaOeDjmed1tOGPrsMhtJA4rD9tkgA0F1qJ3gRFRXcHYVkdeaP50Q5rE/jLQ==", - "dev": true, - "requires": { - "kind-of": "^6.0.0" - } - }, - "is-descriptor": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-descriptor/-/is-descriptor-1.0.2.tgz", - "integrity": "sha512-2eis5WqQGV7peooDyLmNEPUrps9+SXX5c9pL3xEB+4e9HnGuDa7mB7kHxHw4CbqS9k1T2hOH3miL8n8WtiYVtg==", - "dev": true, - "requires": { - "is-accessor-descriptor": "^1.0.0", - "is-data-descriptor": "^1.0.0", - "kind-of": "^6.0.2" - } - }, - "is-number": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-number/-/is-number-3.0.0.tgz", - "integrity": "sha1-JP1iAaR4LPUFYcgQJ2r8fRLXEZU=", - "dev": true, - "requires": { - "kind-of": "^3.0.2" - }, - "dependencies": { - "kind-of": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", - "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", - "dev": true, - "requires": { - "is-buffer": "^1.1.5" - } - } - } - }, - "isobject": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/isobject/-/isobject-3.0.1.tgz", - "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=", - "dev": true - }, - "json-schema-traverse": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", - "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", - "dev": true - }, - "kind-of": { - "version": "6.0.2", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.2.tgz", - "integrity": "sha512-s5kLOcnH0XqDO+FvuaLX8DDjZ18CGFk7VygH40QoKPUQhW4e2rvM0rwUq0t8IQDOwYSeLK01U90OjzBTme2QqA==", - "dev": true - }, - "micromatch": { - "version": "3.1.10", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-3.1.10.tgz", - "integrity": "sha512-MWikgl9n9M3w+bpsY3He8L+w9eF9338xRl8IAO5viDizwSzziFEyUzo2xrrloB64ADbTf8uA8vRqqttDTOmccg==", - "dev": true, - "requires": { - "arr-diff": "^4.0.0", - "array-unique": "^0.3.2", - "braces": "^2.3.1", - "define-property": "^2.0.2", - "extend-shallow": "^3.0.2", - "extglob": "^2.0.4", - "fragment-cache": "^0.2.1", - "kind-of": "^6.0.2", - "nanomatch": "^1.2.9", - "object.pick": "^1.3.0", - "regex-not": "^1.0.0", - "snapdragon": "^0.8.1", - "to-regex": "^3.0.2" - } - }, "schema-utils": { "version": "0.4.7", "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-0.4.7.tgz", @@ -15797,9 +13123,9 @@ } }, "whatwg-mimetype": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-2.2.0.tgz", - "integrity": "sha512-5YSO1nMd5D1hY3WzAQV3PzZL83W3YeyR1yW9PcH26Weh1t+Vzh9B6XkDh7aXm83HBZ4nSMvkjvN2H2ySWIvBgw==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-2.3.0.tgz", + "integrity": "sha512-M4yMwr6mAnQz76TbJm914+gPpB/nCwvZbJU28cUD6dR004SAxDLOOSUaB1JDRqLtaOV/vi0IC5lEAGFgrjGv/g==", "dev": true }, "whatwg-url": { @@ -15823,9 +13149,9 @@ } }, "which-module": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/which-module/-/which-module-2.0.0.tgz", - "integrity": "sha1-2e8H3Od7mQK4o6j6SzHD4/fm6Ho=", + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/which-module/-/which-module-1.0.0.tgz", + "integrity": "sha1-u6Y8qGGUiZT/MHc2CJ47lgJsKk8=", "dev": true }, "wide-align": { @@ -15956,15 +13282,6 @@ "resolved": "http://registry.npmjs.org/pify/-/pify-2.3.0.tgz", "integrity": "sha1-7RQaasBDqEnqWISY59yosVMw6Qw=", "dev": true - }, - "user-home": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/user-home/-/user-home-2.0.0.tgz", - "integrity": "sha1-nHC/2Babwdy/SGBODwS4tJzenp8=", - "dev": true, - "requires": { - "os-homedir": "^1.0.0" - } } } }, @@ -16027,40 +13344,15 @@ "which-module": "^1.0.0", "y18n": "^3.2.1", "yargs-parser": "^5.0.0" - }, - "dependencies": { - "which-module": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/which-module/-/which-module-1.0.0.tgz", - "integrity": "sha1-u6Y8qGGUiZT/MHc2CJ47lgJsKk8=", - "dev": true - }, - "yargs-parser": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-5.0.0.tgz", - "integrity": "sha1-J17PDX/+Bcd+ZOfIbkzZS/DhIoo=", - "dev": true, - "requires": { - "camelcase": "^3.0.0" - } - } } }, "yargs-parser": { - "version": "9.0.2", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-9.0.2.tgz", - "integrity": "sha1-nM9qQ0YP5O1Aqbto9I1DuKaMwHc=", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-5.0.0.tgz", + "integrity": "sha1-J17PDX/+Bcd+ZOfIbkzZS/DhIoo=", "dev": true, "requires": { - "camelcase": "^4.1.0" - }, - "dependencies": { - "camelcase": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz", - "integrity": "sha1-1UVjW+HjPFQmScaRc+Xeas+uNN0=", - "dev": true - } + "camelcase": "^3.0.0" } }, "yn": { diff --git a/js/package.json b/js/package.json index cf49e41dbe2f4..a80886d82733d 100644 --- a/js/package.json +++ b/js/package.json @@ -8,10 +8,10 @@ }, "scripts": { "lerna": "lerna", - "test": "gulp test", - "build": "gulp build", - "clean": "gulp clean", - "debug": "gulp debug", + "test": "NODE_NO_WARNINGS=1 gulp test", + "build": "NODE_NO_WARNINGS=1 gulp build", + "clean": "NODE_NO_WARNINGS=1 gulp clean", + "debug": "NODE_NO_WARNINGS=1 gulp debug", "perf": "node ./perf/index.js", "test:integration": "node ./bin/integration.js --mode validate", "create:perfdata": "python ./test/data/tables/generate.py ./test/data/tables/tracks.arrow", @@ -19,11 +19,14 @@ "clean:all": "run-p clean clean:testdata", "clean:testdata": "gulp clean:testdata", "create:testdata": "gulp create:testdata", - "test:coverage": "gulp test -t ts --coverage", - "doc": "shx rm -rf ./doc && typedoc --mode file --out doc src/Arrow.ts", - "lint": "run-p lint:*", + "test:coverage": "gulp test -t src --coverage", + "doc": "shx rm -rf ./doc && typedoc --tsconfig tsconfig.json --target ES5 --module commonjs --mode modules --ignoreCompilerErrors --out doc src", + "lint": "run-p lint:src lint:test", + "lint:ci": "run-p lint:src:ci lint:test:ci", "lint:src": "tslint --fix --project -p tsconfig.json -c tslint.json \"src/**/*.ts\"", "lint:test": "tslint --fix --project -p test/tsconfig.json -c tslint.json \"test/**/*.ts\"", + "lint:src:ci": "tslint --project -p tsconfig.json -c tslint.json \"src/**/*.ts\"", + "lint:test:ci": "tslint --project -p test/tsconfig.json -c tslint.json \"test/**/*.ts\"", "prepublishOnly": "echo \"Error: do 'npm run release' instead of 'npm publish'\" && exit 1", "version": "npm install && npm run clean:all" }, @@ -53,99 +56,60 @@ "npm-release.sh" ], "dependencies": { - "@types/flatbuffers": "1.9.0", - "@types/node": "10.12.0", - "@types/text-encoding-utf-8": "1.0.1", + "@types/flatbuffers": "^1.9.0", + "@types/node": "^10.12.18", + "@types/text-encoding-utf-8": "^1.0.1", "command-line-args": "5.0.2", "command-line-usage": "5.0.5", - "flatbuffers": "1.10.2", + "flatbuffers": "^1.10.2", "json-bignum": "0.0.3", + "pad-left": "2.1.0", "text-encoding-utf-8": "1.0.2", - "tslib": "1.9.3" + "tslib": "^1.9.3" }, "devDependencies": { - "@std/esm": "0.26.0", + "@mattiasbuelens/web-streams-polyfill": "0.2.1", "@types/glob": "7.1.1", - "@types/jest": "23.3.5", + "@types/jest": "23.3.10", + "async-done": "1.3.1", "benchmark": "2.1.4", "coveralls": "3.0.2", "del": "3.0.0", + "esm": "3.0.84", "glob": "7.1.3", - "google-closure-compiler": "20181008.0.0", + "google-closure-compiler": "20181210.0.0", "gulp": "4.0.0", - "gulp-json-transform": "0.4.5", + "gulp-json-transform": "0.4.6", "gulp-rename": "1.4.0", "gulp-sourcemaps": "2.6.4", - "gulp-typescript": "5.0.0-alpha.3", - "ix": "2.3.5", + "gulp-typescript": "5.0.0", + "ix": "2.4.3", "jest": "23.6.0", "jest-environment-node-debug": "2.0.0", + "jest-silent-reporter": "0.1.1", "json": "9.0.6", - "lerna": "3.4.3", - "lint-staged": "7.3.0", - "merge2": "1.2.3", + "lerna": "3.8.0", + "memfs": "2.14.2", "mkdirp": "0.5.1", + "multistream": "2.1.1", "npm-run-all": "4.1.5", - "pump": "3.0.0", + "randomatic": "3.1.1", "rimraf": "2.6.2", "rxjs": "5.5.11", "shx": "0.3.2", "source-map-loader": "0.2.4", - "terser-webpack-plugin": "1.1.0", + "terser-webpack-plugin": "1.2.1", "trash": "4.3.0", - "ts-jest": "22.4.6", + "ts-jest": "23.10.5", "ts-node": "7.0.1", - "tslint": "5.11.0", - "typedoc": "0.12", - "typescript": "3.0.3", - "webpack": "4.23.1", + "tslint": "5.12.0", + "typedoc": "0.13.0", + "typescript": "3.2.2", + "web-stream-tools": "0.0.1", + "webpack": "4.28.3", "xml2js": "0.4.19" }, "engines": { - "node": ">=10.0" - }, - "@std/esm": { - "warnings": false - }, - "lint-staged": { - "*.@(ts)": [ - "tslint --fix", - "git add" - ] - }, - "jest": { - "verbose": false, - "testEnvironment": "node", - "globals": { - "ts-jest": { - "skipBabel": true, - "tsConfigFile": "test/tsconfig.json" - } - }, - "roots": [ - "/test/" - ], - "moduleFileExtensions": [ - "js", - "ts", - "tsx" - ], - "coverageReporters": [ - "lcov" - ], - "coveragePathIgnorePatterns": [ - "fb\\/(File|Message|Schema|Tensor)_generated\\.(js|ts)$", - "test\\/.*\\.(ts|tsx|js)$", - "/node_modules/" - ], - "transform": { - ".(ts|tsx)": "./node_modules/ts-jest/preprocessor.js", - ".(js|jsx)": "./node_modules/babel-jest/build/index.js" - }, - "transformIgnorePatterns": [ - "/node_modules/", - "/(es2015|esnext)/umd/" - ], - "testRegex": "(.*(-|\\.)(test|spec)s?)\\.(ts|tsx|js)$" + "node": ">=11.0" } } diff --git a/js/perf/index.js b/js/perf/index.js index 2c07591925328..0e9c2bd689aae 100644 --- a/js/perf/index.js +++ b/js/perf/index.js @@ -16,10 +16,10 @@ // under the License. // Use the ES5 UMD target as perf baseline -// const { predicate, Table, read: readBatches } = require('../targets/es5/umd'); -// const { predicate, Table, read: readBatches } = require('../targets/es5/cjs'); -// const { predicate, Table, read: readBatches } = require('../targets/es2015/umd'); -const { predicate, Table, read: readBatches } = require('../targets/es2015/cjs'); +// const { predicate, Table, RecordBatchReader } = require('../targets/es5/umd'); +// const { predicate, Table, RecordBatchReader } = require('../targets/es5/cjs'); +// const { predicate, Table, RecordBatchReader } = require('../targets/es2015/umd'); +const { predicate, Table, RecordBatchReader } = require('../targets/es2015/cjs'); const { col } = predicate; const Benchmark = require('benchmark'); @@ -91,7 +91,7 @@ function createReadBatchesTest(name, buffers) { return { async: true, name: `readBatches\n`, - fn() { for (recordBatch of readBatches(buffers)) {} } + fn() { for (recordBatch of RecordBatchReader.from(buffers)) {} } }; } @@ -139,34 +139,36 @@ function createDataFrameDirectCountTest(table, column, test, value) { let sum, colidx = table.schema.fields.findIndex((c)=>c.name === column); if (test == 'gt') { - op = function () { + op = () => { sum = 0; - let batches = table.batches; + let batches = table.chunks; let numBatches = batches.length; for (let batchIndex = -1; ++batchIndex < numBatches;) { // load batches const batch = batches[batchIndex]; const vector = batch.getChildAt(colidx); // yield all indices - for (let index = -1; ++index < batch.length;) { + for (let index = -1, length = batch.length; ++index < length;) { sum += (vector.get(index) >= value); } } + return sum; } } else if (test == 'eq') { - op = function() { + op = () => { sum = 0; - let batches = table.batches; + let batches = table.chunks; let numBatches = batches.length; for (let batchIndex = -1; ++batchIndex < numBatches;) { // load batches const batch = batches[batchIndex]; const vector = batch.getChildAt(colidx); // yield all indices - for (let index = -1; ++index < batch.length;) { + for (let index = -1, length = batch.length; ++index < length;) { sum += (vector.get(index) === value); } } + return sum; } } else { throw new Error(`Unrecognized test "${test}"`); diff --git a/js/src/Arrow.dom.ts b/js/src/Arrow.dom.ts new file mode 100644 index 0000000000000..f9178df91e782 --- /dev/null +++ b/js/src/Arrow.dom.ts @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import streamAdapters from './io/adapters'; +import { RecordBatchReader } from './ipc/reader'; +import { RecordBatchWriter } from './ipc/writer'; +import { toDOMStream } from './ipc/whatwg/iterable'; +import { recordBatchReaderThroughDOMStream } from './ipc/whatwg/reader'; +import { recordBatchWriterThroughDOMStream } from './ipc/whatwg/writer'; + +streamAdapters.toDOMStream = toDOMStream; +RecordBatchReader['throughDOM'] = recordBatchReaderThroughDOMStream; +RecordBatchWriter['throughDOM'] = recordBatchWriterThroughDOMStream; + +export { + ArrowType, DateUnit, IntervalUnit, MessageHeader, MetadataVersion, Precision, TimeUnit, Type, UnionMode, VectorType, + Data, + DataType, + Null, + Bool, + Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64, + Float, Float16, Float32, Float64, + Utf8, + Binary, + FixedSizeBinary, + Date_, DateDay, DateMillisecond, + Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, + Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond, + Decimal, + List, + Struct, + Union, DenseUnion, SparseUnion, + Dictionary, + Interval, IntervalDayTime, IntervalYearMonth, + FixedSizeList, + Map_, + Table, + Column, + Schema, Field, + Visitor, + Vector, + BaseVector, + BinaryVector, + BoolVector, + Chunked, + DateVector, DateDayVector, DateMillisecondVector, + DecimalVector, + DictionaryVector, + FixedSizeBinaryVector, + FixedSizeListVector, + FloatVector, Float16Vector, Float32Vector, Float64Vector, + IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector, + IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector, + ListVector, + MapVector, + NullVector, + StructVector, + TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector, + TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector, + UnionVector, DenseUnionVector, SparseUnionVector, + Utf8Vector, + ByteStream, AsyncByteStream, AsyncByteQueue, ReadableSource, WritableSink, + RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, AsyncRecordBatchFileReader, AsyncRecordBatchStreamReader, + RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, RecordBatchJSONWriter, + MessageReader, AsyncMessageReader, JSONMessageReader, + Message, + RecordBatch, + ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions, + DataFrame, FilteredDataFrame, CountByResult, BindFunc, NextFunc, + predicate, + util +} from './Arrow'; diff --git a/js/src/Arrow.externs.js b/js/src/Arrow.externs.js deleted file mode 100644 index 7ad066585712e..0000000000000 --- a/js/src/Arrow.externs.js +++ /dev/null @@ -1,814 +0,0 @@ -// @ts-nocheck -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/* tslint:disable */ - -/** - * @fileoverview Closure Compiler externs for Arrow - * @externs - * @suppress {duplicate,checkTypes} - */ -/** @type {symbol} */ -Symbol.iterator; -/** @type {symbol} */ -Symbol.asyncIterator; - -var Table = function() {}; -/** @type {?} */ -Table.from = function() {}; -/** @type {?} */ -Table.fromVectors = function() {}; -/** @type {?} */ -Table.fromAsync = function() {}; -/** @type {?} */ -Table.fromStruct = function() {}; -/** @type {?} */ -Table.empty = function() {}; -/** @type {?} */ -Table.prototype.schema; -/** @type {?} */ -Table.prototype.length; -/** @type {?} */ -Table.prototype.numCols; -/** @type {?} */ -Table.prototype.get; -/** @type {?} */ -Table.prototype.getColumn; -/** @type {?} */ -Table.prototype.getColumnAt; -/** @type {?} */ -Table.prototype.getColumnIndex; -/** @type {?} */ -Table.prototype.toArray; -/** @type {?} */ -Table.prototype.select; -/** @type {?} */ -Table.prototype.rowsToString; -/** @type {?} */ -Table.prototype.batchesUnion; -/** @type {?} */ -Table.prototype.batches; -/** @type {?} */ -Table.prototype.countBy; -/** @type {?} */ -Table.prototype.scan; -/** @type {?} */ -Table.prototype.serialize; - -var CountByResult = function() {}; -/** @type {?} */ -CountByResult.prototype.asJSON; - -var col = function () {}; -var lit = function () {}; -var and = function () {}; -var or = function () {}; -var custom = function () {}; - -var Value = function() {}; -/** @type {?} */ -Value.prototype.ge; -/** @type {?} */ -Value.prototype.le; -/** @type {?} */ -Value.prototype.eq; -/** @type {?} */ -Value.prototype.lt; -/** @type {?} */ -Value.prototype.gt; -/** @type {?} */ -Value.prototype.ne; - -var Col = function() {}; -/** @type {?} */ -Col.prototype.bind; -var CombinationPredicate = function () {}; -/** @type {?} */ -CombinationPredicate.prototype.children; -var Or = function() {}; -var And = function() {}; -var Not = function() {}; -var GTeq = function () {}; -/** @type {?} */ -GTeq.prototype.and; -/** @type {?} */ -GTeq.prototype.or; -var LTeq = function () {}; -/** @type {?} */ -LTeq.prototype.and; -/** @type {?} */ -LTeq.prototype.or; -var Equals = function () {}; -/** @type {?} */ -Equals.prototype.and; -/** @type {?} */ -Equals.prototype.or; -var Predicate = function() {}; -/** @type {?} */ -Predicate.prototype.bind; -/** @type {?} */ -Predicate.prototype.and; -/** @type {?} */ -Predicate.prototype.or; -/** @type {?} */ -Predicate.prototype.not; -/** @type {?} */ -Predicate.prototype.ands; -var Literal = function() {}; - -var PipeIterator = function() {}; -/** @type {?} */ -PipeIterator.prototype.pipe; - -var AsyncPipeIterator = function() {}; -/** @type {?} */ -AsyncPipeIterator.prototype.pipe; - -var RecordBatch = function() {}; -/** @type {?} */ -RecordBatch.from = function() {}; -/** @type {?} */ -RecordBatch.prototype.numCols; -/** @type {?} */ -RecordBatch.prototype.length; -/** @type {?} */ -RecordBatch.prototype.schema; -/** @type {?} */ -RecordBatch.prototype.columns; -/** @type {?} */ -RecordBatch.prototype.select; - -var Vector = function() {}; -/** @type {?} */ -Vector.create = function() {}; -/** @type {?} */ -Vector.prototype.data; -/** @type {?} */ -Vector.prototype.type; -/** @type {?} */ -Vector.prototype.length; -/** @type {?} */ -Vector.prototype.nullCount; -/** @type {?} */ -Vector.prototype.nullBitmap; -/** @type {?} */ -Vector.prototype.isValid; -/** @type {?} */ -Vector.prototype.get; -/** @type {?} */ -Vector.prototype.set; -/** @type {?} */ -Vector.prototype.toArray; -/** @type {?} */ -Vector.prototype.concat; -/** @type {?} */ -Vector.prototype.slice; -/** @type {?} */ -Vector.prototype.acceptTypeVisitor; - -var BaseInt64 = function() {}; -/** @type {?} */ -BaseInt64.prototype.lessThan; -/** @type {?} */ -BaseInt64.prototype.equals; -/** @type {?} */ -BaseInt64.prototype.greaterThan; -/** @type {?} */ -BaseInt64.prototype.hex; - -var Uint64 = function() {}; -/** @type {?} */ -Uint64.add = function() {}; -/** @type {?} */ -Uint64.multiply = function() {}; -/** @type {?} */ -Uint64.from = function() {}; -/** @type {?} */ -Uint64.fromNumber = function() {}; -/** @type {?} */ -Uint64.fromString = function() {}; -/** @type {?} */ -Uint64.prototype.times; -/** @type {?} */ -Uint64.prototype.plus - -var Int64 = function() {}; -/** @type {?} */ -Int64.add = function() {}; -/** @type {?} */ -Int64.multiply = function() {}; -/** @type {?} */ -Int64.from = function() {}; -/** @type {?} */ -Int64.fromNumber = function() {}; -/** @type {?} */ -Int64.fromString = function() {}; -/** @type {?} */ -Int64.prototype.negate -/** @type {?} */ -Int64.prototype.times -/** @type {?} */ -Int64.prototype.plus -/** @type {?} */ -Int64.prototype.lessThan - -var Int128 = function() {}; -/** @type {?} */ -Int128.add = function() {}; -/** @type {?} */ -Int128.multiply = function() {}; -/** @type {?} */ -Int128.from = function() {}; -/** @type {?} */ -Int128.fromNumber = function() {}; -/** @type {?} */ -Int128.fromString = function() {}; -/** @type {?} */ -Int128.prototype.negate -/** @type {?} */ -Int128.prototype.times -/** @type {?} */ -Int128.prototype.plus -/** @type {?} */ -Int128.prototype.hex - -var packBools = function() {}; - -var Type = function() {}; -/** @type {?} */ -Type.NONE = function() {}; -/** @type {?} */ -Type.Null = function() {}; -/** @type {?} */ -Type.Int = function() {}; -/** @type {?} */ -Type.Float = function() {}; -/** @type {?} */ -Type.FloatingPoint = function() {}; -/** @type {?} */ -Type.Binary = function() {}; -/** @type {?} */ -Type.Utf8 = function() {}; -/** @type {?} */ -Type.Bool = function() {}; -/** @type {?} */ -Type.Decimal = function() {}; -/** @type {?} */ -Type.Date = function() {}; -/** @type {?} */ -Type.Time = function() {}; -/** @type {?} */ -Type.Timestamp = function() {}; -/** @type {?} */ -Type.Interval = function() {}; -/** @type {?} */ -Type.List = function() {}; -/** @type {?} */ -Type.Struct = function() {}; -/** @type {?} */ -Type.Struct_ = function() {}; -/** @type {?} */ -Type.Union = function() {}; -/** @type {?} */ -Type.FixedSizeBinary = function() {}; -/** @type {?} */ -Type.FixedSizeList = function() {}; -/** @type {?} */ -Type.Map = function() {}; -/** @type {?} */ -Type.Dictionary = function() {}; -/** @type {?} */ -Type.DenseUnion = function() {}; -/** @type {?} */ -Type.SparseUnion = function() {}; - -var DateUnit = function() {}; -/** @type {?} */ -DateUnit.DAY = function() {}; -/** @type {?} */ -DateUnit.MILLISECOND = function() {}; -var TimeUnit = function() {}; -/** @type {?} */ -TimeUnit.SECOND = function() {}; -/** @type {?} */ -TimeUnit.MILLISECOND = function() {}; -/** @type {?} */ -TimeUnit.MICROSECOND = function() {}; -/** @type {?} */ -TimeUnit.NANOSECOND = function() {}; -var Precision = function() {}; -/** @type {?} */ -Precision.HALF = function() {}; -/** @type {?} */ -Precision.SINGLE = function() {}; -/** @type {?} */ -Precision.DOUBLE = function() {}; -var UnionMode = function() {}; -/** @type {?} */ -UnionMode.Sparse = function() {}; -/** @type {?} */ -UnionMode.Dense = function() {}; -var VectorType = function() {}; -/** @type {?} */ -VectorType.OFFSET = function() {}; -/** @type {?} */ -VectorType.DATA = function() {}; -/** @type {?} */ -VectorType.VALIDITY = function() {}; -/** @type {?} */ -VectorType.TYPE = function() {}; -var IntervalUnit = function() {}; -/** @type {?} */ -IntervalUnit.YEAR_MONTH = function() {}; -/** @type {?} */ -IntervalUnit.DAY_TIME = function() {}; -var MessageHeader = function() {}; -/** @type {?} */ -MessageHeader.NONE = function() {}; -/** @type {?} */ -MessageHeader.Schema = function() {}; -/** @type {?} */ -MessageHeader.DictionaryBatch = function() {}; -/** @type {?} */ -MessageHeader.RecordBatch = function() {}; -/** @type {?} */ -MessageHeader.Tensor = function() {}; -var MetadataVersion = function() {}; -/** @type {?} */ -MetadataVersion.V1 = function() {}; -/** @type {?} */ -MetadataVersion.V2 = function() {}; -/** @type {?} */ -MetadataVersion.V3 = function() {}; -/** @type {?} */ -MetadataVersion.V4 = function() {}; - -var DataType = function() {}; -/** @type {?} */ -DataType.isNull = function() {}; -/** @type {?} */ -DataType.isInt = function() {}; -/** @type {?} */ -DataType.isFloat = function() {}; -/** @type {?} */ -DataType.isBinary = function() {}; -/** @type {?} */ -DataType.isUtf8 = function() {}; -/** @type {?} */ -DataType.isBool = function() {}; -/** @type {?} */ -DataType.isDecimal = function() {}; -/** @type {?} */ -DataType.isDate = function() {}; -/** @type {?} */ -DataType.isTime = function() {}; -/** @type {?} */ -DataType.isTimestamp = function() {}; -/** @type {?} */ -DataType.isInterval = function() {}; -/** @type {?} */ -DataType.isList = function() {}; -/** @type {?} */ -DataType.isStruct = function() {}; -/** @type {?} */ -DataType.isUnion = function() {}; -/** @type {?} */ -DataType.isDenseUnion = function() {}; -/** @type {?} */ -DataType.isSparseUnion = function() {}; -/** @type {?} */ -DataType.isFixedSizeBinary = function() {}; -/** @type {?} */ -DataType.isFixedSizeList = function() {}; -/** @type {?} */ -DataType.isMap = function() {}; -/** @type {?} */ -DataType.isDictionary = function() {}; -/** @type {?} */ -DataType.prototype.ArrayType; - -var Schema = function() {}; -/** @type {?} */ -Schema.from = function() {}; -/** @type {?} */ -Schema.prototype.fields; -/** @type {?} */ -Schema.prototype.version; -/** @type {?} */ -Schema.prototype.metadata; -/** @type {?} */ -Schema.prototype.dictionaries; -/** @type {?} */ -Schema.prototype.select; -var Field = function() {}; -/** @type {?} */ -Field.prototype.name; -/** @type {?} */ -Field.prototype.type; -/** @type {?} */ -Field.prototype.nullable; -/** @type {?} */ -Field.prototype.metadata; -var Null = function() {}; -var Int8 = function() {}; -var Int16 = function() {}; -var Int32 = function() {}; -var Int64 = function() {}; -var Uint8 = function() {}; -var Uint16 = function() {}; -var Uint32 = function() {}; -var Uint64 = function() {}; -var Float16 = function() {}; -var Float32 = function() {}; -var Float64 = function() {}; -var Binary = function() {}; -var Utf8 = function() {}; -var Bool = function() {}; -var Decimal = function() {}; -var Date_ = function() {}; -var Time = function() {}; -var Timestamp = function() {}; -var Interval = function() {}; -var List = function() {}; -var Struct = function() {}; -var Union = function() {}; -var DenseUnion = function() {}; -var SparseUnion = function() {}; -var FixedSizeBinary = function() {}; -var FixedSizeList = function() {}; -var Map_ = function() {}; -var Dictionary = function() {}; - -var BaseData = function() {}; -/** @type {?} */ -BaseData.prototype.type; -/** @type {?} */ -BaseData.prototype.clone; -/** @type {?} */ -BaseData.prototype.slice; -/** @type {?} */ -BaseData.prototype.length; -/** @type {?} */ -BaseData.prototype.offset; -/** @type {?} */ -BaseData.prototype.typeId; -/** @type {?} */ -BaseData.prototype.childData; -/** @type {?} */ -BaseData.prototype.nullBitmap; -/** @type {?} */ -BaseData.prototype.nullCount; - -var BoolData = function() {}; -var NestedData = function() {}; -var SparseUnionData = function() {}; -var ChunkedData = function() {}; - -var FlatData = function() {}; -/** @type {?} */ -FlatData.prototype.values; - -var FlatListData = function() {}; -/** @type {?} */ -FlatListData.prototype.values; -/** @type {?} */ -FlatListData.prototype.valueOffsets; - -var DictionaryData = function() {}; -/** @type {?} */ -DictionaryData.prototype.indices; -/** @type {?} */ -DictionaryData.prototype.dictionary; - -var ListData = function() {}; -/** @type {?} */ -ListData.prototype.values; -/** @type {?} */ -ListData.prototype.valueOffsets; - -var UnionData = function() {}; -/** @type {?} */ -UnionData.prototype.typeIds; - -var DenseUnionData = function() {}; -/** @type {?} */ -DenseUnionData.prototype.valueOffsets; - -var ChunkedData = function() {}; -/** @type {?} */ -ChunkedData.computeOffsets = function() {}; - -var FlatVector = function() {}; -/** @type {?} */ -FlatVector.prototype.values; -/** @type {?} */ -FlatVector.prototype.lows; -/** @type {?} */ -FlatVector.prototype.highs; -/** @type {?} */ -FlatVector.prototype.asInt32; - -var ListVectorBase = function() {}; -/** @type {?} */ -ListVectorBase.prototype.values; -/** @type {?} */ -ListVectorBase.prototype.valueOffsets; -/** @type {?} */ -ListVectorBase.prototype.getValueOffset; -/** @type {?} */ -ListVectorBase.prototype.getValueLength; - -var NestedVector = function() {}; -/** @type {?} */ -NestedVector.prototype.childData; -/** @type {?} */ -NestedVector.prototype.getChildAt; - -var NullVector = function() {}; -var BoolVector = function() {}; -/** @type {?} */ -BoolVector.from = function() {}; -/** @type {?} */ -BoolVector.prototype.values; -var IntVector = function() {}; -/** @type {?} */ -IntVector.from = function() {}; - -var FloatVector = function() {}; -/** @type {?} */ -FloatVector.from = function() {}; - -var DateVector = function() {}; -/** @type {?} */ -DateVector.from = function() {}; -/** @type {?} */ -DateVector.prototype.asEpochMilliseconds; -var DecimalVector = function() {}; -var TimeVector = function() {}; -var TimestampVector = function() {}; -/** @type {?} */ -TimestampVector.prototype.asEpochMilliseconds; -var IntervalVector = function() {}; -var BinaryVector = function() {}; -/** @type {?} */ -BinaryVector.prototype.asUtf8; -var FixedSizeBinaryVector = function() {}; -var Utf8Vector = function() {}; -/** @type {?} */ -Utf8Vector.prototype.asBinary; -var ListVector = function() {}; -/** @type {?} */ -ListVector.prototype.getChildAt; -var FixedSizeListVector = function() {}; -/** @type {?} */ -FixedSizeListVector.prototype.getChildAt; -var MapVector = function() {}; -/** @type {?} */ -MapVector.prototype.asStruct; -var StructVector = function() {}; -/** @type {?} */ -StructVector.prototype.asMap; -var UnionVector = function() {}; - -var DictionaryVector = function() {}; -/** @type {?} */ -DictionaryVector.prototype.indices; -/** @type {?} */ -DictionaryVector.prototype.dictionary; -/** @type {?} */ -DictionaryVector.prototype.getKey; -/** @type {?} */ -DictionaryVector.prototype.getValue; -/** @type {?} */ -DictionaryVector.prototype.reverseLookup; - -var FlatView = function() {}; -/** @type {?} */ -FlatView.prototype.get; -/** @type {?} */ -FlatView.prototype.clone; -/** @type {?} */ -FlatView.prototype.isValid; -/** @type {?} */ -FlatView.prototype.toArray; -/** @type {?} */ -FlatView.prototype.set; - -var PrimitiveView = function() {}; -/** @type {?} */ -PrimitiveView.prototype.size; -/** @type {?} */ -PrimitiveView.prototype.clone; - -var NullView = function() {}; -/** @type {?} */ -NullView.prototype.get; -/** @type {?} */ -NullView.prototype.clone; -/** @type {?} */ -NullView.prototype.isValid; -/** @type {?} */ -NullView.prototype.toArray; -/** @type {?} */ -NullView.prototype.set; - -var BoolView = function() {}; -/** @type {?} */ -BoolView.prototype.get; -/** @type {?} */ -BoolView.prototype.clone; -/** @type {?} */ -BoolView.prototype.isValid; -/** @type {?} */ -BoolView.prototype.toArray; -/** @type {?} */ -BoolView.prototype.set; - -var ValidityView = function() {}; -/** @type {?} */ -ValidityView.prototype.get; -/** @type {?} */ -ValidityView.prototype.clone; -/** @type {?} */ -ValidityView.prototype.isValid; -/** @type {?} */ -ValidityView.prototype.toArray; -/** @type {?} */ -ValidityView.prototype.set; -/** @type {?} */ -ValidityView.prototype.size; -/** @type {?} */ -ValidityView.prototype.getChildAt; - -var DictionaryView = function() {}; -/** @type {?} */ -DictionaryView.prototype.get; -/** @type {?} */ -DictionaryView.prototype.clone; -/** @type {?} */ -DictionaryView.prototype.isValid; -/** @type {?} */ -DictionaryView.prototype.toArray; -/** @type {?} */ -DictionaryView.prototype.set; - -var ListViewBase = function() {}; -/** @type {?} */ -ListViewBase.prototype.get; -/** @type {?} */ -ListViewBase.prototype.clone; -/** @type {?} */ -ListViewBase.prototype.isValid; -/** @type {?} */ -ListViewBase.prototype.toArray; -/** @type {?} */ -ListViewBase.prototype.set; - -var NestedView = function() {}; -/** @type {?} */ -NestedView.prototype.get; -/** @type {?} */ -NestedView.prototype.clone; -/** @type {?} */ -NestedView.prototype.isValid; -/** @type {?} */ -NestedView.prototype.toArray; -/** @type {?} */ -NestedView.prototype.set; - -var ChunkedView = function() {}; -/** @type {?} */ -ChunkedView.prototype.get; -/** @type {?} */ -ChunkedView.prototype.clone; -/** @type {?} */ -ChunkedView.prototype.isValid; -/** @type {?} */ -ChunkedView.prototype.toArray; -/** @type {?} */ -ChunkedView.prototype.set; - -var ListView = function() {}; -var FixedSizeListView = function() {}; -var BinaryView = function() {}; -var Utf8View = function() {}; -var UnionView = function() {}; -var DenseUnionView = function() {}; -var StructView = function() {}; -var MapView = function() {}; -var NullView = function() {}; -var FixedSizeView = function() {}; -var Float16View = function() {}; -var DateDayView = function() {}; -var DateMillisecondView = function() {}; -var TimestampDayView = function() {}; -var TimestampSecondView = function() {}; -var TimestampMillisecondView = function() {}; -var TimestampMicrosecondView = function() {}; -var TimestampNanosecondView = function() {}; -var IntervalYearMonthView = function() {}; -var IntervalYearView = function() {}; -var IntervalMonthView = function() {}; - -var TypeVisitor = function() {}; -/** @type {?} */ -TypeVisitor.visitTypeInline = function() {}; -/** @type {?} */ -TypeVisitor.prototype.visit; -/** @type {?} */ -TypeVisitor.prototype.visitMany; -/** @type {?} */ -TypeVisitor.prototype.visitNull; -/** @type {?} */ -TypeVisitor.prototype.visitBool; -/** @type {?} */ -TypeVisitor.prototype.visitInt; -/** @type {?} */ -TypeVisitor.prototype.visitFloat; -/** @type {?} */ -TypeVisitor.prototype.visitUtf8; -/** @type {?} */ -TypeVisitor.prototype.visitBinary; -/** @type {?} */ -TypeVisitor.prototype.visitFixedSizeBinary; -/** @type {?} */ -TypeVisitor.prototype.visitDate; -/** @type {?} */ -TypeVisitor.prototype.visitTimestamp; -/** @type {?} */ -TypeVisitor.prototype.visitTime; -/** @type {?} */ -TypeVisitor.prototype.visitDecimal; -/** @type {?} */ -TypeVisitor.prototype.visitList; -/** @type {?} */ -TypeVisitor.prototype.visitStruct; -/** @type {?} */ -TypeVisitor.prototype.visitUnion; -/** @type {?} */ -TypeVisitor.prototype.visitDictionary; -/** @type {?} */ -TypeVisitor.prototype.visitInterval; -/** @type {?} */ -TypeVisitor.prototype.visitFixedSizeList; -/** @type {?} */ -TypeVisitor.prototype.visitMap; - -var VectorVisitor = function() {}; -/** @type {?} */ -VectorVisitor.visitTypeInline = function() {}; -/** @type {?} */ -VectorVisitor.prototype.visit; -/** @type {?} */ -VectorVisitor.prototype.visitMany; -/** @type {?} */ -VectorVisitor.prototype.visitNull; -/** @type {?} */ -VectorVisitor.prototype.visitBool; -/** @type {?} */ -VectorVisitor.prototype.visitInt; -/** @type {?} */ -VectorVisitor.prototype.visitFloat; -/** @type {?} */ -VectorVisitor.prototype.visitUtf8; -/** @type {?} */ -VectorVisitor.prototype.visitBinary; -/** @type {?} */ -VectorVisitor.prototype.visitFixedSizeBinary; -/** @type {?} */ -VectorVisitor.prototype.visitDate; -/** @type {?} */ -VectorVisitor.prototype.visitTimestamp; -/** @type {?} */ -VectorVisitor.prototype.visitTime; -/** @type {?} */ -VectorVisitor.prototype.visitDecimal; -/** @type {?} */ -VectorVisitor.prototype.visitList; -/** @type {?} */ -VectorVisitor.prototype.visitStruct; -/** @type {?} */ -VectorVisitor.prototype.visitUnion; -/** @type {?} */ -VectorVisitor.prototype.visitDictionary; -/** @type {?} */ -VectorVisitor.prototype.visitInterval; -/** @type {?} */ -VectorVisitor.prototype.visitFixedSizeList; -/** @type {?} */ -VectorVisitor.prototype.visitMap; diff --git a/js/src/Arrow.node.ts b/js/src/Arrow.node.ts new file mode 100644 index 0000000000000..da6e3df6d9b08 --- /dev/null +++ b/js/src/Arrow.node.ts @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import streamAdapters from './io/adapters'; +import { RecordBatchReader } from './ipc/reader'; +import { RecordBatchWriter } from './ipc/writer'; +import { toNodeStream } from './ipc/node/iterable'; +import { recordBatchReaderThroughNodeStream } from './ipc/node/reader'; +import { recordBatchWriterThroughNodeStream } from './ipc/node/writer'; + +streamAdapters.toNodeStream = toNodeStream; +RecordBatchReader['throughNode'] = recordBatchReaderThroughNodeStream; +RecordBatchWriter['throughNode'] = recordBatchWriterThroughNodeStream; + +export * from './Arrow.dom'; diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts index c76578b62996d..0e5a5fe3bc280 100644 --- a/js/src/Arrow.ts +++ b/js/src/Arrow.ts @@ -15,306 +15,78 @@ // specific language governing permissions and limitations // under the License. -import * as type_ from './type'; -import * as data_ from './data'; -import * as vector_ from './vector'; +export { ArrowType, DateUnit, IntervalUnit, MessageHeader, MetadataVersion, Precision, TimeUnit, Type, UnionMode, VectorType } from './enum'; +export { Data } from './data'; +export { + DataType, + Null, + Bool, + Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64, + Float, Float16, Float32, Float64, + Utf8, + Binary, + FixedSizeBinary, + Date_, DateDay, DateMillisecond, + Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond, + Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond, + Decimal, + List, + Struct, + Union, DenseUnion, SparseUnion, + Dictionary, + Interval, IntervalDayTime, IntervalYearMonth, + FixedSizeList, + Map_, +} from './type'; + +export { Table } from './table'; +export { Column } from './column'; +export { Schema, Field } from './schema'; +export { Visitor } from './visitor'; +export { + Row, + Vector, + BaseVector, + BinaryVector, + BoolVector, + Chunked, + DateVector, DateDayVector, DateMillisecondVector, + DecimalVector, + DictionaryVector, + FixedSizeBinaryVector, + FixedSizeListVector, + FloatVector, Float16Vector, Float32Vector, Float64Vector, + IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector, + IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector, + ListVector, + MapVector, + NullVector, + StructVector, + TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector, + TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector, + UnionVector, DenseUnionVector, SparseUnionVector, + Utf8Vector, +} from './vector/index'; + +export { ByteStream, AsyncByteStream, AsyncByteQueue, ReadableSource, WritableSink } from './io/stream'; +export { RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, AsyncRecordBatchFileReader, AsyncRecordBatchStreamReader } from './ipc/reader'; +export { RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, RecordBatchJSONWriter } from './ipc/writer'; +export { MessageReader, AsyncMessageReader, JSONMessageReader } from './ipc/message'; +export { Message } from './ipc/metadata/message'; +export { RecordBatch } from './recordbatch'; +export { ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions } from './io/interfaces'; +export { DataFrame, FilteredDataFrame, CountByResult, BindFunc, NextFunc } from './compute/dataframe'; + import * as util_int_ from './util/int'; import * as util_bit_ from './util/bit'; -import * as util_node from './util/node'; -import * as visitor_ from './visitor'; -import * as view_ from './vector/view'; -import * as predicate_ from './predicate'; -import { Vector } from './vector'; -import { RecordBatch } from './recordbatch'; -import { Schema, Field, Type } from './type'; -import { Table, DataFrame, NextFunc, BindFunc, CountByResult } from './table'; -import { fromReadableStream } from './ipc/reader/node'; -import { read, readAsync, readStream } from './ipc/reader/arrow'; -import { readBuffersAsync, readRecordBatchesAsync } from './ipc/reader/arrow'; -import { serializeFile, serializeStream } from './ipc/writer/binary'; - -export import View = vector_.View; -export import VectorLike = vector_.VectorLike; -export import TypedArray = type_.TypedArray; -export import IntBitWidth = type_.IntBitWidth; -export import TimeBitWidth = type_.TimeBitWidth; -export import TypedArrayConstructor = type_.TypedArrayConstructor; - -export { fromReadableStream }; -export { read, readAsync, readStream }; -export { readBuffersAsync, readRecordBatchesAsync }; -export { serializeFile, serializeStream }; -export { Table, DataFrame, NextFunc, BindFunc, CountByResult }; -export { Field, Schema, RecordBatch, Vector, Type }; - -export namespace util { - export import Uint64 = util_int_.Uint64; - export import Int64 = util_int_.Int64; - export import Int128 = util_int_.Int128; - export import packBools = util_bit_.packBools; - export import PipeIterator = util_node.PipeIterator; - export import AsyncPipeIterator = util_node.AsyncPipeIterator; -} - -export namespace data { - export import BaseData = data_.BaseData; - export import FlatData = data_.FlatData; - export import BoolData = data_.BoolData; - export import FlatListData = data_.FlatListData; - export import DictionaryData = data_.DictionaryData; - export import NestedData = data_.NestedData; - export import ListData = data_.ListData; - export import UnionData = data_.UnionData; - export import SparseUnionData = data_.SparseUnionData; - export import DenseUnionData = data_.DenseUnionData; - export import ChunkedData = data_.ChunkedData; -} - -export namespace enum_ { - export import Type = type_.ArrowType; - export import DateUnit = type_.DateUnit; - export import TimeUnit = type_.TimeUnit; - export import Precision = type_.Precision; - export import UnionMode = type_.UnionMode; - export import VectorType = type_.VectorType; - export import IntervalUnit = type_.IntervalUnit; - export import MessageHeader = type_.MessageHeader; - export import MetadataVersion = type_.MetadataVersion; -} - -export namespace type { - export import Schema = type_.Schema; - export import Field = type_.Field; - export import Null = type_.Null; - export import Int = type_.Int; - export import Int8 = type_.Int8; - export import Int16 = type_.Int16; - export import Int32 = type_.Int32; - export import Int64 = type_.Int64; - export import Uint8 = type_.Uint8; - export import Uint16 = type_.Uint16; - export import Uint32 = type_.Uint32; - export import Uint64 = type_.Uint64; - export import Float = type_.Float; - export import Float16 = type_.Float16; - export import Float32 = type_.Float32; - export import Float64 = type_.Float64; - export import Binary = type_.Binary; - export import Utf8 = type_.Utf8; - export import Bool = type_.Bool; - export import Decimal = type_.Decimal; - export import Date_ = type_.Date_; - export import Time = type_.Time; - export import Timestamp = type_.Timestamp; - export import Interval = type_.Interval; - export import List = type_.List; - export import Struct = type_.Struct; - export import Union = type_.Union; - export import DenseUnion = type_.DenseUnion; - export import SparseUnion = type_.SparseUnion; - export import FixedSizeBinary = type_.FixedSizeBinary; - export import FixedSizeList = type_.FixedSizeList; - export import Map_ = type_.Map_; - export import Dictionary = type_.Dictionary; -} - -export namespace vector { - export import Vector = vector_.Vector; - export import NullVector = vector_.NullVector; - export import BoolVector = vector_.BoolVector; - export import IntVector = vector_.IntVector; - export import FloatVector = vector_.FloatVector; - export import DateVector = vector_.DateVector; - export import DecimalVector = vector_.DecimalVector; - export import TimeVector = vector_.TimeVector; - export import TimestampVector = vector_.TimestampVector; - export import IntervalVector = vector_.IntervalVector; - export import BinaryVector = vector_.BinaryVector; - export import FixedSizeBinaryVector = vector_.FixedSizeBinaryVector; - export import Utf8Vector = vector_.Utf8Vector; - export import ListVector = vector_.ListVector; - export import FixedSizeListVector = vector_.FixedSizeListVector; - export import MapVector = vector_.MapVector; - export import StructVector = vector_.StructVector; - export import UnionVector = vector_.UnionVector; - export import DictionaryVector = vector_.DictionaryVector; -} - -export namespace visitor { - export import TypeVisitor = visitor_.TypeVisitor; - export import VectorVisitor = visitor_.VectorVisitor; -} - -export namespace view { - export import ChunkedView = view_.ChunkedView; - export import DictionaryView = view_.DictionaryView; - export import ListView = view_.ListView; - export import FixedSizeListView = view_.FixedSizeListView; - export import BinaryView = view_.BinaryView; - export import Utf8View = view_.Utf8View; - export import UnionView = view_.UnionView; - export import DenseUnionView = view_.DenseUnionView; - export import NestedView = view_.NestedView; - export import StructView = view_.StructView; - export import MapView = view_.MapView; - export import FlatView = view_.FlatView; - export import NullView = view_.NullView; - export import BoolView = view_.BoolView; - export import ValidityView = view_.ValidityView; - export import PrimitiveView = view_.PrimitiveView; - export import FixedSizeView = view_.FixedSizeView; - export import Float16View = view_.Float16View; - export import DateDayView = view_.DateDayView; - export import DateMillisecondView = view_.DateMillisecondView; - export import TimestampDayView = view_.TimestampDayView; - export import TimestampSecondView = view_.TimestampSecondView; - export import TimestampMillisecondView = view_.TimestampMillisecondView; - export import TimestampMicrosecondView = view_.TimestampMicrosecondView; - export import TimestampNanosecondView = view_.TimestampNanosecondView; - export import IntervalYearMonthView = view_.IntervalYearMonthView; - export import IntervalYearView = view_.IntervalYearView; - export import IntervalMonthView = view_.IntervalMonthView; -} - -export namespace predicate { - export import col = predicate_.col; - export import lit = predicate_.lit; - export import and = predicate_.and; - export import or = predicate_.or; - export import custom = predicate_.custom; - - export import Or = predicate_.Or; - export import Col = predicate_.Col; - export import And = predicate_.And; - export import Not = predicate_.Not; - export import GTeq = predicate_.GTeq; - export import LTeq = predicate_.LTeq; - export import Value = predicate_.Value; - export import Equals = predicate_.Equals; - export import Literal = predicate_.Literal; - export import Predicate = predicate_.Predicate; - - export import PredicateFunc = predicate_.PredicateFunc; -} - -/* These exports are needed for the closure and uglify umd targets */ -try { - let Arrow: any = eval('exports'); - if (Arrow && typeof Arrow === 'object') { - // string indexers tell closure and uglify not to rename these properties - Arrow['data'] = data; - Arrow['type'] = type; - Arrow['util'] = util; - Arrow['view'] = view; - Arrow['enum_'] = enum_; - Arrow['vector'] = vector; - Arrow['visitor'] = visitor; - Arrow['predicate'] = predicate; - - Arrow['read'] = read; - Arrow['readAsync'] = readAsync; - Arrow['readStream'] = readStream; - Arrow['fromReadableStream'] = fromReadableStream; - Arrow['readBuffersAsync'] = readBuffersAsync; - Arrow['readRecordBatchesAsync'] = readRecordBatchesAsync; - - Arrow['serializeFile'] = serializeFile; - Arrow['serializeStream'] = serializeStream; - - Arrow['Type'] = Type; - Arrow['Field'] = Field; - Arrow['Schema'] = Schema; - Arrow['Vector'] = Vector; - Arrow['RecordBatch'] = RecordBatch; - - Arrow['Table'] = Table; - Arrow['CountByResult'] = CountByResult; - } -} catch (e) { /* not the UMD bundle */ } -/* end umd exports */ - -// closure compiler erases static properties/methods: -// https://github.com/google/closure-compiler/issues/1776 -// set them via string indexers to save them from the mangler -Schema['from'] = Schema.from; -Table['from'] = Table.from; -Table['fromVectors'] = Table.fromVectors; -Table['fromAsync'] = Table.fromAsync; -Table['fromStruct'] = Table.fromStruct; -Table['empty'] = Table.empty; -Vector['create'] = Vector.create; -RecordBatch['from'] = RecordBatch.from; - -util_int_.Uint64['add'] = util_int_.Uint64.add; -util_int_.Uint64['multiply'] = util_int_.Uint64.multiply; -util_int_.Uint64['from'] = util_int_.Uint64.from; -util_int_.Uint64['fromNumber'] = util_int_.Uint64.fromNumber; -util_int_.Uint64['fromString'] = util_int_.Uint64.fromString; -util_int_.Uint64['convertArray'] = util_int_.Uint64.convertArray; - -util_int_.Int64['add'] = util_int_.Int64.add; -util_int_.Int64['multiply'] = util_int_.Int64.multiply; -util_int_.Int64['from'] = util_int_.Int64.from; -util_int_.Int64['fromNumber'] = util_int_.Int64.fromNumber; -util_int_.Int64['fromString'] = util_int_.Int64.fromString; -util_int_.Int64['convertArray'] = util_int_.Int64.convertArray; - -util_int_.Int128['add'] = util_int_.Int128.add; -util_int_.Int128['multiply'] = util_int_.Int128.multiply; -util_int_.Int128['from'] = util_int_.Int128.from; -util_int_.Int128['fromNumber'] = util_int_.Int128.fromNumber; -util_int_.Int128['fromString'] = util_int_.Int128.fromString; -util_int_.Int128['convertArray'] = util_int_.Int128.convertArray; - -data_.ChunkedData['computeOffsets'] = data_.ChunkedData.computeOffsets; - -(type_.Type as any)['NONE'] = type_.Type.NONE; -(type_.Type as any)['Null'] = type_.Type.Null; -(type_.Type as any)['Int'] = type_.Type.Int; -(type_.Type as any)['Float'] = type_.Type.Float; -(type_.Type as any)['Binary'] = type_.Type.Binary; -(type_.Type as any)['Utf8'] = type_.Type.Utf8; -(type_.Type as any)['Bool'] = type_.Type.Bool; -(type_.Type as any)['Decimal'] = type_.Type.Decimal; -(type_.Type as any)['Date'] = type_.Type.Date; -(type_.Type as any)['Time'] = type_.Type.Time; -(type_.Type as any)['Timestamp'] = type_.Type.Timestamp; -(type_.Type as any)['Interval'] = type_.Type.Interval; -(type_.Type as any)['List'] = type_.Type.List; -(type_.Type as any)['Struct'] = type_.Type.Struct; -(type_.Type as any)['Union'] = type_.Type.Union; -(type_.Type as any)['FixedSizeBinary'] = type_.Type.FixedSizeBinary; -(type_.Type as any)['FixedSizeList'] = type_.Type.FixedSizeList; -(type_.Type as any)['Map'] = type_.Type.Map; -(type_.Type as any)['Dictionary'] = type_.Type.Dictionary; -(type_.Type as any)['DenseUnion'] = type_.Type.DenseUnion; -(type_.Type as any)['SparseUnion'] = type_.Type.SparseUnion; - -type_.DataType['isNull'] = type_.DataType.isNull; -type_.DataType['isInt'] = type_.DataType.isInt; -type_.DataType['isFloat'] = type_.DataType.isFloat; -type_.DataType['isBinary'] = type_.DataType.isBinary; -type_.DataType['isUtf8'] = type_.DataType.isUtf8; -type_.DataType['isBool'] = type_.DataType.isBool; -type_.DataType['isDecimal'] = type_.DataType.isDecimal; -type_.DataType['isDate'] = type_.DataType.isDate; -type_.DataType['isTime'] = type_.DataType.isTime; -type_.DataType['isTimestamp'] = type_.DataType.isTimestamp; -type_.DataType['isInterval'] = type_.DataType.isInterval; -type_.DataType['isList'] = type_.DataType.isList; -type_.DataType['isStruct'] = type_.DataType.isStruct; -type_.DataType['isUnion'] = type_.DataType.isUnion; -type_.DataType['isDenseUnion'] = type_.DataType.isDenseUnion; -type_.DataType['isSparseUnion'] = type_.DataType.isSparseUnion; -type_.DataType['isFixedSizeBinary'] = type_.DataType.isFixedSizeBinary; -type_.DataType['isFixedSizeList'] = type_.DataType.isFixedSizeList; -type_.DataType['isMap'] = type_.DataType.isMap; -type_.DataType['isDictionary'] = type_.DataType.isDictionary; - -vector_.BoolVector['from'] = vector_.BoolVector.from; -vector_.DateVector['from'] = vector_.DateVector.from; -vector_.IntVector['from'] = vector_.IntVector.from; -vector_.FloatVector['from'] = vector_.FloatVector.from; - -visitor_.TypeVisitor['visitTypeInline'] = visitor_.TypeVisitor.visitTypeInline; -visitor_.VectorVisitor['visitTypeInline'] = visitor_.VectorVisitor.visitTypeInline; \ No newline at end of file +import * as util_buffer_ from './util/buffer'; +import * as util_vector_ from './util/vector'; +import * as predicate from './compute/predicate'; + +export { predicate }; +export const util = { + ...util_int_, + ...util_bit_, + ...util_buffer_, + ...util_vector_ +}; diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts index 510f00740fed0..4ae9c0089a009 100644 --- a/js/src/bin/arrow2csv.ts +++ b/js/src/bin/arrow2csv.ts @@ -20,60 +20,189 @@ /* tslint:disable */ import * as fs from 'fs'; -import { promisify } from 'util'; -import { Table, readStream } from '../Arrow'; +import * as stream from 'stream'; +import { valueToString } from '../util/pretty'; +import { RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node'; -const readFile = promisify(fs.readFile); -const { parse } = require('json-bignum'); +const padLeft = require('pad-left'); +const bignumJSONParse = require('json-bignum').parse; +const pipeline = require('util').promisify(stream.pipeline); const argv = require(`command-line-args`)(cliOpts(), { partial: true }); -const files = [...(argv.file || []), ...(argv._unknown || [])].filter(Boolean); +const files = argv.help ? [] : [...(argv.file || []), ...(argv._unknown || [])].filter(Boolean); + +const state = { ...argv, closed: false, hasRecords: false }; (async () => { - let hasRecords = false; - if (files.length > 0) { - hasRecords = true; - for (let input of files) { - printTable(await readFile(input)); - } - } else { - let rowOffset = 0; - let maxColumnWidths: number[] = []; - for await (const recordBatch of readStream(process.stdin)) { - hasRecords = true; - recordBatch.rowsToString(' | ', rowOffset, maxColumnWidths).pipe(process.stdout); - rowOffset += recordBatch.length; + + const sources = argv.help ? [] : [ + ...files.map((file) => () => fs.createReadStream(file)), + ...(process.stdin.isTTY ? [] : [() => process.stdin]) + ].filter(Boolean) as (() => NodeJS.ReadableStream)[]; + + let reader: RecordBatchReader | null; + + for (const source of sources) { + if (state.closed) { break; } + if (reader = await createRecordBatchReader(source)) { + await pipeline( + reader.toNodeStream(), + recordBatchRowsToString(state), + process.stdout + ).catch(() => state.closed = true); } + if (state.closed) { break; } } - return hasRecords ? null : print_usage(); -})().catch((e) => { console.error(e); process.exit(1); }); -function printTable(input: any) { - let table: Table; + return state.hasRecords ? 0 : print_usage(); +})() +.then((x) => +x || 0, (err) => { + if (err) { + console.error(`${err && err.stack || err}`); + } + return process.exitCode || 1; +}).then((code) => process.exit(code)); + +async function createRecordBatchReader(createSourceStream: () => NodeJS.ReadableStream) { + + let json = new AsyncByteQueue(); + let stream = new AsyncByteQueue(); + let source = createSourceStream(); + let reader: RecordBatchReader | null = null; + // tee the input source, just in case it's JSON + source.on('end', () => [stream, json].forEach((y) => y.close())) + .on('data', (x) => [stream, json].forEach((y) => y.write(x))) + .on('error', (e) => [stream, json].forEach((y) => y.abort(e))); + try { - table = Table.from(input); - } catch (e) { - table = Table.from(parse(input + '')); + reader = await (await RecordBatchReader.from(stream)).open(); + } catch (e) { reader = null; } + + if (!reader || reader.closed) { + reader = null; + await json.closed; + if (source instanceof fs.ReadStream) { source.close(); } + // If the data in the `json` ByteQueue parses to JSON, then assume it's Arrow JSON from a file or stdin + try { + reader = await (await RecordBatchReader.from(bignumJSONParse(await json.toString()))).open(); + } catch (e) { reader = null; } + } + + return (reader && !reader.closed) ? reader : null; +} + +function recordBatchRowsToString(state: { closed: boolean, schema: any, separator: string, hasRecords: boolean }) { + + let rowId = 0, maxColWidths = [15], separator = `${state.separator || ' |'} `; + + return new stream.Transform({ transform, encoding: 'utf8', writableObjectMode: true, readableObjectMode: false }); + + function transform(this: stream.Transform, batch: RecordBatch, _enc: string, cb: (error?: Error, data?: any) => void) { + batch = !(state.schema && state.schema.length) ? batch : batch.select(...state.schema); + if (batch.length <= 0 || batch.numCols <= 0 || state.closed) { + state.hasRecords || (state.hasRecords = false); + return cb(undefined, null); + } + + state.hasRecords = true; + const header = ['row_id', ...batch.schema.fields.map((f) => `${f}`)].map(valueToString); + + // Pass one to convert to strings and count max column widths + const newMaxWidths = measureColumnWidths(rowId, batch, header.map((x, i) => Math.max(maxColWidths[i] || 0, x.length))); + + // If any of the column widths changed, print the header again + if ((rowId % 350) && JSON.stringify(newMaxWidths) !== JSON.stringify(maxColWidths)) { + this.push(`\n${formatRow(header, newMaxWidths, separator)}`); + } + + maxColWidths = newMaxWidths; + + for (const row of batch) { + if (state.closed) { break; } + else if (!row) { continue; } + if (!(rowId % 350)) { this.push(`\n${formatRow(header, maxColWidths, separator)}`); } + this.push(formatRow([rowId++, ...row].map(valueToString), maxColWidths, separator)); + } + cb(); } - if (argv.schema && argv.schema.length) { - table = table.select(...argv.schema); +} + +function formatRow(row: string[] = [], maxColWidths: number[] = [], separator: string = ' |') { + return row.map((x, j) => padLeft(x, maxColWidths[j])).join(separator) + '\n'; +} + +function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: number[] = []) { + for (const row of batch) { + if (!row) { continue; } + maxColWidths[0] = Math.max(maxColWidths[0] || 0, (`${rowId++}`).length); + for (let val: any, j = -1, k = row.length; ++j < k;) { + if (ArrayBuffer.isView(val = row[j]) && (typeof val[Symbol.toPrimitive] !== 'function')) { + // If we're printing a column of TypedArrays, ensure the column is wide enough to accommodate + // the widest possible element for a given byte size, since JS omits leading zeroes. For example: + // 1 | [1137743649,2170567488,244696391,2122556476] + // 2 | null + // 3 | [637174007,2142281880,961736230,2912449282] + // 4 | [1035112265,21832886,412842672,2207710517] + // 5 | null + // 6 | null + // 7 | [2755142991,4192423256,2994359,467878370] + const elementWidth = typedArrayElementWidths.get(val.constructor)!; + + maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, + 2 + // brackets on each end + (val.length - 1) + // commas between elements + (val.length * elementWidth) // width of stringified 2^N-1 + ); + } else { + maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, valueToString(val).length); + } + } } - table.rowsToString().pipe(process.stdout); + return maxColWidths; } +// Measure the stringified representation of 2^N-1 for each TypedArray variant +const typedArrayElementWidths = (() => { + const maxElementWidth = (ArrayType: any) => { + const octets = Array.from({ length: ArrayType.BYTES_PER_ELEMENT - 1 }, _ => 255); + return `${new ArrayType(new Uint8Array([...octets, 254]).buffer)[0]}`.length; + }; + return new Map([ + [Int8Array, maxElementWidth(Int8Array)], + [Int16Array, maxElementWidth(Int16Array)], + [Int32Array, maxElementWidth(Int32Array)], + [Uint8Array, maxElementWidth(Uint8Array)], + [Uint16Array, maxElementWidth(Uint16Array)], + [Uint32Array, maxElementWidth(Uint32Array)], + [Float32Array, maxElementWidth(Float32Array)], + [Float64Array, maxElementWidth(Float64Array)], + [Uint8ClampedArray, maxElementWidth(Uint8ClampedArray)] + ]) +})(); + function cliOpts() { return [ { type: String, name: 'schema', alias: 's', optional: true, multiple: true, - typeLabel: '[underline]{columns}', + typeLabel: '{underline columns}', description: 'A space-delimited list of column names' }, { type: String, name: 'file', alias: 'f', - optional: false, multiple: true, + optional: true, multiple: true, description: 'The Arrow file to read' + }, + { + type: String, + name: 'sep', optional: true, default: '|', + description: 'The column separator character' + }, + { + type: Boolean, + name: 'help', optional: true, default: false, + description: 'Print this usage guide.' } ]; } @@ -87,34 +216,29 @@ function print_usage() { { header: 'Synopsis', content: [ - '$ arrow2csv [underline]{file.arrow} [[bold]{--schema} column_name ...]', - '$ arrow2csv [[bold]{--schema} column_name ...] [[bold]{--file} [underline]{file.arrow}]', - '$ arrow2csv [bold]{-s} column_1 [bold]{-s} column_2 [[bold]{-f} [underline]{file.arrow}]', - '$ arrow2csv [[bold]{--help}]' + '$ arrow2csv {underline file.arrow} [{bold --schema} column_name ...]', + '$ arrow2csv [{bold --schema} column_name ...] [{bold --file} {underline file.arrow}]', + '$ arrow2csv {bold -s} column_1 {bold -s} column_2 [{bold -f} {underline file.arrow}]', + '$ arrow2csv [{bold --help}]' ] }, { header: 'Options', - optionList: [ - ...cliOpts(), - { - name: 'help', - description: 'Print this usage guide.' - } - ] + optionList: cliOpts() }, { header: 'Example', content: [ - '$ arrow2csv --schema foo baz -f simple.arrow', - '> foo, baz', - '> 1, aa', - '> null, null', - '> 3, null', - '> 4, bbb', - '> 5, cccc', + '$ arrow2csv --schema foo baz -f simple.arrow --sep ","', + ' ', + '> "row_id", "foo: Int32", "bar: Float64", "baz: Utf8"', + '> 0, 1, 1, "aa"', + '> 1, null, null, null', + '> 2, 3, null, null', + '> 3, 4, 4, "bbb"', + '> 4, 5, 5, "cccc"', ] } ])); - process.exit(1); -} \ No newline at end of file + return 1; +} diff --git a/js/src/column.ts b/js/src/column.ts new file mode 100644 index 0000000000000..0a5bc36797bf9 --- /dev/null +++ b/js/src/column.ts @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Field } from './schema'; +import { Vector } from './vector'; +import { DataType } from './type'; +import { Clonable, Sliceable, Applicative } from './vector'; +import { Chunked, SearchContinuation } from './vector/chunked'; + +export interface Column { + typeId: T['TType']; + concat(...others: Vector[]): Column; + slice(begin?: number, end?: number): Column; + clone(chunks?: Vector[], offsets?: Uint32Array): Column; +} + +export class Column + extends Chunked + implements Clonable>, + Sliceable>, + Applicative> { + + constructor(field: Field, vectors: Vector[] = [], offsets?: Uint32Array) { + vectors = Chunked.flatten(...vectors); + super(field.type, vectors, offsets); + this._field = field; + if (vectors.length === 1 && !(this instanceof SingleChunkColumn)) { + return new SingleChunkColumn(field, vectors[0], this._chunkOffsets); + } + } + + protected _field: Field; + protected _children?: Column[]; + + public get field() { return this._field; } + public get name() { return this._field.name; } + + public clone(chunks = this._chunks) { + return new Column(this._field, chunks); + } + + public getChildAt(index: number): Column | null { + + if (index < 0 || index >= this.numChildren) { return null; } + + let columns = this._children || (this._children = []); + let column: Column, field: Field, chunks: Vector[]; + + if (column = columns[index]) { return column; } + if (field = ((this.type.children || [])[index] as Field)) { + chunks = this._chunks + .map((vector) => vector.getChildAt(index)) + .filter((vec): vec is Vector => vec != null); + if (chunks.length > 0) { + return (columns[index] = new Column(field, chunks)); + } + } + + return null; + } +} + +class SingleChunkColumn extends Column { + protected _chunk: Vector; + constructor(field: Field, vector: Vector, offsets?: Uint32Array) { + super(field, [vector], offsets); + this._chunk = vector; + } + public search(index: number): [number, number] | null; + public search>>(index: number, then?: N): ReturnType; + public search>>(index: number, then?: N) { + return then ? then(this, 0, index) : [0, index]; + } + public isValid(index: number): boolean { + return this._chunk.isValid(index); + } + public get(index: number): T['TValue'] | null { + return this._chunk.get(index); + } + public set(index: number, value: T['TValue'] | null): void { + this._chunk.set(index, value); + } + public indexOf(element: T['TValue'], offset?: number): number { + return this._chunk.indexOf(element, offset); + } +} diff --git a/js/src/compute/dataframe.ts b/js/src/compute/dataframe.ts new file mode 100644 index 0000000000000..01026d882f0c0 --- /dev/null +++ b/js/src/compute/dataframe.ts @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Table } from '../table'; +import { Vector } from '../vector'; +import { IntVector } from '../vector/int'; +import { Field, Schema } from '../schema'; +import { Vector as V } from '../interfaces'; +import { Predicate, Col } from './predicate'; +import { RecordBatch } from '../recordbatch'; +import { DataType, Int, Struct, Dictionary } from '../type'; + +/** @ignore */ +export type BindFunc = (batch: RecordBatch) => void; +/** @ignore */ +export type NextFunc = (idx: number, batch: RecordBatch) => void; + +Table.prototype.countBy = function(this: Table, name: Col | string) { return new DataFrame(this.chunks).countBy(name); }; +Table.prototype.scan = function(this: Table, next: NextFunc, bind?: BindFunc) { return new DataFrame(this.chunks).scan(next, bind); }; +Table.prototype.filter = function(this: Table, predicate: Predicate): FilteredDataFrame { return new DataFrame(this.chunks).filter(predicate); }; + +export class DataFrame extends Table { + public filter(predicate: Predicate): FilteredDataFrame { + return new FilteredDataFrame(this.chunks, predicate); + } + public scan(next: NextFunc, bind?: BindFunc) { + const batches = this.chunks, numBatches = batches.length; + for (let batchIndex = -1; ++batchIndex < numBatches;) { + // load batches + const batch = batches[batchIndex]; + if (bind) { bind(batch); } + // yield all indices + for (let index = -1, numRows = batch.length; ++index < numRows;) { + next(index, batch); + } + } + } + public countBy(name: Col | string) { + const batches = this.chunks, numBatches = batches.length; + const count_by = typeof name === 'string' ? new Col(name) : name as Col; + // Assume that all dictionary batches are deltas, which means that the + // last record batch has the most complete dictionary + count_by.bind(batches[numBatches - 1]); + const vector = count_by.vector as V; + if (!DataType.isDictionary(vector.type)) { + throw new Error('countBy currently only supports dictionary-encoded columns'); + } + + const countByteLength = Math.ceil(Math.log(vector.dictionary.length) / Math.log(256)); + const CountsArrayType = countByteLength == 4 ? Uint32Array : + countByteLength >= 2 ? Uint16Array : Uint8Array; + + const counts = new CountsArrayType(vector.dictionary.length); + for (let batchIndex = -1; ++batchIndex < numBatches;) { + // load batches + const batch = batches[batchIndex]; + // rebind the countBy Col + count_by.bind(batch); + const keys = (count_by.vector as V).indices; + // yield all indices + for (let index = -1, numRows = batch.length; ++index < numRows;) { + let key = keys.get(index); + if (key !== null) { counts[key]++; } + } + } + return new CountByResult(vector.dictionary, IntVector.from(counts)); + } +} + +export class CountByResult extends Table<{ values: T, counts: TCount }> { + constructor(values: Vector, counts: V) { + const schema = new Schema<{ values: T, counts: TCount }>([ + new Field('values', values.type), + new Field('counts', counts.type) + ]); + super(new RecordBatch(schema, counts.length, [values, counts])); + } + public toJSON(): Object { + const values = this.getColumnAt(0)!; + const counts = this.getColumnAt(1)!; + const result = {} as { [k: string]: number | null }; + for (let i = -1; ++i < this.length;) { + result[values.get(i)] = counts.get(i); + } + return result; + } +} + +export class FilteredDataFrame extends DataFrame { + private _predicate: Predicate; + constructor (batches: RecordBatch[], predicate: Predicate) { + super(batches); + this._predicate = predicate; + } + public scan(next: NextFunc, bind?: BindFunc) { + // inlined version of this: + // this.parent.scan((idx, columns) => { + // if (this.predicate(idx, columns)) next(idx, columns); + // }); + const batches = this._chunks; + const numBatches = batches.length; + for (let batchIndex = -1; ++batchIndex < numBatches;) { + // load batches + const batch = batches[batchIndex]; + // TODO: bind batches lazily + // If predicate doesn't match anything in the batch we don't need + // to bind the callback + if (bind) { bind(batch); } + const predicate = this._predicate.bind(batch); + // yield all indices + for (let index = -1, numRows = batch.length; ++index < numRows;) { + if (predicate(index, batch)) { next(index, batch); } + } + } + } + public count(): number { + // inlined version of this: + // let sum = 0; + // this.parent.scan((idx, columns) => { + // if (this.predicate(idx, columns)) ++sum; + // }); + // return sum; + let sum = 0; + const batches = this._chunks; + const numBatches = batches.length; + for (let batchIndex = -1; ++batchIndex < numBatches;) { + // load batches + const batch = batches[batchIndex]; + const predicate = this._predicate.bind(batch); + // yield all indices + for (let index = -1, numRows = batch.length; ++index < numRows;) { + if (predicate(index, batch)) { ++sum; } + } + } + return sum; + } + public *[Symbol.iterator](): IterableIterator['TValue']> { + // inlined version of this: + // this.parent.scan((idx, columns) => { + // if (this.predicate(idx, columns)) next(idx, columns); + // }); + const batches = this._chunks; + const numBatches = batches.length; + for (let batchIndex = -1; ++batchIndex < numBatches;) { + // load batches + const batch = batches[batchIndex]; + // TODO: bind batches lazily + // If predicate doesn't match anything in the batch we don't need + // to bind the callback + const predicate = this._predicate.bind(batch); + // yield all indices + for (let index = -1, numRows = batch.length; ++index < numRows;) { + if (predicate(index, batch)) { yield batch.get(index) as any; } + } + } + } + public filter(predicate: Predicate): FilteredDataFrame { + return new FilteredDataFrame( + this._chunks, + this._predicate.and(predicate) + ); + } + public countBy(name: Col | string) { + const batches = this._chunks, numBatches = batches.length; + const count_by = typeof name === 'string' ? new Col(name) : name as Col; + // Assume that all dictionary batches are deltas, which means that the + // last record batch has the most complete dictionary + count_by.bind(batches[numBatches - 1]); + const vector = count_by.vector as V; + if (!DataType.isDictionary(vector.type)) { + throw new Error('countBy currently only supports dictionary-encoded columns'); + } + + const countByteLength = Math.ceil(Math.log(vector.dictionary.length) / Math.log(256)); + const CountsArrayType = countByteLength == 4 ? Uint32Array : + countByteLength >= 2 ? Uint16Array : Uint8Array; + + const counts = new CountsArrayType(vector.dictionary.length); + + for (let batchIndex = -1; ++batchIndex < numBatches;) { + // load batches + const batch = batches[batchIndex]; + const predicate = this._predicate.bind(batch); + // rebind the countBy Col + count_by.bind(batch); + const keys = (count_by.vector as V).indices; + // yield all indices + for (let index = -1, numRows = batch.length; ++index < numRows;) { + let key = keys.get(index); + if (key !== null && predicate(index, batch)) { counts[key]++; } + } + } + return new CountByResult(vector.dictionary, IntVector.from(counts)); + } +} diff --git a/js/src/predicate.ts b/js/src/compute/predicate.ts similarity index 94% rename from js/src/predicate.ts rename to js/src/compute/predicate.ts index cfae73ae0af73..ec947d2670c81 100644 --- a/js/src/predicate.ts +++ b/js/src/compute/predicate.ts @@ -15,12 +15,16 @@ // specific language governing permissions and limitations // under the License. -import { RecordBatch } from './recordbatch'; -import { Vector, DictionaryVector } from './vector'; +import { Vector } from '../vector'; +import { RecordBatch } from '../recordbatch'; +import { DictionaryVector } from '../vector/dictionary'; +/** @ignore */ export type ValueFunc = (idx: number, cols: RecordBatch) => T | null; +/** @ignore */ export type PredicateFunc = (idx: number, cols: RecordBatch) => boolean; +/** @ignore */ export abstract class Value { eq(other: Value | T): Predicate { if (!(other instanceof Value)) { other = new Literal(other); } @@ -45,10 +49,12 @@ export abstract class Value { } } +/** @ignore */ export class Literal extends Value { constructor(public v: T) { super(); } } +/** @ignore */ export class Col extends Value { // @ts-ignore public vector: Vector; @@ -56,7 +62,7 @@ export class Col extends Value { public colidx: number; constructor(public name: string) { super(); } - bind(batch: RecordBatch) { + bind(batch: RecordBatch): (idx: number, batch?: RecordBatch) => any { if (!this.colidx) { // Assume column index doesn't change between calls to bind //this.colidx = cols.findIndex(v => v.name.indexOf(this.name) != -1); @@ -70,11 +76,13 @@ export class Col extends Value { } if (this.colidx < 0) { throw new Error(`Failed to bind Col "${this.name}"`); } } - this.vector = batch.getChildAt(this.colidx)!; - return this.vector.get.bind(this.vector); + + const vec = this.vector = batch.getChildAt(this.colidx)!; + return (idx: number) => vec.get(idx); } } +/** @ignore */ export abstract class Predicate { abstract bind(batch: RecordBatch): PredicateFunc; and(...expr: Predicate[]): And { return new And(this, ...expr); } @@ -82,6 +90,7 @@ export abstract class Predicate { not(): Predicate { return new Not(this); } } +/** @ignore */ export abstract class ComparisonPredicate extends Predicate { constructor(public readonly left: Value, public readonly right: Value) { super(); @@ -110,8 +119,9 @@ export abstract class ComparisonPredicate extends Predicate { protected abstract _bindLitCol(batch: RecordBatch, lit: Literal, col: Col): PredicateFunc; } +/** @ignore */ export abstract class CombinationPredicate extends Predicate { - readonly children: Predicate[] + readonly children: Predicate[]; constructor(...children: Predicate[]) { super(); this.children = children; @@ -120,12 +130,13 @@ export abstract class CombinationPredicate extends Predicate { // add children to protoype so it doesn't get mangled in es2015/umd ( CombinationPredicate.prototype).children = Object.freeze([]); // freeze for safety +/** @ignore */ export class And extends CombinationPredicate { constructor(...children: Predicate[]) { // Flatten any Ands children = children.reduce((accum: Predicate[], p: Predicate): Predicate[] => { - return accum.concat(p instanceof And ? p.children : p) - }, []) + return accum.concat(p instanceof And ? p.children : p); + }, []); super(...children); } bind(batch: RecordBatch) { @@ -134,12 +145,13 @@ export class And extends CombinationPredicate { } } +/** @ignore */ export class Or extends CombinationPredicate { constructor(...children: Predicate[]) { // Flatten any Ors children = children.reduce((accum: Predicate[], p: Predicate): Predicate[] => { - return accum.concat(p instanceof Or ? p.children : p) - }, []) + return accum.concat(p instanceof Or ? p.children : p); + }, []); super(...children); } bind(batch: RecordBatch) { @@ -148,6 +160,7 @@ export class Or extends CombinationPredicate { } } +/** @ignore */ export class Equals extends ComparisonPredicate { // Helpers used to cache dictionary reverse lookups between calls to bind private lastDictionary: Vector|undefined; @@ -200,6 +213,7 @@ export class Equals extends ComparisonPredicate { } } +/** @ignore */ export class LTeq extends ComparisonPredicate { protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): PredicateFunc { const rtrn: boolean = left.v <= right.v; @@ -223,6 +237,7 @@ export class LTeq extends ComparisonPredicate { } } +/** @ignore */ export class GTeq extends ComparisonPredicate { protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): PredicateFunc { const rtrn: boolean = left.v >= right.v; @@ -246,6 +261,7 @@ export class GTeq extends ComparisonPredicate { } } +/** @ignore */ export class Not extends Predicate { constructor(public readonly child: Predicate) { super(); @@ -257,6 +273,7 @@ export class Not extends Predicate { } } +/** @ignore */ export class CustomPredicate extends Predicate { constructor(private next: PredicateFunc, private bind_: (batch: RecordBatch) => void) { super(); diff --git a/js/src/data.ts b/js/src/data.ts index 5a117594bc89e..b55321bf98ec2 100644 --- a/js/src/data.ts +++ b/js/src/data.ts @@ -15,317 +15,231 @@ // specific language governing permissions and limitations // under the License. +import { Vector } from './vector'; import { popcnt_bit_range } from './util/bit'; -import { VectorLike, Vector } from './vector'; -import { Int, Bool, FlatListType, List, Struct, Map_ } from './type'; -import { VectorType, TypedArray, TypedArrayConstructor, Dictionary } from './type'; -import { DataType, FlatType, ListType, NestedType, SingleNestedType, DenseUnion, SparseUnion } from './type'; +import { toArrayBufferView } from './util/buffer'; +import { DataType, SparseUnion, DenseUnion } from './type'; +import { VectorType as BufferType, UnionMode, Type } from './enum'; +import { + Dictionary, + Null, Int, Float, + Binary, Bool, Utf8, Decimal, + Date_, Time, Timestamp, Interval, + List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, +} from './type'; -export function toTypedArray(ArrayType: TypedArrayConstructor, values?: T | ArrayLike | Iterable | null): T { - if (!ArrayType && ArrayBuffer.isView(values)) { return values; } - return values instanceof ArrayType ? values - : !values || !ArrayBuffer.isView(values) ? ArrayType.from(values || []) - : new ArrayType(values.buffer, values.byteOffset, values.byteLength / ArrayType.BYTES_PER_ELEMENT); -} - -export type Data = DataTypes[T['TType']] & BaseData; -export interface DataTypes { -/* [Type.NONE]*/ 0: BaseData; -/* [Type.Null]*/ 1: FlatData; -/* [Type.Int]*/ 2: FlatData; -/* [Type.Float]*/ 3: FlatData; -/* [Type.Binary]*/ 4: FlatListData; -/* [Type.Utf8]*/ 5: FlatListData; -/* [Type.Bool]*/ 6: BoolData; -/* [Type.Decimal]*/ 7: FlatData; -/* [Type.Date]*/ 8: FlatData; -/* [Type.Time]*/ 9: FlatData; -/* [Type.Timestamp]*/ 10: FlatData; -/* [Type.Interval]*/ 11: FlatData; -/* [Type.List]*/ 12: ListData>; -/* [Type.Struct]*/ 13: NestedData; -/* [Type.Union]*/ 14: UnionData; -/* [Type.FixedSizeBinary]*/ 15: FlatData; -/* [Type.FixedSizeList]*/ 16: SingleNestedData; -/* [Type.Map]*/ 17: NestedData; -/* [Type.DenseUnion]*/ DenseUnion: DenseUnionData; -/*[Type.SparseUnion]*/ SparseUnion: SparseUnionData; -/*[ Type.Dictionary]*/ Dictionary: DictionaryData; -} // When slicing, we do not know the null count of the sliced range without // doing some computation. To avoid doing this eagerly, we set the null count -// to -1 (any negative number will do). When Array::null_count is called the +// to -1 (any negative number will do). When Vector.nullCount is called the // first time, the null count will be computed. See ARROW-33 -export type kUnknownNullCount = -1; -export const kUnknownNullCount = -1; +/** @ignore */ export type kUnknownNullCount = -1; +/** @ignore */ export const kUnknownNullCount = -1; -export class BaseData implements VectorLike { - public type: T; - public length: number; - public offset: number; - // @ts-ignore - public childData: Data[]; - protected _nullCount: number | kUnknownNullCount; - protected /* [VectorType.OFFSET]:*/ 0?: Int32Array; - protected /* [VectorType.DATA]:*/ 1?: T['TArray']; - protected /*[VectorType.VALIDITY]:*/ 2?: Uint8Array; - protected /* [VectorType.TYPE]:*/ 3?: Int8Array; - constructor(type: T, length: number, offset?: number, nullCount?: number) { - this.type = type; - this.length = Math.floor(Math.max(length || 0, 0)); - this.offset = Math.floor(Math.max(offset || 0, 0)); - this._nullCount = Math.floor(Math.max(nullCount || 0, -1)); - } - public get typeId() { return this.type.TType; } - public get nullBitmap() { return this[VectorType.VALIDITY]; } - public get nullCount() { - let nullCount = this._nullCount; - let nullBitmap: Uint8Array | undefined; - if (nullCount === -1 && (nullBitmap = this[VectorType.VALIDITY])) { - this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length); - } - return nullCount; - } - public clone(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data { - return new BaseData(type, length, offset, nullCount) as any; - } - public slice(offset: number, length: number) { - return length <= 0 ? this : this.sliceInternal(this.clone( - this.type, length, this.offset + offset, +(this._nullCount === 0) - 1 - ) as any, offset, length); - } - protected sliceInternal(clone: this, offset: number, length: number) { - let arr: any; - // If typeIds exist, slice the typeIds buffer - (arr = this[VectorType.TYPE]) && (clone[VectorType.TYPE] = this.sliceData(arr, offset, length)); - // If offsets exist, only slice the offsets buffer - (arr = this[VectorType.OFFSET]) && (clone[VectorType.OFFSET] = this.sliceOffsets(arr, offset, length)) || - // Otherwise if no offsets, slice the data buffer - (arr = this[VectorType.DATA]) && (clone[VectorType.DATA] = this.sliceData(arr, offset, length)); - return clone; - } - protected sliceData(data: T['TArray'] & TypedArray, offset: number, length: number) { - return data.subarray(offset, offset + length); - } - protected sliceOffsets(valueOffsets: Int32Array, offset: number, length: number) { - return valueOffsets.subarray(offset, offset + length + 1); - } -} +/** @ignore */ export type NullBuffer = Uint8Array | null | undefined; +/** @ignore */ export type TypeIdsBuffer = Int8Array | ArrayLike | Iterable; +/** @ignore */ export type ValueOffsetsBuffer = Int32Array | ArrayLike | Iterable; +/** @ignore */ export type DataBuffer = T['TArray'] | ArrayLike | Iterable; -export class FlatData extends BaseData { - public /* [VectorType.DATA]:*/ 1: T['TArray']; - public /*[VectorType.VALIDITY]:*/ 2: Uint8Array; - public get values() { return this[VectorType.DATA]; } - constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, data: Iterable, offset?: number, nullCount?: number) { - super(type, length, offset, nullCount); - this[VectorType.DATA] = toTypedArray(this.ArrayType, data); - this[VectorType.VALIDITY] = toTypedArray(Uint8Array, nullBitmap); - } - public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; } - public clone(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) { - return new (this.constructor as any)(type, length, this[VectorType.VALIDITY], this[VectorType.DATA], offset, nullCount) as FlatData; - } +/** @ignore */ +export interface Buffers { + [BufferType.OFFSET]: Int32Array; + [BufferType.DATA]: T['TArray']; + [BufferType.VALIDITY]: Uint8Array; + [BufferType.TYPE]: T['TArray']; } -export class BoolData extends FlatData { - protected sliceData(data: Uint8Array) { return data; } +/** @ignore */ +export interface Data { + readonly TType: T['TType']; + readonly TArray: T['TArray']; + readonly TValue: T['TValue']; } -export class FlatListData extends FlatData { - public /* [VectorType.OFFSET]:*/ 0: Int32Array; - public /* [VectorType.DATA]:*/ 1: T['TArray']; - public /*[VectorType.VALIDITY]:*/ 2: Uint8Array; - public get values() { return this[VectorType.DATA]; } - public get valueOffsets() { return this[VectorType.OFFSET]; } - constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, valueOffsets: Iterable, data: T['TArray'], offset?: number, nullCount?: number) { - super(type, length, nullBitmap, data, offset, nullCount); - this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets); - } - public clone(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) { - return new FlatListData(type, length, this[VectorType.VALIDITY], this[VectorType.OFFSET], this[VectorType.DATA], offset, nullCount) as FlatListData; - } -} +/** @ignore */ +export class Data { -export class DictionaryData extends BaseData> { - protected _dictionary: Vector; - protected _indices: Data>; - public get indices() { return this._indices; } - public get dictionary() { return this._dictionary; } - constructor(type: Dictionary, dictionary: Vector, indices: Data>) { - super(type, indices.length, indices.offset, (indices as any)._nullCount); - this._indices = indices; - this._dictionary = dictionary; - } - public get nullCount() { return this._indices.nullCount; } - public get nullBitmap() { return this._indices.nullBitmap; } - public clone>(type: R, length = this.length, offset = this.offset) { - const data = this._dictionary.data.clone(type.dictionary as any); - return new DictionaryData( - this.type as any, - this._dictionary.clone(data) as any, - this._indices.slice(offset - this.offset, length) - ) as any; - } - protected sliceInternal(clone: this, _offset: number, _length: number) { - clone.length = clone._indices.length; - clone._nullCount = (clone._indices as any)._nullCount; - return clone; - } -} + public readonly type: T; + public readonly length: number; + public readonly offset: number; + public readonly stride: number; + public readonly childData: Data[]; + public readonly values: Buffers[BufferType.DATA]; + public readonly typeIds: Buffers[BufferType.TYPE]; + // @ts-ignore + public readonly nullBitmap: Buffers[BufferType.VALIDITY]; + // @ts-ignore + public readonly valueOffsets: Buffers[BufferType.OFFSET]; -export class NestedData extends BaseData { - public /*[VectorType.VALIDITY]:*/ 2: Uint8Array; - constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, childData: Data[], offset?: number, nullCount?: number) { - super(type, length, offset, nullCount); - this.childData = childData; - this[VectorType.VALIDITY] = toTypedArray(Uint8Array, nullBitmap); - } - public clone(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data { - return new NestedData(type, length, this[VectorType.VALIDITY], this.childData, offset, nullCount) as any; + public get ArrayType() { return this.type.ArrayType; } + public get typeId(): T['TType'] { return this.type.typeId; } + public get buffers() { + return [this.valueOffsets, this.values, this.nullBitmap, this.typeIds] as Buffers; } - protected sliceInternal(clone: this, offset: number, length: number) { - if (!this[VectorType.OFFSET]) { - clone.childData = this.childData.map((child) => child.slice(offset, length)); + + protected _nullCount: number | kUnknownNullCount; + + public get nullCount() { + let nullCount = this._nullCount; + let nullBitmap: Uint8Array | undefined; + if (nullCount <= kUnknownNullCount && (nullBitmap = this.nullBitmap)) { + this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length); } - return super.sliceInternal(clone, offset, length); + return nullCount; } -} -export class SingleNestedData extends NestedData { - protected _valuesData: Data; - public get values() { return this._valuesData; } - constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, valueChildData: Data, offset?: number, nullCount?: number) { - super(type, length, nullBitmap, [valueChildData], offset, nullCount); - this._valuesData = valueChildData; + constructor(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial> | Data, childData?: (Data | Vector)[]) { + this.type = type; + this.offset = Math.floor(Math.max(offset || 0, 0)); + this.length = Math.floor(Math.max(length || 0, 0)); + this._nullCount = Math.floor(Math.max(nullCount || 0, -1)); + this.childData = (childData || []).map((x) => x instanceof Data ? x : x.data) as Data[]; + let buffer: Buffers[keyof Buffers]; + if (buffers instanceof Data) { + this.stride = buffers.stride; + this.values = buffers.values; + this.typeIds = buffers.typeIds; + this.nullBitmap = buffers.nullBitmap; + this.valueOffsets = buffers.valueOffsets; + } else { + if (buffers) { + (buffer = (buffers as Buffers)[0]) && (this.valueOffsets = buffer); + (buffer = (buffers as Buffers)[1]) && (this.values = buffer); + (buffer = (buffers as Buffers)[2]) && (this.nullBitmap = buffer); + (buffer = (buffers as Buffers)[3]) && (this.typeIds = buffer); + } + const t: any = type; + switch (type.typeId) { + case Type.Decimal: this.stride = 4; break; + case Type.Timestamp: this.stride = 2; break; + case Type.Date: this.stride = 1 + (t as Date_).unit; break; + case Type.Interval: this.stride = 1 + (t as Interval).unit; break; + case Type.Int: this.stride = 1 + +((t as Int).bitWidth > 32); break; + case Type.Time: this.stride = 1 + +((t as Time).bitWidth > 32); break; + case Type.FixedSizeList: this.stride = (t as FixedSizeList).listSize; break; + case Type.FixedSizeBinary: this.stride = (t as FixedSizeBinary).byteWidth; break; + default: this.stride = 1; + } + } } -} -export class ListData extends SingleNestedData { - public /* [VectorType.OFFSET]:*/ 0: Int32Array; - public /*[VectorType.VALIDITY]:*/ 2: Uint8Array; - public get valueOffsets() { return this[VectorType.OFFSET]; } - constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, valueOffsets: Iterable, valueChildData: Data, offset?: number, nullCount?: number) { - super(type, length, nullBitmap, valueChildData, offset, nullCount); - this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets); + public clone(type: R, offset = this.offset, length = this.length, nullCount = this._nullCount, buffers: Buffers = this, childData: (Data | Vector)[] = this.childData) { + return new Data(type, offset, length, nullCount, buffers, childData); } - public clone(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data { - return new ListData(type, length, this[VectorType.VALIDITY], this[VectorType.OFFSET], this._valuesData as any, offset, nullCount) as any; - } -} -export class UnionData extends NestedData { - public /* [VectorType.TYPE]:*/ 3: T['TArray']; - public get typeIds() { return this[VectorType.TYPE]; } - public readonly typeIdToChildIndex: { [key: number]: number }; - constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, typeIds: Iterable, childData: Data[], offset?: number, nullCount?: number) { - super(type, length, nullBitmap, childData, offset, nullCount); - this[VectorType.TYPE] = toTypedArray(Int8Array, typeIds); - this.typeIdToChildIndex = type.typeIds.reduce((typeIdToChildIndex, typeId, idx) => { - return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex; - }, Object.create(null) as { [key: number]: number }); + public slice(offset: number, length: number): Data { + // +true === 1, +false === 0, so this means + // we keep nullCount at 0 if it's already 0, + // otherwise set to the invalidated flag -1 + const { stride, typeId, childData } = this; + const nullCount = +(this._nullCount === 0) - 1; + const childStride = typeId === 16 /* FixedSizeList */ ? stride : 1; + const buffers = this._sliceBuffers(offset, length, stride, typeId); + return this.clone(this.type, this.offset + offset, length, nullCount, buffers, + // Don't slice children if we have value offsets (the variable-width types) + (!childData.length || this.valueOffsets) ? childData : this._sliceChildren(childData, childStride * offset, childStride * length)); } - public clone(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data { - return new UnionData(type, length, this[VectorType.VALIDITY], this[VectorType.TYPE], this.childData, offset, nullCount) as any; - } -} -export class SparseUnionData extends UnionData { - constructor(type: SparseUnion, length: number, nullBitmap: Uint8Array | null | undefined, typeIds: Iterable, childData: Data[], offset?: number, nullCount?: number) { - super(type, length, nullBitmap, typeIds, childData, offset, nullCount); - } - public clone(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data { - return new SparseUnionData( - type, - length, - this[VectorType.VALIDITY], - this[VectorType.TYPE], - this.childData, - offset, nullCount - ) as any; + protected _sliceBuffers(offset: number, length: number, stride: number, typeId: T['TType']): Buffers { + let arr: any, { buffers } = this; + // If typeIds exist, slice the typeIds buffer + (arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = arr.subarray(offset, offset + length)); + // If offsets exist, only slice the offsets buffer + (arr = buffers[BufferType.OFFSET]) && (buffers[BufferType.OFFSET] = arr.subarray(offset, offset + length + 1)) || + // Otherwise if no offsets, slice the data buffer. Don't slice the data vector for Booleans, since the offset goes by bits not bytes + (arr = buffers[BufferType.DATA]) && (buffers[BufferType.DATA] = typeId === 6 ? arr : arr.subarray(stride * offset, stride * (offset + length))); + return buffers; } -} -export class DenseUnionData extends UnionData { - public /* [VectorType.OFFSET]:*/ 0: Int32Array; - public get valueOffsets() { return this[VectorType.OFFSET]; } - constructor(type: DenseUnion, length: number, nullBitmap: Uint8Array | null | undefined, typeIds: Iterable, valueOffsets: Iterable, childData: Data[], offset?: number, nullCount?: number) { - super(type, length, nullBitmap, typeIds, childData, offset, nullCount); - this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets); - } - public clone(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data { - return new DenseUnionData( - type, - length, - this[VectorType.VALIDITY], - this[VectorType.TYPE], - this[VectorType.OFFSET], - this.childData, - offset, nullCount - ) as any; + protected _sliceChildren(childData: Data[], offset: number, length: number): Data[] { + return childData.map((child) => child.slice(offset, length)); } -} -export class ChunkedData extends BaseData { - // @ts-ignore - protected _chunkData: Data[]; - protected _chunkVectors: Vector[]; - protected _chunkOffsets: Uint32Array; - public get chunkVectors() { return this._chunkVectors; } - public get chunkOffsets() { return this._chunkOffsets; } - public get chunkData() { - return this._chunkData || ( - this._chunkData = this._chunkVectors.map(({ data }) => data)); - } - constructor(type: T, length: number, chunkVectors: Vector[], offset?: number, nullCount?: number, chunkOffsets?: Uint32Array) { - super(type, length, offset, nullCount); - this._chunkVectors = chunkVectors; - this._chunkOffsets = chunkOffsets || ChunkedData.computeOffsets(chunkVectors); - } - public get nullCount() { - let nullCount = this._nullCount; - if (nullCount === -1) { - this._nullCount = nullCount = this._chunkVectors.reduce((x, c) => x + c.nullCount, 0); + // + // Convenience methods for creating Data instances for each of the Arrow Vector types + // + /** @nocollapse */ + public static Null(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer) { + return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Int(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { + return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Dictionary(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { + return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.indices.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Float(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { + return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Bool(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { + return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Decimal(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { + return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Date(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { + return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Time(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { + return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Timestamp(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { + return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Interval(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { + return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static FixedSizeBinary(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer) { + return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Binary(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: Uint8Array) { + return new Data(type, offset, length, nullCount, [toArrayBufferView(Int32Array, valueOffsets), toArrayBufferView(Uint8Array, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static Utf8(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: Uint8Array) { + return new Data(type, offset, length, nullCount, [toArrayBufferView(Int32Array, valueOffsets), toArrayBufferView(Uint8Array, data), toArrayBufferView(Uint8Array, nullBitmap)]); + } + /** @nocollapse */ + public static List(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: Data | Vector) { + return new Data(type, offset, length, nullCount, [toArrayBufferView(Int32Array, valueOffsets), undefined, toArrayBufferView(Uint8Array, nullBitmap)], [child]); + } + /** @nocollapse */ + public static FixedSizeList(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, child: Data | Vector) { + return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)], [child]); + } + /** @nocollapse */ + public static Struct(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) { + return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)], children); + } + /** @nocollapse */ + public static Map(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) { + return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)], children); + } + public static Union(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, children: (Data | Vector)[]): Data; + public static Union(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsets: ValueOffsetsBuffer, children: (Data | Vector)[]): Data; + /** @nocollapse */ + public static Union(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]) { + const buffers = [ + undefined, undefined, + toArrayBufferView(Uint8Array, nullBitmap), + toArrayBufferView(type.ArrayType, typeIds) + ] as Partial>; + if (type.mode === UnionMode.Sparse) { + return new Data(type, offset, length, nullCount, buffers, valueOffsetsOrChildren as (Data | Vector)[]); } - return nullCount; - } - public clone(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount): Data { - return new ChunkedData( - type, length, - this._chunkVectors.map((vec) => vec.clone(vec.data.clone(type))) as any, - offset, nullCount, this._chunkOffsets - ) as any; - } - protected sliceInternal(clone: this, offset: number, length: number) { - const chunks = this._chunkVectors; - const offsets = this._chunkOffsets; - const chunkSlices: Vector[] = []; - for (let childIndex = -1, numChildren = chunks.length; ++childIndex < numChildren;) { - const child = chunks[childIndex]; - const childLength = child.length; - const childOffset = offsets[childIndex]; - // If the child is to the right of the slice boundary, exclude - if (childOffset >= offset + length) { continue; } - // If the child is to the left of of the slice boundary, exclude - if (offset >= childOffset + childLength) { continue; } - // If the child is between both left and right boundaries, include w/o slicing - if (childOffset >= offset && (childOffset + childLength) <= offset + length) { - chunkSlices.push(child); - continue; - } - // If the child overlaps one of the slice boundaries, include that slice - const begin = Math.max(0, offset - childOffset); - const end = begin + Math.min(childLength - begin, (offset + length) - childOffset); - chunkSlices.push(child.slice(begin, end)); - } - clone._chunkVectors = chunkSlices; - clone._chunkOffsets = ChunkedData.computeOffsets(chunkSlices); - return clone; - } - static computeOffsets(childVectors: Vector[]) { - const childOffsets = new Uint32Array(childVectors.length + 1); - for (let index = 0, length = childOffsets.length, childOffset = childOffsets[0] = 0; ++index < length;) { - childOffsets[index] = (childOffset += childVectors[index - 1].length); - } - return childOffsets; + buffers[BufferType.OFFSET] = toArrayBufferView(Int32Array, valueOffsetsOrChildren); + return new Data(type, offset, length, nullCount, buffers, children); } } + +((Data.prototype as any).childData = Object.freeze([])); diff --git a/js/src/enum.ts b/js/src/enum.ts new file mode 100644 index 0000000000000..0be6a4ed2938e --- /dev/null +++ b/js/src/enum.ts @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import * as Schema_ from './fb/Schema'; +import * as Message_ from './fb/Message'; + +export import ArrowType = Schema_.org.apache.arrow.flatbuf.Type; +export import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit; +export import TimeUnit = Schema_.org.apache.arrow.flatbuf.TimeUnit; +export import Precision = Schema_.org.apache.arrow.flatbuf.Precision; +export import UnionMode = Schema_.org.apache.arrow.flatbuf.UnionMode; +export import VectorType = Schema_.org.apache.arrow.flatbuf.VectorType; +export import IntervalUnit = Schema_.org.apache.arrow.flatbuf.IntervalUnit; +export import MessageHeader = Message_.org.apache.arrow.flatbuf.MessageHeader; +export import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion; + +/** + * * + * Main data type enumeration: + * * + * Data types in this library are all *logical*. They can be expressed as + * either a primitive physical type (bytes or bits of some fixed size), a + * nested type consisting of other data types, or another data type (e.g. a + * timestamp encoded as an int64) + */ +export enum Type { + NONE = 0, // The default placeholder type + Null = 1, // A NULL type having no physical storage + Int = 2, // Signed or unsigned 8, 16, 32, or 64-bit little-endian integer + Float = 3, // 2, 4, or 8-byte floating point value + Binary = 4, // Variable-length bytes (no guarantee of UTF8-ness) + Utf8 = 5, // UTF8 variable-length string as List + Bool = 6, // Boolean as 1 bit, LSB bit-packed ordering + Decimal = 7, // Precision-and-scale-based decimal type. Storage type depends on the parameters. + Date = 8, // int32_t days or int64_t milliseconds since the UNIX epoch + Time = 9, // Time as signed 32 or 64-bit integer, representing either seconds, milliseconds, microseconds, or nanoseconds since midnight since midnight + Timestamp = 10, // Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond) + Interval = 11, // YEAR_MONTH or DAY_TIME interval in SQL style + List = 12, // A list of some logical data type + Struct = 13, // Struct of logical types + Union = 14, // Union of logical types + FixedSizeBinary = 15, // Fixed-size binary. Each value occupies the same number of bytes + FixedSizeList = 16, // Fixed-size list. Each value occupies the same number of bytes + Map = 17, // Map of named logical types + + // These enum values are here so that TypeScript can narrow the type signatures further + // beyond the base Arrow types. The base Arrow types include metadata like bitWidths that + // impact the type signatures of the values we return. For example, the Int8Vector reads + // 1-byte numbers from an Int8Array, an Int32Vector reads a 4-byte number from an Int32Array, + // and an Int64Vector reads a pair of 4-byte lo, hi int32s, and returns them as a zero-copy + // slice from an underlying Int32Array. Library consumers benefit by doing this type narrowing, + // since we can ensure the types across all public methods are propagated and never bail to `any`. + // These values are _never_ actually used at runtime, and they will _never_ be written into the + // flatbuffers metadata of serialized Arrow IPC payloads. + Dictionary = -1, // Dictionary aka Category type + Int8 = -2, + Int16 = -3, + Int32 = -4, + Int64 = -5, + Uint8 = -6, + Uint16 = -7, + Uint32 = -8, + Uint64 = -9, + Float16 = -10, + Float32 = -11, + Float64 = -12, + DateDay = -13, + DateMillisecond = -14, + TimestampSecond = -15, + TimestampMillisecond = -16, + TimestampMicrosecond = -17, + TimestampNanosecond = -18, + TimeSecond = -19, + TimeMillisecond = -20, + TimeMicrosecond = -21, + TimeNanosecond = -22, + DenseUnion = -23, + SparseUnion = -24, + IntervalDayTime = -25, + IntervalYearMonth = -26, +} diff --git a/js/src/fb/Schema.ts b/js/src/fb/Schema.ts index 4a4aeb65599be..e9829d9d8348a 100644 --- a/js/src/fb/Schema.ts +++ b/js/src/fb/Schema.ts @@ -588,7 +588,7 @@ export namespace org.apache.arrow.flatbuf { * @param {Array.} data * @returns {flatbuffers.Offset} */ - static createTypeIdsVector(builder: flatbuffers.Builder, data: number[] | Uint8Array): flatbuffers.Offset { + static createTypeIdsVector(builder: flatbuffers.Builder, data: number[] | Int32Array): flatbuffers.Offset { builder.startVector(4, data.length, 4); for (let i = data.length - 1; i >= 0; i--) { builder.addInt32(data[i]); diff --git a/js/src/interfaces.ts b/js/src/interfaces.ts new file mode 100644 index 0000000000000..ae38d4e5be333 --- /dev/null +++ b/js/src/interfaces.ts @@ -0,0 +1,240 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Data } from './data'; +import { Type } from './enum'; +import * as type from './type'; +import { DataType } from './type'; +import * as vecs from './vector/index'; + +/** @ignore */ +export interface ArrayBufferViewConstructor { + readonly prototype: T; + new(length: number): T; + new(arrayOrArrayBuffer: ArrayLike | ArrayBufferLike): T; + new(buffer: ArrayBufferLike, byteOffset: number, length?: number): T; + /** + * The size in bytes of each element in the array. + */ + readonly BYTES_PER_ELEMENT: number; + /** + * Returns a new array from a set of elements. + * @param items A set of elements to include in the new array object. + */ + of(...items: number[]): T; + /** + * Creates an array from an array-like or iterable object. + * @param arrayLike An array-like or iterable object to convert to an array. + * @param mapfn A mapping function to call on every element of the array. + * @param thisArg Value of 'this' used to invoke the mapfn. + */ + from(arrayLike: ArrayLike, mapfn?: (v: number, k: number) => number, thisArg?: any): T; +} + +/** @ignore */ +export type VectorCtorArgs< + T extends Vector, + R extends DataType = any, + TArgs extends any[] = any[], + TCtor extends new (data: Data, ...args: TArgs) => T = + new (data: Data, ...args: TArgs) => T +> = TCtor extends new (data: Data, ...args: infer TArgs) => T ? TArgs : never; + +/** + * Obtain the constructor function of an instance type + * @ignore + */ +export type ConstructorType< + T, + TCtor extends new (...args: any[]) => T = + new (...args: any[]) => T +> = TCtor extends new (...args: any[]) => T ? TCtor : never; + +/** @ignore */ +export type VectorCtorType< + T extends Vector, + R extends DataType = any, + TCtor extends new (data: Data, ...args: VectorCtorArgs) => T = + new (data: Data, ...args: VectorCtorArgs) => T +> = TCtor extends new (data: Data, ...args: VectorCtorArgs) => T ? TCtor : never; + +/** @ignore */ +export type Vector = + T extends Type ? TypeToVector : + T extends DataType ? DataTypeToVector : + never + ; + +/** @ignore */ +export type VectorCtor = + T extends Vector ? VectorCtorType : + T extends Type ? VectorCtorType> : + T extends DataType ? VectorCtorType> : + VectorCtorType + ; + +/** @ignore */ +export type DataTypeCtor = + T extends DataType ? ConstructorType : + T extends Vector ? ConstructorType : + T extends Type ? ConstructorType> : + never + ; + +/** @ignore */ +type TypeToVector = + T extends Type.Null ? vecs.NullVector : + T extends Type.Bool ? vecs.BoolVector : + T extends Type.Int8 ? vecs.Int8Vector : + T extends Type.Int16 ? vecs.Int16Vector : + T extends Type.Int32 ? vecs.Int32Vector : + T extends Type.Int64 ? vecs.Int64Vector : + T extends Type.Uint8 ? vecs.Uint8Vector : + T extends Type.Uint16 ? vecs.Uint16Vector : + T extends Type.Uint32 ? vecs.Uint32Vector : + T extends Type.Uint64 ? vecs.Uint64Vector : + T extends Type.Int ? vecs.IntVector : + T extends Type.Float16 ? vecs.Float16Vector : + T extends Type.Float32 ? vecs.Float32Vector : + T extends Type.Float64 ? vecs.Float64Vector : + T extends Type.Float ? vecs.FloatVector : + T extends Type.Utf8 ? vecs.Utf8Vector : + T extends Type.Binary ? vecs.BinaryVector : + T extends Type.FixedSizeBinary ? vecs.FixedSizeBinaryVector : + T extends Type.Date ? vecs.DateVector : + T extends Type.DateDay ? vecs.DateDayVector : + T extends Type.DateMillisecond ? vecs.DateMillisecondVector : + T extends Type.Timestamp ? vecs.TimestampVector : + T extends Type.TimestampSecond ? vecs.TimestampSecondVector : + T extends Type.TimestampMillisecond ? vecs.TimestampMillisecondVector : + T extends Type.TimestampMicrosecond ? vecs.TimestampMicrosecondVector : + T extends Type.TimestampNanosecond ? vecs.TimestampNanosecondVector : + T extends Type.Time ? vecs.TimeVector : + T extends Type.TimeSecond ? vecs.TimeSecondVector : + T extends Type.TimeMillisecond ? vecs.TimeMillisecondVector : + T extends Type.TimeMicrosecond ? vecs.TimeMicrosecondVector : + T extends Type.TimeNanosecond ? vecs.TimeNanosecondVector : + T extends Type.Decimal ? vecs.DecimalVector : + T extends Type.Union ? vecs.UnionVector : + T extends Type.DenseUnion ? vecs.DenseUnionVector : + T extends Type.SparseUnion ? vecs.SparseUnionVector : + T extends Type.Interval ? vecs.IntervalVector : + T extends Type.IntervalDayTime ? vecs.IntervalDayTimeVector : + T extends Type.IntervalYearMonth ? vecs.IntervalYearMonthVector : + T extends Type.Map ? vecs.MapVector : + T extends Type.List ? vecs.ListVector : + T extends Type.Struct ? vecs.StructVector : + T extends Type.Dictionary ? vecs.DictionaryVector : + T extends Type.FixedSizeList ? vecs.FixedSizeListVector : + vecs.BaseVector + ; + +/** @ignore */ +type DataTypeToVector = + T extends type.Null ? vecs.NullVector : + T extends type.Bool ? vecs.BoolVector : + T extends type.Int8 ? vecs.Int8Vector : + T extends type.Int16 ? vecs.Int16Vector : + T extends type.Int32 ? vecs.Int32Vector : + T extends type.Int64 ? vecs.Int64Vector : + T extends type.Uint8 ? vecs.Uint8Vector : + T extends type.Uint16 ? vecs.Uint16Vector : + T extends type.Uint32 ? vecs.Uint32Vector : + T extends type.Uint64 ? vecs.Uint64Vector : + T extends type.Int ? vecs.IntVector : + T extends type.Float16 ? vecs.Float16Vector : + T extends type.Float32 ? vecs.Float32Vector : + T extends type.Float64 ? vecs.Float64Vector : + T extends type.Float ? vecs.FloatVector : + T extends type.Utf8 ? vecs.Utf8Vector : + T extends type.Binary ? vecs.BinaryVector : + T extends type.FixedSizeBinary ? vecs.FixedSizeBinaryVector : + T extends type.Date_ ? vecs.DateVector : + T extends type.DateDay ? vecs.DateDayVector : + T extends type.DateMillisecond ? vecs.DateMillisecondVector : + T extends type.Timestamp ? vecs.TimestampVector : + T extends type.TimestampSecond ? vecs.TimestampSecondVector : + T extends type.TimestampMillisecond ? vecs.TimestampMillisecondVector : + T extends type.TimestampMicrosecond ? vecs.TimestampMicrosecondVector : + T extends type.TimestampNanosecond ? vecs.TimestampNanosecondVector : + T extends type.Time ? vecs.TimeVector : + T extends type.TimeSecond ? vecs.TimeSecondVector : + T extends type.TimeMillisecond ? vecs.TimeMillisecondVector : + T extends type.TimeMicrosecond ? vecs.TimeMicrosecondVector : + T extends type.TimeNanosecond ? vecs.TimeNanosecondVector : + T extends type.Decimal ? vecs.DecimalVector : + T extends type.Union ? vecs.UnionVector : + T extends type.DenseUnion ? vecs.DenseUnionVector : + T extends type.SparseUnion ? vecs.SparseUnionVector : + T extends type.Interval ? vecs.IntervalVector : + T extends type.IntervalDayTime ? vecs.IntervalDayTimeVector : + T extends type.IntervalYearMonth ? vecs.IntervalYearMonthVector : + T extends type.Map_ ? vecs.MapVector : + T extends type.List ? vecs.ListVector : + T extends type.Struct ? vecs.StructVector : + T extends type.Dictionary ? vecs.DictionaryVector : + T extends type.FixedSizeList ? vecs.FixedSizeListVector : + vecs.BaseVector + ; + +/** @ignore */ +type TypeToDataType = + T extends Type.Null ? type.Null + : T extends Type.Bool ? type.Bool + : T extends Type.Int ? type.Int + : T extends Type.Int16 ? type.Int16 + : T extends Type.Int32 ? type.Int32 + : T extends Type.Int64 ? type.Int64 + : T extends Type.Uint8 ? type.Uint8 + : T extends Type.Uint16 ? type.Uint16 + : T extends Type.Uint32 ? type.Uint32 + : T extends Type.Uint64 ? type.Uint64 + : T extends Type.Int8 ? type.Int8 + : T extends Type.Float16 ? type.Float16 + : T extends Type.Float32 ? type.Float32 + : T extends Type.Float64 ? type.Float64 + : T extends Type.Float ? type.Float + : T extends Type.Utf8 ? type.Utf8 + : T extends Type.Binary ? type.Binary + : T extends Type.FixedSizeBinary ? type.FixedSizeBinary + : T extends Type.Date ? type.Date_ + : T extends Type.DateDay ? type.DateDay + : T extends Type.DateMillisecond ? type.DateMillisecond + : T extends Type.Timestamp ? type.Timestamp + : T extends Type.TimestampSecond ? type.TimestampSecond + : T extends Type.TimestampMillisecond ? type.TimestampMillisecond + : T extends Type.TimestampMicrosecond ? type.TimestampMicrosecond + : T extends Type.TimestampNanosecond ? type.TimestampNanosecond + : T extends Type.Time ? type.Time + : T extends Type.TimeSecond ? type.TimeSecond + : T extends Type.TimeMillisecond ? type.TimeMillisecond + : T extends Type.TimeMicrosecond ? type.TimeMicrosecond + : T extends Type.TimeNanosecond ? type.TimeNanosecond + : T extends Type.Decimal ? type.Decimal + : T extends Type.Union ? type.Union + : T extends Type.DenseUnion ? type.DenseUnion + : T extends Type.SparseUnion ? type.SparseUnion + : T extends Type.Interval ? type.Interval + : T extends Type.IntervalDayTime ? type.IntervalDayTime + : T extends Type.IntervalYearMonth ? type.IntervalYearMonth + : T extends Type.Map ? type.Map_ + : T extends Type.List ? type.List + : T extends Type.Struct ? type.Struct + : T extends Type.Dictionary ? type.Dictionary + : T extends Type.FixedSizeList ? type.FixedSizeList + : DataType + ; diff --git a/js/src/io/adapters.ts b/js/src/io/adapters.ts new file mode 100644 index 0000000000000..427fc29ab2228 --- /dev/null +++ b/js/src/io/adapters.ts @@ -0,0 +1,386 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { + toUint8Array, + joinUint8Arrays, + ArrayBufferViewInput, + toUint8ArrayIterator, + toUint8ArrayAsyncIterator +} from '../util/buffer'; + +import { ReadableDOMStreamOptions } from './interfaces'; + +/** @ignore */ +export default { + fromIterable(source: Iterable | T): IterableIterator { + return pump(fromIterable(source)); + }, + fromAsyncIterable(source: AsyncIterable | PromiseLike): AsyncIterableIterator { + return pump(fromAsyncIterable(source)); + }, + fromDOMStream(source: ReadableStream): AsyncIterableIterator { + return pump(fromDOMStream(source)); + }, + fromNodeStream(stream: NodeJS.ReadableStream): AsyncIterableIterator { + return pump(fromNodeStream(stream)); + }, + // @ts-ignore + toDOMStream(source: Iterable | AsyncIterable, options?: ReadableDOMStreamOptions): ReadableStream { + throw new Error(`"toDOMStream" not available in this environment`); + }, + // @ts-ignore + toNodeStream(source: Iterable | AsyncIterable, options?: import('stream').ReadableOptions): import('stream').Readable { + throw new Error(`"toNodeStream" not available in this environment`); + }, +}; + +/** @ignore */ +const pump = | AsyncIterator>(iterator: T) => { iterator.next(); return iterator; }; + +/** @ignore */ +function* fromIterable(source: Iterable | T): IterableIterator { + + let done: boolean, threw = false; + let buffers: Uint8Array[] = [], buffer: Uint8Array; + let cmd: 'peek' | 'read', size: number, bufferLength = 0; + + function byteRange() { + if (cmd === 'peek') { + return joinUint8Arrays(buffers, size)[0]; + } + [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size); + return buffer; + } + + // Yield so the caller can inject the read command before creating the source Iterator + ({ cmd, size } = yield null); + + // initialize the iterator + let it = toUint8ArrayIterator(source)[Symbol.iterator](); + + try { + do { + // read the next value + ({ done, value: buffer } = isNaN(size - bufferLength) ? + it.next(undefined) : it.next(size - bufferLength)); + // if chunk is not null or empty, push it onto the queue + if (!done && buffer.byteLength > 0) { + buffers.push(buffer); + bufferLength += buffer.byteLength; + } + // If we have enough bytes in our buffer, yield chunks until we don't + if (done || size <= bufferLength) { + do { + ({ cmd, size } = yield byteRange()); + } while (size < bufferLength); + } + } while (!done); + } catch (e) { + (threw = true) && (typeof it.throw === 'function') && (it.throw(e)); + } finally { + (threw === false) && (typeof it.return === 'function') && (it.return()); + } +} + +/** @ignore */ +async function* fromAsyncIterable(source: AsyncIterable | PromiseLike): AsyncIterableIterator { + + let done: boolean, threw = false; + let buffers: Uint8Array[] = [], buffer: Uint8Array; + let cmd: 'peek' | 'read', size: number, bufferLength = 0; + + function byteRange() { + if (cmd === 'peek') { + return joinUint8Arrays(buffers, size)[0]; + } + [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size); + return buffer; + } + + // Yield so the caller can inject the read command before creating the source AsyncIterator + ({ cmd, size } = yield null); + + // initialize the iterator + let it = toUint8ArrayAsyncIterator(source)[Symbol.asyncIterator](); + + try { + do { + // read the next value + ({ done, value: buffer } = isNaN(size - bufferLength) + ? await it.next(undefined) + : await it.next(size - bufferLength)); + // if chunk is not null or empty, push it onto the queue + if (!done && buffer.byteLength > 0) { + buffers.push(buffer); + bufferLength += buffer.byteLength; + } + // If we have enough bytes in our buffer, yield chunks until we don't + if (done || size <= bufferLength) { + do { + ({ cmd, size } = yield byteRange()); + } while (size < bufferLength); + } + } while (!done); + } catch (e) { + (threw = true) && (typeof it.throw === 'function') && (await it.throw(e)); + } finally { + (threw === false) && (typeof it.return === 'function') && (await it.return()); + } +} + +// All this manual Uint8Array chunk management can be avoided if/when engines +// add support for ArrayBuffer.transfer() or ArrayBuffer.prototype.realloc(): +// https://github.com/domenic/proposal-arraybuffer-transfer +/** @ignore */ +async function* fromDOMStream(source: ReadableStream): AsyncIterableIterator { + + let done = false, threw = false; + let buffers: Uint8Array[] = [], buffer: Uint8Array; + let cmd: 'peek' | 'read', size: number, bufferLength = 0; + + function byteRange() { + if (cmd === 'peek') { + return joinUint8Arrays(buffers, size)[0]; + } + [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size); + return buffer; + } + + // Yield so the caller can inject the read command before we establish the ReadableStream lock + ({ cmd, size } = yield null); + + // initialize the reader and lock the stream + let it = new AdaptiveByteReader(source); + + try { + do { + // read the next value + ({ done, value: buffer } = isNaN(size - bufferLength) + ? await it['read'](undefined) + : await it['read'](size - bufferLength)); + // if chunk is not null or empty, push it onto the queue + if (!done && buffer.byteLength > 0) { + buffers.push(toUint8Array(buffer)); + bufferLength += buffer.byteLength; + } + // If we have enough bytes in our buffer, yield chunks until we don't + if (done || size <= bufferLength) { + do { + ({ cmd, size } = yield byteRange()); + } while (size < bufferLength); + } + } while (!done); + } catch (e) { + (threw = true) && (await it['cancel'](e)); + } finally { + (threw === false) ? (await it['cancel']()) + : source['locked'] && it.releaseLock(); + } +} + +/** @ignore */ +class AdaptiveByteReader { + + private supportsBYOB: boolean; + private byobReader: ReadableStreamBYOBReader | null = null; + private defaultReader: ReadableStreamDefaultReader | null = null; + private reader: ReadableStreamBYOBReader | ReadableStreamDefaultReader | null; + + constructor(private source: ReadableStream) { + try { + this.supportsBYOB = !!(this.reader = this.getBYOBReader()); + } catch (e) { + this.supportsBYOB = !!!(this.reader = this.getDefaultReader()); + } + } + + get closed(): Promise { + return this.reader ? this.reader['closed'].catch(() => {}) : Promise.resolve(); + } + + releaseLock(): void { + if (this.reader) { + this.reader.releaseLock(); + } + this.reader = this.byobReader = this.defaultReader = null; + } + + async cancel(reason?: any): Promise { + const { reader, source } = this; + reader && (await reader['cancel'](reason)); + source && (source['locked'] && this.releaseLock()); + } + + async read(size?: number): Promise> { + if (size === 0) { + return { done: this.reader == null, value: new Uint8Array(0) }; + } + const result = !this.supportsBYOB || typeof size !== 'number' + ? await this.getDefaultReader().read() + : await this.readFromBYOBReader(size); + !result.done && (result.value = toUint8Array(result as ReadableStreamReadResult)); + return result as ReadableStreamReadResult; + } + + private getDefaultReader() { + if (this.byobReader) { this.releaseLock(); } + if (!this.defaultReader) { + this.defaultReader = this.source['getReader'](); + // We have to catch and swallow errors here to avoid uncaught promise rejection exceptions + // that seem to be raised when we call `releaseLock()` on this reader. I'm still mystified + // about why these errors are raised, but I'm sure there's some important spec reason that + // I haven't considered. I hate to employ such an anti-pattern here, but it seems like the + // only solution in this case :/ + this.defaultReader['closed'].catch(() => {}); + } + return (this.reader = this.defaultReader); + } + + private getBYOBReader() { + if (this.defaultReader) { this.releaseLock(); } + if (!this.byobReader) { + this.byobReader = this.source['getReader']({ mode: 'byob' }); + // We have to catch and swallow errors here to avoid uncaught promise rejection exceptions + // that seem to be raised when we call `releaseLock()` on this reader. I'm still mystified + // about why these errors are raised, but I'm sure there's some important spec reason that + // I haven't considered. I hate to employ such an anti-pattern here, but it seems like the + // only solution in this case :/ + this.byobReader['closed'].catch(() => {}); + } + return (this.reader = this.byobReader); + } + + // This strategy plucked from the example in the streams spec: + // https://streams.spec.whatwg.org/#example-manual-read-bytes + private async readFromBYOBReader(size: number) { + return await readInto(this.getBYOBReader(), new ArrayBuffer(size), 0, size); + } +} + +/** @ignore */ +async function readInto(reader: ReadableStreamBYOBReader, buffer: ArrayBufferLike, offset: number, size: number): Promise> { + if (offset >= size) { + return { done: false, value: new Uint8Array(buffer, 0, size) }; + } + const { done, value } = await reader.read(new Uint8Array(buffer, offset, size - offset)); + if (((offset += value.byteLength) < size) && !done) { + return await readInto(reader, value.buffer, offset, size); + } + return { done, value: new Uint8Array(value.buffer, 0, offset) }; +} + +/** @ignore */ +type EventName = 'end' | 'error' | 'readable'; +/** @ignore */ +type Event = [EventName, (_: any) => void, Promise<[EventName, Error | null]>]; +/** @ignore */ +const onEvent = (stream: NodeJS.ReadableStream, event: T) => { + let handler = (_: any) => resolve([event, _]); + let resolve: (value?: [T, any] | PromiseLike<[T, any]>) => void; + return [event, handler, new Promise<[T, any]>( + (r) => (resolve = r) && stream['once'](event, handler) + )] as Event; +}; + +/** @ignore */ +async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncIterableIterator { + + let events: Event[] = []; + let event: EventName = 'error'; + let done = false, err: Error | null = null; + let cmd: 'peek' | 'read', size: number, bufferLength = 0; + let buffers: Uint8Array[] = [], buffer: Uint8Array | Buffer | string; + + function byteRange() { + if (cmd === 'peek') { + return joinUint8Arrays(buffers, size)[0]; + } + [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size); + return buffer; + } + + // Yield so the caller can inject the read command before we + // add the listener for the source stream's 'readable' event. + ({ cmd, size } = yield null); + + // ignore stdin if it's a TTY + if ((stream as any)['isTTY']) { return yield new Uint8Array(0); } + + try { + // initialize the stream event handlers + events[0] = onEvent(stream, 'end'); + events[1] = onEvent(stream, 'error'); + + do { + events[2] = onEvent(stream, 'readable'); + + // wait on the first message event from the stream + [event, err] = await Promise.race(events.map((x) => x[2])); + + // if the stream emitted an Error, rethrow it + if (event === 'error') { break; } + if (!(done = event === 'end')) { + // If the size is NaN, request to read everything in the stream's internal buffer + if (!isFinite(size - bufferLength)) { + buffer = toUint8Array(stream['read'](undefined)); + } else { + buffer = toUint8Array(stream['read'](size - bufferLength)); + // If the byteLength is 0, then the requested amount is more than the stream has + // in its internal buffer. In this case the stream needs a "kick" to tell it to + // continue emitting readable events, so request to read everything the stream + // has in its internal buffer right now. + if (buffer.byteLength < (size - bufferLength)) { + buffer = toUint8Array(stream['read'](undefined)); + } + } + // if chunk is not null or empty, push it onto the queue + if (buffer.byteLength > 0) { + buffers.push(buffer); + bufferLength += buffer.byteLength; + } + } + // If we have enough bytes in our buffer, yield chunks until we don't + if (done || size <= bufferLength) { + do { + ({ cmd, size } = yield byteRange()); + } while (size < bufferLength); + } + } while (!done); + } finally { + await cleanup(events, event === 'error' ? err : null); + } + + function cleanup(events: Event[], err?: T) { + buffer = buffers = null; + return new Promise(async (resolve, reject) => { + for (const [evt, fn] of events) { + stream['off'](evt, fn); + } + try { + // Some stream implementations don't call the destroy callback, + // because it's really a node-internal API. Just calling `destroy` + // here should be enough to conform to the ReadableStream contract + const destroy = (stream as any)['destroy']; + destroy && destroy.call(stream, err); + err = undefined; + } catch (e) { err = e || err; } finally { + err != null ? reject(err) : resolve(); + } + }); + } +} diff --git a/js/src/io/file.ts b/js/src/io/file.ts new file mode 100644 index 0000000000000..d88bc5f6f4e56 --- /dev/null +++ b/js/src/io/file.ts @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { FileHandle } from './interfaces'; +import { ByteStream, AsyncByteStream } from './stream'; +import { ArrayBufferViewInput, toUint8Array } from '../util/buffer'; + +/** @ignore */ +export class RandomAccessFile extends ByteStream { + public size: number; + public position: number = 0; + protected buffer: Uint8Array | null; + constructor(buffer: ArrayBufferViewInput, byteLength?: number) { + super(); + this.buffer = toUint8Array(buffer); + this.size = typeof byteLength === 'undefined' ? this.buffer.byteLength : byteLength; + } + public readInt32(position: number) { + const { buffer, byteOffset } = this.readAt(position, 4); + return new DataView(buffer, byteOffset).getInt32(0, true); + } + public seek(position: number) { + this.position = Math.min(position, this.size); + return position < this.size; + } + public read(nBytes?: number | null) { + const { buffer, size, position } = this; + if (buffer && position < size) { + if (typeof nBytes !== 'number') { nBytes = Infinity; } + this.position = Math.min(size, + position + Math.min(size - position, nBytes)); + return buffer.subarray(position, this.position); + } + return null; + } + public readAt(position: number, nBytes: number) { + const buf = this.buffer; + const end = Math.min(this.size, position + nBytes); + return buf ? buf.subarray(position, end) : new Uint8Array(nBytes); + } + public close() { this.buffer && (this.buffer = null); } + public throw(value?: any) { this.close(); return { done: true, value }; } + public return(value?: any) { this.close(); return { done: true, value }; } +} + +/** @ignore */ +export class AsyncRandomAccessFile extends AsyncByteStream { + // @ts-ignore + public size: number; + public position: number = 0; + public _pending?: Promise; + protected _handle: FileHandle | null; + constructor(file: FileHandle, byteLength?: number) { + super(); + this._handle = file; + if (typeof byteLength === 'number') { + this.size = byteLength; + } else { + this._pending = (async () => { + delete this._pending; + this.size = (await file.stat()).size; + })(); + } + } + public async readInt32(position: number) { + const { buffer, byteOffset } = await this.readAt(position, 4); + return new DataView(buffer, byteOffset).getInt32(0, true); + } + public async seek(position: number) { + this._pending && await this._pending; + this.position = Math.min(position, this.size); + return position < this.size; + } + public async read(nBytes?: number | null) { + this._pending && await this._pending; + const { _handle: file, size, position } = this; + if (file && position < size) { + if (typeof nBytes !== 'number') { nBytes = Infinity; } + let pos = position, offset = 0, bytesRead = 0; + let end = Math.min(size, pos + Math.min(size - pos, nBytes)); + let buffer = new Uint8Array(Math.max(0, (this.position = end) - pos)); + while ((pos += bytesRead) < end && (offset += bytesRead) < buffer.byteLength) { + ({ bytesRead } = await file.read(buffer, offset, buffer.byteLength - offset, pos)); + } + return buffer; + } + return null; + } + public async readAt(position: number, nBytes: number) { + this._pending && await this._pending; + const { _handle: file, size } = this; + if (file && (position + nBytes) < size) { + const end = Math.min(size, position + nBytes); + const buffer = new Uint8Array(end - position); + return (await file.read(buffer, 0, nBytes, position)).buffer; + } + return new Uint8Array(nBytes); + } + public async close() { const f = this._handle; this._handle = null; f && await f.close(); } + public async throw(value?: any) { await this.close(); return { done: true, value }; } + public async return(value?: any) { await this.close(); return { done: true, value }; } +} diff --git a/js/src/io/interfaces.ts b/js/src/io/interfaces.ts new file mode 100644 index 0000000000000..9892562e0c0ec --- /dev/null +++ b/js/src/io/interfaces.ts @@ -0,0 +1,180 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import streamAdapters from './adapters'; + +/** @ignore */ +export const ITERATOR_DONE: any = Object.freeze({ done: true, value: void (0) }); + +/** @ignore */ +export type FileHandle = import('fs').promises.FileHandle; +/** @ignore */ +export type ArrowJSONLike = { schema: any; batches?: any[]; dictionaries?: any[]; }; +/** @ignore */ +export type ReadableDOMStreamOptions = { type: 'bytes' | undefined, autoAllocateChunkSize?: number, highWaterMark?: number }; + +/** @ignore */ +export class ArrowJSON { + // @ts-ignore + constructor(private _json: ArrowJSONLike) {} + public get schema(): any { return this._json['schema']; } + public get batches(): any[] { return (this._json['batches'] || []) as any[]; } + public get dictionaries(): any[] { return (this._json['dictionaries'] || []) as any[]; } +} + +/** @ignore */ +export interface Readable { + + readonly closed: Promise; + cancel(reason?: any): Promise; + + read(size?: number | null): Promise; + peek(size?: number | null): Promise; + throw(value?: any): Promise>; + return(value?: any): Promise>; + next(size?: number | null): Promise>; +} + +/** @ignore */ +export interface Writable { + readonly closed: Promise; + close(): void; + write(chunk: T): void; + abort(reason?: any): void; +} + +/** @ignore */ +export interface ReadableWritable extends Readable, Writable { + [Symbol.asyncIterator](): AsyncIterableIterator; + toDOMStream(options?: ReadableDOMStreamOptions): ReadableStream; + toNodeStream(options?: import('stream').ReadableOptions): import('stream').Readable; +} + +/** @ignore */ +export abstract class ReadableInterop { + + public abstract toDOMStream(options?: ReadableDOMStreamOptions): ReadableStream; + public abstract toNodeStream(options?: import('stream').ReadableOptions): import('stream').Readable; + + public tee(): [ReadableStream, ReadableStream] { + return this._getDOMStream().tee(); + } + public pipe(writable: R, options?: { end?: boolean; }) { + return this._getNodeStream().pipe(writable, options); + } + public pipeTo(writable: WritableStream, options?: PipeOptions) { return this._getDOMStream().pipeTo(writable, options); } + public pipeThrough>(duplex: { writable: WritableStream, readable: R }, options?: PipeOptions) { + return this._getDOMStream().pipeThrough(duplex, options); + } + + private _DOMStream?: ReadableStream; + private _getDOMStream() { + return this._DOMStream || (this._DOMStream = this.toDOMStream()); + } + + private _nodeStream?: import('stream').Readable; + private _getNodeStream() { + return this._nodeStream || (this._nodeStream = this.toNodeStream()); + } +} + +/** @ignore */ +type Resolution = { resolve: (value?: T | PromiseLike) => void; reject: (reason?: any) => void; }; + +/** @ignore */ +export class AsyncQueue extends ReadableInterop + implements AsyncIterableIterator, ReadableWritable { + + protected _values: TWritable[] = []; + protected _error?: { error: any; }; + protected _closedPromise: Promise; + protected _closedPromiseResolve?: (value?: any) => void; + protected resolvers: Resolution>[] = []; + + constructor() { + super(); + this._closedPromise = new Promise((r) => this._closedPromiseResolve = r); + } + + public get closed(): Promise { return this._closedPromise; } + public async cancel(reason?: any) { await this.return(reason); } + public write(value: TWritable) { + if (this._ensureOpen()) { + this.resolvers.length <= 0 + ? (this._values.push(value)) + : (this.resolvers.shift()!.resolve({ done: false, value } as any)); + } + } + public abort(value?: any) { + if (this._closedPromiseResolve) { + this.resolvers.length <= 0 + ? (this._error = { error: value }) + : (this.resolvers.shift()!.reject({ done: true, value })); + } + } + public close() { + if (this._closedPromiseResolve) { + const { resolvers } = this; + while (resolvers.length > 0) { + resolvers.shift()!.resolve(ITERATOR_DONE); + } + this._closedPromiseResolve(); + this._closedPromiseResolve = undefined; + } + } + + public [Symbol.asyncIterator]() { return this; } + public toDOMStream(options?: ReadableDOMStreamOptions) { + return streamAdapters.toDOMStream( + (this._closedPromiseResolve || this._error) + ? (this as AsyncIterable) + : (this._values as any) as Iterable, + options); + } + public toNodeStream(options?: import('stream').ReadableOptions) { + return streamAdapters.toNodeStream( + (this._closedPromiseResolve || this._error) + ? (this as AsyncIterable) + : (this._values as any) as Iterable, + options); + } + public async throw(_?: any) { await this.abort(_); return ITERATOR_DONE; } + public async return(_?: any) { await this.close(); return ITERATOR_DONE; } + + public async read(size?: number | null): Promise { return (await this.next(size, 'read')).value; } + public async peek(size?: number | null): Promise { return (await this.next(size, 'peek')).value; } + public next(..._args: any[]): Promise> { + if (this._values.length > 0) { + return Promise.resolve({ done: false, value: this._values.shift()! } as any); + } else if (this._error) { + return Promise.reject({ done: true, value: this._error.error }); + } else if (!this._closedPromiseResolve) { + return Promise.resolve(ITERATOR_DONE); + } else { + return new Promise>((resolve, reject) => { + this.resolvers.push({ resolve, reject }); + }); + } + } + + protected _ensureOpen() { + if (this._closedPromiseResolve) { + return true; + } + throw new Error(`${this} is closed`); + } +} diff --git a/js/src/io/stream.ts b/js/src/io/stream.ts new file mode 100644 index 0000000000000..2fe686532a5e5 --- /dev/null +++ b/js/src/io/stream.ts @@ -0,0 +1,158 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import streamAdapters from './adapters'; +import { decodeUtf8 } from '../util/utf8'; +import { ITERATOR_DONE, Readable, Writable, AsyncQueue } from './interfaces'; +import { toUint8Array, joinUint8Arrays, ArrayBufferViewInput } from '../util/buffer'; + +import { + isPromise, isFetchResponse, + isIterable, isAsyncIterable, + isReadableDOMStream, isReadableNodeStream +} from '../util/compat'; + +/** @ignore */ +export type WritableSink = Writable | WritableStream | NodeJS.WritableStream | null; +/** @ignore */ +export type ReadableSource = Readable | PromiseLike | AsyncIterable | ReadableStream | NodeJS.ReadableStream | null; + +/** @ignore */ +export class AsyncByteQueue extends AsyncQueue { + public write(value: ArrayBufferViewInput | Uint8Array) { + if ((value = toUint8Array(value)).byteLength > 0) { + return super.write(value as T); + } + } + public toString(sync: true): string; + public toString(sync?: false): Promise; + public toString(sync = false) { + return sync + ? decodeUtf8(this.toUint8Array(true)) + : this.toUint8Array(false).then(decodeUtf8); + } + public toUint8Array(sync: true): Uint8Array; + public toUint8Array(sync?: false): Promise; + public toUint8Array(sync = false) { + return sync ? joinUint8Arrays(this._values as any[])[0] : (async () => { + let buffers = [], byteLength = 0; + for await (const chunk of this) { + buffers.push(chunk); + byteLength += chunk.byteLength; + } + return joinUint8Arrays(buffers, byteLength)[0]; + })(); + } +} + +/** @ignore */ +export class ByteStream implements IterableIterator { + // @ts-ignore + private source: ByteStreamSource; + constructor(source?: Iterable | ArrayBufferViewInput) { + if (source) { + this.source = new ByteStreamSource(streamAdapters.fromIterable(source)); + } + } + [Symbol.iterator]() { return this; } + public next(value?: any) { return this.source.next(value); } + public throw(value?: any) { return this.source.throw(value); } + public return(value?: any) { return this.source.return(value); } + public peek(size?: number | null) { return this.source.peek(size); } + public read(size?: number | null) { return this.source.read(size); } +} + +/** @ignore */ +export class AsyncByteStream implements Readable, AsyncIterableIterator { + // @ts-ignore + private source: AsyncByteStreamSource; + constructor(source?: PromiseLike | Response | ReadableStream | NodeJS.ReadableStream | AsyncIterable | Iterable) { + if (source instanceof AsyncByteStream) { + this.source = (source as AsyncByteStream).source; + } else if (source instanceof AsyncByteQueue) { + this.source = new AsyncByteStreamSource(streamAdapters.fromAsyncIterable(source)); + } else if (isReadableNodeStream(source)) { + this.source = new AsyncByteStreamSource(streamAdapters.fromNodeStream(source)); + } else if (isFetchResponse(source)) { + this.source = new AsyncByteStreamSource(streamAdapters.fromDOMStream(source.body!)); + } else if (isIterable(source)) { + this.source = new AsyncByteStreamSource(streamAdapters.fromIterable(source)); + } else if (isPromise(source)) { + this.source = new AsyncByteStreamSource(streamAdapters.fromAsyncIterable(source)); + } else if (isAsyncIterable(source)) { + this.source = new AsyncByteStreamSource(streamAdapters.fromAsyncIterable(source)); + } else if (isReadableDOMStream(source)) { + this.source = new AsyncByteStreamSource(streamAdapters.fromDOMStream(source)); + } + } + [Symbol.asyncIterator]() { return this; } + public next(value?: any) { return this.source.next(value); } + public throw(value?: any) { return this.source.throw(value); } + public return(value?: any) { return this.source.return(value); } + public get closed(): Promise { return this.source.closed; } + public cancel(reason?: any) { return this.source.cancel(reason); } + public peek(size?: number | null) { return this.source.peek(size); } + public read(size?: number | null) { return this.source.read(size); } +} + +/** @ignore */ +interface ByteStreamSourceIterator extends IterableIterator { + next(value?: { cmd: 'peek' | 'read', size?: number | null }): IteratorResult; +} + +/** @ignore */ +interface AsyncByteStreamSourceIterator extends AsyncIterableIterator { + next(value?: { cmd: 'peek' | 'read', size?: number | null }): Promise>; +} + +/** @ignore */ +class ByteStreamSource { + constructor(protected source: ByteStreamSourceIterator) {} + public cancel(reason?: any) { this.return(reason); } + public peek(size?: number | null): T | null { return this.next(size, 'peek').value; } + public read(size?: number | null): T | null { return this.next(size, 'read').value; } + public next(size?: number | null, cmd: 'peek' | 'read' = 'read') { return this.source.next({ cmd, size }); } + public throw(value?: any) { return Object.create((this.source.throw && this.source.throw(value)) || ITERATOR_DONE); } + public return(value?: any) { return Object.create((this.source.return && this.source.return(value)) || ITERATOR_DONE); } +} + +/** @ignore */ +class AsyncByteStreamSource implements Readable { + + private _closedPromise: Promise; + private _closedPromiseResolve?: (value?: any) => void; + constructor (protected source: ByteStreamSourceIterator | AsyncByteStreamSourceIterator) { + this._closedPromise = new Promise((r) => this._closedPromiseResolve = r); + } + public async cancel(reason?: any) { await this.return(reason); } + public get closed(): Promise { return this._closedPromise; } + public async read(size?: number | null): Promise { return (await this.next(size, 'read')).value; } + public async peek(size?: number | null): Promise { return (await this.next(size, 'peek')).value; } + public async next(size?: number | null, cmd: 'peek' | 'read' = 'read') { return (await this.source.next({ cmd, size })); } + public async throw(value?: any) { + const result = (this.source.throw && await this.source.throw(value)) || ITERATOR_DONE; + this._closedPromiseResolve && this._closedPromiseResolve(); + this._closedPromiseResolve = undefined; + return Object.create(result); + } + public async return(value?: any) { + const result = (this.source.return && await this.source.return(value)) || ITERATOR_DONE; + this._closedPromiseResolve && this._closedPromiseResolve(); + this._closedPromiseResolve = undefined; + return Object.create(result); + } +} diff --git a/js/src/ipc/magic.ts b/js/src/ipc/magic.ts deleted file mode 100644 index 0688d1a2d1e19..0000000000000 --- a/js/src/ipc/magic.ts +++ /dev/null @@ -1,53 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { flatbuffers } from 'flatbuffers'; -import ByteBuffer = flatbuffers.ByteBuffer; - -export const PADDING = 4; -export const MAGIC_STR = 'ARROW1'; -export const MAGIC = new Uint8Array(MAGIC_STR.length); - -for (let i = 0; i < MAGIC_STR.length; i += 1 | 0) { - MAGIC[i] = MAGIC_STR.charCodeAt(i); -} - -export function checkForMagicArrowString(buffer: Uint8Array, index = 0) { - for (let i = -1, n = MAGIC.length; ++i < n;) { - if (MAGIC[i] !== buffer[index + i]) { - return false; - } - } - return true; -} - -export function isValidArrowFile(bb: ByteBuffer) { - let fileLength = bb.capacity(), footerLength: number, lengthOffset: number; - if ((fileLength < magicX2AndPadding /* Arrow buffer too small */) || - (!checkForMagicArrowString(bb.bytes(), 0) /* Missing magic start */) || - (!checkForMagicArrowString(bb.bytes(), fileLength - magicLength) /* Missing magic end */) || - (/* Invalid footer length */ - (footerLength = bb.readInt32(lengthOffset = fileLength - magicAndPadding)) < 1 && - (footerLength + lengthOffset > fileLength))) { - return false; - } - return true; -} - -export const magicLength = MAGIC.length; -export const magicAndPadding = magicLength + PADDING; -export const magicX2AndPadding = magicLength * 2 + PADDING; diff --git a/js/src/ipc/message.ts b/js/src/ipc/message.ts new file mode 100644 index 0000000000000..194e4ac7f679d --- /dev/null +++ b/js/src/ipc/message.ts @@ -0,0 +1,249 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { MessageHeader } from '../enum'; +import { flatbuffers } from 'flatbuffers'; +import ByteBuffer = flatbuffers.ByteBuffer; +import { Message } from './metadata/message'; +import { isFileHandle } from '../util/compat'; +import { AsyncRandomAccessFile } from '../io/file'; +import { toUint8Array, ArrayBufferViewInput } from '../util/buffer'; +import { ByteStream, ReadableSource, AsyncByteStream } from '../io/stream'; +import { ArrowJSON, ArrowJSONLike, ITERATOR_DONE, FileHandle } from '../io/interfaces'; + +/** @ignore */ const invalidMessageType = (type: MessageHeader) => `Expected ${MessageHeader[type]} Message in stream, but was null or length 0.`; +/** @ignore */ const nullMessage = (type: MessageHeader) => `Header pointer of flatbuffer-encoded ${MessageHeader[type]} Message is null or length 0.`; +/** @ignore */ const invalidMessageMetadata = (expected: number, actual: number) => `Expected to read ${expected} metadata bytes, but only read ${actual}.`; +/** @ignore */ const invalidMessageBodyLength = (expected: number, actual: number) => `Expected to read ${expected} bytes for message body, but only read ${actual}.`; + +/** @ignore */ +export class MessageReader implements IterableIterator { + protected source: ByteStream; + constructor(source: ByteStream | ArrayBufferViewInput | Iterable) { + this.source = source instanceof ByteStream ? source : new ByteStream(source); + } + public [Symbol.iterator](): IterableIterator { return this as IterableIterator; } + public next(): IteratorResult { + let r; + if ((r = this.readMetadataLength()).done) { return ITERATOR_DONE; } + if ((r = this.readMetadata(r.value)).done) { return ITERATOR_DONE; } + return ( r) as IteratorResult; + } + public throw(value?: any) { return this.source.throw(value); } + public return(value?: any) { return this.source.return(value); } + public readMessage(type?: T | null) { + let r: IteratorResult>; + if ((r = this.next()).done) { return null; } + if ((type != null) && r.value.headerType !== type) { + throw new Error(invalidMessageType(type)); + } + return r.value; + } + public readMessageBody(bodyLength: number): Uint8Array { + if (bodyLength <= 0) { return new Uint8Array(0); } + const buf = toUint8Array(this.source.read(bodyLength)); + if (buf.byteLength < bodyLength) { + throw new Error(invalidMessageBodyLength(bodyLength, buf.byteLength)); + } + // 1. Work around bugs in fs.ReadStream's internal Buffer pooling, see: https://github.com/nodejs/node/issues/24817 + // 2. Work around https://github.com/whatwg/streams/blob/0ebe4b042e467d9876d80ae045de3843092ad797/reference-implementation/lib/helpers.js#L126 + return /* 1. */ (buf.byteOffset % 8 === 0) && + /* 2. */ (buf.byteOffset + buf.byteLength) <= buf.buffer.byteLength ? buf : buf.slice(); + } + public readSchema(throwIfNull = false) { + const type = MessageHeader.Schema; + const message = this.readMessage(type); + const schema = message && message.header(); + if (throwIfNull && !schema) { + throw new Error(nullMessage(type)); + } + return schema; + } + protected readMetadataLength(): IteratorResult { + const buf = this.source.read(PADDING); + const bb = buf && new ByteBuffer(buf); + const len = +(bb && bb.readInt32(0))!; + return { done: len <= 0, value: len }; + } + protected readMetadata(metadataLength: number): IteratorResult { + const buf = this.source.read(metadataLength); + if (!buf) { return ITERATOR_DONE; } + if (buf.byteLength < metadataLength) { + throw new Error(invalidMessageMetadata(metadataLength, buf.byteLength)); + } + return { done: false, value: Message.decode(buf) }; + } +} + +/** @ignore */ +export class AsyncMessageReader implements AsyncIterableIterator { + protected source: AsyncByteStream; + constructor(source: ReadableSource); + constructor(source: FileHandle, byteLength?: number); + constructor(source: any, byteLength?: number) { + this.source = source instanceof AsyncByteStream ? source + : isFileHandle(source) + ? new AsyncRandomAccessFile(source, byteLength!) + : new AsyncByteStream(source); + } + public [Symbol.asyncIterator](): AsyncIterableIterator { return this as AsyncIterableIterator; } + public async next(): Promise> { + let r; + if ((r = await this.readMetadataLength()).done) { return ITERATOR_DONE; } + if ((r = await this.readMetadata(r.value)).done) { return ITERATOR_DONE; } + return ( r) as IteratorResult; + } + public async throw(value?: any) { return await this.source.throw(value); } + public async return(value?: any) { return await this.source.return(value); } + public async readMessage(type?: T | null) { + let r: IteratorResult>; + if ((r = await this.next()).done) { return null; } + if ((type != null) && r.value.headerType !== type) { + throw new Error(invalidMessageType(type)); + } + return r.value; + } + public async readMessageBody(bodyLength: number): Promise { + if (bodyLength <= 0) { return new Uint8Array(0); } + const buf = toUint8Array(await this.source.read(bodyLength)); + if (buf.byteLength < bodyLength) { + throw new Error(invalidMessageBodyLength(bodyLength, buf.byteLength)); + } + // 1. Work around bugs in fs.ReadStream's internal Buffer pooling, see: https://github.com/nodejs/node/issues/24817 + // 2. Work around https://github.com/whatwg/streams/blob/0ebe4b042e467d9876d80ae045de3843092ad797/reference-implementation/lib/helpers.js#L126 + return /* 1. */ (buf.byteOffset % 8 === 0) && + /* 2. */ (buf.byteOffset + buf.byteLength) <= buf.buffer.byteLength ? buf : buf.slice(); + } + public async readSchema(throwIfNull = false) { + const type = MessageHeader.Schema; + const message = await this.readMessage(type); + const schema = message && message.header(); + if (throwIfNull && !schema) { + throw new Error(nullMessage(type)); + } + return schema; + } + protected async readMetadataLength(): Promise> { + const buf = await this.source.read(PADDING); + const bb = buf && new ByteBuffer(buf); + const len = +(bb && bb.readInt32(0))!; + return { done: len <= 0, value: len }; + } + protected async readMetadata(metadataLength: number): Promise> { + const buf = await this.source.read(metadataLength); + if (!buf) { return ITERATOR_DONE; } + if (buf.byteLength < metadataLength) { + throw new Error(invalidMessageMetadata(metadataLength, buf.byteLength)); + } + return { done: false, value: Message.decode(buf) }; + } +} + +/** @ignore */ +export class JSONMessageReader extends MessageReader { + private _schema = false; + private _json: ArrowJSON; + private _body: any[] = []; + private _batchIndex = 0; + private _dictionaryIndex = 0; + constructor(source: ArrowJSON | ArrowJSONLike) { + super(new Uint8Array(0)); + this._json = source instanceof ArrowJSON ? source : new ArrowJSON(source); + } + public next() { + const { _json, _batchIndex, _dictionaryIndex } = this; + const numBatches = _json.batches.length; + const numDictionaries = _json.dictionaries.length; + if (!this._schema) { + this._schema = true; + const message = Message.fromJSON(_json.schema, MessageHeader.Schema); + return { value: message, done: _batchIndex >= numBatches && _dictionaryIndex >= numDictionaries }; + } + if (_dictionaryIndex < numDictionaries) { + const batch = _json.dictionaries[this._dictionaryIndex++]; + this._body = batch['data']['columns']; + const message = Message.fromJSON(batch, MessageHeader.DictionaryBatch); + return { done: false, value: message }; + } + if (_batchIndex < numBatches) { + const batch = _json.batches[this._batchIndex++]; + this._body = batch['columns']; + const message = Message.fromJSON(batch, MessageHeader.RecordBatch); + return { done: false, value: message }; + } + this._body = []; + return ITERATOR_DONE; + } + public readMessageBody(_bodyLength?: number) { + return flattenDataSources(this._body) as any; + function flattenDataSources(xs: any[]): any[][] { + return (xs || []).reduce((buffers, column: any) => [ + ...buffers, + ...(column['VALIDITY'] && [column['VALIDITY']] || []), + ...(column['TYPE'] && [column['TYPE']] || []), + ...(column['OFFSET'] && [column['OFFSET']] || []), + ...(column['DATA'] && [column['DATA']] || []), + ...flattenDataSources(column['children']) + ], [] as any[][]); + } + } + public readMessage(type?: T | null) { + let r: IteratorResult>; + if ((r = this.next()).done) { return null; } + if ((type != null) && r.value.headerType !== type) { + throw new Error(invalidMessageType(type)); + } + return r.value; + } + public readSchema() { + const type = MessageHeader.Schema; + const message = this.readMessage(type); + const schema = message && message.header(); + if (!message || !schema) { + throw new Error(nullMessage(type)); + } + return schema; + } +} + +/** @ignore */ +export const PADDING = 4; +/** @ignore */ +export const MAGIC_STR = 'ARROW1'; +/** @ignore */ +export const MAGIC = new Uint8Array(MAGIC_STR.length); + +for (let i = 0; i < MAGIC_STR.length; i += 1 | 0) { + MAGIC[i] = MAGIC_STR.charCodeAt(i); +} + +/** @ignore */ +export function checkForMagicArrowString(buffer: Uint8Array, index = 0) { + for (let i = -1, n = MAGIC.length; ++i < n;) { + if (MAGIC[i] !== buffer[index + i]) { + return false; + } + } + return true; +} + +/** @ignore */ +export const magicLength = MAGIC.length; +/** @ignore */ +export const magicAndPadding = magicLength + PADDING; +/** @ignore */ +export const magicX2AndPadding = magicLength * 2 + PADDING; diff --git a/js/src/ipc/metadata.ts b/js/src/ipc/metadata.ts deleted file mode 100644 index 025b051734295..0000000000000 --- a/js/src/ipc/metadata.ts +++ /dev/null @@ -1,96 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/* tslint:disable:class-name */ - -import { Schema, Long, MessageHeader, MetadataVersion } from '../type'; - -export class Footer { - constructor(public dictionaryBatches: FileBlock[], public recordBatches: FileBlock[], public schema: Schema) {} -} - -export class FileBlock { - public offset: number; - public bodyLength: number; - constructor(public metaDataLength: number, bodyLength: Long | number, offset: Long | number) { - this.offset = typeof offset === 'number' ? offset : offset.low; - this.bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low; - } -} - -export class Message { - public bodyLength: number; - public version: MetadataVersion; - public headerType: MessageHeader; - constructor(version: MetadataVersion, bodyLength: Long | number, headerType: MessageHeader) { - this.version = version; - this.headerType = headerType; - this.bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low; - } - static isSchema(m: Message): m is Schema { return m.headerType === MessageHeader.Schema; } - static isRecordBatch(m: Message): m is RecordBatchMetadata { return m.headerType === MessageHeader.RecordBatch; } - static isDictionaryBatch(m: Message): m is DictionaryBatch { return m.headerType === MessageHeader.DictionaryBatch; } -} - -export class RecordBatchMetadata extends Message { - public length: number; - public nodes: FieldMetadata[]; - public buffers: BufferMetadata[]; - constructor(version: MetadataVersion, length: Long | number, nodes: FieldMetadata[], buffers: BufferMetadata[], bodyLength?: Long | number) { - if (bodyLength === void(0)) { - bodyLength = buffers.reduce((bodyLength, buffer) => bodyLength + buffer.length, 0); - } - super(version, bodyLength, MessageHeader.RecordBatch); - this.nodes = nodes; - this.buffers = buffers; - this.length = typeof length === 'number' ? length : length.low; - } -} - -export class DictionaryBatch extends Message { - public id: number; - public isDelta: boolean; - public data: RecordBatchMetadata; - constructor(version: MetadataVersion, data: RecordBatchMetadata, id: Long | number, isDelta: boolean = false) { - super(version, data.bodyLength, MessageHeader.DictionaryBatch); - this.isDelta = isDelta; - this.data = data; - this.id = typeof id === 'number' ? id : id.low; - } - private static atomicDictionaryId = 0; - public static getId() { return DictionaryBatch.atomicDictionaryId++; } - public get nodes(): FieldMetadata[] { return this.data.nodes; } - public get buffers(): BufferMetadata[] { return this.data.buffers; } -} - -export class BufferMetadata { - public offset: number; - public length: number; - constructor(offset: Long | number, length: Long | number) { - this.offset = typeof offset === 'number' ? offset : offset.low; - this.length = typeof length === 'number' ? length : length.low; - } -} - -export class FieldMetadata { - public length: number; - public nullCount: number; - constructor(length: Long | number, nullCount: Long | number) { - this.length = typeof length === 'number' ? length : length.low; - this.nullCount = typeof nullCount === 'number' ? nullCount : nullCount.low; - } -} diff --git a/js/src/ipc/metadata/file.ts b/js/src/ipc/metadata/file.ts new file mode 100644 index 0000000000000..d7786fbbf9324 --- /dev/null +++ b/js/src/ipc/metadata/file.ts @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/* tslint:disable:class-name */ + +import * as File_ from '../../fb/File'; +import { flatbuffers } from 'flatbuffers'; + +import Long = flatbuffers.Long; +import Builder = flatbuffers.Builder; +import ByteBuffer = flatbuffers.ByteBuffer; +import _Block = File_.org.apache.arrow.flatbuf.Block; +import _Footer = File_.org.apache.arrow.flatbuf.Footer; + +import { Schema } from '../../schema'; +import { MetadataVersion } from '../../enum'; +import { toUint8Array } from '../../util/buffer'; +import { ArrayBufferViewInput } from '../../util/buffer'; + +/** @ignore */ +class Footer_ { + + /** @nocollapse */ + public static decode(buf: ArrayBufferViewInput) { + buf = new ByteBuffer(toUint8Array(buf)); + const footer = _Footer.getRootAsFooter(buf); + const schema = Schema.decode(footer.schema()!); + return new OffHeapFooter(schema, footer) as Footer_; + } + + /** @nocollapse */ + public static encode(footer: Footer_) { + + const b: Builder = new Builder(); + const schemaOffset = Schema.encode(b, footer.schema); + + _Footer.startRecordBatchesVector(b, footer.numRecordBatches); + [...footer.recordBatches()].slice().reverse().forEach((rb) => FileBlock.encode(b, rb)); + const recordBatchesOffset = b.endVector(); + + _Footer.startDictionariesVector(b, footer.numDictionaries); + [...footer.dictionaryBatches()].slice().reverse().forEach((db) => FileBlock.encode(b, db)); + + const dictionaryBatchesOffset = b.endVector(); + + _Footer.startFooter(b); + _Footer.addSchema(b, schemaOffset); + _Footer.addVersion(b, MetadataVersion.V4); + _Footer.addRecordBatches(b, recordBatchesOffset); + _Footer.addDictionaries(b, dictionaryBatchesOffset); + _Footer.finishFooterBuffer(b, _Footer.endFooter(b)); + + return b.asUint8Array(); + } + + // @ts-ignore + protected _recordBatches: FileBlock[]; + // @ts-ignore + protected _dictionaryBatches: FileBlock[]; + public get numRecordBatches() { return this._recordBatches.length; } + public get numDictionaries() { return this._dictionaryBatches.length; } + + constructor(public schema: Schema, + public version: MetadataVersion = MetadataVersion.V4, + recordBatches?: FileBlock[], dictionaryBatches?: FileBlock[]) { + recordBatches && (this._recordBatches = recordBatches); + dictionaryBatches && (this._dictionaryBatches = dictionaryBatches); + } + + public *recordBatches(): Iterable { + for (let block, i = -1, n = this.numRecordBatches; ++i < n;) { + if (block = this.getRecordBatch(i)) { yield block; } + } + } + + public *dictionaryBatches(): Iterable { + for (let block, i = -1, n = this.numDictionaries; ++i < n;) { + if (block = this.getDictionaryBatch(i)) { yield block; } + } + } + + public getRecordBatch(index: number) { + return index >= 0 + && index < this.numRecordBatches + && this._recordBatches[index] || null; + } + + public getDictionaryBatch(index: number) { + return index >= 0 + && index < this.numDictionaries + && this._dictionaryBatches[index] || null; + } +} + +export { Footer_ as Footer }; + +/** @ignore */ +class OffHeapFooter extends Footer_ { + + public get numRecordBatches() { return this._footer.recordBatchesLength(); } + public get numDictionaries() { return this._footer.dictionariesLength(); } + + constructor(schema: Schema, protected _footer: _Footer) { + super(schema, _footer.version()); + } + + public getRecordBatch(index: number) { + if (index >= 0 && index < this.numRecordBatches) { + const fileBlock = this._footer.recordBatches(index); + if (fileBlock) { return FileBlock.decode(fileBlock); } + } + return null; + } + + public getDictionaryBatch(index: number) { + if (index >= 0 && index < this.numDictionaries) { + const fileBlock = this._footer.dictionaries(index); + if (fileBlock) { return FileBlock.decode(fileBlock); } + } + return null; + } +} + +/** @ignore */ +export class FileBlock { + + /** @nocollapse */ + public static decode(block: _Block) { + return new FileBlock(block.metaDataLength(), block.bodyLength(), block.offset()); + } + + /** @nocollapse */ + public static encode(b: Builder, fileBlock: FileBlock) { + const { metaDataLength } = fileBlock; + const offset = new Long(fileBlock.offset, 0); + const bodyLength = new Long(fileBlock.bodyLength, 0); + return _Block.createBlock(b, offset, metaDataLength, bodyLength); + } + + public offset: number; + public bodyLength: number; + public metaDataLength: number; + + constructor(metaDataLength: number, bodyLength: Long | number, offset: Long | number) { + this.metaDataLength = metaDataLength; + this.offset = typeof offset === 'number' ? offset : offset.low; + this.bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low; + } +} diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts new file mode 100644 index 0000000000000..fa219b3e7853b --- /dev/null +++ b/js/src/ipc/metadata/json.ts @@ -0,0 +1,208 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Schema, Field } from '../../schema'; +import { + DataType, Dictionary, TimeBitWidth, + Utf8, Binary, Decimal, FixedSizeBinary, + List, FixedSizeList, Map_, Struct, Union, + Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, +} from '../../type'; + +import { DictionaryBatch, RecordBatch, FieldNode, BufferRegion } from './message'; +import { TimeUnit, Precision, IntervalUnit, UnionMode, DateUnit } from '../../enum'; + +/** @ignore */ +export function schemaFromJSON(_schema: any, dictionaries: Map = new Map(), dictionaryFields: Map[]> = new Map()) { + return new Schema( + schemaFieldsFromJSON(_schema, dictionaries, dictionaryFields), + customMetadataFromJSON(_schema['customMetadata']), + dictionaries, dictionaryFields + ); +} + +/** @ignore */ +export function recordBatchFromJSON(b: any) { + return new RecordBatch( + b['count'], + fieldNodesFromJSON(b['columns']), + buffersFromJSON(b['columns']) + ); +} + +/** @ignore */ +export function dictionaryBatchFromJSON(b: any) { + return new DictionaryBatch( + recordBatchFromJSON(b['data']), + b['id'], b['isDelta'] + ); +} + +/** @ignore */ +function schemaFieldsFromJSON(_schema: any, dictionaries?: Map, dictionaryFields?: Map[]>) { + return (_schema['fields'] || []).filter(Boolean).map((f: any) => Field.fromJSON(f, dictionaries, dictionaryFields)); +} + +/** @ignore */ +function fieldChildrenFromJSON(_field: any, dictionaries?: Map, dictionaryFields?: Map[]>): Field[] { + return (_field['children'] || []).filter(Boolean).map((f: any) => Field.fromJSON(f, dictionaries, dictionaryFields)); +} + +/** @ignore */ +function fieldNodesFromJSON(xs: any[]): FieldNode[] { + return (xs || []).reduce((fieldNodes, column: any) => [ + ...fieldNodes, + new FieldNode( + column['count'], + nullCountFromJSON(column['VALIDITY']) + ), + ...fieldNodesFromJSON(column['children']) + ], [] as FieldNode[]); +} + +/** @ignore */ +function buffersFromJSON(xs: any[], buffers: BufferRegion[] = []): BufferRegion[] { + for (let i = -1, n = (xs || []).length; ++i < n;) { + const column = xs[i]; + column['VALIDITY'] && buffers.push(new BufferRegion(buffers.length, column['VALIDITY'].length)); + column['TYPE'] && buffers.push(new BufferRegion(buffers.length, column['TYPE'].length)); + column['OFFSET'] && buffers.push(new BufferRegion(buffers.length, column['OFFSET'].length)); + column['DATA'] && buffers.push(new BufferRegion(buffers.length, column['DATA'].length)); + buffers = buffersFromJSON(column['children'], buffers); + } + return buffers; +} + +/** @ignore */ +function nullCountFromJSON(validity: number[]) { + return (validity || []).reduce((sum, val) => sum + +(val === 0), 0); +} + +/** @ignore */ +export function fieldFromJSON(_field: any, dictionaries?: Map, dictionaryFields?: Map[]>) { + + let id: number; + let keys: TKeys | null; + let field: Field | void; + let dictMeta: any; + let type: DataType; + let dictType: Dictionary; + let dictField: Field; + + // If no dictionary encoding, or in the process of decoding the children of a dictionary-encoded field + if (!dictionaries || !dictionaryFields || !(dictMeta = _field['dictionary'])) { + type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries, dictionaryFields)); + field = new Field(_field['name'], type, _field['nullable'], customMetadataFromJSON(_field['customMetadata'])); + } + // tslint:disable + // If dictionary encoded and the first time we've seen this dictionary id, decode + // the data type and child fields, then wrap in a Dictionary type and insert the + // data type into the dictionary types map. + else if (!dictionaries.has(id = dictMeta['id'])) { + // a dictionary index defaults to signed 32 bit int if unspecified + keys = (keys = dictMeta['indexType']) ? indexTypeFromJSON(keys) as TKeys : new Int32(); + dictionaries.set(id, type = typeFromJSON(_field, fieldChildrenFromJSON(_field))); + dictType = new Dictionary(type, keys, id, dictMeta['isOrdered']); + dictField = new Field(_field['name'], dictType, _field['nullable'], customMetadataFromJSON(_field['customMetadata'])); + dictionaryFields.set(id, [field = dictField]); + } + // If dictionary encoded, and have already seen this dictionary Id in the schema, then reuse the + // data type and wrap in a new Dictionary type and field. + else { + // a dictionary index defaults to signed 32 bit int if unspecified + keys = (keys = dictMeta['indexType']) ? indexTypeFromJSON(keys) as TKeys : new Int32(); + dictType = new Dictionary(dictionaries.get(id)!, keys, id, dictMeta['isOrdered']); + dictField = new Field(_field['name'], dictType, _field['nullable'], customMetadataFromJSON(_field['customMetadata'])); + dictionaryFields.get(id)!.push(field = dictField); + } + return field || null; +} + +/** @ignore */ +function customMetadataFromJSON(_metadata?: object) { + return new Map(Object.entries(_metadata || {})); +} + +/** @ignore */ +function indexTypeFromJSON(_type: any) { + return new Int(_type['isSigned'], _type['bitWidth']); +} + +/** @ignore */ +function typeFromJSON(f: any, children?: Field[]): DataType { + + const typeId = f['type']['name']; + + switch (typeId) { + case 'NONE': return new DataType(); + case 'null': return new Null(); + case 'binary': return new Binary(); + case 'utf8': return new Utf8(); + case 'bool': return new Bool(); + case 'list': return new List((children || [])[0]); + case 'struct': return new Struct(children || []); + case 'struct_': return new Struct(children || []); + } + + switch (typeId) { + case 'int': { + const t = f['type']; + return new Int(t['isSigned'], t['bitWidth'] as IntBitWidth); + } + case 'floatingpoint': { + const t = f['type']; + return new Float(Precision[t['precision']] as any); + } + case 'decimal': { + const t = f['type']; + return new Decimal(t['scale'], t['precision']); + } + case 'date': { + const t = f['type']; + return new Date_(DateUnit[t['unit']] as any); + } + case 'time': { + const t = f['type']; + return new Time(TimeUnit[t['unit']] as any, t['bitWidth'] as TimeBitWidth); + } + case 'timestamp': { + const t = f['type']; + return new Timestamp(TimeUnit[t['unit']] as any, t['timezone']); + } + case 'interval': { + const t = f['type']; + return new Interval(IntervalUnit[t['unit']] as any); + } + case 'union': { + const t = f['type']; + return new Union(UnionMode[t['mode']] as any, (t['typeIds'] || []), children || []); + } + case 'fixedsizebinary': { + const t = f['type']; + return new FixedSizeBinary(t['byteWidth']); + } + case 'fixedsizelist': { + const t = f['type']; + return new FixedSizeList(t['listSize'], (children || [])[0]); + } + case 'map': { + const t = f['type']; + return new Map_(children || [], t['keysSorted']); + } + } + throw new Error(`Unrecognized type: "${typeId}"`); +} diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts new file mode 100644 index 0000000000000..794ece9101e52 --- /dev/null +++ b/js/src/ipc/metadata/message.ts @@ -0,0 +1,593 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { flatbuffers } from 'flatbuffers'; +import * as Schema_ from '../../fb/Schema'; +import * as Message_ from '../../fb/Message'; + +import { Schema, Field } from '../../schema'; +import { toUint8Array } from '../../util/buffer'; +import { ArrayBufferViewInput } from '../../util/buffer'; +import { MessageHeader, MetadataVersion } from '../../enum'; +import { instance as typeAssembler } from '../../visitor/typeassembler'; +import { fieldFromJSON, schemaFromJSON, recordBatchFromJSON, dictionaryBatchFromJSON } from './json'; + +import Long = flatbuffers.Long; +import Builder = flatbuffers.Builder; +import ByteBuffer = flatbuffers.ByteBuffer; +import _Int = Schema_.org.apache.arrow.flatbuf.Int; +import Type = Schema_.org.apache.arrow.flatbuf.Type; +import _Field = Schema_.org.apache.arrow.flatbuf.Field; +import _Schema = Schema_.org.apache.arrow.flatbuf.Schema; +import _Buffer = Schema_.org.apache.arrow.flatbuf.Buffer; +import _Message = Message_.org.apache.arrow.flatbuf.Message; +import _KeyValue = Schema_.org.apache.arrow.flatbuf.KeyValue; +import _FieldNode = Message_.org.apache.arrow.flatbuf.FieldNode; +import _Endianness = Schema_.org.apache.arrow.flatbuf.Endianness; +import _RecordBatch = Message_.org.apache.arrow.flatbuf.RecordBatch; +import _DictionaryBatch = Message_.org.apache.arrow.flatbuf.DictionaryBatch; +import _DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding; + +import { + DataType, Dictionary, TimeBitWidth, + Utf8, Binary, Decimal, FixedSizeBinary, + List, FixedSizeList, Map_, Struct, Union, + Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, +} from '../../type'; + +/** @ignore */ +export class Message { + + /** @nocollapse */ + public static fromJSON(msg: any, headerType: T): Message { + const message = new Message(0, MetadataVersion.V4, headerType); + message._createHeader = messageHeaderFromJSON(msg, headerType); + return message; + } + + /** @nocollapse */ + public static decode(buf: ArrayBufferViewInput) { + buf = new ByteBuffer(toUint8Array(buf)); + const _message = _Message.getRootAsMessage(buf); + const bodyLength: Long = _message.bodyLength()!; + const version: MetadataVersion = _message.version(); + const headerType: MessageHeader = _message.headerType(); + const message = new Message(bodyLength, version, headerType); + message._createHeader = decodeMessageHeader(_message, headerType); + return message; + } + + /** @nocollapse */ + public static encode(message: Message) { + let b = new Builder(), headerOffset = -1; + if (message.isSchema()) { + headerOffset = Schema.encode(b, message.header() as Schema); + } else if (message.isRecordBatch()) { + headerOffset = RecordBatch.encode(b, message.header() as RecordBatch); + } else if (message.isDictionaryBatch()) { + headerOffset = DictionaryBatch.encode(b, message.header() as DictionaryBatch); + } + _Message.startMessage(b); + _Message.addVersion(b, MetadataVersion.V4); + _Message.addHeader(b, headerOffset); + _Message.addHeaderType(b, message.headerType); + _Message.addBodyLength(b, new Long(message.bodyLength, 0)); + _Message.finishMessageBuffer(b, _Message.endMessage(b)); + return b.asUint8Array(); + } + + /** @nocollapse */ + public static from(header: Schema | RecordBatch | DictionaryBatch, bodyLength = 0) { + if (header instanceof Schema) { + return new Message(0, MetadataVersion.V4, MessageHeader.Schema, header); + } + if (header instanceof RecordBatch) { + return new Message(bodyLength, MetadataVersion.V4, MessageHeader.RecordBatch, header); + } + if (header instanceof DictionaryBatch) { + return new Message(bodyLength, MetadataVersion.V4, MessageHeader.DictionaryBatch, header); + } + throw new Error(`Unrecognized Message header: ${header}`); + } + + // @ts-ignore + public body: Uint8Array; + protected _headerType: T; + protected _bodyLength: number; + protected _version: MetadataVersion; + public get type() { return this.headerType; } + public get version() { return this._version; } + public get headerType() { return this._headerType; } + public get bodyLength() { return this._bodyLength; } + // @ts-ignore + protected _createHeader: MessageHeaderDecoder; + public header() { return this._createHeader(); } + public isSchema(): this is Message { return this.headerType === MessageHeader.Schema; } + public isRecordBatch(): this is Message { return this.headerType === MessageHeader.RecordBatch; } + public isDictionaryBatch(): this is Message { return this.headerType === MessageHeader.DictionaryBatch; } + + constructor(bodyLength: Long | number, version: MetadataVersion, headerType: T, header?: any) { + this._version = version; + this._headerType = headerType; + this.body = new Uint8Array(0); + header && (this._createHeader = () => header); + this._bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low; + } +} + +/** @ignore */ +export class RecordBatch { + protected _length: number; + protected _nodes: FieldNode[]; + protected _buffers: BufferRegion[]; + public get nodes() { return this._nodes; } + public get length() { return this._length; } + public get buffers() { return this._buffers; } + constructor(length: Long | number, nodes: FieldNode[], buffers: BufferRegion[]) { + this._nodes = nodes; + this._buffers = buffers; + this._length = typeof length === 'number' ? length : length.low; + } +} + +/** @ignore */ +export class DictionaryBatch { + + protected _id: number; + protected _isDelta: boolean; + protected _data: RecordBatch; + public get id() { return this._id; } + public get data() { return this._data; } + public get isDelta() { return this._isDelta; } + public get length(): number { return this.data.length; } + public get nodes(): FieldNode[] { return this.data.nodes; } + public get buffers(): BufferRegion[] { return this.data.buffers; } + + constructor(data: RecordBatch, id: Long | number, isDelta: boolean = false) { + this._data = data; + this._isDelta = isDelta; + this._id = typeof id === 'number' ? id : id.low; + } +} + +/** @ignore */ +export class BufferRegion { + public offset: number; + public length: number; + constructor(offset: Long | number, length: Long | number) { + this.offset = typeof offset === 'number' ? offset : offset.low; + this.length = typeof length === 'number' ? length : length.low; + } +} + +/** @ignore */ +export class FieldNode { + public length: number; + public nullCount: number; + constructor(length: Long | number, nullCount: Long | number) { + this.length = typeof length === 'number' ? length : length.low; + this.nullCount = typeof nullCount === 'number' ? nullCount : nullCount.low; + } +} + +function messageHeaderFromJSON(message: any, type: MessageHeader) { + return (() => { + switch (type) { + case MessageHeader.Schema: return Schema.fromJSON(message); + case MessageHeader.RecordBatch: return RecordBatch.fromJSON(message); + case MessageHeader.DictionaryBatch: return DictionaryBatch.fromJSON(message); + } + throw new Error(`Unrecognized Message type: { name: ${MessageHeader[type]}, type: ${type} }`); + }) as MessageHeaderDecoder; +} + +function decodeMessageHeader(message: _Message, type: MessageHeader) { + return (() => { + switch (type) { + case MessageHeader.Schema: return Schema.decode(message.header(new _Schema())!); + case MessageHeader.RecordBatch: return RecordBatch.decode(message.header(new _RecordBatch())!, message.version()); + case MessageHeader.DictionaryBatch: return DictionaryBatch.decode(message.header(new _DictionaryBatch())!, message.version()); + } + throw new Error(`Unrecognized Message type: { name: ${MessageHeader[type]}, type: ${type} }`); + }) as MessageHeaderDecoder; +} + +Field['encode'] = encodeField; +Field['decode'] = decodeField; +Field['fromJSON'] = fieldFromJSON; + +Schema['encode'] = encodeSchema; +Schema['decode'] = decodeSchema; +Schema['fromJSON'] = schemaFromJSON; + +RecordBatch['encode'] = encodeRecordBatch; +RecordBatch['decode'] = decodeRecordBatch; +RecordBatch['fromJSON'] = recordBatchFromJSON; + +DictionaryBatch['encode'] = encodeDictionaryBatch; +DictionaryBatch['decode'] = decodeDictionaryBatch; +DictionaryBatch['fromJSON'] = dictionaryBatchFromJSON; + +FieldNode['encode'] = encodeFieldNode; +FieldNode['decode'] = decodeFieldNode; + +BufferRegion['encode'] = encodeBufferRegion; +BufferRegion['decode'] = decodeBufferRegion; + +declare module '../../schema' { + namespace Field { + export { encodeField as encode }; + export { decodeField as decode }; + export { fieldFromJSON as fromJSON }; + } + namespace Schema { + export { encodeSchema as encode }; + export { decodeSchema as decode }; + export { schemaFromJSON as fromJSON }; + } +} + +declare module './message' { + namespace RecordBatch { + export { encodeRecordBatch as encode }; + export { decodeRecordBatch as decode }; + export { recordBatchFromJSON as fromJSON }; + } + namespace DictionaryBatch { + export { encodeDictionaryBatch as encode }; + export { decodeDictionaryBatch as decode }; + export { dictionaryBatchFromJSON as fromJSON }; + } + namespace FieldNode { + export { encodeFieldNode as encode }; + export { decodeFieldNode as decode }; + } + namespace BufferRegion { + export { encodeBufferRegion as encode }; + export { decodeBufferRegion as decode }; + } +} + +/** @ignore */ +function decodeSchema(_schema: _Schema, dictionaries: Map = new Map(), dictionaryFields: Map[]> = new Map()) { + const fields = decodeSchemaFields(_schema, dictionaries, dictionaryFields); + return new Schema(fields, decodeCustomMetadata(_schema), dictionaries, dictionaryFields); +} + +/** @ignore */ +function decodeRecordBatch(batch: _RecordBatch, version = MetadataVersion.V4) { + return new RecordBatch(batch.length(), decodeFieldNodes(batch), decodeBuffers(batch, version)); +} + +/** @ignore */ +function decodeDictionaryBatch(batch: _DictionaryBatch, version = MetadataVersion.V4) { + return new DictionaryBatch(RecordBatch.decode(batch.data()!, version), batch.id(), batch.isDelta()); +} + +/** @ignore */ +function decodeBufferRegion(b: _Buffer) { + return new BufferRegion(b.offset(), b.length()); +} + +/** @ignore */ +function decodeFieldNode(f: _FieldNode) { + return new FieldNode(f.length(), f.nullCount()); +} + +/** @ignore */ +function decodeFieldNodes(batch: _RecordBatch) { + const nodes = [] as FieldNode[]; + for (let f, i = -1, j = -1, n = batch.nodesLength(); ++i < n;) { + if (f = batch.nodes(i)) { + nodes[++j] = FieldNode.decode(f); + } + } + return nodes; +} + +/** @ignore */ +function decodeBuffers(batch: _RecordBatch, version: MetadataVersion) { + const bufferRegions = [] as BufferRegion[]; + for (let b, i = -1, j = -1, n = batch.buffersLength(); ++i < n;) { + if (b = batch.buffers(i)) { + // If this Arrow buffer was written before version 4, + // advance the buffer's bb_pos 8 bytes to skip past + // the now-removed page_id field + if (version < MetadataVersion.V4) { + b.bb_pos += (8 * (i + 1)); + } + bufferRegions[++j] = BufferRegion.decode(b); + } + } + return bufferRegions; +} + +/** @ignore */ +function decodeSchemaFields(schema: _Schema, dictionaries?: Map, dictionaryFields?: Map[]>) { + const fields = [] as Field[]; + for (let f, i = -1, j = -1, n = schema.fieldsLength(); ++i < n;) { + if (f = schema.fields(i)) { + fields[++j] = Field.decode(f, dictionaries, dictionaryFields); + } + } + return fields; +} + +/** @ignore */ +function decodeFieldChildren(field: _Field, dictionaries?: Map, dictionaryFields?: Map[]>): Field[] { + const children = [] as Field[]; + for (let f, i = -1, j = -1, n = field.childrenLength(); ++i < n;) { + if (f = field.children(i)) { + children[++j] = Field.decode(f, dictionaries, dictionaryFields); + } + } + return children; +} + +/** @ignore */ +function decodeField(f: _Field, dictionaries?: Map, dictionaryFields?: Map[]>) { + + let id: number; + let field: Field | void; + let type: DataType; + let keys: _Int | TKeys | null; + let dictType: Dictionary; + let dictMeta: _DictionaryEncoding | null; + let dictField: Field; + + // If no dictionary encoding, or in the process of decoding the children of a dictionary-encoded field + if (!dictionaries || !dictionaryFields || !(dictMeta = f.dictionary())) { + type = decodeFieldType(f, decodeFieldChildren(f, dictionaries, dictionaryFields)); + field = new Field(f.name()!, type, f.nullable(), decodeCustomMetadata(f)); + } + // tslint:disable + // If dictionary encoded and the first time we've seen this dictionary id, decode + // the data type and child fields, then wrap in a Dictionary type and insert the + // data type into the dictionary types map. + else if (!dictionaries.has(id = dictMeta.id().low)) { + // a dictionary index defaults to signed 32 bit int if unspecified + keys = (keys = dictMeta.indexType()) ? decodeIndexType(keys) as TKeys : new Int32(); + dictionaries.set(id, type = decodeFieldType(f, decodeFieldChildren(f))); + dictType = new Dictionary(type, keys, id, dictMeta.isOrdered()); + dictField = new Field(f.name()!, dictType, f.nullable(), decodeCustomMetadata(f)); + dictionaryFields.set(id, [field = dictField]); + } + // If dictionary encoded, and have already seen this dictionary Id in the schema, then reuse the + // data type and wrap in a new Dictionary type and field. + else { + // a dictionary index defaults to signed 32 bit int if unspecified + keys = (keys = dictMeta.indexType()) ? decodeIndexType(keys) as TKeys : new Int32(); + dictType = new Dictionary(dictionaries.get(id)!, keys, id, dictMeta.isOrdered()); + dictField = new Field(f.name()!, dictType, f.nullable(), decodeCustomMetadata(f)); + dictionaryFields.get(id)!.push(field = dictField); + } + return field || null; +} + +/** @ignore */ +function decodeCustomMetadata(parent?: _Schema | _Field | null) { + const data = new Map(); + if (parent) { + for (let entry, key, i = -1, n = parent.customMetadataLength() | 0; ++i < n;) { + if ((entry = parent.customMetadata(i)) && (key = entry.key()) != null) { + data.set(key, entry.value()!); + } + } + } + return data; +} + +/** @ignore */ +function decodeIndexType(_type: _Int) { + return new Int(_type.isSigned(), _type.bitWidth() as IntBitWidth); +} + +/** @ignore */ +function decodeFieldType(f: _Field, children?: Field[]): DataType { + + const typeId = f.typeType(); + + switch (typeId) { + case Type.NONE: return new DataType(); + case Type.Null: return new Null(); + case Type.Binary: return new Binary(); + case Type.Utf8: return new Utf8(); + case Type.Bool: return new Bool(); + case Type.List: return new List((children || [])[0]); + case Type.Struct_: return new Struct(children || []); + } + + switch (typeId) { + case Type.Int: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.Int())!; + return new Int(t.isSigned(), t.bitWidth()); + } + case Type.FloatingPoint: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.FloatingPoint())!; + return new Float(t.precision()); + } + case Type.Decimal: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.Decimal())!; + return new Decimal(t.scale(), t.precision()); + } + case Type.Date: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.Date())!; + return new Date_(t.unit()); + } + case Type.Time: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.Time())!; + return new Time(t.unit(), t.bitWidth() as TimeBitWidth); + } + case Type.Timestamp: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.Timestamp())!; + return new Timestamp(t.unit(), t.timezone()); + } + case Type.Interval: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.Interval())!; + return new Interval(t.unit()); + } + case Type.Union: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.Union())!; + return new Union(t.mode(), t.typeIdsArray() || [], children || []); + } + case Type.FixedSizeBinary: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.FixedSizeBinary())!; + return new FixedSizeBinary(t.byteWidth()); + } + case Type.FixedSizeList: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.FixedSizeList())!; + return new FixedSizeList(t.listSize(), (children || [])[0]); + } + case Type.Map: { + const t = f.type(new Schema_.org.apache.arrow.flatbuf.Map())!; + return new Map_(children || [], t.keysSorted()); + } + } + throw new Error(`Unrecognized type: "${Type[typeId]}" (${typeId})`); +} + +/** @ignore */ +function encodeSchema(b: Builder, schema: Schema) { + + const fieldOffsets = schema.fields.map((f) => Field.encode(b, f)); + + _Schema.startFieldsVector(b, fieldOffsets.length); + + const fieldsVectorOffset = _Schema.createFieldsVector(b, fieldOffsets); + + const metadataOffset = !(schema.metadata && schema.metadata.size > 0) ? -1 : + _Schema.createCustomMetadataVector(b, [...schema.metadata].map(([k, v]) => { + const key = b.createString(`${k}`); + const val = b.createString(`${v}`); + _KeyValue.startKeyValue(b); + _KeyValue.addKey(b, key); + _KeyValue.addValue(b, val); + return _KeyValue.endKeyValue(b); + })); + + _Schema.startSchema(b); + _Schema.addFields(b, fieldsVectorOffset); + _Schema.addEndianness(b, platformIsLittleEndian ? _Endianness.Little : _Endianness.Big); + + if (metadataOffset !== -1) { _Schema.addCustomMetadata(b, metadataOffset); } + + return _Schema.endSchema(b); +} + +/** @ignore */ +function encodeField(b: Builder, field: Field) { + + let nameOffset = -1; + let typeOffset = -1; + let dictionaryOffset = -1; + + let type = field.type; + let typeId: Type = field.typeId; + + if (!DataType.isDictionary(type)) { + typeOffset = typeAssembler.visit(type, b)!; + } else { + typeId = type.dictionary.typeId; + dictionaryOffset = typeAssembler.visit(type, b)!; + typeOffset = typeAssembler.visit(type.dictionary, b)!; + } + + const childOffsets = (type.children || []).map((f: Field) => Field.encode(b, f)); + const childrenVectorOffset = _Field.createChildrenVector(b, childOffsets); + + const metadataOffset = !(field.metadata && field.metadata.size > 0) ? -1 : + _Field.createCustomMetadataVector(b, [...field.metadata].map(([k, v]) => { + const key = b.createString(`${k}`); + const val = b.createString(`${v}`); + _KeyValue.startKeyValue(b); + _KeyValue.addKey(b, key); + _KeyValue.addValue(b, val); + return _KeyValue.endKeyValue(b); + })); + + if (field.name) { + nameOffset = b.createString(field.name); + } + + _Field.startField(b); + _Field.addType(b, typeOffset); + _Field.addTypeType(b, typeId); + _Field.addChildren(b, childrenVectorOffset); + _Field.addNullable(b, !!field.nullable); + + if (nameOffset !== -1) { _Field.addName(b, nameOffset); } + if (dictionaryOffset !== -1) { _Field.addDictionary(b, dictionaryOffset); } + if (metadataOffset !== -1) { _Field.addCustomMetadata(b, metadataOffset); } + + return _Field.endField(b); +} + +/** @ignore */ +function encodeRecordBatch(b: Builder, recordBatch: RecordBatch) { + + const nodes = recordBatch.nodes || []; + const buffers = recordBatch.buffers || []; + + _RecordBatch.startNodesVector(b, nodes.length); + nodes.slice().reverse().forEach((n) => FieldNode.encode(b, n)); + + const nodesVectorOffset = b.endVector(); + + _RecordBatch.startBuffersVector(b, buffers.length); + buffers.slice().reverse().forEach((b_) => BufferRegion.encode(b, b_)); + + const buffersVectorOffset = b.endVector(); + + _RecordBatch.startRecordBatch(b); + _RecordBatch.addLength(b, new Long(recordBatch.length, 0)); + _RecordBatch.addNodes(b, nodesVectorOffset); + _RecordBatch.addBuffers(b, buffersVectorOffset); + return _RecordBatch.endRecordBatch(b); +} + +/** @ignore */ +function encodeDictionaryBatch(b: Builder, dictionaryBatch: DictionaryBatch) { + const dataOffset = RecordBatch.encode(b, dictionaryBatch.data); + _DictionaryBatch.startDictionaryBatch(b); + _DictionaryBatch.addId(b, new Long(dictionaryBatch.id, 0)); + _DictionaryBatch.addIsDelta(b, dictionaryBatch.isDelta); + _DictionaryBatch.addData(b, dataOffset); + return _DictionaryBatch.endDictionaryBatch(b); +} + +/** @ignore */ +function encodeFieldNode(b: Builder, node: FieldNode) { + return _FieldNode.createFieldNode(b, new Long(node.length, 0), new Long(node.nullCount, 0)); +} + +/** @ignore */ +function encodeBufferRegion(b: Builder, node: BufferRegion) { + return _Buffer.createBuffer(b, new Long(node.offset, 0), new Long(node.length, 0)); +} + +/** @ignore */ +const platformIsLittleEndian = (function() { + const buffer = new ArrayBuffer(2); + new DataView(buffer).setInt16(0, 256, true /* littleEndian */); + // Int16Array uses the platform's endianness. + return new Int16Array(buffer)[0] === 256; +})(); + +/** @ignore */ +type MessageHeaderDecoder = () => T extends MessageHeader.Schema ? Schema + : T extends MessageHeader.RecordBatch ? RecordBatch + : T extends MessageHeader.DictionaryBatch ? DictionaryBatch : never; diff --git a/js/src/ipc/node/iterable.ts b/js/src/ipc/node/iterable.ts new file mode 100644 index 0000000000000..a5e558e01662e --- /dev/null +++ b/js/src/ipc/node/iterable.ts @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Readable } from 'stream'; +import { isIterable, isAsyncIterable } from '../../util/compat'; + +type ReadableOptions = import('stream').ReadableOptions; + +/** @ignore */ +export function toNodeStream(source: Iterable | AsyncIterable, options?: ReadableOptions): Readable { + if (isAsyncIterable(source)) { return new AsyncIterableReadable(source[Symbol.asyncIterator](), options); } + if (isIterable(source)) { return new IterableReadable(source[Symbol.iterator](), options); } + /* istanbul ignore next */ + throw new Error(`toNodeStream() must be called with an Iterable or AsyncIterable`); +} + +/** @ignore */ +class IterableReadable extends Readable { + private _pulling: boolean; + private _bytesMode: boolean; + private _iterator: Iterator; + constructor(it: Iterator, options?: ReadableOptions) { + super(options); + this._iterator = it; + this._pulling = false; + this._bytesMode = !options || !options.objectMode; + } + _read(size: number) { + const it = this._iterator; + if (it && !this._pulling && (this._pulling = true)) { + this._pulling = this._pull(size, it); + } + } + _destroy(e: Error | null, cb: (e: Error | null) => void) { + let it = this._iterator, fn: any; + it && (fn = e != null && it.throw || it.return); + fn && fn.call(it, e); + cb && cb(null); + } + private _pull(size: number, it: Iterator) { + const bm = this._bytesMode; + let r: IteratorResult | null = null; + while (this.readable && !(r = it.next(bm ? size : null)).done) { + if (size != null) { + size -= (bm && ArrayBuffer.isView(r.value) ? r.value.byteLength : 1); + } + if (!this.push(r.value) || size <= 0) { break; } + } + if ((r && r.done || !this.readable) && (this.push(null) || true)) { + it.return && it.return(); + } + return !this.readable; + } +} + +/** @ignore */ +class AsyncIterableReadable extends Readable { + private _pulling: boolean; + private _bytesMode: boolean; + private _iterator: AsyncIterator; + constructor(it: AsyncIterator, options?: ReadableOptions) { + super(options); + this._iterator = it; + this._pulling = false; + this._bytesMode = !options || !options.objectMode; + } + _read(size: number) { + const it = this._iterator; + if (it && !this._pulling && (this._pulling = true)) { + (async () => this._pulling = await this._pull(size, it))(); + } + } + _destroy(e: Error | null, cb: (e: Error | null) => void) { + let it = this._iterator, fn: any; + it && (fn = e != null && it.throw || it.return); + fn && fn.call(it, e).then(() => cb && cb(null)) || (cb && cb(null)); + } + private async _pull(size: number, it: AsyncIterator) { + const bm = this._bytesMode; + let r: IteratorResult | null = null; + while (this.readable && !(r = await it.next(bm ? size : null)).done) { + if (size != null) { + size -= (bm && ArrayBuffer.isView(r.value) ? r.value.byteLength : 1); + } + if (!this.push(r.value) || size <= 0) { break; } + } + if ((r && r.done || !this.readable) && (this.push(null) || true)) { + it.return && it.return(); + } + return !this.readable; + } +} diff --git a/js/src/ipc/node/reader.ts b/js/src/ipc/node/reader.ts new file mode 100644 index 0000000000000..aeb8688d211c9 --- /dev/null +++ b/js/src/ipc/node/reader.ts @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Duplex, DuplexOptions } from 'stream'; +import { DataType } from '../../type'; +import { RecordBatch } from '../../recordbatch'; +import { AsyncByteQueue } from '../../io/stream'; +import { RecordBatchReader } from '../../ipc/reader'; + +/** @ignore */ +export function recordBatchReaderThroughNodeStream(options?: DuplexOptions & { autoDestroy: boolean }) { + return new RecordBatchReaderDuplex(options); +} + +type CB = (error?: Error | null | undefined) => void; + +/** @ignore */ +class RecordBatchReaderDuplex extends Duplex { + private _pulling: boolean = false; + private _autoDestroy: boolean = true; + private _reader: RecordBatchReader | null; + private _asyncQueue: AsyncByteQueue | null; + constructor(options?: DuplexOptions & { autoDestroy: boolean }) { + super({ allowHalfOpen: false, ...options, readableObjectMode: true, writableObjectMode: false }); + this._reader = null; + this._pulling = false; + this._asyncQueue = new AsyncByteQueue(); + this._autoDestroy = options && (typeof options.autoDestroy === 'boolean') ? options.autoDestroy : true; + } + _final(cb?: CB) { + const aq = this._asyncQueue; + aq && aq.close(); + cb && cb(); + } + _write(x: any, _: string, cb: CB) { + const aq = this._asyncQueue; + aq && aq.write(x); + cb && cb(); + return true; + } + _read(size: number) { + const aq = this._asyncQueue; + if (aq && !this._pulling && (this._pulling = true)) { + (async () => { + if (!this._reader) { + this._reader = await this._open(aq); + } + this._pulling = await this._pull(size, this._reader); + })(); + } + } + _destroy(err: Error | null, cb: (error: Error | null) => void) { + const aq = this._asyncQueue; + if (aq) { err ? aq.abort(err) : aq.close(); } + cb(this._asyncQueue = this._reader = null); + } + async _open(source: AsyncByteQueue) { + return await (await RecordBatchReader.from(source)).open({ autoDestroy: this._autoDestroy }); + } + async _pull(size: number, reader: RecordBatchReader) { + let r: IteratorResult> | null = null; + while (this.readable && !(r = await reader.next()).done) { + if (!this.push(r.value) || (size != null && --size <= 0)) { break; } + } + if ((r && r.done || !this.readable)) { + this.push(null); + await reader.cancel(); + } + return !this.readable; + } +} diff --git a/js/src/ipc/node/writer.ts b/js/src/ipc/node/writer.ts new file mode 100644 index 0000000000000..673050791c67c --- /dev/null +++ b/js/src/ipc/node/writer.ts @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Duplex, DuplexOptions } from 'stream'; +import { DataType } from '../../type'; +import { AsyncByteStream } from '../../io/stream'; +import { RecordBatchWriter } from '../../ipc/writer'; + +/** @ignore */ +export function recordBatchWriterThroughNodeStream(this: typeof RecordBatchWriter, options?: DuplexOptions & { autoDestroy: boolean }) { + return new RecordBatchWriterDuplex(new this(options)); +} + +type CB = (error?: Error | null | undefined) => void; + +/** @ignore */ +class RecordBatchWriterDuplex extends Duplex { + private _pulling: boolean = false; + private _reader: AsyncByteStream | null; + private _writer: RecordBatchWriter | null; + constructor(writer: RecordBatchWriter, options?: DuplexOptions) { + super({ allowHalfOpen: false, ...options, writableObjectMode: true, readableObjectMode: false }); + this._writer = writer; + this._reader = new AsyncByteStream(writer); + } + _final(cb?: CB) { + const writer = this._writer; + writer && writer.close(); + cb && cb(); + } + _write(x: any, _: string, cb: CB) { + const writer = this._writer; + writer && writer.write(x); + cb && cb(); + return true; + } + _read(size: number) { + const it = this._reader; + if (it && !this._pulling && (this._pulling = true)) { + (async () => this._pulling = await this._pull(size, it))(); + } + } + _destroy(err: Error | null, cb: (error: Error | null) => void) { + const writer = this._writer; + if (writer) { err ? writer.abort(err) : writer.close(); } + cb(this._reader = this._writer = null); + } + async _pull(size: number, reader: AsyncByteStream) { + let r: IteratorResult | null = null; + while (this.readable && !(r = await reader.next(size || null)).done) { + if (size != null && r.value) { + size -= r.value.byteLength; + } + if (!this.push(r.value) || size <= 0) { break; } + } + if ((r && r.done || !this.readable)) { + this.push(null); + await reader.cancel(); + } + return !this.readable; + } +} diff --git a/js/src/ipc/reader.ts b/js/src/ipc/reader.ts new file mode 100644 index 0000000000000..91990afb35b17 --- /dev/null +++ b/js/src/ipc/reader.ts @@ -0,0 +1,737 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { DataType } from '../type'; +import { Vector } from '../vector'; +import { MessageHeader } from '../enum'; +import { Footer } from './metadata/file'; +import { Schema, Field } from '../schema'; +import streamAdapters from '../io/adapters'; +import { Message } from './metadata/message'; +import { RecordBatch } from '../recordbatch'; +import * as metadata from './metadata/message'; +import { ArrayBufferViewInput } from '../util/buffer'; +import { ByteStream, AsyncByteStream } from '../io/stream'; +import { RandomAccessFile, AsyncRandomAccessFile } from '../io/file'; +import { VectorLoader, JSONVectorLoader } from '../visitor/vectorloader'; +import { + FileHandle, + ArrowJSONLike, + ITERATOR_DONE, + ReadableInterop, +} from '../io/interfaces'; +import { + MessageReader, AsyncMessageReader, JSONMessageReader, + checkForMagicArrowString, magicLength, magicAndPadding, magicX2AndPadding +} from './message'; +import { + isPromise, + isIterable, isAsyncIterable, + isIteratorResult, isArrowJSON, + isFileHandle, isFetchResponse, + isReadableDOMStream, isReadableNodeStream +} from '../util/compat'; + +/** @ignore */ export type FromArg0 = ArrowJSONLike; +/** @ignore */ export type FromArg1 = PromiseLike; +/** @ignore */ export type FromArg2 = Iterable | ArrayBufferViewInput; +/** @ignore */ export type FromArg3 = PromiseLike | ArrayBufferViewInput>; +/** @ignore */ export type FromArg4 = Response | NodeJS.ReadableStream | ReadableStream | AsyncIterable; +/** @ignore */ export type FromArg5 = FileHandle | PromiseLike | PromiseLike; +/** @ignore */ export type FromArgs = FromArg0 | FromArg1 | FromArg2 | FromArg3 | FromArg4 | FromArg5; + +/** @ignore */ type OpenOptions = { autoDestroy?: boolean; }; +/** @ignore */ type RecordBatchReaders = RecordBatchFileReader | RecordBatchStreamReader; +/** @ignore */ type AsyncRecordBatchReaders = AsyncRecordBatchFileReader | AsyncRecordBatchStreamReader; +/** @ignore */ type RecordBatchFileReaders = RecordBatchFileReader | AsyncRecordBatchFileReader; +/** @ignore */ type RecordBatchStreamReaders = RecordBatchStreamReader | AsyncRecordBatchStreamReader; + +export class RecordBatchReader extends ReadableInterop> { + + protected _impl: RecordBatchReaderImpls; + protected constructor(impl: RecordBatchReaderImpls) { + super(); + this._impl = impl; + } + + public get closed() { return this._impl.closed; } + public get schema() { return this._impl.schema; } + public get autoDestroy() { return this._impl.autoDestroy; } + public get dictionaries() { return this._impl.dictionaries; } + public get numDictionaries() { return this._impl.numDictionaries; } + public get numRecordBatches() { return this._impl.numRecordBatches; } + public get footer() { return this._impl.isFile() ? this._impl.footer : null; } + + public isSync(): this is RecordBatchReaders { return this._impl.isSync(); } + public isAsync(): this is AsyncRecordBatchReaders { return this._impl.isAsync(); } + public isFile(): this is RecordBatchFileReaders { return this._impl.isFile(); } + public isStream(): this is RecordBatchStreamReaders { return this._impl.isStream(); } + + public next() { + return this._impl.next(); + } + public throw(value?: any) { + return this._impl.throw(value); + } + public return(value?: any) { + return this._impl.return(value); + } + public cancel() { + return this._impl.cancel(); + } + public reset(schema?: Schema | null): this { + this._impl.reset(schema); + return this; + } + public open(options?: OpenOptions) { + const opening = this._impl.open(options); + return isPromise(opening) ? opening.then(() => this) : this; + } + public readRecordBatch(index: number): RecordBatch | null | Promise | null> { + return this._impl.isFile() ? this._impl.readRecordBatch(index) : null; + } + public [Symbol.iterator](): IterableIterator> { + return (>> this._impl)[Symbol.iterator](); + } + public [Symbol.asyncIterator](): AsyncIterableIterator> { + return (>> this._impl)[Symbol.asyncIterator](); + } + public toDOMStream() { + return streamAdapters.toDOMStream>( + (this.isSync() + ? { [Symbol.iterator]: () => this } as Iterable> + : { [Symbol.asyncIterator]: () => this } as AsyncIterable>)); + } + public toNodeStream() { + return streamAdapters.toNodeStream>( + (this.isSync() + ? { [Symbol.iterator]: () => this } as Iterable> + : { [Symbol.asyncIterator]: () => this } as AsyncIterable>), + { objectMode: true }); + } + + /** @nocollapse */ + // @ts-ignore + public static throughNode(options?: import('stream').DuplexOptions & { autoDestroy: boolean }): import('stream').Duplex { + throw new Error(`"throughNode" not available in this environment`); + } + /** @nocollapse */ + public static throughDOM( + // @ts-ignore + writableStrategy?: ByteLengthQueuingStrategy, + // @ts-ignore + readableStrategy?: { autoDestroy: boolean } + ): { writable: WritableStream, readable: ReadableStream> } { + throw new Error(`"throughDOM" not available in this environment`); + } + + public static from(source: T): T; + public static from(source: FromArg0): RecordBatchStreamReader; + public static from(source: FromArg1): Promise>; + public static from(source: FromArg2): RecordBatchFileReader | RecordBatchStreamReader; + public static from(source: FromArg3): Promise | RecordBatchStreamReader>; + public static from(source: FromArg4): Promise | AsyncRecordBatchReaders>; + public static from(source: FromArg5): Promise | AsyncRecordBatchStreamReader>; + /** @nocollapse */ + public static from(source: any) { + if (source instanceof RecordBatchReader) { + return source; + } else if (isArrowJSON(source)) { + return fromArrowJSON(source); + } else if (isFileHandle(source)) { + return fromFileHandle(source); + } else if (isPromise(source)) { + return (async () => await RecordBatchReader.from(await source))(); + } else if (isFetchResponse(source) || isReadableDOMStream(source) || isReadableNodeStream(source) || isAsyncIterable(source)) { + return fromAsyncByteStream(new AsyncByteStream(source)); + } + return fromByteStream(new ByteStream(source)); + } + + public static readAll(source: T): T extends RecordBatchReaders ? IterableIterator : AsyncIterableIterator; + public static readAll(source: FromArg0): IterableIterator>; + public static readAll(source: FromArg1): AsyncIterableIterator>; + public static readAll(source: FromArg2): IterableIterator | RecordBatchStreamReader>; + public static readAll(source: FromArg3): AsyncIterableIterator | RecordBatchStreamReader>; + public static readAll(source: FromArg4): AsyncIterableIterator | AsyncRecordBatchReaders>; + public static readAll(source: FromArg5): AsyncIterableIterator | AsyncRecordBatchStreamReader>; + /** @nocollapse */ + public static readAll(source: any) { + if (source instanceof RecordBatchReader) { + return source.isSync() ? readAllSync(source) : readAllAsync(source as AsyncRecordBatchReaders); + } else if (isArrowJSON(source) || ArrayBuffer.isView(source) || isIterable(source) || isIteratorResult(source)) { + return readAllSync(source) as IterableIterator>; + } + return readAllAsync(source) as AsyncIterableIterator | AsyncRecordBatchReaders>; + } +} + +// +// Since TS is a structural type system, we define the following subclass stubs +// so that concrete types exist to associate with with the interfaces below. +// +// The implementation for each RecordBatchReader is hidden away in the set of +// `RecordBatchReaderImpl` classes in the second half of this file. This allows +// us to export a single RecordBatchReader class, and swap out the impl based +// on the io primitives or underlying arrow (JSON, file, or stream) at runtime. +// +// Async/await makes our job a bit harder, since it forces everything to be +// either fully sync or fully async. This is why the logic for the reader impls +// has been duplicated into both sync and async variants. Since the RBR +// delegates to its impl, an RBR with an AsyncRecordBatchFileReaderImpl for +// example will return async/await-friendly Promises, but one with a (sync) +// RecordBatchStreamReaderImpl will always return values. Nothing should be +// different about their logic, aside from the async handling. This is also why +// this code looks highly structured, as it should be nearly identical and easy +// to follow. +// + +/** @ignore */ +export class RecordBatchStreamReader extends RecordBatchReader { + constructor(protected _impl: RecordBatchStreamReaderImpl) { super (_impl); } + public [Symbol.iterator]() { return (this._impl as IterableIterator>)[Symbol.iterator](); } + public async *[Symbol.asyncIterator](): AsyncIterableIterator> { yield* this[Symbol.iterator](); } +} +/** @ignore */ +export class AsyncRecordBatchStreamReader extends RecordBatchReader { + constructor(protected _impl: AsyncRecordBatchStreamReaderImpl) { super (_impl); } + public [Symbol.iterator](): IterableIterator> { throw new Error(`AsyncRecordBatchStreamReader is not Iterable`); } + public [Symbol.asyncIterator]() { return (this._impl as AsyncIterableIterator>)[Symbol.asyncIterator](); } +} +/** @ignore */ +export class RecordBatchFileReader extends RecordBatchStreamReader { + constructor(protected _impl: RecordBatchFileReaderImpl) { super (_impl); } +} +/** @ignore */ +export class AsyncRecordBatchFileReader extends AsyncRecordBatchStreamReader { + constructor(protected _impl: AsyncRecordBatchFileReaderImpl) { super (_impl); } +} + +// +// Now override the return types for each sync/async RecordBatchReader variant +// + +/** @ignore */ +export interface RecordBatchStreamReader extends RecordBatchReader { + open(options?: OpenOptions | undefined): this; + cancel(): void; + throw(value?: any): IteratorResult; + return(value?: any): IteratorResult; + next(value?: any): IteratorResult>; +} + +/** @ignore */ +export interface AsyncRecordBatchStreamReader extends RecordBatchReader { + open(options?: OpenOptions | undefined): Promise; + cancel(): Promise; + throw(value?: any): Promise>; + return(value?: any): Promise>; + next(value?: any): Promise>>; +} + +/** @ignore */ +export interface RecordBatchFileReader extends RecordBatchStreamReader { + footer: Footer; + readRecordBatch(index: number): RecordBatch | null; +} + +/** @ignore */ +export interface AsyncRecordBatchFileReader extends AsyncRecordBatchStreamReader { + footer: Footer; + readRecordBatch(index: number): Promise | null>; +} + +/** @ignore */ +type RecordBatchReaderImpls = + RecordBatchJSONReaderImpl | + RecordBatchFileReaderImpl | + RecordBatchStreamReaderImpl | + AsyncRecordBatchFileReaderImpl | + AsyncRecordBatchStreamReaderImpl; + +/** @ignore */ +interface RecordBatchReaderImpl { + + closed: boolean; + schema: Schema; + autoDestroy: boolean; + dictionaries: Map; + + isFile(): this is RecordBatchFileReaders; + isStream(): this is RecordBatchStreamReaders; + isSync(): this is RecordBatchReaders; + isAsync(): this is AsyncRecordBatchReaders; + + reset(schema?: Schema | null): this; +} + +/** @ignore */ +interface RecordBatchStreamReaderImpl extends RecordBatchReaderImpl { + + open(options?: OpenOptions): this; + cancel(): void; + + throw(value?: any): IteratorResult; + return(value?: any): IteratorResult; + next(value?: any): IteratorResult>; + + [Symbol.iterator](): IterableIterator>; +} + +/** @ignore */ +interface AsyncRecordBatchStreamReaderImpl extends RecordBatchReaderImpl { + + open(options?: OpenOptions): Promise; + cancel(): Promise; + + throw(value?: any): Promise>; + return(value?: any): Promise>; + next(value?: any): Promise>>; + + [Symbol.asyncIterator](): AsyncIterableIterator>; +} + +/** @ignore */ +interface RecordBatchFileReaderImpl extends RecordBatchStreamReaderImpl { + readRecordBatch(index: number): RecordBatch | null; +} + +/** @ignore */ +interface AsyncRecordBatchFileReaderImpl extends AsyncRecordBatchStreamReaderImpl { + readRecordBatch(index: number): Promise | null>; +} + +/** @ignore */ +abstract class RecordBatchReaderImpl implements RecordBatchReaderImpl { + + // @ts-ignore + public schema: Schema; + public closed = false; + public autoDestroy = true; + public dictionaries: Map; + + protected _dictionaryIndex = 0; + protected _recordBatchIndex = 0; + public get numDictionaries() { return this._dictionaryIndex; } + public get numRecordBatches() { return this._recordBatchIndex; } + + constructor(dictionaries = new Map()) { + this.dictionaries = dictionaries; + } + + public isSync(): this is RecordBatchReaders { return false; } + public isAsync(): this is AsyncRecordBatchReaders { return false; } + public isFile(): this is RecordBatchFileReaders { return false; } + public isStream(): this is RecordBatchStreamReaders { return false; } + + public reset(schema?: Schema | null) { + this._dictionaryIndex = 0; + this._recordBatchIndex = 0; + this.schema = schema; + this.dictionaries = new Map(); + return this; + } + + protected _loadRecordBatch(header: metadata.RecordBatch, body: any) { + return new RecordBatch(this.schema, header.length, this._loadVectors(header, body, this.schema.fields)); + } + protected _loadDictionaryBatch(header: metadata.DictionaryBatch, body: any) { + const { id, isDelta, data } = header; + const { dictionaries, schema } = this; + if (isDelta || !dictionaries.get(id)) { + + const type = schema.dictionaries.get(id)!; + const vector = (isDelta ? dictionaries.get(id)!.concat( + Vector.new(this._loadVectors(data, body, [type])[0])) : + Vector.new(this._loadVectors(data, body, [type])[0])) as Vector; + + (schema.dictionaryFields.get(id) || []).forEach(({ type }) => type.dictionaryVector = vector); + + return vector; + } + return dictionaries.get(id)!; + } + protected _loadVectors(header: metadata.RecordBatch, body: any, types: (Field | DataType)[]) { + return new VectorLoader(body, header.nodes, header.buffers).visitMany(types); + } +} + +/** @ignore */ +class RecordBatchStreamReaderImpl extends RecordBatchReaderImpl implements IterableIterator> { + + protected _reader: MessageReader; + protected _handle: ByteStream | ArrowJSONLike; + + constructor(source: ByteStream | ArrowJSONLike, dictionaries?: Map) { + super(dictionaries); + this._reader = !isArrowJSON(source) + ? new MessageReader(this._handle = source) + : new JSONMessageReader(this._handle = source); + } + + public isSync(): this is RecordBatchReaders { return true; } + public isStream(): this is RecordBatchStreamReaders { return true; } + public [Symbol.iterator](): IterableIterator> { + return this as IterableIterator>; + } + public cancel() { + if (!this.closed && (this.closed = true)) { + this.reset()._reader.return(); + this._reader = null; + this.dictionaries = null; + } + } + public open(options?: OpenOptions) { + if (!this.closed) { + this.autoDestroy = shouldAutoDestroy(this, options); + if (!(this.schema || (this.schema = this._reader.readSchema()!))) { + this.cancel(); + } + } + return this; + } + public throw(value?: any): IteratorResult { + if (!this.closed && this.autoDestroy && (this.closed = true)) { + return this.reset()._reader.throw(value); + } + return ITERATOR_DONE; + } + public return(value?: any): IteratorResult { + if (!this.closed && this.autoDestroy && (this.closed = true)) { + return this.reset()._reader.return(value); + } + return ITERATOR_DONE; + } + public next(): IteratorResult> { + if (this.closed) { return ITERATOR_DONE; } + let message: Message | null, { _reader: reader } = this; + while (message = this._readNextMessageAndValidate()) { + if (message.isSchema()) { + this.reset(message.header()); + } else if (message.isRecordBatch()) { + this._recordBatchIndex++; + const header = message.header(); + const buffer = reader.readMessageBody(message.bodyLength); + const recordBatch = this._loadRecordBatch(header, buffer); + return { done: false, value: recordBatch }; + } else if (message.isDictionaryBatch()) { + this._dictionaryIndex++; + const header = message.header(); + const buffer = reader.readMessageBody(message.bodyLength); + const vector = this._loadDictionaryBatch(header, buffer); + this.dictionaries.set(header.id, vector); + } + } + return this.return(); + } + protected _readNextMessageAndValidate(type?: T | null) { + return this._reader.readMessage(type); + } +} + +/** @ignore */ +class AsyncRecordBatchStreamReaderImpl extends RecordBatchReaderImpl implements AsyncIterableIterator> { + + protected _handle: AsyncByteStream; + protected _reader: AsyncMessageReader; + + constructor(source: AsyncByteStream, dictionaries?: Map) { + super(dictionaries); + this._reader = new AsyncMessageReader(this._handle = source); + } + public isAsync(): this is AsyncRecordBatchReaders { return true; } + public isStream(): this is RecordBatchStreamReaders { return true; } + public [Symbol.asyncIterator](): AsyncIterableIterator> { + return this as AsyncIterableIterator>; + } + public async cancel() { + if (!this.closed && (this.closed = true)) { + await this.reset()._reader.return(); + this._reader = null; + this.dictionaries = null; + } + } + public async open(options?: OpenOptions) { + if (!this.closed) { + this.autoDestroy = shouldAutoDestroy(this, options); + if (!(this.schema || (this.schema = (await this._reader.readSchema())!))) { + await this.cancel(); + } + } + return this; + } + public async throw(value?: any): Promise> { + if (!this.closed && this.autoDestroy && (this.closed = true)) { + return await this.reset()._reader.throw(value); + } + return ITERATOR_DONE; + } + public async return(value?: any): Promise> { + if (!this.closed && this.autoDestroy && (this.closed = true)) { + return await this.reset()._reader.return(value); + } + return ITERATOR_DONE; + } + public async next() { + if (this.closed) { return ITERATOR_DONE; } + let message: Message | null, { _reader: reader } = this; + while (message = await this._readNextMessageAndValidate()) { + if (message.isSchema()) { + await this.reset(message.header()); + } else if (message.isRecordBatch()) { + this._recordBatchIndex++; + const header = message.header(); + const buffer = await reader.readMessageBody(message.bodyLength); + const recordBatch = this._loadRecordBatch(header, buffer); + return { done: false, value: recordBatch }; + } else if (message.isDictionaryBatch()) { + this._dictionaryIndex++; + const header = message.header(); + const buffer = await reader.readMessageBody(message.bodyLength); + const vector = this._loadDictionaryBatch(header, buffer); + this.dictionaries.set(header.id, vector); + } + } + return await this.return(); + } + protected async _readNextMessageAndValidate(type?: T | null) { + return await this._reader.readMessage(type); + } +} + +/** @ignore */ +class RecordBatchFileReaderImpl extends RecordBatchStreamReaderImpl { + + // @ts-ignore + protected _footer?: Footer; + // @ts-ignore + protected _handle: RandomAccessFile; + public get footer() { return this._footer!; } + public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; } + public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; } + + constructor(source: RandomAccessFile | ArrayBufferViewInput, dictionaries?: Map) { + super(source instanceof RandomAccessFile ? source : new RandomAccessFile(source), dictionaries); + } + public isSync(): this is RecordBatchReaders { return true; } + public isFile(): this is RecordBatchFileReaders { return true; } + public open(options?: OpenOptions) { + if (!this.closed && !this._footer) { + this.schema = (this._footer = this._readFooter()).schema; + for (const block of this._footer.dictionaryBatches()) { + block && this._readDictionaryBatch(this._dictionaryIndex++); + } + } + return super.open(options); + } + public readRecordBatch(index: number) { + if (this.closed) { return null; } + if (!this._footer) { this.open(); } + const block = this._footer && this._footer.getRecordBatch(index); + if (block && this._handle.seek(block.offset)) { + const message = this._reader.readMessage(MessageHeader.RecordBatch); + if (message && message.isRecordBatch()) { + const header = message.header(); + const buffer = this._reader.readMessageBody(message.bodyLength); + const recordBatch = this._loadRecordBatch(header, buffer); + return recordBatch; + } + } + return null; + } + protected _readDictionaryBatch(index: number) { + const block = this._footer && this._footer.getDictionaryBatch(index); + if (block && this._handle.seek(block.offset)) { + const message = this._reader.readMessage(MessageHeader.DictionaryBatch); + if (message && message.isDictionaryBatch()) { + const header = message.header(); + const buffer = this._reader.readMessageBody(message.bodyLength); + const vector = this._loadDictionaryBatch(header, buffer); + this.dictionaries.set(header.id, vector); + } + } + } + protected _readFooter() { + const { _handle } = this; + const offset = _handle.size - magicAndPadding; + const length = _handle.readInt32(offset); + const buffer = _handle.readAt(offset - length, length); + return Footer.decode(buffer); + } + protected _readNextMessageAndValidate(type?: T | null): Message | null { + if (!this._footer) { this.open(); } + if (this._footer && this._recordBatchIndex < this.numRecordBatches) { + const block = this._footer && this._footer.getRecordBatch(this._recordBatchIndex); + if (block && this._handle.seek(block.offset)) { + return this._reader.readMessage(type); + } + } + return null; + } +} + +/** @ignore */ +class AsyncRecordBatchFileReaderImpl extends AsyncRecordBatchStreamReaderImpl + implements AsyncRecordBatchFileReaderImpl { + + protected _footer?: Footer; + // @ts-ignore + protected _handle: AsyncRandomAccessFile; + public get footer() { return this._footer!; } + public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; } + public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; } + + constructor(source: FileHandle, byteLength?: number, dictionaries?: Map); + constructor(source: FileHandle | AsyncRandomAccessFile, dictionaries?: Map); + constructor(source: FileHandle | AsyncRandomAccessFile, ...rest: any[]) { + const byteLength = typeof rest[0] !== 'number' ? rest.shift() : undefined; + const dictionaries = rest[0] instanceof Map ? > rest.shift() : undefined; + super(source instanceof AsyncRandomAccessFile ? source : new AsyncRandomAccessFile(source, byteLength), dictionaries); + } + public isFile(): this is RecordBatchFileReaders { return true; } + public isAsync(): this is AsyncRecordBatchReaders { return true; } + public async open(options?: OpenOptions) { + if (!this.closed && !this._footer) { + this.schema = (this._footer = await this._readFooter()).schema; + for (const block of this._footer.dictionaryBatches()) { + block && await this._readDictionaryBatch(this._dictionaryIndex++); + } + } + return await super.open(options); + } + public async readRecordBatch(index: number) { + if (this.closed) { return null; } + if (!this._footer) { await this.open(); } + const block = this._footer && this._footer.getRecordBatch(index); + if (block && (await this._handle.seek(block.offset))) { + const message = await this._reader.readMessage(MessageHeader.RecordBatch); + if (message && message.isRecordBatch()) { + const header = message.header(); + const buffer = await this._reader.readMessageBody(message.bodyLength); + const recordBatch = this._loadRecordBatch(header, buffer); + return recordBatch; + } + } + return null; + } + protected async _readDictionaryBatch(index: number) { + const block = this._footer && this._footer.getDictionaryBatch(index); + if (block && (await this._handle.seek(block.offset))) { + const message = await this._reader.readMessage(MessageHeader.DictionaryBatch); + if (message && message.isDictionaryBatch()) { + const header = message.header(); + const buffer = await this._reader.readMessageBody(message.bodyLength); + const vector = this._loadDictionaryBatch(header, buffer); + this.dictionaries.set(header.id, vector); + } + } + } + protected async _readFooter() { + const { _handle } = this; + _handle._pending && await _handle._pending; + const offset = _handle.size - magicAndPadding; + const length = await _handle.readInt32(offset); + const buffer = await _handle.readAt(offset - length, length); + return Footer.decode(buffer); + } + protected async _readNextMessageAndValidate(type?: T | null): Promise | null> { + if (!this._footer) { await this.open(); } + if (this._footer && this._recordBatchIndex < this.numRecordBatches) { + const block = this._footer.getRecordBatch(this._recordBatchIndex); + if (block && await this._handle.seek(block.offset)) { + return await this._reader.readMessage(type); + } + } + return null; + } +} + +/** @ignore */ +class RecordBatchJSONReaderImpl extends RecordBatchStreamReaderImpl { + constructor(source: ArrowJSONLike, dictionaries?: Map) { + super(source, dictionaries); + } + protected _loadVectors(header: metadata.RecordBatch, body: any, types: (Field | DataType)[]) { + return new JSONVectorLoader(body, header.nodes, header.buffers).visitMany(types); + } +} + +// +// Define some helper functions and static implementations down here. There's +// a bit of branching in the static methods that can lead to the same routines +// being executed, so we've broken those out here for readability. +// + +/** @ignore */ +function shouldAutoDestroy(self: { autoDestroy: boolean }, options?: OpenOptions) { + return options && (typeof options['autoDestroy'] === 'boolean') ? options['autoDestroy'] : self['autoDestroy']; +} + +/** @ignore */ +function* readAllSync(source: RecordBatchReaders | FromArg0 | FromArg2) { + const reader = RecordBatchReader.from( source) as RecordBatchReaders; + try { + if (!reader.open({ autoDestroy: false }).closed) { + do { yield reader; } while (!(reader.reset().open()).closed); + } + } finally { reader.cancel(); } +} + +/** @ignore */ +async function* readAllAsync(source: AsyncRecordBatchReaders | FromArg1 | FromArg3 | FromArg4 | FromArg5) { + const reader = await RecordBatchReader.from( source) as RecordBatchReader; + try { + if (!(await reader.open({ autoDestroy: false })).closed) { + do { yield reader; } while (!(await reader.reset().open()).closed); + } + } finally { await reader.cancel(); } +} + +/** @ignore */ +function fromArrowJSON(source: ArrowJSONLike) { + return new RecordBatchStreamReader(new RecordBatchJSONReaderImpl(source)); +} + +/** @ignore */ +function fromByteStream(source: ByteStream) { + const bytes = source.peek((magicLength + 7) & ~7); + return bytes && bytes.byteLength >= 4 ? !checkForMagicArrowString(bytes) + ? new RecordBatchStreamReader(new RecordBatchStreamReaderImpl(source)) + : new RecordBatchFileReader(new RecordBatchFileReaderImpl(source.read())) + : new RecordBatchStreamReader(new RecordBatchStreamReaderImpl(function*(): any {}())); +} + +/** @ignore */ +async function fromAsyncByteStream(source: AsyncByteStream) { + const bytes = await source.peek((magicLength + 7) & ~7); + return bytes && bytes.byteLength >= 4 ? !checkForMagicArrowString(bytes) + ? new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl(source)) + : new RecordBatchFileReader(new RecordBatchFileReaderImpl(await source.read())) + : new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl(async function*(): any {}())); +} + +/** @ignore */ +async function fromFileHandle(source: FileHandle) { + const { size } = await source.stat(); + const file = new AsyncRandomAccessFile(source, size); + if (size >= magicX2AndPadding) { + if (checkForMagicArrowString(await file.readAt(0, (magicLength + 7) & ~7))) { + return new AsyncRecordBatchFileReader(new AsyncRecordBatchFileReaderImpl(file)); + } + } + return new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl(file)); +} diff --git a/js/src/ipc/reader/arrow.ts b/js/src/ipc/reader/arrow.ts deleted file mode 100644 index 1847c9c2eb628..0000000000000 --- a/js/src/ipc/reader/arrow.ts +++ /dev/null @@ -1,55 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { readJSON } from './json'; -import { fromReadableStream } from './node'; -import { RecordBatch } from '../../recordbatch'; -import { readBuffers, readBuffersAsync } from './binary'; -import { readRecordBatches, readRecordBatchesAsync, TypeDataLoader } from './vector'; -import { Schema } from '../../type'; -import { Message } from '../metadata'; - -export { readJSON, RecordBatch }; -export { readBuffers, readBuffersAsync }; -export { readRecordBatches, readRecordBatchesAsync }; - -export function* read(sources: Iterable | object | string) { - let input: any = sources; - let messages: Iterable<{ schema: Schema, message: Message, loader: TypeDataLoader }>; - if (typeof input === 'string') { - try { input = JSON.parse(input); } - catch (e) { input = sources; } - } - if (!input || typeof input !== 'object') { - messages = (typeof input === 'string') ? readBuffers([input]) : []; - } else { - messages = (typeof input[Symbol.iterator] === 'function') ? readBuffers(input) : readJSON(input); - } - yield* readRecordBatches(messages); -} - -export async function* readAsync(sources: AsyncIterable) { - for await (let recordBatch of readRecordBatchesAsync(readBuffersAsync(sources))) { - yield recordBatch; - } -} - -export async function* readStream(stream: NodeJS.ReadableStream) { - for await (const recordBatch of readAsync(fromReadableStream(stream))) { - yield recordBatch as RecordBatch; - } -} diff --git a/js/src/ipc/reader/binary.ts b/js/src/ipc/reader/binary.ts deleted file mode 100644 index 988ce606b2614..0000000000000 --- a/js/src/ipc/reader/binary.ts +++ /dev/null @@ -1,432 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Vector } from '../../vector'; -import { flatbuffers } from 'flatbuffers'; -import { TypeDataLoader } from './vector'; -import { checkForMagicArrowString, PADDING, magicAndPadding, isValidArrowFile } from '../magic'; -import { Message, Footer, FileBlock, RecordBatchMetadata, DictionaryBatch, BufferMetadata, FieldMetadata, } from '../metadata'; -import { - Schema, Field, - DataType, Dictionary, - Null, TimeBitWidth, - Binary, Bool, Utf8, Decimal, - Date_, Time, Timestamp, Interval, - List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, -} from '../../type'; - -import { - Int8, Uint8, - Int16, Uint16, - Int32, Uint32, - Int64, Uint64, - Float16, Float64, Float32, -} from '../../type'; - -import ByteBuffer = flatbuffers.ByteBuffer; - -type MessageReader = (bb: ByteBuffer) => IterableIterator; - -export function* readBuffers(sources: Iterable | Uint8Array | Buffer | string) { - let schema: Schema | null = null; - let dictionaries = new Map(); - let readMessages: MessageReader | null = null; - if (ArrayBuffer.isView(sources) || typeof sources === 'string') { - sources = [sources as T]; - } - for (const source of sources) { - const bb = toByteBuffer(source); - if ((!schema && ({ schema, readMessages } = readSchema(bb)) || true) && schema && readMessages) { - for (const message of readMessages(bb)) { - yield { - schema, message, - loader: new BinaryDataLoader( - bb, - arrayIterator((message as any).nodes || []), - arrayIterator((message as any).buffers || []), - dictionaries - ) - }; - } - } - } -} - -export async function* readBuffersAsync(sources: AsyncIterable) { - let schema: Schema | null = null; - let dictionaries = new Map(); - let readMessages: MessageReader | null = null; - for await (const source of sources) { - const bb = toByteBuffer(source); - if ((!schema && ({ schema, readMessages } = readSchema(bb)) || true) && schema && readMessages) { - for (const message of readMessages(bb)) { - yield { - schema, message, - loader: new BinaryDataLoader( - bb, - arrayIterator((message as any).nodes || []), - arrayIterator((message as any).buffers || []), - dictionaries - ) - }; - } - } - } -} - -export class BinaryDataLoader extends TypeDataLoader { - private bytes: Uint8Array; - private messageOffset: number; - constructor(bb: ByteBuffer, nodes: Iterator, buffers: Iterator, dictionaries: Map) { - super(nodes, buffers, dictionaries); - this.bytes = bb.bytes(); - this.messageOffset = bb.position(); - } - protected readOffsets(type: T, buffer?: BufferMetadata) { return this.readData(type, buffer); } - protected readTypeIds(type: T, buffer?: BufferMetadata) { return this.readData(type, buffer); } - protected readData(_type: T, { length, offset }: BufferMetadata = this.getBufferMetadata()) { - return new Uint8Array(this.bytes.buffer, this.bytes.byteOffset + this.messageOffset + offset, length); - } -} - -function* arrayIterator(arr: Array) { yield* arr; } - -function toByteBuffer(bytes?: Uint8Array | Buffer | string) { - let arr: Uint8Array = bytes as any || new Uint8Array(0); - if (typeof bytes === 'string') { - arr = new Uint8Array(bytes.length); - for (let i = -1, n = bytes.length; ++i < n;) { - arr[i] = bytes.charCodeAt(i); - } - return new ByteBuffer(arr); - } - return new ByteBuffer(arr); -} - -function readSchema(bb: ByteBuffer) { - let schema: Schema, readMessages, footer: Footer | null; - if (footer = readFileSchema(bb)) { - schema = footer.schema; - readMessages = readFileMessages(footer); - } else if (schema = readStreamSchema(bb)!) { - readMessages = readStreamMessages; - } else { - throw new Error('Invalid Arrow buffer'); - } - return { schema, readMessages }; -} - -function readStreamSchema(bb: ByteBuffer) { - if (!checkForMagicArrowString(bb.bytes(), 0)) { - for (const message of readMessages(bb)) { - if (Message.isSchema(message)) { - return message as Schema; - } - } - } - return null; -} - -function* readStreamMessages(bb: ByteBuffer) { - for (const message of readMessages(bb)) { - if (Message.isRecordBatch(message)) { - yield message; - } else if (Message.isDictionaryBatch(message)) { - yield message; - } else { - yield message; - } - // position the buffer after the body to read the next message - bb.setPosition(bb.position() + message.bodyLength); - } -} - -function readFileSchema(bb: ByteBuffer) { - if (!isValidArrowFile(bb)) { - return null; - } - let fileLength = bb.capacity(); - let lengthOffset = fileLength - magicAndPadding; - let footerLength = bb.readInt32(lengthOffset); - bb.setPosition(lengthOffset - footerLength); - return footerFromByteBuffer(bb); -} - -function readFileMessages(footer: Footer) { - return function* (bb: ByteBuffer) { - let message: RecordBatchMetadata | DictionaryBatch; - for (let i = -1, batches = footer.dictionaryBatches, n = batches.length; ++i < n;) { - bb.setPosition(batches[i].offset); - if (message = readMessage(bb, bb.readInt32(bb.position())) as DictionaryBatch) { - yield message; - } - } - for (let i = -1, batches = footer.recordBatches, n = batches.length; ++i < n;) { - bb.setPosition(batches[i].offset); - if (message = readMessage(bb, bb.readInt32(bb.position())) as RecordBatchMetadata) { - yield message; - } - } - }; -} - -function* readMessages(bb: ByteBuffer) { - let length: number, message: Schema | RecordBatchMetadata | DictionaryBatch; - while (bb.position() < bb.capacity() && - (length = bb.readInt32(bb.position())) > 0) { - if (message = readMessage(bb, length)!) { - yield message; - } - } -} - -function readMessage(bb: ByteBuffer, length: number) { - bb.setPosition(bb.position() + PADDING); - const message = messageFromByteBuffer(bb); - bb.setPosition(bb.position() + length); - return message; -} - -import * as File_ from '../../fb/File'; -import * as Schema_ from '../../fb/Schema'; -import * as Message_ from '../../fb/Message'; - -import Type = Schema_.org.apache.arrow.flatbuf.Type; -import Precision = Schema_.org.apache.arrow.flatbuf.Precision; -import MessageHeader = Message_.org.apache.arrow.flatbuf.MessageHeader; -import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion; -import _Footer = File_.org.apache.arrow.flatbuf.Footer; -import _Block = File_.org.apache.arrow.flatbuf.Block; -import _Message = Message_.org.apache.arrow.flatbuf.Message; -import _Schema = Schema_.org.apache.arrow.flatbuf.Schema; -import _Field = Schema_.org.apache.arrow.flatbuf.Field; -import _RecordBatch = Message_.org.apache.arrow.flatbuf.RecordBatch; -import _DictionaryBatch = Message_.org.apache.arrow.flatbuf.DictionaryBatch; -import _FieldNode = Message_.org.apache.arrow.flatbuf.FieldNode; -import _Buffer = Schema_.org.apache.arrow.flatbuf.Buffer; -import _DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding; -import _Null = Schema_.org.apache.arrow.flatbuf.Null; -import _Int = Schema_.org.apache.arrow.flatbuf.Int; -import _FloatingPoint = Schema_.org.apache.arrow.flatbuf.FloatingPoint; -import _Binary = Schema_.org.apache.arrow.flatbuf.Binary; -import _Bool = Schema_.org.apache.arrow.flatbuf.Bool; -import _Utf8 = Schema_.org.apache.arrow.flatbuf.Utf8; -import _Decimal = Schema_.org.apache.arrow.flatbuf.Decimal; -import _Date = Schema_.org.apache.arrow.flatbuf.Date; -import _Time = Schema_.org.apache.arrow.flatbuf.Time; -import _Timestamp = Schema_.org.apache.arrow.flatbuf.Timestamp; -import _Interval = Schema_.org.apache.arrow.flatbuf.Interval; -import _List = Schema_.org.apache.arrow.flatbuf.List; -import _Struct = Schema_.org.apache.arrow.flatbuf.Struct_; -import _Union = Schema_.org.apache.arrow.flatbuf.Union; -import _FixedSizeBinary = Schema_.org.apache.arrow.flatbuf.FixedSizeBinary; -import _FixedSizeList = Schema_.org.apache.arrow.flatbuf.FixedSizeList; -import _Map = Schema_.org.apache.arrow.flatbuf.Map; - -function footerFromByteBuffer(bb: ByteBuffer) { - const dictionaryFields = new Map>(); - const f = _Footer.getRootAsFooter(bb), s = f.schema()!; - return new Footer( - dictionaryBatchesFromFooter(f), recordBatchesFromFooter(f), - new Schema(fieldsFromSchema(s, dictionaryFields), customMetadata(s), f.version(), dictionaryFields) - ); -} - -function messageFromByteBuffer(bb: ByteBuffer) { - const m = _Message.getRootAsMessage(bb)!, type = m.headerType(), version = m.version(); - switch (type) { - case MessageHeader.Schema: return schemaFromMessage(version, m.header(new _Schema())!, new Map()); - case MessageHeader.RecordBatch: return recordBatchFromMessage(version, m, m.header(new _RecordBatch())!); - case MessageHeader.DictionaryBatch: return dictionaryBatchFromMessage(version, m, m.header(new _DictionaryBatch())!); - } - return null; - // throw new Error(`Unrecognized Message type '${type}'`); -} - -function schemaFromMessage(version: MetadataVersion, s: _Schema, dictionaryFields: Map>) { - return new Schema(fieldsFromSchema(s, dictionaryFields), customMetadata(s), version, dictionaryFields); -} - -function recordBatchFromMessage(version: MetadataVersion, m: _Message, b: _RecordBatch) { - return new RecordBatchMetadata(version, b.length(), fieldNodesFromRecordBatch(b), buffersFromRecordBatch(b, version), m.bodyLength()); -} - -function dictionaryBatchFromMessage(version: MetadataVersion, m: _Message, d: _DictionaryBatch) { - return new DictionaryBatch(version, recordBatchFromMessage(version, m, d.data()!), d.id(), d.isDelta()); -} - -function dictionaryBatchesFromFooter(f: _Footer) { - const blocks = [] as FileBlock[]; - for (let b: _Block, i = -1, n = f && f.dictionariesLength(); ++i < n;) { - if (b = f.dictionaries(i)!) { - blocks.push(new FileBlock(b.metaDataLength(), b.bodyLength(), b.offset())); - } - } - return blocks; -} - -function recordBatchesFromFooter(f: _Footer) { - const blocks = [] as FileBlock[]; - for (let b: _Block, i = -1, n = f && f.recordBatchesLength(); ++i < n;) { - if (b = f.recordBatches(i)!) { - blocks.push(new FileBlock(b.metaDataLength(), b.bodyLength(), b.offset())); - } - } - return blocks; -} - -function fieldsFromSchema(s: _Schema, dictionaryFields: Map> | null) { - const fields = [] as Field[]; - for (let i = -1, c: Field | null, n = s && s.fieldsLength(); ++i < n;) { - if (c = field(s.fields(i)!, dictionaryFields)) { - fields.push(c); - } - } - return fields; -} - -function fieldsFromField(f: _Field, dictionaryFields: Map> | null) { - const fields = [] as Field[]; - for (let i = -1, c: Field | null, n = f && f.childrenLength(); ++i < n;) { - if (c = field(f.children(i)!, dictionaryFields)) { - fields.push(c); - } - } - return fields; -} - -function fieldNodesFromRecordBatch(b: _RecordBatch) { - const fieldNodes = [] as FieldMetadata[]; - for (let i = -1, n = b.nodesLength(); ++i < n;) { - fieldNodes.push(fieldNodeFromRecordBatch(b.nodes(i)!)); - } - return fieldNodes; -} - -function buffersFromRecordBatch(b: _RecordBatch, version: MetadataVersion) { - const buffers = [] as BufferMetadata[]; - for (let i = -1, n = b.buffersLength(); ++i < n;) { - let buffer = b.buffers(i)!; - // If this Arrow buffer was written before version 4, - // advance the buffer's bb_pos 8 bytes to skip past - // the now-removed page id field. - if (version < MetadataVersion.V4) { - buffer.bb_pos += (8 * (i + 1)); - } - buffers.push(bufferFromRecordBatch(buffer)); - } - return buffers; -} - -function field(f: _Field, dictionaryFields: Map> | null) { - let name = f.name()!; - let field: Field | void; - let nullable = f.nullable(); - let metadata = customMetadata(f); - let dataType: DataType | null; - let keysMeta: _Int | null, id: number; - let dictMeta: _DictionaryEncoding | null; - if (!dictionaryFields || !(dictMeta = f.dictionary())) { - if (dataType = typeFromField(f, fieldsFromField(f, dictionaryFields))) { - field = new Field(name, dataType, nullable, metadata); - } - } else if (dataType = dictionaryFields.has(id = dictMeta.id().low) - ? dictionaryFields.get(id)!.type.dictionary - : typeFromField(f, fieldsFromField(f, null))) { - dataType = new Dictionary(dataType, - // a dictionary index defaults to signed 32 bit int if unspecified - (keysMeta = dictMeta.indexType()) ? intFromField(keysMeta)! : new Int32(), - id, dictMeta.isOrdered() - ); - field = new Field(name, dataType, nullable, metadata); - dictionaryFields.has(id) || dictionaryFields.set(id, field as Field); - } - return field || null; -} - -function customMetadata(parent?: _Schema | _Field | null) { - const data = new Map(); - if (parent) { - for (let entry, key, i = -1, n = parent.customMetadataLength() | 0; ++i < n;) { - if ((entry = parent.customMetadata(i)) && (key = entry.key()) != null) { - data.set(key, entry.value()!); - } - } - } - return data; -} - -function fieldNodeFromRecordBatch(f: _FieldNode) { - return new FieldMetadata(f.length(), f.nullCount()); -} - -function bufferFromRecordBatch(b: _Buffer) { - return new BufferMetadata(b.offset(), b.length()); -} - -function typeFromField(f: _Field, children?: Field[]): DataType | null { - switch (f.typeType()) { - case Type.NONE: return null; - case Type.Null: return nullFromField(f.type(new _Null())!); - case Type.Int: return intFromField(f.type(new _Int())!); - case Type.FloatingPoint: return floatFromField(f.type(new _FloatingPoint())!); - case Type.Binary: return binaryFromField(f.type(new _Binary())!); - case Type.Utf8: return utf8FromField(f.type(new _Utf8())!); - case Type.Bool: return boolFromField(f.type(new _Bool())!); - case Type.Decimal: return decimalFromField(f.type(new _Decimal())!); - case Type.Date: return dateFromField(f.type(new _Date())!); - case Type.Time: return timeFromField(f.type(new _Time())!); - case Type.Timestamp: return timestampFromField(f.type(new _Timestamp())!); - case Type.Interval: return intervalFromField(f.type(new _Interval())!); - case Type.List: return listFromField(f.type(new _List())!, children || []); - case Type.Struct_: return structFromField(f.type(new _Struct())!, children || []); - case Type.Union: return unionFromField(f.type(new _Union())!, children || []); - case Type.FixedSizeBinary: return fixedSizeBinaryFromField(f.type(new _FixedSizeBinary())!); - case Type.FixedSizeList: return fixedSizeListFromField(f.type(new _FixedSizeList())!, children || []); - case Type.Map: return mapFromField(f.type(new _Map())!, children || []); - } - throw new Error(`Unrecognized type ${f.typeType()}`); -} - -function nullFromField (_type: _Null) { return new Null(); } -function intFromField (_type: _Int) { switch (_type.bitWidth()) { - case 8: return _type.isSigned() ? new Int8() : new Uint8(); - case 16: return _type.isSigned() ? new Int16() : new Uint16(); - case 32: return _type.isSigned() ? new Int32() : new Uint32(); - case 64: return _type.isSigned() ? new Int64() : new Uint64(); - } - return null; } -function floatFromField (_type: _FloatingPoint) { switch (_type.precision()) { - case Precision.HALF: return new Float16(); - case Precision.SINGLE: return new Float32(); - case Precision.DOUBLE: return new Float64(); - } - return null; } -function binaryFromField (_type: _Binary) { return new Binary(); } -function utf8FromField (_type: _Utf8) { return new Utf8(); } -function boolFromField (_type: _Bool) { return new Bool(); } -function decimalFromField (_type: _Decimal) { return new Decimal(_type.scale(), _type.precision()); } -function dateFromField (_type: _Date) { return new Date_(_type.unit()); } -function timeFromField (_type: _Time) { return new Time(_type.unit(), _type.bitWidth() as TimeBitWidth); } -function timestampFromField (_type: _Timestamp) { return new Timestamp(_type.unit(), _type.timezone()); } -function intervalFromField (_type: _Interval) { return new Interval(_type.unit()); } -function listFromField (_type: _List, children: Field[]) { return new List(children); } -function structFromField (_type: _Struct, children: Field[]) { return new Struct(children); } -function unionFromField (_type: _Union, children: Field[]) { return new Union(_type.mode(), (_type.typeIdsArray() || []) as Type[], children); } -function fixedSizeBinaryFromField(_type: _FixedSizeBinary) { return new FixedSizeBinary(_type.byteWidth()); } -function fixedSizeListFromField (_type: _FixedSizeList, children: Field[]) { return new FixedSizeList(_type.listSize(), children); } -function mapFromField (_type: _Map, children: Field[]) { return new Map_(_type.keysSorted(), children); } diff --git a/js/src/ipc/reader/json.ts b/js/src/ipc/reader/json.ts deleted file mode 100644 index 0f0c018d66bb9..0000000000000 --- a/js/src/ipc/reader/json.ts +++ /dev/null @@ -1,304 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Vector } from '../../vector'; -import { flatbuffers } from 'flatbuffers'; -import { TypeDataLoader } from './vector'; -import { packBools } from '../../util/bit'; -import * as IntUtil from '../../util/int'; -import { TextEncoder } from 'text-encoding-utf-8'; -import { RecordBatchMetadata, DictionaryBatch, BufferMetadata, FieldMetadata } from '../metadata'; -import { - Schema, Field, - DataType, Dictionary, - Null, TimeBitWidth, - Binary, Bool, Utf8, Decimal, - Date_, Time, Timestamp, Interval, - List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, -} from '../../type'; - -import { - Int8, Uint8, - Int16, Uint16, - Int32, Uint32, - Int64, Uint64, - Float16, Float64, Float32, -} from '../../type'; - -import Long = flatbuffers.Long; - -export function* readJSON(json: any) { - const schema = schemaFromJSON(json['schema']); - const dictionaries = new Map(); - for (const batch of (json['dictionaries'] || [])) { - const message = dictionaryBatchFromJSON(batch); - yield { - schema, message, - loader: new JSONDataLoader( - flattenDataSources(batch['data']['columns']), - arrayIterator(message.nodes), - arrayIterator(message.buffers), - dictionaries - ) - }; - } - for (const batch of (json['batches'] || [])) { - const message = recordBatchFromJSON(batch); - yield { - schema, message, - loader: new JSONDataLoader( - flattenDataSources(batch['columns']), - arrayIterator(message.nodes), - arrayIterator(message.buffers), - dictionaries - ) - }; - } -} - -function* arrayIterator(arr: Array) { yield* arr; } -function flattenDataSources(xs: any[]): any[][] { - return (xs || []).reduce((buffers, column: any) => [ - ...buffers, - ...(column['VALIDITY'] && [column['VALIDITY']] || []), - ...(column['OFFSET'] && [column['OFFSET']] || []), - ...(column['TYPE'] && [column['TYPE']] || []), - ...(column['DATA'] && [column['DATA']] || []), - ...flattenDataSources(column['children']) - ], [] as any[][]); -} - -const utf8Encoder = new TextEncoder('utf-8'); - -export class JSONDataLoader extends TypeDataLoader { - constructor(private sources: any[][], nodes: Iterator, buffers: Iterator, dictionaries: Map) { - super(nodes, buffers, dictionaries); - } - protected readNullBitmap(_type: T, nullCount: number, { offset } = this.getBufferMetadata()) { - return nullCount <= 0 ? new Uint8Array(0) : packBools(this.sources[offset]); - } - protected readOffsets(_type: T, { offset }: BufferMetadata = this.getBufferMetadata()) { - return new Int32Array(this.sources[offset]); - } - protected readTypeIds(_type: T, { offset }: BufferMetadata = this.getBufferMetadata()) { - return new Int8Array(this.sources[offset]); - } - protected readData(type: T, { offset }: BufferMetadata = this.getBufferMetadata()) { - const { sources } = this; - if (DataType.isTimestamp(type) === true) { - return new Uint8Array(IntUtil.Int64.convertArray(sources[offset] as string[]).buffer); - } else if ((DataType.isInt(type) || DataType.isTime(type)) && type.bitWidth === 64) { - return new Uint8Array(IntUtil.Int64.convertArray(sources[offset] as string[]).buffer); - } else if (DataType.isDate(type) && type.unit === DateUnit.MILLISECOND) { - return new Uint8Array(IntUtil.Int64.convertArray(sources[offset] as string[]).buffer); - } else if (DataType.isDecimal(type) === true) { - return new Uint8Array(IntUtil.Int128.convertArray(sources[offset] as string[]).buffer); - } else if (DataType.isBinary(type) === true || DataType.isFixedSizeBinary(type) === true) { - return new Uint8Array(binaryDataFromJSON(sources[offset] as string[])); - } else if (DataType.isBool(type) === true) { - return new Uint8Array(packBools(sources[offset] as number[]).buffer); - } else if (DataType.isUtf8(type) === true) { - return utf8Encoder.encode((sources[offset] as string[]).join('')); - } else { - return toTypedArray(type.ArrayType, sources[offset].map((x) => +x)) as any; - } - } -} - -function binaryDataFromJSON(values: string[]) { - // "DATA": ["49BC7D5B6C47D2","3F5FB6D9322026"] - // There are definitely more efficient ways to do this... but it gets the - // job done. - const joined = values.join(''); - const data = new Uint8Array(joined.length / 2); - for (let i = 0; i < joined.length; i += 2) { - data[i >> 1] = parseInt(joined.substr(i, 2), 16); - } - return data.buffer; -} - -import * as Schema_ from '../../fb/Schema'; -import Type = Schema_.org.apache.arrow.flatbuf.Type; -import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit; -import TimeUnit = Schema_.org.apache.arrow.flatbuf.TimeUnit; -import UnionMode = Schema_.org.apache.arrow.flatbuf.UnionMode; -import Precision = Schema_.org.apache.arrow.flatbuf.Precision; -import IntervalUnit = Schema_.org.apache.arrow.flatbuf.IntervalUnit; -import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion; -import { toTypedArray } from '../../data'; - -function schemaFromJSON(s: any): Schema { - const dictionaryFields = new Map>(); - return new Schema( - fieldsFromJSON(s['fields'], dictionaryFields), - customMetadata(s['customMetadata']), - MetadataVersion.V4, dictionaryFields - ); -} - -function recordBatchFromJSON(b: any): RecordBatchMetadata { - return new RecordBatchMetadata( - MetadataVersion.V4, - b['count'], - fieldNodesFromJSON(b['columns']), - buffersFromJSON(b['columns']) - ); -} - -function dictionaryBatchFromJSON(b: any): DictionaryBatch { - return new DictionaryBatch( - MetadataVersion.V4, - recordBatchFromJSON(b['data']), - b['id'], b['isDelta'] - ); -} - -function fieldsFromJSON(fs: any[], dictionaryFields: Map> | null): Field[] { - return (fs || []) - .map((f) => fieldFromJSON(f, dictionaryFields)) - .filter((f) => f != null) as Field[]; -} - -function fieldNodesFromJSON(xs: any[]): FieldMetadata[] { - return (xs || []).reduce((fieldNodes, column: any) => [ - ...fieldNodes, - new FieldMetadata( - new Long(column['count'], 0), - new Long(nullCountFromJSON(column['VALIDITY']), 0) - ), - ...fieldNodesFromJSON(column['children']) - ], [] as FieldMetadata[]); -} - -function buffersFromJSON(xs: any[], buffers: BufferMetadata[] = []): BufferMetadata[] { - for (let i = -1, n = (xs || []).length; ++i < n;) { - const column = xs[i]; - column['VALIDITY'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['VALIDITY'].length, 0))); - column['OFFSET'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['OFFSET'].length, 0))); - column['TYPE'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['TYPE'].length, 0))); - column['DATA'] && buffers.push(new BufferMetadata(new Long(buffers.length, 0), new Long(column['DATA'].length, 0))); - buffers = buffersFromJSON(column['children'], buffers); - } - return buffers; -} - -function nullCountFromJSON(validity: number[]) { - return (validity || []).reduce((sum, val) => sum + +(val === 0), 0); -} - -function fieldFromJSON(f: any, dictionaryFields: Map> | null) { - let name = f['name']; - let field: Field | void; - let nullable = f['nullable']; - let dataType: DataType | null; - let id: number, keysMeta: any, dictMeta: any; - let metadata = customMetadata(f['customMetadata']); - if (!dictionaryFields || !(dictMeta = f['dictionary'])) { - if (dataType = typeFromJSON(f['type'], fieldsFromJSON(f['children'], dictionaryFields))) { - field = new Field(name, dataType, nullable, metadata); - } - } else if (dataType = dictionaryFields.has(id = dictMeta['id']) - ? dictionaryFields.get(id)!.type.dictionary - : typeFromJSON(f['type'], fieldsFromJSON(f['children'], null))) { - dataType = new Dictionary(dataType, - // a dictionary index defaults to signed 32 bit int if unspecified - (keysMeta = dictMeta['indexType']) ? intFromJSON(keysMeta)! : new Int32(), - id, dictMeta['isOrdered'] - ); - field = new Field(name, dataType, nullable, metadata); - dictionaryFields.has(id) || dictionaryFields.set(id, field as Field); - } - return field || null; -} - -function customMetadata(metadata?: any) { - return new Map(Object.entries(metadata || {})); -} - -const namesToTypeMap: { [n: string]: Type } = { - 'NONE': Type.NONE, - 'null': Type.Null, - 'int': Type.Int, - 'floatingpoint': Type.FloatingPoint, - 'binary': Type.Binary, - 'bool': Type.Bool, - 'utf8': Type.Utf8, - 'decimal': Type.Decimal, - 'date': Type.Date, - 'time': Type.Time, - 'timestamp': Type.Timestamp, - 'interval': Type.Interval, - 'list': Type.List, - 'struct': Type.Struct_, - 'union': Type.Union, - 'fixedsizebinary': Type.FixedSizeBinary, - 'fixedsizelist': Type.FixedSizeList, - 'map': Type.Map, -}; - -function typeFromJSON(t: any, children?: Field[]) { - switch (namesToTypeMap[t['name']]) { - case Type.NONE: return null; - case Type.Null: return nullFromJSON(t); - case Type.Int: return intFromJSON(t); - case Type.FloatingPoint: return floatingPointFromJSON(t); - case Type.Binary: return binaryFromJSON(t); - case Type.Utf8: return utf8FromJSON(t); - case Type.Bool: return boolFromJSON(t); - case Type.Decimal: return decimalFromJSON(t); - case Type.Date: return dateFromJSON(t); - case Type.Time: return timeFromJSON(t); - case Type.Timestamp: return timestampFromJSON(t); - case Type.Interval: return intervalFromJSON(t); - case Type.List: return listFromJSON(t, children || []); - case Type.Struct_: return structFromJSON(t, children || []); - case Type.Union: return unionFromJSON(t, children || []); - case Type.FixedSizeBinary: return fixedSizeBinaryFromJSON(t); - case Type.FixedSizeList: return fixedSizeListFromJSON(t, children || []); - case Type.Map: return mapFromJSON(t, children || []); - } - throw new Error(`Unrecognized type ${t['name']}`); -} - -function nullFromJSON (_type: any) { return new Null(); } -function intFromJSON (_type: any) { switch (_type['bitWidth']) { - case 8: return _type['isSigned'] ? new Int8() : new Uint8(); - case 16: return _type['isSigned'] ? new Int16() : new Uint16(); - case 32: return _type['isSigned'] ? new Int32() : new Uint32(); - case 64: return _type['isSigned'] ? new Int64() : new Uint64(); - } - return null; } -function floatingPointFromJSON (_type: any) { switch (Precision[_type['precision']] as any) { - case Precision.HALF: return new Float16(); - case Precision.SINGLE: return new Float32(); - case Precision.DOUBLE: return new Float64(); - } - return null; } -function binaryFromJSON (_type: any) { return new Binary(); } -function utf8FromJSON (_type: any) { return new Utf8(); } -function boolFromJSON (_type: any) { return new Bool(); } -function decimalFromJSON (_type: any) { return new Decimal(_type['scale'], _type['precision']); } -function dateFromJSON (_type: any) { return new Date_(DateUnit[_type['unit']] as any); } -function timeFromJSON (_type: any) { return new Time(TimeUnit[_type['unit']] as any, _type['bitWidth'] as TimeBitWidth); } -function timestampFromJSON (_type: any) { return new Timestamp(TimeUnit[_type['unit']] as any, _type['timezone']); } -function intervalFromJSON (_type: any) { return new Interval(IntervalUnit[_type['unit']] as any); } -function listFromJSON (_type: any, children: Field[]) { return new List(children); } -function structFromJSON (_type: any, children: Field[]) { return new Struct(children); } -function unionFromJSON (_type: any, children: Field[]) { return new Union(UnionMode[_type['mode']] as any, (_type['typeIds'] || []) as Type[], children); } -function fixedSizeBinaryFromJSON(_type: any) { return new FixedSizeBinary(_type['byteWidth']); } -function fixedSizeListFromJSON (_type: any, children: Field[]) { return new FixedSizeList(_type['listSize'], children); } -function mapFromJSON (_type: any, children: Field[]) { return new Map_(_type['keysSorted'], children); } diff --git a/js/src/ipc/reader/node.ts b/js/src/ipc/reader/node.ts deleted file mode 100644 index 24295c81cbd52..0000000000000 --- a/js/src/ipc/reader/node.ts +++ /dev/null @@ -1,78 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { flatbuffers } from 'flatbuffers'; -import * as Message_ from '../../fb/Message'; -import ByteBuffer = flatbuffers.ByteBuffer; -import _Message = Message_.org.apache.arrow.flatbuf.Message; -import { PADDING, isValidArrowFile, checkForMagicArrowString } from '../magic'; - -export async function* fromReadableStream(stream: NodeJS.ReadableStream) { - - let bb: ByteBuffer; - let bytesRead = 0, bytes = new Uint8Array(0); - let messageLength = 0, message: _Message | null = null; - - for await (let chunk of (stream as any as AsyncIterable)) { - - if (chunk == null) { - continue; - } - - const grown = new Uint8Array(bytes.byteLength + chunk.length); - - if (typeof chunk !== 'string') { - grown.set(bytes, 0) || grown.set(chunk, bytes.byteLength); - } else { - for (let i = -1, j = bytes.byteLength, n = chunk.length; ++i < n;) { - grown[i + j] = chunk.charCodeAt(i); - } - } - - bytes = grown; - - // If we're reading in an Arrow File, just concatenate the bytes until - // the file is fully read in - if (checkForMagicArrowString(bytes)) { - if (!isValidArrowFile(new ByteBuffer(bytes))) { - continue; - } - return yield bytes; - } - - if (bytes.byteLength > 0 && messageLength <= 0) { - messageLength = new DataView(bytes.buffer).getInt32(0, true); - } - - while (messageLength > 0 && messageLength <= bytes.byteLength) { - if (!message) { - (bb = new ByteBuffer(bytes)).setPosition(4); - if (message = _Message.getRootAsMessage(bb)) { - messageLength += message.bodyLength().low; - continue; - } - throw new Error(`Invalid message at position ${bytesRead}`); - } - bytesRead += messageLength + PADDING; - yield bytes.subarray(0, messageLength + PADDING); - bytes = bytes.subarray(messageLength + PADDING); - messageLength = bytes.byteLength < 4 ? 0 : - new DataView(bytes.buffer).getInt32(bytes.byteOffset, true); - message = null; - } - } -} diff --git a/js/src/ipc/reader/vector.ts b/js/src/ipc/reader/vector.ts deleted file mode 100644 index c4688f5e2b851..0000000000000 --- a/js/src/ipc/reader/vector.ts +++ /dev/null @@ -1,131 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Vector } from '../../vector'; -import { RecordBatch } from '../../recordbatch'; -import { TypeVisitor } from '../../visitor'; -import { FlatType, NestedType, ListType } from '../../type'; -import { Message, FieldMetadata, BufferMetadata } from '../metadata'; -import { FlatData, ListData, NestedData, SingleNestedData, DenseUnionData, SparseUnionData, BoolData, FlatListData, DictionaryData } from '../../data'; -import { - Schema, Field, - Dictionary, - Null, Int, Float, - Binary, Bool, Utf8, Decimal, - Date_, Time, Timestamp, Interval, - List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, - UnionMode, SparseUnion, DenseUnion, FlatListType, DataType, -} from '../../type'; - -export function* readRecordBatches(messages: Iterable<{ schema: Schema, message: Message, loader: TypeDataLoader }>) { - for (const { schema, message, loader } of messages) { - yield* readRecordBatch(schema, message, loader); - } -} - -export async function* readRecordBatchesAsync(messages: AsyncIterable<{ schema: Schema, message: Message, loader: TypeDataLoader }>) { - for await (const { schema, message, loader } of messages) { - yield* readRecordBatch(schema, message, loader); - } -} - -export function* readRecordBatch(schema: Schema, message: Message, loader: TypeDataLoader) { - if (Message.isRecordBatch(message)) { - yield new RecordBatch(schema, message.length, loader.visitFields(schema.fields)); - } else if (Message.isDictionaryBatch(message)) { - const dictionaryId = message.id; - const dictionaries = loader.dictionaries; - const dictionaryField = schema.dictionaries.get(dictionaryId)!; - const dictionaryDataType = (dictionaryField.type as Dictionary).dictionary; - let dictionaryVector = Vector.create(loader.visit(dictionaryDataType)); - if (message.isDelta && dictionaries.has(dictionaryId)) { - dictionaryVector = dictionaries.get(dictionaryId)!.concat(dictionaryVector); - } - dictionaries.set(dictionaryId, dictionaryVector); - } -} - -export abstract class TypeDataLoader extends TypeVisitor { - - public dictionaries: Map; - protected nodes: Iterator; - protected buffers: Iterator; - - constructor(nodes: Iterator, buffers: Iterator, dictionaries: Map) { - super(); - this.nodes = nodes; - this.buffers = buffers; - this.dictionaries = dictionaries; - } - - public visitFields(fields: Field[]) { return fields.map((field) => this.visit(field.type)); } - - public visitNull (type: Null) { return this.visitNullType(type); } - public visitInt (type: Int) { return this.visitFlatType(type); } - public visitFloat (type: Float) { return this.visitFlatType(type); } - public visitBinary (type: Binary) { return this.visitFlatList(type); } - public visitUtf8 (type: Utf8) { return this.visitFlatList(type); } - public visitBool (type: Bool) { return this.visitBoolType(type); } - public visitDecimal (type: Decimal) { return this.visitFlatType(type); } - public visitDate (type: Date_) { return this.visitFlatType(type); } - public visitTime (type: Time) { return this.visitFlatType(type); } - public visitTimestamp (type: Timestamp) { return this.visitFlatType(type); } - public visitInterval (type: Interval) { return this.visitFlatType(type); } - public visitList (type: List) { return this.visitListType(type); } - public visitStruct (type: Struct) { return this.visitNestedType(type); } - public visitUnion (type: Union) { return this.visitUnionType(type); } - public visitFixedSizeBinary(type: FixedSizeBinary) { return this.visitFlatType(type); } - public visitFixedSizeList (type: FixedSizeList) { return this.visitFixedSizeListType(type); } - public visitMap (type: Map_) { return this.visitNestedType(type); } - public visitDictionary (type: Dictionary) { - return new DictionaryData(type, this.dictionaries.get(type.id)!, this.visit(type.indices)); - } - protected getFieldMetadata() { return this.nodes.next().value; } - protected getBufferMetadata() { return this.buffers.next().value; } - protected readNullBitmap(type: T, nullCount: number, buffer = this.getBufferMetadata()) { - return nullCount > 0 && this.readData(type, buffer) || new Uint8Array(0); - } - protected abstract readData(type: T, buffer?: BufferMetadata): any; - protected abstract readOffsets(type: T, buffer?: BufferMetadata): any; - protected abstract readTypeIds(type: T, buffer?: BufferMetadata): any; - protected visitNullType(type: Null, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) { - return new FlatData(type, length, this.readNullBitmap(type, nullCount), new Uint8Array(0), 0, nullCount); - } - protected visitFlatType(type: T, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) { - return new FlatData(type, length, this.readNullBitmap(type, nullCount), this.readData(type), 0, nullCount); - } - protected visitBoolType(type: Bool, { length, nullCount }: FieldMetadata = this.getFieldMetadata(), data?: Uint8Array) { - return new BoolData(type, length, this.readNullBitmap(type, nullCount), data || this.readData(type), 0, nullCount); - } - protected visitFlatList(type: T, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) { - return new FlatListData(type, length, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.readData(type), 0, nullCount); - } - protected visitListType(type: T, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) { - return new ListData(type, length, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.visit(type.children![0].type), 0, nullCount); - } - protected visitFixedSizeListType(type: T, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) { - return new SingleNestedData(type, length, this.readNullBitmap(type, nullCount), this.visit(type.children![0].type), 0, nullCount); - } - protected visitNestedType(type: T, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) { - return new NestedData(type, length, this.readNullBitmap(type, nullCount), this.visitFields(type.children), 0, nullCount); - } - protected visitUnionType(type: DenseUnion | SparseUnion, { length, nullCount }: FieldMetadata = this.getFieldMetadata()) { - return type.mode === UnionMode.Sparse ? - new SparseUnionData(type as SparseUnion, length, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.visitFields(type.children), 0, nullCount) : - new DenseUnionData(type as DenseUnion, length, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.readOffsets(type), this.visitFields(type.children), 0, nullCount); - } -} diff --git a/js/src/ipc/whatwg/iterable.ts b/js/src/ipc/whatwg/iterable.ts new file mode 100644 index 0000000000000..31916f2a3bdac --- /dev/null +++ b/js/src/ipc/whatwg/iterable.ts @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { toUint8Array } from '../../util/buffer'; +import { ReadableDOMStreamOptions } from '../../io/interfaces'; +import { isIterable, isAsyncIterable } from '../../util/compat'; + +/** @ignore */ +export function toDOMStream(source: Iterable | AsyncIterable, options?: ReadableDOMStreamOptions): ReadableStream { + if (isAsyncIterable(source)) { return asyncIterableAsReadableDOMStream(source, options); } + if (isIterable(source)) { return iterableAsReadableDOMStream(source, options); } + /* istanbul ignore next */ + throw new Error(`toDOMStream() must be called with an Iterable or AsyncIterable`); +} + +/** @ignore */ +function iterableAsReadableDOMStream(source: Iterable, options?: ReadableDOMStreamOptions) { + + let it: Iterator | null = null; + const bm = (options && options.type === 'bytes') || false; + const hwm = options && options.highWaterMark || (2 ** 24); + + return new ReadableStream({ + ...options as any, + start(controller) { next(controller, it || (it = source[Symbol.iterator]())); }, + pull(controller) { it ? (next(controller, it)) : controller.close(); }, + cancel() { (it && (it.return && it.return()) || true) && (it = null); } + }, { highWaterMark: bm ? hwm : undefined, ...options }); + + function next(controller: ReadableStreamDefaultController, it: Iterator) { + let buf: Uint8Array; + let r: IteratorResult | null = null; + let size = controller.desiredSize || null; + while (!(r = it.next(bm ? size : null)).done) { + if (ArrayBuffer.isView(r.value) && (buf = toUint8Array(r.value))) { + size != null && bm && (size = size - buf.byteLength + 1); + r.value = buf; + } + controller.enqueue(r.value); + if (size != null && --size <= 0) { return; } + } + controller.close(); + } +} + +/** @ignore */ +function asyncIterableAsReadableDOMStream(source: AsyncIterable, options?: ReadableDOMStreamOptions) { + + let it: AsyncIterator | null = null; + const bm = (options && options.type === 'bytes') || false; + const hwm = options && options.highWaterMark || (2 ** 24); + + return new ReadableStream({ + ...options as any, + async start(controller) { await next(controller, it || (it = source[Symbol.asyncIterator]())); }, + async pull(controller) { it ? (await next(controller, it)) : controller.close(); }, + async cancel() { (it && (it.return && await it.return()) || true) && (it = null); }, + }, { highWaterMark: bm ? hwm : undefined, ...options }); + + async function next(controller: ReadableStreamDefaultController, it: AsyncIterator) { + let buf: Uint8Array; + let r: IteratorResult | null = null; + let size = controller.desiredSize || null; + while (!(r = await it.next(bm ? size : null)).done) { + if (ArrayBuffer.isView(r.value) && (buf = toUint8Array(r.value))) { + size != null && bm && (size = size - buf.byteLength + 1); + r.value = buf; + } + controller.enqueue(r.value); + if (size != null && --size <= 0) { return; } + } + controller.close(); + } +} diff --git a/js/src/ipc/whatwg/reader.ts b/js/src/ipc/whatwg/reader.ts new file mode 100644 index 0000000000000..3e39900fe27e5 --- /dev/null +++ b/js/src/ipc/whatwg/reader.ts @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { DataType } from '../../type'; +import { RecordBatch } from '../../recordbatch'; +import { AsyncByteQueue } from '../../io/stream'; +import { RecordBatchReader } from '../../ipc/reader'; + +/** @ignore */ +export function recordBatchReaderThroughDOMStream(writableStrategy?: ByteLengthQueuingStrategy, readableStrategy?: { autoDestroy: boolean }) { + + const queue = new AsyncByteQueue(); + let reader: RecordBatchReader | null = null; + + const readable = new ReadableStream>({ + async cancel() { await queue.close(); }, + async start(controller) { await next(controller, reader || (reader = await open())); }, + async pull(controller) { reader ? await next(controller, reader) : controller.close(); } + }); + + return { writable: new WritableStream(queue, { 'highWaterMark': 2 ** 14, ...writableStrategy }), readable }; + + async function open() { + return await (await RecordBatchReader.from(queue)).open(readableStrategy); + } + + async function next(controller: ReadableStreamDefaultController>, reader: RecordBatchReader) { + let size = controller.desiredSize; + let r: IteratorResult> | null = null; + while (!(r = await reader.next()).done) { + controller.enqueue(r.value); + if (size != null && --size <= 0) { + return; + } + } + controller.close(); + } +} diff --git a/js/src/ipc/whatwg/writer.ts b/js/src/ipc/whatwg/writer.ts new file mode 100644 index 0000000000000..de3b3f1d2474a --- /dev/null +++ b/js/src/ipc/whatwg/writer.ts @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { DataType } from '../../type'; +import { RecordBatch } from '../../recordbatch'; +import { AsyncByteStream } from '../../io/stream'; +import { RecordBatchWriter } from '../../ipc/writer'; + +/** @ignore */ +export function recordBatchWriterThroughDOMStream( + this: typeof RecordBatchWriter, + writableStrategy?: QueuingStrategy> & { autoDestroy: boolean }, + readableStrategy?: { highWaterMark?: number, size?: any } +) { + + const writer = new this(writableStrategy); + const reader = new AsyncByteStream(writer); + const readable = new ReadableStream({ + type: 'bytes', + async cancel() { await reader.cancel(); }, + async pull(controller) { await next(controller); }, + async start(controller) { await next(controller); }, + }, { 'highWaterMark': 2 ** 14, ...readableStrategy }); + + return { writable: new WritableStream(writer, writableStrategy), readable }; + + async function next(controller: ReadableStreamDefaultController) { + let buf: Uint8Array | null = null; + let size = controller.desiredSize; + while (buf = await reader.read(size || null)) { + controller.enqueue(buf); + if (size != null && (size -= buf.byteLength) <= 0) { return; } + } + controller.close(); + } +} diff --git a/js/src/ipc/writer.ts b/js/src/ipc/writer.ts new file mode 100644 index 0000000000000..746e5ef58e369 --- /dev/null +++ b/js/src/ipc/writer.ts @@ -0,0 +1,417 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Table } from '../table'; +import { MAGIC } from './message'; +import { Vector } from '../vector'; +import { Column } from '../column'; +import { Schema, Field } from '../schema'; +import { Chunked } from '../vector/chunked'; +import { Message } from './metadata/message'; +import { RecordBatch } from '../recordbatch'; +import * as metadata from './metadata/message'; +import { DataType, Dictionary } from '../type'; +import { FileBlock, Footer } from './metadata/file'; +import { MessageHeader, MetadataVersion } from '../enum'; +import { WritableSink, AsyncByteQueue } from '../io/stream'; +import { VectorAssembler } from '../visitor/vectorassembler'; +import { JSONTypeAssembler } from '../visitor/jsontypeassembler'; +import { JSONVectorAssembler } from '../visitor/jsonvectorassembler'; +import { ArrayBufferViewInput, toUint8Array } from '../util/buffer'; +import { Writable, ReadableInterop, ReadableDOMStreamOptions } from '../io/interfaces'; +import { isPromise, isAsyncIterable, isWritableDOMStream, isWritableNodeStream } from '../util/compat'; + +export class RecordBatchWriter extends ReadableInterop implements Writable> { + + /** @nocollapse */ + // @ts-ignore + public static throughNode(options?: import('stream').DuplexOptions & { autoDestroy: boolean }): import('stream').Duplex { + throw new Error(`"throughNode" not available in this environment`); + } + /** @nocollapse */ + public static throughDOM( + // @ts-ignore + writableStrategy?: QueuingStrategy> & { autoDestroy: boolean }, + // @ts-ignore + readableStrategy?: { highWaterMark?: number, size?: any } + ): { writable: WritableStream | RecordBatch>, readable: ReadableStream } { + throw new Error(`"throughDOM" not available in this environment`); + } + + constructor(options?: { autoDestroy: boolean }) { + super(); + this._autoDestroy = options && (typeof options.autoDestroy === 'boolean') ? options.autoDestroy : true; + } + + protected _position = 0; + protected _started = false; + protected _autoDestroy: boolean; + // @ts-ignore + protected _sink = new AsyncByteQueue(); + protected _schema: Schema | null = null; + protected _dictionaryBlocks: FileBlock[] = []; + protected _recordBatchBlocks: FileBlock[] = []; + + public toString(sync: true): string; + public toString(sync?: false): Promise; + public toString(sync: any = false) { + return this._sink.toString(sync) as Promise | string; + } + public toUint8Array(sync: true): Uint8Array; + public toUint8Array(sync?: false): Promise; + public toUint8Array(sync: any = false) { + return this._sink.toUint8Array(sync) as Promise | Uint8Array; + } + + public writeAll(input: Table | Iterable>): this; + public writeAll(input: AsyncIterable>): Promise; + public writeAll(input: PromiseLike>>): Promise; + public writeAll(input: PromiseLike | Iterable>>): Promise; + public writeAll(input: PromiseLike | Table | Iterable> | AsyncIterable>) { + if (isPromise(input)) { + return input.then((x) => this.writeAll(x)); + } else if (isAsyncIterable>(input)) { + return writeAllAsync(this, input); + } + return writeAll(this, input); + } + + public get closed() { return this._sink.closed; } + public [Symbol.asyncIterator]() { return this._sink[Symbol.asyncIterator](); } + public toDOMStream(options?: ReadableDOMStreamOptions) { return this._sink.toDOMStream(options); } + public toNodeStream(options?: import('stream').ReadableOptions) { return this._sink.toNodeStream(options); } + + public close() { + return this.reset()._sink.close(); + } + public abort(reason?: any) { + return this.reset()._sink.abort(reason); + } + public finish() { + this._autoDestroy ? this.close() : this.reset(this._sink, this._schema); + return this; + } + public reset(sink: WritableSink = this._sink, schema: Schema | null = null) { + + if ((sink === this._sink) || (sink instanceof AsyncByteQueue)) { + this._sink = sink as AsyncByteQueue; + } else { + this._sink = new AsyncByteQueue(); + if (sink && isWritableDOMStream(sink)) { + this.toDOMStream({ type: 'bytes' }).pipeTo(sink); + } else if (sink && isWritableNodeStream(sink)) { + this.toNodeStream({ objectMode: false }).pipe(sink); + } + } + + if (this._started && this._schema) { + this._writeFooter(); + } + + this._started = false; + this._dictionaryBlocks = []; + this._recordBatchBlocks = []; + + if (!schema || (schema !== this._schema)) { + if (schema === null) { + this._position = 0; + this._schema = null; + } else { + this._started = true; + this._schema = schema; + this._writeSchema(schema); + } + } + + return this; + } + + public write(chunk?: Table | RecordBatch | null) { + let schema: Schema | null; + if (!this._sink) { + throw new Error(`RecordBatchWriter is closed`); + } else if (!chunk || !(schema = chunk.schema)) { + return this.finish() && undefined; + } else if (schema !== this._schema) { + if (this._started && this._autoDestroy) { + return this.close(); + } + this.reset(this._sink, schema); + } + (chunk instanceof Table) + ? this.writeAll(chunk.chunks) + : this._writeRecordBatch(chunk); + } + + protected _writeMessage(message: Message, alignment = 8) { + + const a = alignment - 1; + const buffer = Message.encode(message); + const flatbufferSize = buffer.byteLength; + const alignedSize = (flatbufferSize + 4 + a) & ~a; + const nPaddingBytes = alignedSize - flatbufferSize - 4; + + if (message.headerType === MessageHeader.RecordBatch) { + this._recordBatchBlocks.push(new FileBlock(alignedSize, message.bodyLength, this._position)); + } else if (message.headerType === MessageHeader.DictionaryBatch) { + this._dictionaryBlocks.push(new FileBlock(alignedSize, message.bodyLength, this._position)); + } + + // Write the flatbuffer size prefix including padding + this._write(Int32Array.of(alignedSize - 4)); + // Write the flatbuffer + if (flatbufferSize > 0) { this._write(buffer); } + // Write any padding + return this._writePadding(nPaddingBytes); + } + + protected _write(chunk: ArrayBufferViewInput) { + if (this._started) { + const buffer = toUint8Array(chunk); + if (buffer && buffer.byteLength > 0) { + this._sink.write(buffer); + this._position += buffer.byteLength; + } + } + return this; + } + + protected _writeSchema(schema: Schema) { + return this + ._writeMessage(Message.from(schema)) + ._writeDictionaries(schema.dictionaryFields); + } + + protected _writeFooter() { + return this._writePadding(4); // eos bytes + } + + protected _writeMagic() { + return this._write(MAGIC); + } + + protected _writePadding(nBytes: number) { + return nBytes > 0 ? this._write(new Uint8Array(nBytes)) : this; + } + + protected _writeRecordBatch(records: RecordBatch) { + const { byteLength, nodes, bufferRegions, buffers } = VectorAssembler.assemble(records); + const recordBatch = new metadata.RecordBatch(records.length, nodes, bufferRegions); + const message = Message.from(recordBatch, byteLength); + return this + ._writeMessage(message) + ._writeBodyBuffers(buffers); + } + + protected _writeDictionaryBatch(dictionary: Vector, id: number, isDelta = false) { + const { byteLength, nodes, bufferRegions, buffers } = VectorAssembler.assemble(dictionary); + const recordBatch = new metadata.RecordBatch(dictionary.length, nodes, bufferRegions); + const dictionaryBatch = new metadata.DictionaryBatch(recordBatch, id, isDelta); + const message = Message.from(dictionaryBatch, byteLength); + return this + ._writeMessage(message) + ._writeBodyBuffers(buffers); + } + + protected _writeBodyBuffers(buffers: ArrayBufferView[]) { + let buffer: ArrayBufferView; + let size: number, padding: number; + for (let i = -1, n = buffers.length; ++i < n;) { + if ((buffer = buffers[i]) && (size = buffer.byteLength) > 0) { + this._write(buffer); + if ((padding = ((size + 7) & ~7) - size) > 0) { + this._writePadding(padding); + } + } + } + return this; + } + + protected _writeDictionaries(dictionaryFields: Map>[]>) { + for (const [id, fields] of dictionaryFields) { + const vector = fields[0].type.dictionaryVector; + if (!(vector instanceof Chunked)) { + this._writeDictionaryBatch(vector, id, false); + } else { + const chunks = vector.chunks; + for (let i = -1, n = chunks.length; ++i < n;) { + this._writeDictionaryBatch(chunks[i], id, i > 0); + } + } + } + return this; + } +} + +/** @ignore */ +export class RecordBatchStreamWriter extends RecordBatchWriter { + + public static writeAll(this: typeof RecordBatchWriter, input: Table | Iterable>, options?: { autoDestroy: true }): RecordBatchStreamWriter; + // @ts-ignore + public static writeAll(this: typeof RecordBatchWriter, input: AsyncIterable>, options?: { autoDestroy: true }): Promise>; + public static writeAll(this: typeof RecordBatchWriter, input: PromiseLike>>, options?: { autoDestroy: true }): Promise>; + public static writeAll(this: typeof RecordBatchWriter, input: PromiseLike | Iterable>>, options?: { autoDestroy: true }): Promise>; + /** @nocollapse */ + public static writeAll(this: typeof RecordBatchWriter, input: any, options?: { autoDestroy: true }) { + return new RecordBatchStreamWriter(options).writeAll(input); + } +} + +/** @ignore */ +export class RecordBatchFileWriter extends RecordBatchWriter { + + public static writeAll(this: typeof RecordBatchWriter, input: Table | Iterable>): RecordBatchFileWriter; + // @ts-ignore + public static writeAll(this: typeof RecordBatchWriter, input: AsyncIterable>): Promise>; + public static writeAll(this: typeof RecordBatchWriter, input: PromiseLike>>): Promise>; + public static writeAll(this: typeof RecordBatchWriter, input: PromiseLike | Iterable>>): Promise>; + /** @nocollapse */ + public static writeAll(this: typeof RecordBatchWriter, input: any) { + return new RecordBatchFileWriter().writeAll(input); + } + + constructor() { + super(); + this._autoDestroy = true; + } + + protected _writeSchema(schema: Schema) { + return this + ._writeMagic()._writePadding(2) + ._writeDictionaries(schema.dictionaryFields); + } + + protected _writeFooter() { + const buffer = Footer.encode(new Footer( + this._schema!, MetadataVersion.V4, + this._recordBatchBlocks, this._dictionaryBlocks + )); + return this + ._write(buffer) // Write the flatbuffer + ._write(Int32Array.of(buffer.byteLength)) // then the footer size suffix + ._writeMagic(); // then the magic suffix + } +} + +/** @ignore */ +export class RecordBatchJSONWriter extends RecordBatchWriter { + + public static writeAll(this: typeof RecordBatchWriter, input: Table | Iterable>): RecordBatchJSONWriter; + // @ts-ignore + public static writeAll(this: typeof RecordBatchWriter, input: AsyncIterable>): Promise>; + public static writeAll(this: typeof RecordBatchWriter, input: PromiseLike>>): Promise>; + public static writeAll(this: typeof RecordBatchWriter, input: PromiseLike | Iterable>>): Promise>; + /** @nocollapse */ + public static writeAll(this: typeof RecordBatchWriter, input: any) { + return new RecordBatchJSONWriter().writeAll(input as any); + } + + constructor() { + super(); + this._autoDestroy = true; + } + + protected _writeMessage() { return this; } + protected _writeSchema(schema: Schema) { + return this._write(`{\n "schema": ${ + JSON.stringify({ fields: schema.fields.map(fieldToJSON) }, null, 2) + }`)._writeDictionaries(schema.dictionaryFields); + } + protected _writeDictionaries(dictionaryFields: Map>[]>) { + this._write(`,\n "dictionaries": [\n`); + super._writeDictionaries(dictionaryFields); + return this._write(`\n ]`); + } + protected _writeDictionaryBatch(dictionary: Vector, id: number, isDelta = false) { + this._write(this._dictionaryBlocks.length === 0 ? ` ` : `,\n `); + this._write(`${dictionaryBatchToJSON(this._schema!, dictionary, id, isDelta)}`); + this._dictionaryBlocks.push(new FileBlock(0, 0, 0)); + return this; + } + protected _writeRecordBatch(records: RecordBatch) { + this._write(this._recordBatchBlocks.length === 0 + ? `,\n "batches": [\n ` + : `,\n `); + this._write(`${recordBatchToJSON(records)}`); + this._recordBatchBlocks.push(new FileBlock(0, 0, 0)); + return this; + } + public close() { + if (this._recordBatchBlocks.length > 0) { + this._write(`\n ]`); + } + if (this._schema) { + this._write(`\n}`); + } + return super.close(); + } +} + +/** @ignore */ +function writeAll(writer: RecordBatchWriter, input: Table | Iterable>) { + const chunks = (input instanceof Table) ? input.chunks : input; + for (const batch of chunks) { + writer.write(batch); + } + return writer.finish(); +} + +/** @ignore */ +async function writeAllAsync(writer: RecordBatchWriter, batches: AsyncIterable>) { + for await (const batch of batches) { + writer.write(batch); + } + return writer.finish(); +} + +/** @ignore */ +function fieldToJSON({ name, type, nullable }: Field): object { + const assembler = new JSONTypeAssembler(); + return { + 'name': name, 'nullable': nullable, + 'type': assembler.visit(type), + 'children': (type.children || []).map(fieldToJSON), + 'dictionary': !DataType.isDictionary(type) ? undefined : { + 'id': type.id, + 'isOrdered': type.isOrdered, + 'indexType': assembler.visit(type.indices) + } + }; +} + +/** @ignore */ +function dictionaryBatchToJSON(schema: Schema, dictionary: Vector, id: number, isDelta = false) { + const f = schema.dictionaryFields.get(id)![0]; + const field = new Field(f.name, f.type.dictionary, f.nullable, f.metadata); + const columns = JSONVectorAssembler.assemble(new Column(field, [dictionary])); + return JSON.stringify({ + 'id': id, + 'isDelta': isDelta, + 'data': { + 'count': dictionary.length, + 'columns': columns + } + }, null, 2); +} + +/** @ignore */ +function recordBatchToJSON(records: RecordBatch) { + return JSON.stringify({ + 'count': records.length, + 'columns': JSONVectorAssembler.assemble(records) + }, null, 2); +} diff --git a/js/src/ipc/writer/binary.ts b/js/src/ipc/writer/binary.ts deleted file mode 100644 index df7c586d94ab5..0000000000000 --- a/js/src/ipc/writer/binary.ts +++ /dev/null @@ -1,725 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Table } from '../../table'; -import { DenseUnionData } from '../../data'; -import { RecordBatch } from '../../recordbatch'; -import { VectorVisitor, TypeVisitor } from '../../visitor'; -import { MAGIC, magicLength, magicAndPadding, PADDING } from '../magic'; -import { align, getBool, packBools, iterateBits } from '../../util/bit'; -import { Vector, UnionVector, DictionaryVector, NestedVector, ListVector } from '../../vector'; -import { BufferMetadata, FieldMetadata, Footer, FileBlock, Message, RecordBatchMetadata, DictionaryBatch } from '../metadata'; -import { - Schema, Field, TypedArray, MetadataVersion, - DataType, - Dictionary, - Null, Int, Float, - Binary, Bool, Utf8, Decimal, - Date_, Time, Timestamp, Interval, - List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, - FlatType, FlatListType, NestedType, UnionMode, SparseUnion, DenseUnion, SingleNestedType, -} from '../../type'; - -export function* serializeStream(table: Table) { - yield serializeMessage(table.schema).buffer; - for (const [id, field] of table.schema.dictionaries) { - const vec = table.getColumn(field.name) as any as DictionaryVector; - if (vec && vec.dictionary) { - yield serializeDictionaryBatch(vec.dictionary, id).buffer; - } - } - for (const recordBatch of table.batches) { - yield serializeRecordBatch(recordBatch).buffer; - } -} - -export function* serializeFile(table: Table) { - - const recordBatches = []; - const dictionaryBatches = []; - - // First yield the magic string (aligned) - let buffer = new Uint8Array(align(magicLength, 8)); - let metadataLength, bodyLength, byteLength = buffer.byteLength; - buffer.set(MAGIC, 0); - yield buffer; - - // Then yield the schema - ({ metadataLength, buffer } = serializeMessage(table.schema)); - byteLength += buffer.byteLength; - yield buffer; - - for (const [id, field] of table.schema.dictionaries) { - const vec = table.getColumn(field.name) as any as DictionaryVector; - if (vec && vec.dictionary) { - ({ metadataLength, bodyLength, buffer } = serializeDictionaryBatch(vec.dictionary, id)); - dictionaryBatches.push(new FileBlock(metadataLength, bodyLength, byteLength)); - byteLength += buffer.byteLength; - yield buffer; - } - } - for (const recordBatch of table.batches) { - ({ metadataLength, bodyLength, buffer } = serializeRecordBatch(recordBatch)); - recordBatches.push(new FileBlock(metadataLength, bodyLength, byteLength)); - byteLength += buffer.byteLength; - yield buffer; - } - - // Then yield the footer metadata (not aligned) - ({ metadataLength, buffer } = serializeFooter(new Footer(dictionaryBatches, recordBatches, table.schema))); - yield buffer; - - // Last, yield the footer length + terminating magic arrow string (aligned) - buffer = new Uint8Array(magicAndPadding); - new DataView(buffer.buffer).setInt32(0, metadataLength, platformIsLittleEndian); - buffer.set(MAGIC, buffer.byteLength - magicLength); - yield buffer; -} - -export function serializeRecordBatch(recordBatch: RecordBatch) { - const { byteLength, fieldNodes, buffers, buffersMeta } = new RecordBatchSerializer().visitRecordBatch(recordBatch); - const rbMeta = new RecordBatchMetadata(MetadataVersion.V4, recordBatch.length, fieldNodes, buffersMeta); - const rbData = concatBuffersWithMetadata(byteLength, buffers, buffersMeta); - return serializeMessage(rbMeta, rbData); -} - -export function serializeDictionaryBatch(dictionary: Vector, id: Long | number, isDelta: boolean = false) { - const { byteLength, fieldNodes, buffers, buffersMeta } = new RecordBatchSerializer().visitRecordBatch(RecordBatch.from([dictionary])); - const rbMeta = new RecordBatchMetadata(MetadataVersion.V4, dictionary.length, fieldNodes, buffersMeta); - const dbMeta = new DictionaryBatch(MetadataVersion.V4, rbMeta, id, isDelta); - const rbData = concatBuffersWithMetadata(byteLength, buffers, buffersMeta); - return serializeMessage(dbMeta, rbData); -} - -export function serializeMessage(message: Message, data?: Uint8Array) { - const b = new Builder(); - _Message.finishMessageBuffer(b, writeMessage(b, message)); - // Slice out the buffer that contains the message metadata - const metadataBytes = b.asUint8Array(); - // Reserve 4 bytes for writing the message size at the front. - // Metadata length includes the metadata byteLength + the 4 - // bytes for the length, and rounded up to the nearest 8 bytes. - const metadataLength = align(PADDING + metadataBytes.byteLength, 8); - // + the length of the optional data buffer at the end, padded - const dataByteLength = data ? data.byteLength : 0; - // ensure the entire message is aligned to an 8-byte boundary - const messageBytes = new Uint8Array(align(metadataLength + dataByteLength, 8)); - // Write the metadata length into the first 4 bytes, but subtract the - // bytes we use to hold the length itself. - new DataView(messageBytes.buffer).setInt32(0, metadataLength - PADDING, platformIsLittleEndian); - // Copy the metadata bytes into the message buffer - messageBytes.set(metadataBytes, PADDING); - // Copy the optional data buffer after the metadata bytes - (data && dataByteLength > 0) && messageBytes.set(data, metadataLength); - // if (messageBytes.byteLength % 8 !== 0) { debugger; } - // Return the metadata length because we need to write it into each FileBlock also - return { metadataLength, bodyLength: message.bodyLength, buffer: messageBytes }; -} - -export function serializeFooter(footer: Footer) { - const b = new Builder(); - _Footer.finishFooterBuffer(b, writeFooter(b, footer)); - // Slice out the buffer that contains the footer metadata - const footerBytes = b.asUint8Array(); - const metadataLength = footerBytes.byteLength; - return { metadataLength, buffer: footerBytes }; -} - -export class RecordBatchSerializer extends VectorVisitor { - public byteLength = 0; - public buffers: TypedArray[] = []; - public fieldNodes: FieldMetadata[] = []; - public buffersMeta: BufferMetadata[] = []; - public visitRecordBatch(recordBatch: RecordBatch) { - this.buffers = []; - this.byteLength = 0; - this.fieldNodes = []; - this.buffersMeta = []; - for (let vector: Vector, index = -1, numCols = recordBatch.numCols; ++index < numCols;) { - if (vector = recordBatch.getChildAt(index)!) { - this.visit(vector); - } - } - return this; - } - public visit(vector: Vector) { - if (!DataType.isDictionary(vector.type)) { - const { data, length, nullCount } = vector; - if (length > 2147483647) { - throw new RangeError('Cannot write arrays larger than 2^31 - 1 in length'); - } - this.fieldNodes.push(new FieldMetadata(length, nullCount)); - this.addBuffer(nullCount <= 0 - ? new Uint8Array(0) // placeholder validity buffer - : this.getTruncatedBitmap(data.offset, length, data.nullBitmap!) - ); - } - return super.visit(vector); - } - public visitNull (_nullz: Vector) { return this; } - public visitBool (vector: Vector) { return this.visitBoolVector(vector); } - public visitInt (vector: Vector) { return this.visitFlatVector(vector); } - public visitFloat (vector: Vector) { return this.visitFlatVector(vector); } - public visitUtf8 (vector: Vector) { return this.visitFlatListVector(vector); } - public visitBinary (vector: Vector) { return this.visitFlatListVector(vector); } - public visitDate (vector: Vector) { return this.visitFlatVector(vector); } - public visitTimestamp (vector: Vector) { return this.visitFlatVector(vector); } - public visitTime (vector: Vector