Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-22.08' into feature/fin…
Browse files Browse the repository at this point in the history
…ite-state-transducer-trimmed
  • Loading branch information
elstehle committed Jul 16, 2022
2 parents 8a184e9 + 9627091 commit 78dd893
Show file tree
Hide file tree
Showing 98 changed files with 3,649 additions and 2,138 deletions.
63 changes: 19 additions & 44 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,71 +2,40 @@

repos:
- repo: https://github.com/PyCQA/isort
rev: 5.6.4
rev: 5.10.1
hooks:
- id: isort
alias: isort-cudf
name: isort-cudf
args: ["--settings-path=python/cudf/setup.cfg"]
files: python/cudf/.*
exclude: __init__.py$
types: [text]
# Use the config file specific to each subproject so that each
# project can specify its own first/third-party packages.
args: ["--config-root=python/", "--resolve-all-configs"]
files: python/.*
exclude: (__init__.py|setup.py)$
types_or: [python, cython, pyi]
- id: isort
alias: isort-cudf-kafka
name: isort-cudf-kafka
args: ["--settings-path=python/cudf_kafka/setup.cfg"]
files: python/cudf_kafka/.*
types: [text]
types_or: [python, cython]
- id: isort
alias: isort-custreamz
name: isort-custreamz
args: ["--settings-path=python/custreamz/setup.cfg"]
files: python/custreamz/.*
- id: isort
alias: isort-dask-cudf
name: isort-dask-cudf
args: ["--settings-path=python/dask_cudf/setup.cfg"]
files: python/dask_cudf/.*
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
files: python/.*
# Explicitly specify the pyproject.toml at the repo root, not per-project.
args: ["--config", "pyproject.toml"]
- repo: https://github.com/PyCQA/flake8
rev: 3.8.3
hooks:
- id: flake8
alias: flake8
name: flake8
args: ["--config=python/.flake8"]
files: python/.*\.py$
- id: flake8
alias: flake8-cython
name: flake8-cython
args: ["--config=python/.flake8.cython"]
types: [cython]
args: ["--config=setup.cfg"]
files: python/.*\.(py|pyx|pxd)$
types: [file]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v0.782'
hooks:
- id: mypy
args: ["--config-file=python/cudf/setup.cfg", "python/cudf/cudf"]
args: ["--config-file=setup.cfg", "python/cudf/cudf", "python/dask_cudf/dask_cudf", "python/custreamz/custreamz", "python/cudf_kafka/cudf_kafka"]
pass_filenames: false
- repo: https://github.com/PyCQA/pydocstyle
rev: 6.1.1
hooks:
- id: pydocstyle
args: ["--config=python/.flake8"]
exclude: |
(?x)^(
ci|
cpp|
conda|
docs|
java|
notebooks
)
args: ["--config=setup.cfg"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v11.1.0
hooks:
Expand All @@ -76,6 +45,12 @@ repos:
args: ['-fallback-style=none', '-style=file', '-i']
- repo: local
hooks:
- id: no-deprecationwarning
name: no-deprecationwarning
description: 'Enforce that DeprecationWarning is not introduced (use FutureWarning instead)'
entry: '(category=|\s)DeprecationWarning[,)]'
language: pygrep
types_or: [python, cython]
- id: cmake-format
name: cmake-format
entry: ./cpp/scripts/run-cmake-format.sh cmake-format
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ dependencies:
- pip
- flake8=3.8.3
- black=22.3.0
- isort=5.6.4
- isort=5.10.1
- mypy=0.782
- doxygen=1.8.20
- pydocstyle=6.1.1
Expand Down
10 changes: 5 additions & 5 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version=environ.get('CONDA_PY', 36) %}
{% set cuda_version='.'.join(environ.get('CUDA', '11.5').split('.')[:2]) %}
{% set cuda_major=cuda_version.split('.')[0] %}
Expand Down Expand Up @@ -41,10 +41,10 @@ requirements:
- setuptools
- numba >=0.54
- dlpack>=0.5,<0.6.0a0
- pyarrow 8.0.0 *cuda
- libcudf {{ version }}
- rmm {{ minor_version }}
- cudatoolkit {{ cuda_version }}
- pyarrow =8.0.0 *cuda
- libcudf ={{ version }}
- rmm ={{ minor_version }}
- cudatoolkit ={{ cuda_version }}
run:
- protobuf>=3.20.1,<3.21.0a0
- python
Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ requirements:
host:
- python
- cython >=0.29,<0.30
- cudf {{ version }}
- libcudf_kafka {{ version }}
- cudf ={{ version }}
- libcudf_kafka ={{ version }}
- setuptools
run:
- python
- libcudf_kafka {{ version }}
- libcudf_kafka ={{ version }}
- python-confluent-kafka >=1.7.0,<1.8.0a0
- cudf {{ version }}
- cudf ={{ version }}

test: # [linux64]
requires: # [linux64]
Expand Down
6 changes: 3 additions & 3 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ requirements:
host:
- python
- python-confluent-kafka >=1.7.0,<1.8.0a0
- cudf_kafka {{ version }}
- cudf_kafka ={{ version }}
run:
- python
- streamz
- cudf {{ version }}
- cudf ={{ version }}
- dask>=2022.05.2
- distributed>=2022.05.2
- python-confluent-kafka >=1.7.0,<1.8.0a0
- cudf_kafka {{ version }}
- cudf_kafka ={{ version }}

test: # [linux64]
requires: # [linux64]
Expand Down
6 changes: 3 additions & 3 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ build:
requirements:
host:
- python
- cudf {{ version }}
- cudf ={{ version }}
- dask>=2022.05.2
- distributed>=2022.05.2
- cudatoolkit {{ cuda_version }}
- cudatoolkit ={{ cuda_version }}
run:
- python
- cudf {{ version }}
- cudf ={{ version }}
- dask>=2022.05.2
- distributed>=2022.05.2
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
Expand Down
9 changes: 5 additions & 4 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -429,16 +429,17 @@ add_library(
src/reshape/byte_cast.cu
src/reshape/interleave_columns.cu
src/reshape/tile.cu
src/rolling/detail/rolling_collect_list.cu
src/rolling/detail/rolling_fixed_window.cu
src/rolling/detail/rolling_variable_window.cu
src/rolling/grouped_rolling.cu
src/rolling/range_window_bounds.cpp
src/rolling/rolling.cu
src/rolling/rolling_collect_list.cu
src/rolling/rolling_detail_fixed_window.cu
src/rolling/rolling_detail_variable_window.cu
src/round/round.cu
src/scalar/scalar.cpp
src/scalar/scalar_factories.cpp
src/search/contains.cu
src/search/contains_column.cu
src/search/contains_table.cu
src/search/contains_nested.cu
src/search/search_ordered.cu
src/sort/is_sorted.cu
Expand Down
27 changes: 21 additions & 6 deletions cpp/benchmarks/join/join_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,14 @@ static void BM_join(state_type& state, Join JoinFunc)
return cudf::detail::valid_if(validity, validity + size, thrust::identity<bool>{}).first;
};

std::unique_ptr<cudf::column> build_key_column = [&]() {
std::unique_ptr<cudf::column> build_key_column0 = [&]() {
return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id<key_type>()),
build_table_size,
build_random_null_mask(build_table_size))
: cudf::make_numeric_column(cudf::data_type(cudf::type_to_id<key_type>()),
build_table_size);
}();
std::unique_ptr<cudf::column> probe_key_column = [&]() {
std::unique_ptr<cudf::column> probe_key_column0 = [&]() {
return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id<key_type>()),
probe_table_size,
build_random_null_mask(probe_table_size))
Expand All @@ -104,21 +104,36 @@ static void BM_join(state_type& state, Join JoinFunc)
}();

generate_input_tables<key_type, cudf::size_type>(
build_key_column->mutable_view().data<key_type>(),
build_key_column0->mutable_view().data<key_type>(),
build_table_size,
probe_key_column->mutable_view().data<key_type>(),
probe_key_column0->mutable_view().data<key_type>(),
probe_table_size,
selectivity,
multiplicity);

// Copy build_key_column0 and probe_key_column0 into new columns.
// If Nullable, the new columns will be assigned new nullmasks.
auto const build_key_column1 = [&]() {
auto col = std::make_unique<cudf::column>(build_key_column0->view());
if (Nullable) { col->set_null_mask(build_random_null_mask(build_table_size)); }
return col;
}();
auto const probe_key_column1 = [&]() {
auto col = std::make_unique<cudf::column>(probe_key_column0->view());
if (Nullable) { col->set_null_mask(build_random_null_mask(probe_table_size)); }
return col;
}();

auto init = cudf::make_fixed_width_scalar<payload_type>(static_cast<payload_type>(0));
auto build_payload_column = cudf::sequence(build_table_size, *init);
auto probe_payload_column = cudf::sequence(probe_table_size, *init);

CUDF_CHECK_CUDA(0);

cudf::table_view build_table({build_key_column->view(), *build_payload_column});
cudf::table_view probe_table({probe_key_column->view(), *probe_payload_column});
cudf::table_view build_table(
{build_key_column0->view(), build_key_column1->view(), *build_payload_column});
cudf::table_view probe_table(
{probe_key_column0->view(), probe_key_column1->view(), *probe_payload_column});

// Setup join parameters and result table
[[maybe_unused]] std::vector<cudf::size_type> columns_to_join = {0};
Expand Down
38 changes: 23 additions & 15 deletions cpp/include/cudf/detail/null_mask.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

#include <algorithm>
#include <iterator>
#include <optional>
#include <vector>

namespace cudf {
Expand Down Expand Up @@ -508,17 +509,20 @@ std::vector<size_type> segmented_null_count(bitmask_type const* bitmask,
* validity of any/all elements of segments of an input null mask.
*
* @tparam OffsetIterator Random-access input iterator type.
* @param bitmask Null mask residing in device memory whose segments will be
* reduced into a new mask.
* @param first_bit_indices_begin Random-access input iterator to the beginning
* of a sequence of indices of the first bit in each segment (inclusive).
* @param first_bit_indices_end Random-access input iterator to the end of a
* sequence of indices of the first bit in each segment (inclusive).
* @param last_bit_indices_begin Random-access input iterator to the beginning
* of a sequence of indices of the last bit in each segment (exclusive).
* @param null_handling If `null_policy::INCLUDE`, all elements in a segment
* must be valid for the reduced value to be valid. If `null_policy::EXCLUDE`,
* the reduction is valid if any element in the segment is valid.
* @param bitmask Null mask residing in device memory whose segments will be reduced into a new
* mask.
* @param first_bit_indices_begin Random-access input iterator to the beginning of a sequence of
* indices of the first bit in each segment (inclusive).
* @param first_bit_indices_end Random-access input iterator to the end of a sequence of indices of
* the first bit in each segment (inclusive).
* @param last_bit_indices_begin Random-access input iterator to the beginning of a sequence of
* indices of the last bit in each segment (exclusive).
* @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
* reduced value to be valid. If `null_policy::EXCLUDE`, the reduction is valid if any element in
* the segment is valid.
* @param valid_initial_value Indicates whether a valid initial value was provided to the reduction.
* True indicates a valid initial value, false indicates a null initial value, and null indicates no
* initial value was provided.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned buffer's device memory.
* @return A pair containing the reduced null mask and number of nulls.
Expand All @@ -530,6 +534,7 @@ std::pair<rmm::device_buffer, size_type> segmented_null_mask_reduction(
OffsetIterator first_bit_indices_end,
OffsetIterator last_bit_indices_begin,
null_policy null_handling,
std::optional<bool> valid_initial_value,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
Expand All @@ -549,7 +554,9 @@ std::pair<rmm::device_buffer, size_type> segmented_null_mask_reduction(
return cudf::detail::valid_if(
segment_length_iterator,
segment_length_iterator + num_segments,
[] __device__(auto const& length) { return length > 0; },
[valid_initial_value] __device__(auto const& length) {
return valid_initial_value.value_or(length > 0);
},
stream,
mr);
}
Expand All @@ -567,11 +574,12 @@ std::pair<rmm::device_buffer, size_type> segmented_null_mask_reduction(
return cudf::detail::valid_if(
length_and_valid_count,
length_and_valid_count + num_segments,
[null_handling] __device__(auto const& length_and_valid_count) {
[null_handling, valid_initial_value] __device__(auto const& length_and_valid_count) {
auto const length = thrust::get<0>(length_and_valid_count);
auto const valid_count = thrust::get<1>(length_and_valid_count);
return (length > 0) and
((null_handling == null_policy::EXCLUDE) ? valid_count > 0 : valid_count == length);
return (null_handling == null_policy::EXCLUDE)
? (valid_initial_value.value_or(false) || valid_count > 0)
: (valid_initial_value.value_or(length > 0) && valid_count == length);
},
stream,
mr);
Expand Down
Loading

0 comments on commit 78dd893

Please sign in to comment.