diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp index 67705863d41..4bfef9767ca 100644 --- a/cpp/benchmarks/io/text/multibyte_split.cpp +++ b/cpp/benchmarks/io/text/multibyte_split.cpp @@ -85,8 +85,7 @@ static cudf::string_scalar create_random_input(int32_t num_chars, // extract the chars from the returned strings column. auto input_column_contents = input_column->release(); - auto chars_column_contents = input_column_contents.children[1]->release(); - auto chars_buffer = chars_column_contents.data.release(); + auto chars_buffer = input_column_contents.data.release(); // turn the chars in to a string scalar. return cudf::string_scalar(std::move(*chars_buffer)); @@ -218,7 +217,7 @@ NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list)) .set_name("multibyte_split_source") .set_min_samples(4) - .add_int64_axis("strip_delimiters", {1}) + .add_int64_axis("strip_delimiters", {0, 1}) .add_int64_axis("delim_size", {1}) .add_int64_axis("delim_percent", {1}) .add_int64_power_of_two_axis("size_approx", {15, 30}) diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp index 596ca3458c8..58faa0ebfe4 100644 --- a/cpp/src/io/text/data_chunk_source_factories.cpp +++ b/cpp/src/io/text/data_chunk_source_factories.cpp @@ -120,7 +120,11 @@ class istream_data_chunk_reader : public data_chunk_reader { { } - void skip_bytes(std::size_t size) override { _datastream->ignore(size); }; + void skip_bytes(std::size_t size) override + { + // 20% faster than _datastream->ignore(size) for large files + _datastream->seekg(_datastream->tellg() + static_cast(size)); + }; std::unique_ptr get_next_chunk(std::size_t read_size, rmm::cuda_stream_view stream) override @@ -265,7 +269,7 @@ class file_data_chunk_source : public data_chunk_source { [[nodiscard]] std::unique_ptr create_reader() const override { return std::make_unique( - std::make_unique(_filename, std::ifstream::in)); + std::make_unique(_filename, std::ifstream::in | std::ifstream::binary)); } private: diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp index ef1d09e5652..6b350c137d0 100644 --- a/cpp/tests/ast/transform_tests.cpp +++ b/cpp/tests/ast/transform_tests.cpp @@ -65,6 +65,22 @@ TEST_F(TransformTest, ColumnReference) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); } +TEST_F(TransformTest, BasicAdditionDoubleCast) +{ + auto c_0 = column_wrapper{3, 20, 1, 50}; + std::vector<__int128_t> data1{10, 7, 20, 0}; + auto c_1 = cudf::test::fixed_point_column_wrapper<__int128_t>( + data1.begin(), data1.end(), numeric::scale_type{0}); + auto table = cudf::table_view{{c_0, c_1}}; + auto col_ref_0 = cudf::ast::column_reference(0); + auto col_ref_1 = cudf::ast::column_reference(1); + auto cast = cudf::ast::operation(cudf::ast::ast_operator::CAST_TO_FLOAT64, col_ref_1); + auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, cast); + auto expected = column_wrapper{13, 27, 21, 50}; + auto result = cudf::compute_column(table, expression); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity); +} + TEST_F(TransformTest, Literal) { auto c_0 = column_wrapper{3, 20, 1, 50}; diff --git a/docs/cudf/source/user_guide/api_docs/general_utilities.rst b/docs/cudf/source/user_guide/api_docs/general_utilities.rst index d9c53c3fbbd..8d0edc0b100 100644 --- a/docs/cudf/source/user_guide/api_docs/general_utilities.rst +++ b/docs/cudf/source/user_guide/api_docs/general_utilities.rst @@ -8,6 +8,8 @@ Testing functions :toctree: api/ cudf.testing.testing.assert_column_equal + cudf.testing.testing.assert_eq cudf.testing.testing.assert_frame_equal cudf.testing.testing.assert_index_equal + cudf.testing.testing.assert_neq cudf.testing.testing.assert_series_equal diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py index f8f674fecec..d90f3ea1aca 100644 --- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py +++ b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import sys from io import StringIO @@ -13,7 +13,7 @@ compare_content, run_test, ) -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pythonfuzz(data_handle=CSVReader) diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py index 2f5e6204f7c..69e9437be93 100644 --- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py +++ b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import io import sys @@ -9,7 +9,7 @@ from cudf._fuzz_testing.json import JSONReader, JSONWriter from cudf._fuzz_testing.main import pythonfuzz from cudf._fuzz_testing.utils import ALL_POSSIBLE_VALUES, run_test -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pythonfuzz(data_handle=JSONReader) diff --git a/python/cudf/cudf/_fuzz_testing/utils.py b/python/cudf/cudf/_fuzz_testing/utils.py index d685174f3c2..e6dfe2eae62 100644 --- a/python/cudf/cudf/_fuzz_testing/utils.py +++ b/python/cudf/cudf/_fuzz_testing/utils.py @@ -8,7 +8,7 @@ import pyarrow as pa import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq from cudf.utils.dtypes import ( pandas_dtypes_to_np_dtypes, pyarrow_dtypes_to_pandas_dtypes, diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx index 0b0bbdb2589..c706351a683 100644 --- a/python/cudf/cudf/_lib/csv.pyx +++ b/python/cudf/cudf/_lib/csv.pyx @@ -8,7 +8,7 @@ from libcpp.utility cimport move from libcpp.vector cimport vector cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types -from cudf._lib.io.datasource cimport Datasource, NativeFileDatasource +from cudf._lib.pylibcudf.io.datasource cimport Datasource, NativeFileDatasource from cudf._lib.pylibcudf.libcudf.types cimport data_type from cudf._lib.types cimport dtype_to_data_type diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt index 2408fa1c12f..620229a1275 100644 --- a/python/cudf/cudf/_lib/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/io/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -12,7 +12,7 @@ # the License. # ============================================================================= -set(cython_sources datasource.pyx utils.pyx) +set(cython_sources utils.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( CXX diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx index 3c14ec46122..1d7c56888d9 100644 --- a/python/cudf/cudf/_lib/io/utils.pyx +++ b/python/cudf/cudf/_lib/io/utils.pyx @@ -8,7 +8,7 @@ from libcpp.utility cimport move from libcpp.vector cimport vector from cudf._lib.column cimport Column -from cudf._lib.io.datasource cimport Datasource +from cudf._lib.pylibcudf.io.datasource cimport Datasource from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource from cudf._lib.pylibcudf.libcudf.io.types cimport ( diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index d3e6053ef4b..9609e3131b4 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -23,12 +23,12 @@ except ImportError: cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types from cudf._lib.column cimport Column -from cudf._lib.io.datasource cimport NativeFileDatasource from cudf._lib.io.utils cimport ( make_sink_info, make_source_info, update_column_struct_field_names, ) +from cudf._lib.pylibcudf.io.datasource cimport NativeFileDatasource from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink from cudf._lib.pylibcudf.libcudf.io.orc cimport ( chunked_orc_writer_options, diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index f6f9cfa9a7c..7914ed7e9d9 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -37,12 +37,12 @@ cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types cimport cudf._lib.pylibcudf.libcudf.types as cudf_types from cudf._lib.column cimport Column from cudf._lib.expressions cimport Expression -from cudf._lib.io.datasource cimport NativeFileDatasource from cudf._lib.io.utils cimport ( make_sinks_info, make_source_info, update_struct_field_names, ) +from cudf._lib.pylibcudf.io.datasource cimport NativeFileDatasource from cudf._lib.pylibcudf.libcudf.expressions cimport expression from cudf._lib.pylibcudf.libcudf.io.parquet cimport ( chunked_parquet_reader as cpp_chunked_parquet_reader, diff --git a/python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt index 2cfec101bab..32f0f5543e4 100644 --- a/python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt @@ -12,7 +12,7 @@ # the License. # ============================================================================= -set(cython_sources avro.pyx types.pyx) +set(cython_sources avro.pyx datasource.pyx types.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( @@ -21,5 +21,5 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_io_ ASSOCIATED_TARGETS cudf ) -set(targets_using_arrow_headers pylibcudf_io_avro pylibcudf_io_types) +set(targets_using_arrow_headers pylibcudf_io_avro pylibcudf_io_datasource pylibcudf_io_types) link_to_pyarrow_headers("${targets_using_arrow_headers}") diff --git a/python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd index 250292746c1..cfd6d2cd281 100644 --- a/python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd @@ -1,4 +1,4 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from . cimport avro, types +from . cimport avro, datasource, types from .types cimport SourceInfo, TableWithMetadata diff --git a/python/cudf/cudf/_lib/pylibcudf/io/__init__.py b/python/cudf/cudf/_lib/pylibcudf/io/__init__.py index 5242c741911..a54ba1834dc 100644 --- a/python/cudf/cudf/_lib/pylibcudf/io/__init__.py +++ b/python/cudf/cudf/_lib/pylibcudf/io/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from . import avro, types +from . import avro, datasource, types from .types import SourceInfo, TableWithMetadata diff --git a/python/cudf/cudf/_lib/io/datasource.pxd b/python/cudf/cudf/_lib/pylibcudf/io/datasource.pxd similarity index 100% rename from python/cudf/cudf/_lib/io/datasource.pxd rename to python/cudf/cudf/_lib/pylibcudf/io/datasource.pxd diff --git a/python/cudf/cudf/_lib/io/datasource.pyx b/python/cudf/cudf/_lib/pylibcudf/io/datasource.pyx similarity index 100% rename from python/cudf/cudf/_lib/io/datasource.pyx rename to python/cudf/cudf/_lib/pylibcudf/io/datasource.pyx diff --git a/python/cudf/cudf/_lib/pylibcudf/io/types.pyx b/python/cudf/cudf/_lib/pylibcudf/io/types.pyx index cd777232b33..ab3375da662 100644 --- a/python/cudf/cudf/_lib/pylibcudf/io/types.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/io/types.pyx @@ -4,6 +4,8 @@ from libcpp.string cimport string from libcpp.utility cimport move from libcpp.vector cimport vector +from cudf._lib.pylibcudf.io.datasource cimport Datasource +from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource from cudf._lib.pylibcudf.libcudf.io.types cimport ( host_buffer, source_info, @@ -56,9 +58,8 @@ cdef class SourceInfo: Parameters ---------- - sources : List[Union[str, os.PathLike, bytes, io.BytesIO]] - A homogeneous list of sources (this can be a string filename, - an os.PathLike, bytes, or an io.BytesIO) to read from. + sources : List[Union[str, os.PathLike, bytes, io.BytesIO, DataSource]] + A homogeneous list of sources to read from. Mixing different types of sources will raise a `ValueError`. """ @@ -68,6 +69,7 @@ cdef class SourceInfo: raise ValueError("Need to pass at least one source") cdef vector[string] c_files + cdef vector[datasource*] c_datasources if isinstance(sources[0], (os.PathLike, str)): c_files.reserve(len(sources)) @@ -84,6 +86,13 @@ cdef class SourceInfo: self.c_obj = move(source_info(c_files)) return + elif isinstance(sources[0], Datasource): + for csrc in sources: + if not isinstance(csrc, Datasource): + raise ValueError("All sources must be of the same type!") + c_datasources.push_back((csrc).get_datasource()) + self.c_obj = move(source_info(c_datasources)) + return # TODO: host_buffer is deprecated API, use host_span instead cdef vector[host_buffer] c_host_buffers @@ -106,5 +115,11 @@ cdef class SourceInfo: c_buffer = bio.getbuffer() # check if empty? c_host_buffers.push_back(host_buffer(&c_buffer[0], c_buffer.shape[0])) + else: + raise ValueError("Sources must be a list of str/paths, " + "bytes, io.BytesIO, or a Datasource") + + if empty_buffer is True: + c_host_buffers.push_back(host_buffer(NULL, 0)) - self.c_obj = source_info(c_host_buffers) + self.c_obj = move(source_info(c_host_buffers)) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index f538180805b..231af30c06d 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -1068,51 +1068,34 @@ def notnull(self) -> ColumnBase: return result - def fillna( - self, - fill_value: Any = None, - method: str | None = None, - ) -> Self: - """ - Fill null values with *fill_value* - """ - if fill_value is not None: - fill_is_scalar = np.isscalar(fill_value) - - if fill_is_scalar: - if fill_value == _DEFAULT_CATEGORICAL_VALUE: - fill_value = self.codes.dtype.type(fill_value) - else: - try: - fill_value = self._encode(fill_value) - fill_value = self.codes.dtype.type(fill_value) - except ValueError as err: - err_msg = "fill value must be in categories" - raise ValueError(err_msg) from err + def _validate_fillna_value( + self, fill_value: ScalarLike | ColumnLike + ) -> cudf.Scalar | ColumnBase: + """Align fill_value for .fillna based on column type.""" + if cudf.api.types.is_scalar(fill_value): + if fill_value != _DEFAULT_CATEGORICAL_VALUE: + try: + fill_value = self._encode(fill_value) + except ValueError as err: + raise ValueError( + f"{fill_value=} must be in categories" + ) from err + return cudf.Scalar(fill_value, dtype=self.codes.dtype) + else: + fill_value = column.as_column(fill_value, nan_as_null=False) + if isinstance(fill_value.dtype, CategoricalDtype): + if self.dtype != fill_value.dtype: + raise TypeError( + "Cannot set a categorical with another without identical categories" + ) else: - fill_value = column.as_column(fill_value, nan_as_null=False) - if isinstance(fill_value, CategoricalColumn): - if self.dtype != fill_value.dtype: - raise TypeError( - "Cannot set a Categorical with another, " - "without identical categories" - ) - # TODO: only required if fill_value has a subset of the - # categories: - fill_value = fill_value._set_categories( - self.categories, - is_unique=True, - ) - fill_value = column.as_column(fill_value.codes).astype( - self.codes.dtype + raise TypeError( + "Cannot set a categorical with non-categorical data" ) - - # Validation of `fill_value` will have to be performed - # before returning self. - if not self.nullable: - return self - - return super().fillna(fill_value, method=method) + fill_value = fill_value._set_categories( + self.categories, + ) + return fill_value.codes.astype(self.codes.dtype) def indices_of( self, value: ScalarLike @@ -1372,11 +1355,13 @@ def _set_categories( if not (is_unique or new_cats.is_unique): new_cats = cudf.Series(new_cats)._column.unique() + if cur_cats.equals(new_cats, check_dtypes=True): + # TODO: Internal usages don't always need a copy; add a copy keyword + # as_ordered shallow copies + return self.copy().as_ordered(ordered=ordered) + cur_codes = self.codes - max_cat_size = ( - len(cur_cats) if len(cur_cats) > len(new_cats) else len(new_cats) - ) - out_code_dtype = min_unsigned_type(max_cat_size) + out_code_dtype = min_unsigned_type(max(len(cur_cats), len(new_cats))) cur_order = column.as_column(range(len(cur_codes))) old_codes = column.as_column( diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 586689e2ee3..dfcdfbb9d91 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -666,15 +666,32 @@ def _check_scatter_key_length( f"{num_keys}" ) + def _validate_fillna_value( + self, fill_value: ScalarLike | ColumnLike + ) -> cudf.Scalar | ColumnBase: + """Align fill_value for .fillna based on column type.""" + if is_scalar(fill_value): + return cudf.Scalar(fill_value, dtype=self.dtype) + return as_column(fill_value) + def fillna( self, - fill_value: Any = None, - method: str | None = None, + fill_value: ScalarLike | ColumnLike, + method: Literal["ffill", "bfill", None] = None, ) -> Self: """Fill null values with ``value``. Returns a copy with null filled. """ + if not self.has_nulls(include_nan=True): + return self.copy() + elif method is None: + if is_scalar(fill_value) and libcudf.scalar._is_null_host_scalar( + fill_value + ): + return self.copy() + else: + fill_value = self._validate_fillna_value(fill_value) return libcudf.replace.replace_nulls( input_col=self.nans_to_nulls(), replacement=fill_value, diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index d88553361dd..121076b69ce 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -8,18 +8,17 @@ import locale import re from locale import nl_langinfo -from typing import TYPE_CHECKING, Any, Literal, Sequence, cast +from typing import TYPE_CHECKING, Literal, Sequence, cast import numpy as np import pandas as pd import pyarrow as pa -from typing_extensions import Self import cudf from cudf import _lib as libcudf from cudf._lib.labeling import label_bins from cudf._lib.search import search_sorted -from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype +from cudf.api.types import is_datetime64_dtype, is_timedelta64_dtype from cudf.core._compat import PANDAS_GE_220 from cudf.core._internals.timezones import ( check_ambiguous_and_nonexistent, @@ -641,22 +640,6 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: else: return result_col - def fillna( - self, - fill_value: Any = None, - method: str | None = None, - ) -> Self: - if fill_value is not None: - if cudf.utils.utils._isnat(fill_value): - return self.copy(deep=True) - if is_scalar(fill_value): - if not isinstance(fill_value, cudf.Scalar): - fill_value = cudf.Scalar(fill_value, dtype=self.dtype) - else: - fill_value = column.as_column(fill_value, nan_as_null=False) - - return super().fillna(fill_value, method) - def indices_of( self, value: ScalarLike ) -> cudf.core.column.NumericalColumn: diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index e9d9b4933e5..d66908b5f94 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -4,12 +4,11 @@ import warnings from decimal import Decimal -from typing import TYPE_CHECKING, Any, Sequence, cast +from typing import TYPE_CHECKING, Sequence, cast import cupy as cp import numpy as np import pyarrow as pa -from typing_extensions import Self import cudf from cudf import _lib as libcudf @@ -31,7 +30,7 @@ from .numerical_base import NumericalBaseColumn if TYPE_CHECKING: - from cudf._typing import ColumnBinaryOperand, Dtype + from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike class DecimalBaseColumn(NumericalBaseColumn): @@ -135,30 +134,20 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str): return result - def fillna( - self, - fill_value: Any = None, - method: str | None = None, - ) -> Self: - """Fill null values with ``value``. - - Returns a copy with null filled. - """ + def _validate_fillna_value( + self, fill_value: ScalarLike | ColumnLike + ) -> cudf.Scalar | ColumnBase: + """Align fill_value for .fillna based on column type.""" if isinstance(fill_value, (int, Decimal)): - fill_value = cudf.Scalar(fill_value, dtype=self.dtype) - elif ( - isinstance(fill_value, DecimalBaseColumn) - or isinstance(fill_value, cudf.core.column.NumericalColumn) - and is_integer_dtype(fill_value.dtype) + return cudf.Scalar(fill_value, dtype=self.dtype) + elif isinstance(fill_value, ColumnBase) and ( + isinstance(self.dtype, DecimalDtype) or self.dtype.kind in "iu" ): - fill_value = fill_value.astype(self.dtype) - else: - raise TypeError( - "Decimal columns only support using fillna with decimal and " - "integer values" - ) - - return super().fillna(fill_value, method=method) + return fill_value.astype(self.dtype) + raise TypeError( + "Decimal columns only support using fillna with decimal and " + "integer values" + ) def normalize_binop_value(self, other): if isinstance(other, ColumnBase): diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 098cf43421b..76c64e1aea0 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -532,57 +532,26 @@ def find_and_replace( replaced, df._data["old"], df._data["new"] ) - def fillna( - self, - fill_value: Any = None, - method: str | None = None, - ) -> Self: - """ - Fill null values with *fill_value* - """ - col = self.nans_to_nulls() - - if col.null_count == 0: - return col - - if method is not None: - return super().fillna(fill_value, method) - - if fill_value is None: - raise ValueError("Must specify either 'fill_value' or 'method'") - - if ( - isinstance(fill_value, cudf.Scalar) - and fill_value.dtype == col.dtype - ): - return super().fillna(fill_value, method) - - if np.isscalar(fill_value): - # cast safely to the same dtype as self - fill_value_casted = col.dtype.type(fill_value) - if not np.isnan(fill_value) and (fill_value_casted != fill_value): + def _validate_fillna_value( + self, fill_value: ScalarLike | ColumnLike + ) -> cudf.Scalar | ColumnBase: + """Align fill_value for .fillna based on column type.""" + if is_scalar(fill_value): + cudf_obj = cudf.Scalar(fill_value) + if not as_column(cudf_obj).can_cast_safely(self.dtype): raise TypeError( f"Cannot safely cast non-equivalent " - f"{type(fill_value).__name__} to {col.dtype.name}" + f"{type(fill_value).__name__} to {self.dtype.name}" ) - fill_value = cudf.Scalar(fill_value_casted) else: - fill_value = column.as_column(fill_value, nan_as_null=False) - if is_integer_dtype(col.dtype): - # cast safely to the same dtype as self - if fill_value.dtype != col.dtype: - new_fill_value = fill_value.astype(col.dtype) - if not (new_fill_value == fill_value).all(): - raise TypeError( - f"Cannot safely cast non-equivalent " - f"{fill_value.dtype.type.__name__} to " - f"{col.dtype.type.__name__}" - ) - fill_value = new_fill_value - else: - fill_value = fill_value.astype(col.dtype) - - return super().fillna(fill_value, method) + cudf_obj = as_column(fill_value, nan_as_null=False) + if not cudf_obj.can_cast_safely(self.dtype): # type: ignore[attr-defined] + raise TypeError( + f"Cannot safely cast non-equivalent " + f"{cudf_obj.dtype.type.__name__} to " + f"{self.dtype.type.__name__}" + ) + return cudf_obj.astype(self.dtype) def can_cast_safely(self, to_dtype: DtypeObj) -> bool: """ diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 2451a9cc0af..936cd1eccb0 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5,12 +5,11 @@ import re import warnings from functools import cached_property -from typing import TYPE_CHECKING, Any, Sequence, cast, overload +from typing import TYPE_CHECKING, Sequence, cast, overload import numpy as np import pandas as pd import pyarrow as pa -from typing_extensions import Self import cudf import cudf.api.types @@ -5838,21 +5837,6 @@ def find_and_replace( res = self return libcudf.replace.replace(res, df._data["old"], df._data["new"]) - def fillna( - self, - fill_value: Any = None, - method: str | None = None, - ) -> Self: - if fill_value is not None: - if not is_scalar(fill_value): - fill_value = column.as_column(fill_value, dtype=self.dtype) - elif cudf._lib.scalar._is_null_host_scalar(fill_value): - # Trying to fill with value? Return copy. - return self.copy(deep=True) - else: - fill_value = cudf.Scalar(fill_value, dtype=self.dtype) - return super().fillna(fill_value, method=method) - def normalize_binop_value(self, other) -> column.ColumnBase | cudf.Scalar: if ( isinstance(other, (column.ColumnBase, cudf.Scalar)) diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 26b449f1863..8f41bcb6422 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -4,12 +4,11 @@ import datetime import functools -from typing import TYPE_CHECKING, Any, Sequence, cast +from typing import TYPE_CHECKING, Sequence, cast import numpy as np import pandas as pd import pyarrow as pa -from typing_extensions import Self import cudf from cudf import _lib as libcudf @@ -252,22 +251,6 @@ def normalize_binop_value(self, other) -> ColumnBinaryOperand: def time_unit(self) -> str: return np.datetime_data(self.dtype)[0] - def fillna( - self, - fill_value: Any = None, - method: str | None = None, - ) -> Self: - if fill_value is not None: - if cudf.utils.utils._isnat(fill_value): - return self.copy(deep=True) - if is_scalar(fill_value): - fill_value = cudf.Scalar(fill_value) - dtype = self.dtype - fill_value = fill_value.astype(dtype) - else: - fill_value = column.as_column(fill_value, nan_as_null=False) - return super().fillna(fill_value, method) - def as_numerical_column( self, dtype: Dtype ) -> "cudf.core.column.NumericalColumn": diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 76bb9d2a8ed..f0d8157011d 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2980,6 +2980,32 @@ def set_index( df.index = idx return df if not inplace else None + @_cudf_nvtx_annotate + def fillna( + self, value=None, method=None, axis=None, inplace=False, limit=None + ): # noqa: D102 + if isinstance(value, (pd.Series, pd.DataFrame)): + value = cudf.from_pandas(value) + if isinstance(value, cudf.Series): + # Align value.index to self.columns + value = value.reindex(self._column_names) + elif isinstance(value, cudf.DataFrame): + if not self.index.equals(value.index): + # Align value.index to self.index + value = value.reindex(self.index) + value = dict(value.items()) + elif isinstance(value, abc.Mapping): + # Align value.indexes to self.index + value = { + key: value.reindex(self.index) + if isinstance(value, cudf.Series) + else value + for key, value in value.items() + } + return super().fillna( + value=value, method=method, axis=axis, inplace=inplace, limit=limit + ) + @_cudf_nvtx_annotate def where(self, cond, other=None, inplace=False): from cudf.core._internals.where import ( diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 38bff3946d6..8ca71180c00 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -2,7 +2,6 @@ from __future__ import annotations -import copy import operator import pickle import warnings @@ -20,6 +19,7 @@ import cudf from cudf import _lib as libcudf from cudf.api.types import is_dtype_equal, is_scalar +from cudf.core._compat import PANDAS_LT_300 from cudf.core.buffer import acquire_spill_lock from cudf.core.column import ( ColumnBase, @@ -38,7 +38,7 @@ if TYPE_CHECKING: from types import ModuleType - from cudf._typing import Dtype + from cudf._typing import Dtype, ScalarLike # TODO: It looks like Frame is missing a declaration of `copy`, need to add @@ -613,8 +613,8 @@ def where(self, cond, other=None, inplace: bool = False) -> Self | None: @_cudf_nvtx_annotate def fillna( self, - value=None, - method: Literal["ffill", "bfill", "pad", "backfill"] | None = None, + value: None | ScalarLike | cudf.Series = None, + method: Literal["ffill", "bfill", "pad", "backfill", None] = None, axis=None, inplace: bool = False, limit=None, @@ -725,6 +725,16 @@ def fillna( raise ValueError("Cannot specify both 'value' and 'method'.") if method: + # Do not remove until pandas 3.0 support is added. + assert ( + PANDAS_LT_300 + ), "Need to drop after pandas-3.0 support is added." + warnings.warn( + f"{type(self).__name__}.fillna with 'method' is " + "deprecated and will raise in a future version. " + "Use obj.ffill() or obj.bfill() instead.", + FutureWarning, + ) if method not in {"ffill", "bfill", "pad", "backfill"}: raise NotImplementedError( f"Fill method {method} is not supported" @@ -734,57 +744,24 @@ def fillna( elif method == "backfill": method = "bfill" - # TODO: This logic should be handled in different subclasses since - # different Frames support different types of values. - if isinstance(value, cudf.Series): - value = value.reindex(self._data.names) - elif isinstance(value, cudf.DataFrame): - if not self.index.equals(value.index): # type: ignore[attr-defined] - value = value.reindex(self.index) # type: ignore[attr-defined] - else: - value = value - elif not isinstance(value, abc.Mapping): - value = {name: copy.deepcopy(value) for name in self._data.names} - else: - value = { - key: value.reindex(self.index) # type: ignore[attr-defined] - if isinstance(value, cudf.Series) - else value - for key, value in value.items() - } - - filled_data = {} - for col_name, col in self._data.items(): - if col_name in value and method is None: - replace_val = value[col_name] - else: - replace_val = None - should_fill = ( - ( - col_name in value - and col.has_nulls(include_nan=True) - and not libcudf.scalar._is_null_host_scalar(replace_val) - ) - or method is not None - or ( - isinstance(col, cudf.core.column.CategoricalColumn) - and not libcudf.scalar._is_null_host_scalar(replace_val) - ) + if is_scalar(value): + value = {name: value for name in self._column_names} + elif not isinstance(value, (abc.Mapping, cudf.Series)): + raise TypeError( + f'"value" parameter must be a scalar, dict ' + f"or Series, but you passed a " + f'"{type(value).__name__}"' ) - if should_fill: - filled_data[col_name] = col.fillna(replace_val, method) - else: - filled_data[col_name] = col.copy(deep=True) + + filled_columns = [ + col.fillna(value[name], method) if name in value else col.copy() + for name, col in self._data.items() + ] return self._mimic_inplace( - self._from_data( - data=ColumnAccessor( - data=filled_data, - multiindex=self._data.multiindex, - level_names=self._data.level_names, - rangeindex=self._data.rangeindex, - label_dtype=self._data.label_dtype, - verify=False, + self._from_data_like_self( + self._data._from_columns_like_self( + filled_columns, verify=False ) ), inplace=inplace, diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 5cae4a857ee..280a6e92eab 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -3217,29 +3217,6 @@ def _split(self, splits, keep_index=True): for i in range(len(splits) + 1) ] - @_cudf_nvtx_annotate - def fillna( - self, value=None, method=None, axis=None, inplace=False, limit=None - ): # noqa: D102 - if method is not None: - # Do not remove until pandas 3.0 support is added. - assert ( - PANDAS_LT_300 - ), "Need to drop after pandas-3.0 support is added." - warnings.warn( - f"{type(self).__name__}.fillna with 'method' is " - "deprecated and will raise in a future version. " - "Use obj.ffill() or obj.bfill() instead.", - FutureWarning, - ) - old_index = self.index - ret = super().fillna(value, method, axis, inplace, limit) - if inplace: - self.index = old_index - else: - ret.index = old_index - return ret - @_cudf_nvtx_annotate def bfill(self, value=None, axis=None, inplace=None, limit=None): """ diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index c0716d7709a..15ad0813601 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1797,20 +1797,12 @@ def fillna( ): if isinstance(value, pd.Series): value = Series.from_pandas(value) - - if not (is_scalar(value) or isinstance(value, (abc.Mapping, Series))): - raise TypeError( - f'"value" parameter must be a scalar, dict ' - f"or Series, but you passed a " - f'"{type(value).__name__}"' - ) - - if isinstance(value, (abc.Mapping, Series)): + elif isinstance(value, abc.Mapping): value = Series(value) + if isinstance(value, cudf.Series): if not self.index.equals(value.index): value = value.reindex(self.index) - value = value._column - + value = {self.name: value._column} return super().fillna( value=value, method=method, axis=axis, inplace=inplace, limit=limit ) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 1540c6850e7..dfb729cae6b 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -17,7 +17,7 @@ import numpy as np from ..options import _env_get_bool -from ..testing._utils import assert_eq +from ..testing import assert_eq from .annotation import nvtx diff --git a/python/cudf/cudf/pylibcudf_tests/test_source_info.py b/python/cudf/cudf/pylibcudf_tests/test_source_info.py index 71a3ecbcc30..019321b7259 100644 --- a/python/cudf/cudf/pylibcudf_tests/test_source_info.py +++ b/python/cudf/cudf/pylibcudf_tests/test_source_info.py @@ -2,13 +2,21 @@ import io +import pyarrow as pa import pytest import cudf._lib.pylibcudf as plc +from cudf._lib.pylibcudf.io.datasource import NativeFileDatasource @pytest.mark.parametrize( - "source", ["a.txt", b"hello world", io.BytesIO(b"hello world")] + "source", + [ + "a.txt", + b"hello world", + io.BytesIO(b"hello world"), + NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")), + ], ) def test_source_info_ctor(source, tmp_path): if isinstance(source, str): @@ -28,6 +36,10 @@ def test_source_info_ctor(source, tmp_path): ["a.txt", "a.txt"], [b"hello world", b"hello there"], [io.BytesIO(b"hello world"), io.BytesIO(b"hello there")], + [ + NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")), + NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")), + ], ], ) def test_source_info_ctor_multiple(sources, tmp_path): @@ -54,6 +66,11 @@ def test_source_info_ctor_multiple(sources, tmp_path): io.BytesIO(b"hello there"), b"hello world", ], + [ + NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")), + "awef.txt", + b"hello world", + ], ], ) def test_source_info_ctor_mixing_invalid(sources, tmp_path): @@ -67,3 +84,8 @@ def test_source_info_ctor_mixing_invalid(sources, tmp_path): sources[i] = str(file) with pytest.raises(ValueError): plc.io.SourceInfo(sources) + + +def test_source_info_invalid(): + with pytest.raises(ValueError): + plc.io.SourceInfo([123]) diff --git a/python/cudf/cudf/testing/__init__.py b/python/cudf/cudf/testing/__init__.py index 1843344bc81..4e92b43b9f9 100644 --- a/python/cudf/cudf/testing/__init__.py +++ b/python/cudf/cudf/testing/__init__.py @@ -1,7 +1,9 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from cudf.testing.testing import ( + assert_eq, assert_frame_equal, assert_index_equal, + assert_neq, assert_series_equal, ) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index e067d15af4c..a6a2d4eea00 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -2,12 +2,10 @@ import itertools import string -import warnings from collections import abc from contextlib import contextmanager from decimal import Decimal -import cupy import numpy as np import pandas as pd import pytest @@ -15,7 +13,6 @@ from numba.core.typing.templates import AbstractTemplate from numba.cuda.cudadecl import registry as cuda_decl_registry from numba.cuda.cudaimpl import lower as cuda_lower -from pandas import testing as tm import cudf from cudf._lib.null_mask import bitmask_allocation_size_bytes @@ -113,81 +110,6 @@ def count_zero(arr): return np.count_nonzero(arr == 0) -def assert_eq(left, right, **kwargs): - """Assert that two cudf-like things are equivalent - - This equality test works for pandas/cudf dataframes/series/indexes/scalars - in the same way, and so makes it easier to perform parametrized testing - without switching between assert_frame_equal/assert_series_equal/... - functions. - """ - # dtypes that we support but Pandas doesn't will convert to - # `object`. Check equality before that happens: - if kwargs.get("check_dtype", True): - if hasattr(left, "dtype") and hasattr(right, "dtype"): - if isinstance( - left.dtype, cudf.core.dtypes._BaseDtype - ) and not isinstance( - left.dtype, cudf.CategoricalDtype - ): # leave categorical comparison to Pandas - assert_eq(left.dtype, right.dtype) - - if hasattr(left, "to_pandas"): - left = left.to_pandas() - if hasattr(right, "to_pandas"): - right = right.to_pandas() - if isinstance(left, cupy.ndarray): - left = cupy.asnumpy(left) - if isinstance(right, cupy.ndarray): - right = cupy.asnumpy(right) - - if isinstance(left, (pd.DataFrame, pd.Series, pd.Index)): - # TODO: A warning is emitted from the function - # pandas.testing.assert_[series, frame, index]_equal for some inputs: - # "DeprecationWarning: elementwise comparison failed; this will raise - # an error in the future." - # or "FutureWarning: elementwise ..." - # This warning comes from a call from pandas to numpy. It is ignored - # here because it cannot be fixed within cudf. - with warnings.catch_warnings(): - warnings.simplefilter( - "ignore", (DeprecationWarning, FutureWarning) - ) - if isinstance(left, pd.DataFrame): - tm.assert_frame_equal(left, right, **kwargs) - elif isinstance(left, pd.Series): - tm.assert_series_equal(left, right, **kwargs) - else: - tm.assert_index_equal(left, right, **kwargs) - - elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray): - if np.issubdtype(left.dtype, np.floating) and np.issubdtype( - right.dtype, np.floating - ): - assert np.allclose(left, right, equal_nan=True) - else: - assert np.array_equal(left, right) - else: - # Use the overloaded __eq__ of the operands - if left == right: - return True - elif any(np.issubdtype(type(x), np.floating) for x in (left, right)): - np.testing.assert_almost_equal(left, right) - else: - np.testing.assert_equal(left, right) - return True - - -def assert_neq(left, right, **kwargs): - __tracebackhide__ = True - try: - assert_eq(left, right, **kwargs) - except AssertionError: - pass - else: - raise AssertionError - - def assert_exceptions_equal( lfunc, rfunc, diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index dffbbe92fc1..e56c8d867cb 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -2,9 +2,12 @@ from __future__ import annotations +import warnings + import cupy as cp import numpy as np import pandas as pd +from pandas import testing as tm import cudf from cudf._lib.unary import is_nan @@ -708,3 +711,100 @@ def assert_frame_equal( atol=atol, obj=f'Column name="{col}"', ) + + +def assert_eq(left, right, **kwargs): + """Assert that two cudf-like things are equivalent + + Parameters + ---------- + left + Object to compare + right + Object to compare + kwargs + Keyword arguments to control behaviour of comparisons. See + :func:`assert_frame_equal`, :func:`assert_series_equal`, and + :func:`assert_index_equal`. + + Notes + ----- + This equality test works for pandas/cudf dataframes/series/indexes/scalars + in the same way, and so makes it easier to perform parametrized testing + without switching between assert_frame_equal/assert_series_equal/... + functions. + + Raises + ------ + AssertionError + If the two objects do not compare equal. + """ + # dtypes that we support but Pandas doesn't will convert to + # `object`. Check equality before that happens: + if kwargs.get("check_dtype", True): + if hasattr(left, "dtype") and hasattr(right, "dtype"): + if isinstance( + left.dtype, cudf.core.dtypes._BaseDtype + ) and not isinstance( + left.dtype, cudf.CategoricalDtype + ): # leave categorical comparison to Pandas + assert_eq(left.dtype, right.dtype) + + if hasattr(left, "to_pandas"): + left = left.to_pandas() + if hasattr(right, "to_pandas"): + right = right.to_pandas() + if isinstance(left, cp.ndarray): + left = cp.asnumpy(left) + if isinstance(right, cp.ndarray): + right = cp.asnumpy(right) + + if isinstance(left, (pd.DataFrame, pd.Series, pd.Index)): + # TODO: A warning is emitted from the function + # pandas.testing.assert_[series, frame, index]_equal for some inputs: + # "DeprecationWarning: elementwise comparison failed; this will raise + # an error in the future." + # or "FutureWarning: elementwise ..." + # This warning comes from a call from pandas to numpy. It is ignored + # here because it cannot be fixed within cudf. + with warnings.catch_warnings(): + warnings.simplefilter( + "ignore", (DeprecationWarning, FutureWarning) + ) + if isinstance(left, pd.DataFrame): + tm.assert_frame_equal(left, right, **kwargs) + elif isinstance(left, pd.Series): + tm.assert_series_equal(left, right, **kwargs) + else: + tm.assert_index_equal(left, right, **kwargs) + + elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray): + if np.issubdtype(left.dtype, np.floating) and np.issubdtype( + right.dtype, np.floating + ): + assert np.allclose(left, right, equal_nan=True) + else: + assert np.array_equal(left, right) + else: + # Use the overloaded __eq__ of the operands + if left == right: + return True + elif any(np.issubdtype(type(x), np.floating) for x in (left, right)): + np.testing.assert_almost_equal(left, right) + else: + np.testing.assert_equal(left, right) + return True + + +def assert_neq(left, right, **kwargs): + """Assert that two cudf-like things are not equal. + + Provides the negation of the meaning of :func:`assert_eq`. + """ + __tracebackhide__ = True + try: + assert_eq(left, right, **kwargs) + except AssertionError: + pass + else: + raise AssertionError diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py index 30d8f1c8422..437bc4cba67 100644 --- a/python/cudf/cudf/tests/conftest.py +++ b/python/cudf/cudf/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. import itertools import os @@ -11,7 +11,7 @@ import rmm # noqa: F401 import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq _CURRENT_DIRECTORY = str(pathlib.Path(__file__).resolve().parent) diff --git a/python/cudf/cudf/tests/dataframe/test_conversion.py b/python/cudf/cudf/tests/dataframe/test_conversion.py index fa7e5ec1d4c..d1de7245634 100644 --- a/python/cudf/cudf/tests/dataframe/test_conversion.py +++ b/python/cudf/cudf/tests/dataframe/test_conversion.py @@ -1,9 +1,9 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. import pandas as pd import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_convert_dtypes(): diff --git a/python/cudf/cudf/tests/dataframe/test_io_serialization.py b/python/cudf/cudf/tests/dataframe/test_io_serialization.py index ad81609470c..57948afe1d8 100644 --- a/python/cudf/cudf/tests/dataframe/test_io_serialization.py +++ b/python/cudf/cudf/tests/dataframe/test_io_serialization.py @@ -8,7 +8,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/groupby/test_computation.py b/python/cudf/cudf/tests/groupby/test_computation.py index 04c56ef7462..630fcdc4dce 100644 --- a/python/cudf/cudf/tests/groupby/test_computation.py +++ b/python/cudf/cudf/tests/groupby/test_computation.py @@ -1,9 +1,9 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. import pandas as pd import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"]) diff --git a/python/cudf/cudf/tests/groupby/test_groupby_obj.py b/python/cudf/cudf/tests/groupby/test_groupby_obj.py index 04b483e08dc..ab2b16d263c 100644 --- a/python/cudf/cudf/tests/groupby/test_groupby_obj.py +++ b/python/cudf/cudf/tests/groupby/test_groupby_obj.py @@ -2,7 +2,7 @@ from numpy.testing import assert_array_equal import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_groupby_14955(): diff --git a/python/cudf/cudf/tests/groupby/test_indexing.py b/python/cudf/cudf/tests/groupby/test_indexing.py index 57e8bc1c2d8..43b6183fca5 100644 --- a/python/cudf/cudf/tests/groupby/test_indexing.py +++ b/python/cudf/cudf/tests/groupby/test_indexing.py @@ -1,6 +1,6 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_rank_return_type_compatible_mode(): diff --git a/python/cudf/cudf/tests/groupby/test_transform.py b/python/cudf/cudf/tests/groupby/test_transform.py index 78d7fbfd879..f7138036ddf 100644 --- a/python/cudf/cudf/tests/groupby/test_transform.py +++ b/python/cudf/cudf/tests/groupby/test_transform.py @@ -4,7 +4,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.fixture(params=[False, True], ids=["no-null-keys", "null-keys"]) diff --git a/python/cudf/cudf/tests/indexes/datetime/test_indexing.py b/python/cudf/cudf/tests/indexes/datetime/test_indexing.py index ee4d0f7e816..4c0ce2ed191 100644 --- a/python/cudf/cudf/tests/indexes/datetime/test_indexing.py +++ b/python/cudf/cudf/tests/indexes/datetime/test_indexing.py @@ -4,7 +4,7 @@ import pandas as pd import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_slice_datetimetz_index(): diff --git a/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py b/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py index 77b32b8ce89..7cc629270b1 100644 --- a/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py +++ b/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py @@ -4,7 +4,7 @@ import pandas as pd import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_tz_localize(): diff --git a/python/cudf/cudf/tests/indexes/test_interval.py b/python/cudf/cudf/tests/indexes/test_interval.py index d59041e32d5..87b76ab7609 100644 --- a/python/cudf/cudf/tests/indexes/test_interval.py +++ b/python/cudf/cudf/tests/indexes/test_interval.py @@ -7,7 +7,7 @@ import cudf from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.index import IntervalIndex, interval_range -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_interval_constructor_default_closed(): diff --git a/python/cudf/cudf/tests/input_output/test_text.py b/python/cudf/cudf/tests/input_output/test_text.py index acba13bb5b0..e9406d080d4 100644 --- a/python/cudf/cudf/tests/input_output/test_text.py +++ b/python/cudf/cudf/tests/input_output/test_text.py @@ -1,11 +1,11 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. from io import StringIO import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.fixture(scope="module") diff --git a/python/cudf/cudf/tests/series/test_conversion.py b/python/cudf/cudf/tests/series/test_conversion.py index 43ac35e41a6..e1dd359e1ba 100644 --- a/python/cudf/cudf/tests/series/test_conversion.py +++ b/python/cudf/cudf/tests/series/test_conversion.py @@ -1,9 +1,9 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. import pandas as pd import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/series/test_datetimelike.py b/python/cudf/cudf/tests/series/test_datetimelike.py index 302ef19852d..cea86a5499e 100644 --- a/python/cudf/cudf/tests/series/test_datetimelike.py +++ b/python/cudf/cudf/tests/series/test_datetimelike.py @@ -9,7 +9,7 @@ import cudf from cudf import date_range -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def _get_all_zones(): diff --git a/python/cudf/cudf/tests/test_apply_rows.py b/python/cudf/cudf/tests/test_apply_rows.py index 8870eb421c7..a11022c1a17 100644 --- a/python/cudf/cudf/tests/test_apply_rows.py +++ b/python/cudf/cudf/tests/test_apply_rows.py @@ -1,10 +1,11 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. import pytest import cudf from cudf.core.column import column -from cudf.testing._utils import assert_eq, gen_rand_series +from cudf.testing import assert_eq +from cudf.testing._utils import gen_rand_series def _kernel_multiply(a, b, out): diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/test_applymap.py index d720e6ce2ce..ce1dcce5887 100644 --- a/python/cudf/cudf/tests/test_applymap.py +++ b/python/cudf/cudf/tests/test_applymap.py @@ -4,7 +4,7 @@ from cudf import NA, DataFrame from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION -from cudf.testing import _utils as utils +from cudf.testing import assert_eq @pytest.mark.skipif( @@ -46,7 +46,7 @@ def test_applymap_dataframe(data, func, na_action, request): with pytest.warns(FutureWarning): got = gdf.applymap(func, na_action=na_action) - utils.assert_eq(expect, got, check_dtype=False) + assert_eq(expect, got, check_dtype=False) def test_applymap_raise_cases(): diff --git a/python/cudf/cudf/tests/test_array_function.py b/python/cudf/cudf/tests/test_array_function.py index e6b89e2c5fa..773141ee71a 100644 --- a/python/cudf/cudf/tests/test_array_function.py +++ b/python/cudf/cudf/tests/test_array_function.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq # To determine if NEP18 is available in the current version of NumPy we simply diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py index b036c1f13f3..41b9188f036 100644 --- a/python/cudf/cudf/tests/test_array_ufunc.py +++ b/python/cudf/cudf/tests/test_array_ufunc.py @@ -15,11 +15,8 @@ PANDAS_LT_300, PANDAS_VERSION, ) -from cudf.testing._utils import ( - assert_eq, - expect_warning_if, - set_random_null_mask_inplace, -) +from cudf.testing import assert_eq +from cudf.testing._utils import expect_warning_if, set_random_null_mask_inplace _UFUNCS = [ obj diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py index 238e8d990cc..2ec1d1d2f28 100644 --- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py +++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py @@ -23,7 +23,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq from cudf.testing.dataset_generator import rand_dataframe diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index fa371914c3e..7d8c3b53115 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -15,7 +15,7 @@ from cudf import Index, Series from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.buffer.spill_manager import get_global_manager -from cudf.testing import _utils as utils +from cudf.testing import _utils as utils, assert_eq from cudf.utils.dtypes import ( BOOL_TYPES, DATETIME_TYPES, @@ -194,7 +194,7 @@ def test_series_binop(binop, obj_class): if obj_class == "Index": result = Series(result) - utils.assert_eq(result, expect) + assert_eq(result, expect) @pytest.mark.parametrize("binop", _binops) @@ -318,7 +318,7 @@ def test_series_compare_nulls(cmpop, dtypes): expect[expect_mask] = cmpop(lser[expect_mask], rser[expect_mask]) got = cmpop(lser, rser) - utils.assert_eq(expect, got) + assert_eq(expect, got) @pytest.fixture @@ -349,7 +349,7 @@ def test_str_series_compare_str( Series.from_pandas(str_series_cmp_data), "a" ) - utils.assert_eq(expect, got.to_pandas(nullable=True)) + assert_eq(expect, got.to_pandas(nullable=True)) def test_str_series_compare_str_reflected( @@ -360,7 +360,7 @@ def test_str_series_compare_str_reflected( "a", Series.from_pandas(str_series_cmp_data) ) - utils.assert_eq(expect, got.to_pandas(nullable=True)) + assert_eq(expect, got.to_pandas(nullable=True)) def test_str_series_compare_num( @@ -371,7 +371,7 @@ def test_str_series_compare_num( Series.from_pandas(str_series_cmp_data), cmp_scalar ) - utils.assert_eq(expect, got.to_pandas(nullable=True)) + assert_eq(expect, got.to_pandas(nullable=True)) def test_str_series_compare_num_reflected( @@ -382,7 +382,7 @@ def test_str_series_compare_num_reflected( cmp_scalar, Series.from_pandas(str_series_cmp_data) ) - utils.assert_eq(expect, got.to_pandas(nullable=True)) + assert_eq(expect, got.to_pandas(nullable=True)) @pytest.mark.parametrize("obj_class", ["Series", "Index"]) @@ -612,12 +612,12 @@ def test_different_shapes_and_columns(binop): # Empty frame on the right side pd_frame = binop(pd.DataFrame({"x": [1, 2]}), pd.DataFrame({})) cd_frame = binop(cudf.DataFrame({"x": [1, 2]}), cudf.DataFrame({})) - utils.assert_eq(cd_frame, pd_frame) + assert_eq(cd_frame, pd_frame) # Empty frame on the left side pd_frame = pd.DataFrame({}) + pd.DataFrame({"x": [1, 2]}) cd_frame = cudf.DataFrame({}) + cudf.DataFrame({"x": [1, 2]}) - utils.assert_eq(cd_frame, pd_frame) + assert_eq(cd_frame, pd_frame) # Note: the below rely on a discrepancy between cudf and pandas # While pandas inserts columns in alphabetical order, cudf inserts in the @@ -627,12 +627,12 @@ def test_different_shapes_and_columns(binop): # More rows on the left side pd_frame = pd.DataFrame({"x": [1, 2, 3]}) + pd.DataFrame({"y": [1, 2]}) cd_frame = cudf.DataFrame({"x": [1, 2, 3]}) + cudf.DataFrame({"y": [1, 2]}) - utils.assert_eq(cd_frame, pd_frame) + assert_eq(cd_frame, pd_frame) # More rows on the right side pd_frame = pd.DataFrame({"x": [1, 2]}) + pd.DataFrame({"y": [1, 2, 3]}) cd_frame = cudf.DataFrame({"x": [1, 2]}) + cudf.DataFrame({"y": [1, 2, 3]}) - utils.assert_eq(cd_frame, pd_frame) + assert_eq(cd_frame, pd_frame) @pytest.mark.parametrize("binop", _binops) @@ -650,7 +650,7 @@ def test_different_shapes_and_same_columns(binop): ) # cast x as float64 so it matches pandas dtype cd_frame["x"] = cd_frame["x"].astype(np.float64) - utils.assert_eq(cd_frame, pd_frame) + assert_eq(cd_frame, pd_frame) @pytest.mark.parametrize("binop", _binops) @@ -680,7 +680,7 @@ def test_different_shapes_and_columns_with_unaligned_indices(binop): # cast x and y as float64 so it matches pandas dtype cd_frame["x"] = cd_frame["x"].astype(np.float64) cd_frame["y"] = cd_frame["y"].astype(np.float64) - utils.assert_eq(cd_frame, pd_frame) + assert_eq(cd_frame, pd_frame) pdf1 = pd.DataFrame({"x": [1, 1]}, index=["a", "a"]) pdf2 = pd.DataFrame({"x": [2]}, index=["a"]) @@ -688,7 +688,7 @@ def test_different_shapes_and_columns_with_unaligned_indices(binop): gdf2 = cudf.DataFrame.from_pandas(pdf2) pd_frame = binop(pdf1, pdf2) cd_frame = binop(gdf1, gdf2) - utils.assert_eq(pd_frame, cd_frame) + assert_eq(pd_frame, cd_frame) @pytest.mark.parametrize( @@ -717,12 +717,12 @@ def test_df_different_index_shape(df2, binop): def test_boolean_scalar_binop(op): psr = pd.Series(np.random.choice([True, False], 10)) gsr = cudf.from_pandas(psr) - utils.assert_eq(op(psr, True), op(gsr, True)) - utils.assert_eq(op(psr, False), op(gsr, False)) + assert_eq(op(psr, True), op(gsr, True)) + assert_eq(op(psr, False), op(gsr, False)) # cuDF scalar - utils.assert_eq(op(psr, True), op(gsr, cudf.Scalar(True))) - utils.assert_eq(op(psr, False), op(gsr, cudf.Scalar(False))) + assert_eq(op(psr, True), op(gsr, cudf.Scalar(True))) + assert_eq(op(psr, False), op(gsr, cudf.Scalar(False))) @pytest.mark.parametrize("func", _operators_arithmetic) @@ -747,7 +747,7 @@ def test_operator_func_between_series(dtype, func, has_nulls, fill_value): pdf_series_b, fill_value=fill_value ) - utils.assert_eq(pdf_result, gdf_result) + assert_eq(pdf_result, gdf_result) @pytest.mark.parametrize("func", _operators_arithmetic) @@ -773,7 +773,7 @@ def test_operator_func_series_and_scalar( scalar, fill_value=fill_value ) - utils.assert_eq(pdf_series_result, gdf_series_result) + assert_eq(pdf_series_result, gdf_series_result) _permu_values = [0, 1, None, np.nan] @@ -812,9 +812,9 @@ def test_operator_func_between_series_logical( and np.isnan(fill_value) ): with pytest.raises(AssertionError): - utils.assert_eq(expect, got) + assert_eq(expect, got) return - utils.assert_eq(expect, got) + assert_eq(expect, got) @pytest.mark.parametrize("dtype", ["float32", "float64"]) @@ -851,7 +851,7 @@ def test_operator_func_series_and_scalar_logical( expect = pdf_series_result got = gdf_series_result.to_pandas(nullable=True) - utils.assert_eq(expect, got) + assert_eq(expect, got) @pytest.mark.parametrize("func", _operators_arithmetic) @@ -887,7 +887,7 @@ def gen_df(): got = getattr(gdf1, func)(gdf2, fill_value=fill_value) expect = getattr(pdf1, func)(pdf2, fill_value=fill_value)[list(got._data)] - utils.assert_eq(expect, got) + assert_eq(expect, got) @pytest.mark.parametrize("func", _operators_comparison) @@ -923,7 +923,7 @@ def gen_df(): got = getattr(gdf1, func)(gdf2) expect = getattr(pdf1, func)(pdf2)[list(got._data)] - utils.assert_eq(expect, got) + assert_eq(expect, got) @pytest.mark.parametrize( @@ -949,7 +949,7 @@ def gen_df(): def test_binop_bool_uint(func, rhs): psr = pd.Series([True, False, False]) gsr = cudf.from_pandas(psr) - utils.assert_eq( + assert_eq( getattr(psr, func)(rhs), getattr(gsr, func)(rhs), check_dtype=False ) @@ -977,7 +977,7 @@ def test_floordiv_zero_float64(series_dtype, divisor_dtype, scalar_divisor): else: pd_div = pd.Series([0], dtype=divisor_dtype) cudf_div = cudf.from_pandas(pd_div) - utils.assert_eq(sr // pd_div, cr // cudf_div) + assert_eq(sr // pd_div, cr // cudf_div) @pytest.mark.parametrize("scalar_divisor", [False, True]) @@ -1023,27 +1023,27 @@ def test_floordiv_zero_bool(scalar_divisor): def test_rmod_zero_nan(dtype): sr = pd.Series([1, 1, 0], dtype=dtype) cr = cudf.from_pandas(sr) - utils.assert_eq(1 % sr, 1 % cr) + assert_eq(1 % sr, 1 % cr) expected_dtype = np.float64 if cr.dtype.kind != "f" else dtype - utils.assert_eq(1 % cr, cudf.Series([0, 0, None], dtype=expected_dtype)) + assert_eq(1 % cr, cudf.Series([0, 0, None], dtype=expected_dtype)) def test_series_misc_binop(): pds = pd.Series([1, 2, 4], name="abc xyz") gds = cudf.Series([1, 2, 4], name="abc xyz") - utils.assert_eq(pds + 1, gds + 1) - utils.assert_eq(1 + pds, 1 + gds) + assert_eq(pds + 1, gds + 1) + assert_eq(1 + pds, 1 + gds) - utils.assert_eq(pds + pds, gds + gds) + assert_eq(pds + pds, gds + gds) pds1 = pd.Series([1, 2, 4], name="hello world") gds1 = cudf.Series([1, 2, 4], name="hello world") - utils.assert_eq(pds + pds1, gds + gds1) - utils.assert_eq(pds1 + pds, gds1 + gds) + assert_eq(pds + pds1, gds + gds1) + assert_eq(pds1 + pds, gds1 + gds) - utils.assert_eq(pds1 + pds + 5, gds1 + gds + 5) + assert_eq(pds1 + pds + 5, gds1 + gds + 5) def test_int8_float16_binop(): @@ -1051,7 +1051,7 @@ def test_int8_float16_binop(): b = np.float16(2) expect = cudf.Series([0.5]) got = a / b - utils.assert_eq(expect, got, check_dtype=False) + assert_eq(expect, got, check_dtype=False) @pytest.mark.parametrize("dtype", ["int64", "float64", "str"]) @@ -1061,7 +1061,7 @@ def test_vector_to_none_binops(dtype): expect = Series([None] * 4).astype(dtype) got = data + None - utils.assert_eq(expect, got) + assert_eq(expect, got) def dtype_scalar(val, dtype): @@ -1747,12 +1747,12 @@ def test_datetime_dateoffset_binaryop( expect = op(psr, poffset) got = op(gsr, goffset) - utils.assert_eq(expect, got) + assert_eq(expect, got) expect = op(psr, -poffset) got = op(gsr, -goffset) - utils.assert_eq(expect, got) + assert_eq(expect, got) @pytest.mark.parametrize( @@ -1793,7 +1793,7 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op): expect = op(psr, poffset) got = op(gsr, goffset) - utils.assert_eq(expect, got) + assert_eq(expect, got) @pytest.mark.parametrize("n_periods", [0, 1, -1, 12, -12]) @@ -1840,7 +1840,7 @@ def test_datetime_dateoffset_binaryop_reflected( # TODO: Remove check_dtype once we get some clarity on: # https://github.com/pandas-dev/pandas/issues/57448 - utils.assert_eq(expect, got, check_dtype=False) + assert_eq(expect, got, check_dtype=False) with pytest.raises(TypeError): poffset - psr @@ -1878,7 +1878,7 @@ def test_binops_with_lhs_numpy_scalar(frame, dtype): expected = data.to_pandas() == val got = data == val - utils.assert_eq(expected, got) + assert_eq(expected, got) @pytest.mark.parametrize( @@ -2302,7 +2302,7 @@ def test_binops_decimal(op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype): got = op(a, b) assert expect.dtype == got.dtype - utils.assert_eq(expect, got) + assert_eq(expect, got) @pytest.mark.parametrize( @@ -2355,7 +2355,7 @@ def test_binops_reflect_decimal( got = getattr(a, op)(b) assert expect.dtype == got.dtype - utils.assert_eq(expect, got) + assert_eq(expect, got) @pytest.mark.parametrize("powers", [0, 1, 2, 3]) @@ -2371,7 +2371,7 @@ def test_binops_decimal_pow(powers): ) ps = s.to_pandas() - utils.assert_eq(s**powers, ps**powers, check_dtype=False) + assert_eq(s**powers, ps**powers, check_dtype=False) def test_binops_raise_error(): @@ -2554,7 +2554,7 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): actual = op(lhs, rhs) - utils.assert_eq(expected, actual) + assert_eq(expected, actual) @pytest.mark.parametrize( @@ -2804,7 +2804,7 @@ def decimal_series(input, dtype): got = op(lhs, rhs) assert expect.dtype == got.dtype - utils.assert_eq(expect, got) + assert_eq(expect, got) @pytest.mark.parametrize( @@ -2979,7 +2979,7 @@ def test_binops_decimal_scalar_compare(args, reflected): actual = op(lhs, rhs) - utils.assert_eq(expected, actual) + assert_eq(expected, actual) @pytest.mark.parametrize( @@ -3042,7 +3042,7 @@ def test_equality_ops_index_mismatch(fn): expected = getattr(pa, fn)(pb) actual = getattr(a, fn)(b).to_pandas(nullable=True) - utils.assert_eq(expected, actual) + assert_eq(expected, actual) def generate_test_null_equals_columnops_data(): @@ -3132,7 +3132,7 @@ def test_empty_column(binop, data, scalar): got = binop(gdf, scalar) expected = binop(pdf, scalar) - utils.assert_eq(expected, got) + assert_eq(expected, got) @pytest.mark.parametrize( @@ -3179,7 +3179,7 @@ def test_binops_dot(df, other): expected = pdf @ host_other got = df @ other - utils.assert_eq(expected, got) + assert_eq(expected, got) def test_binop_dot_preserve_index(): @@ -3187,7 +3187,7 @@ def test_binop_dot_preserve_index(): df = cudf.DataFrame(np.eye(2), columns=["A", "B"], index=["A", "B"]) result = ser @ df expected = ser.to_pandas() @ df.to_pandas() - utils.assert_eq(result, expected) + assert_eq(result, expected) def test_binop_series_with_repeated_index(): @@ -3198,7 +3198,7 @@ def test_binop_series_with_repeated_index(): gsr2 = cudf.from_pandas(psr2) expected = psr1 - psr2 got = gsr1 - gsr2 - utils.assert_eq(expected, got) + assert_eq(expected, got) def test_binop_integer_power_series_series(): @@ -3209,7 +3209,7 @@ def test_binop_integer_power_series_series(): ps_exponent = gs_exponent.to_pandas() expected = ps_base**ps_exponent got = gs_base**gs_exponent - utils.assert_eq(expected, got) + assert_eq(expected, got) def test_binop_integer_power_series_scalar(): @@ -3219,7 +3219,7 @@ def test_binop_integer_power_series_scalar(): ps_base = gs_base.to_pandas() expected = ps_base**exponent.value got = gs_base**exponent - utils.assert_eq(expected, got) + assert_eq(expected, got) def test_binop_integer_power_series_int(): @@ -3229,7 +3229,7 @@ def test_binop_integer_power_series_int(): ps_base = gs_base.to_pandas() expected = ps_base**exponent got = gs_base**exponent - utils.assert_eq(expected, got) + assert_eq(expected, got) def test_binop_integer_power_scalar_series(): @@ -3239,7 +3239,7 @@ def test_binop_integer_power_scalar_series(): ps_exponent = gs_exponent.to_pandas() expected = base.value**ps_exponent got = base**gs_exponent - utils.assert_eq(expected, got) + assert_eq(expected, got) def test_binop_integer_power_scalar_scalar(): @@ -3248,7 +3248,7 @@ def test_binop_integer_power_scalar_scalar(): exponent = cudf.Scalar(1) expected = base.value**exponent.value got = base**exponent - utils.assert_eq(expected, got) + assert_eq(expected, got) def test_binop_integer_power_scalar_int(): @@ -3257,7 +3257,7 @@ def test_binop_integer_power_scalar_int(): exponent = 1 expected = base.value**exponent got = base**exponent - utils.assert_eq(expected, got) + assert_eq(expected, got) def test_binop_integer_power_int_series(): @@ -3267,7 +3267,7 @@ def test_binop_integer_power_int_series(): ps_exponent = gs_exponent.to_pandas() expected = base**ps_exponent got = base**gs_exponent - utils.assert_eq(expected, got) + assert_eq(expected, got) def test_binop_integer_power_int_scalar(): @@ -3276,7 +3276,7 @@ def test_binop_integer_power_int_scalar(): exponent = cudf.Scalar(1) expected = base**exponent.value got = base**exponent - utils.assert_eq(expected, got) + assert_eq(expected, got) def test_numpy_int_scalar_binop(): @@ -3291,7 +3291,7 @@ def test_binop_index_series(op): actual = op(gi, gs) expected = op(gi.to_pandas(), gs.to_pandas()) - utils.assert_eq(expected, actual) + assert_eq(expected, actual) @pytest.mark.parametrize("name1", utils.SERIES_OR_INDEX_NAMES) @@ -3307,7 +3307,7 @@ def test_binop_index_dt_td_series_with_names(name1, name2): expected = gi.to_pandas() + gs.to_pandas() actual = gi + gs - utils.assert_eq(expected, actual) + assert_eq(expected, actual) @pytest.mark.parametrize("data1", [[1, 2, 3], [10, 11, None]]) @@ -3319,9 +3319,9 @@ def test_binop_eq_ne_index_series(data1, data2): actual = gi == gs expected = gi.to_pandas() == gs.to_pandas() - utils.assert_eq(expected, actual) + assert_eq(expected, actual) actual = gi != gs expected = gi.to_pandas() != gs.to_pandas() - utils.assert_eq(expected, actual) + assert_eq(expected, actual) diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py index c36595192e4..9b6029582ce 100644 --- a/python/cudf/cudf/tests/test_categorical.py +++ b/python/cudf/cudf/tests/test_categorical.py @@ -11,11 +11,8 @@ import pytest import cudf -from cudf.testing._utils import ( - NUMERIC_TYPES, - assert_eq, - assert_exceptions_equal, -) +from cudf.testing import assert_eq +from cudf.testing._utils import NUMERIC_TYPES, assert_exceptions_equal @contextmanager diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py index a8a297c155f..ea919c786b9 100644 --- a/python/cudf/cudf/tests/test_column.py +++ b/python/cudf/cudf/tests/test_column.py @@ -9,7 +9,8 @@ import cudf from cudf._lib.transform import mask_to_bools from cudf.core.column.column import as_column -from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.testing import assert_eq +from cudf.testing._utils import assert_exceptions_equal from cudf.utils import dtypes as dtypeutils dtypes = sorted( diff --git a/python/cudf/cudf/tests/test_column_accessor.py b/python/cudf/cudf/tests/test_column_accessor.py index f1f6097d6a9..f3343c37d1d 100644 --- a/python/cudf/cudf/tests/test_column_accessor.py +++ b/python/cudf/cudf/tests/test_column_accessor.py @@ -6,7 +6,7 @@ import cudf from cudf.core.column_accessor import ColumnAccessor -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq simple_test_data = [ {}, diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 4b43a33c8c8..c1c03de48d4 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -10,11 +10,8 @@ import cudf from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype -from cudf.testing._utils import ( - assert_eq, - assert_exceptions_equal, - expect_warning_if, -) +from cudf.testing import assert_eq +from cudf.testing._utils import assert_exceptions_equal, expect_warning_if @contextmanager diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py index a65ab1780b6..fe86df99d35 100644 --- a/python/cudf/cudf/tests/test_contains.py +++ b/python/cudf/cudf/tests/test_contains.py @@ -9,12 +9,8 @@ import cudf from cudf import Series from cudf.core.index import Index, RangeIndex -from cudf.testing._utils import ( - DATETIME_TYPES, - NUMERIC_TYPES, - TIMEDELTA_TYPES, - assert_eq, -) +from cudf.testing import assert_eq +from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES def cudf_date_series(start, stop, freq): diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py index 0bc9ffa8004..9b6f82ec705 100644 --- a/python/cudf/cudf/tests/test_copying.py +++ b/python/cudf/cudf/tests/test_copying.py @@ -8,7 +8,8 @@ import cudf from cudf import Series from cudf.core.buffer.spill_manager import get_global_manager -from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq +from cudf.testing import assert_eq +from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES pytestmark = pytest.mark.spilling diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index 5009a7f2628..09617306606 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -18,7 +18,8 @@ import cudf from cudf import read_csv from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION -from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.testing import assert_eq +from cudf.testing._utils import assert_exceptions_equal def make_numeric_dataframe(nrows, dtype): diff --git a/python/cudf/cudf/tests/test_cuda_apply.py b/python/cudf/cudf/tests/test_cuda_apply.py index 7fdf9754534..dc892caba3b 100644 --- a/python/cudf/cudf/tests/test_cuda_apply.py +++ b/python/cudf/cudf/tests/test_cuda_apply.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. """ Test method that apply GPU kernel to a frame. @@ -9,7 +9,7 @@ from numba import cuda from cudf import DataFrame -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize("nelem", [1, 2, 64, 128, 129]) diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py index 06d63561fc1..29f2f46e3c7 100644 --- a/python/cudf/cudf/tests/test_cuda_array_interface.py +++ b/python/cudf/cudf/tests/test_cuda_array_interface.py @@ -11,12 +11,8 @@ import cudf from cudf.core.buffer.spill_manager import get_global_manager -from cudf.testing._utils import ( - DATETIME_TYPES, - NUMERIC_TYPES, - TIMEDELTA_TYPES, - assert_eq, -) +from cudf.testing import assert_eq +from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES @pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES) diff --git a/python/cudf/cudf/tests/test_custom_accessor.py b/python/cudf/cudf/tests/test_custom_accessor.py index 5ffe255d0f8..278e63f3e8b 100644 --- a/python/cudf/cudf/tests/test_custom_accessor.py +++ b/python/cudf/cudf/tests/test_custom_accessor.py @@ -4,7 +4,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @cudf.api.extensions.register_dataframe_accessor("point") diff --git a/python/cudf/cudf/tests/test_cut.py b/python/cudf/cudf/tests/test_cut.py index 24c1eaa8f02..3f31da035aa 100644 --- a/python/cudf/cudf/tests/test_cut.py +++ b/python/cudf/cudf/tests/test_cut.py @@ -9,7 +9,7 @@ import pytest from cudf.core.cut import cut -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index cfa2a4aa8fd..05ee8346afa 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -30,14 +30,12 @@ from cudf.core.buffer.spill_manager import get_global_manager from cudf.core.column import column from cudf.errors import MixedTypeError -from cudf.testing import _utils as utils +from cudf.testing import _utils as utils, assert_eq, assert_neq from cudf.testing._utils import ( ALL_TYPES, DATETIME_TYPES, NUMERIC_TYPES, - assert_eq, assert_exceptions_equal, - assert_neq, does_not_raise, expect_warning_if, gen_rand, diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/test_dataframe_copy.py index fec52d82ab1..45bd31ef58e 100644 --- a/python/cudf/cudf/tests/test_dataframe_copy.py +++ b/python/cudf/cudf/tests/test_dataframe_copy.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. from copy import copy, deepcopy import cupy as cp @@ -7,7 +7,8 @@ import pytest from cudf.core.dataframe import DataFrame -from cudf.testing._utils import ALL_TYPES, assert_eq, assert_neq +from cudf.testing import assert_eq, assert_neq +from cudf.testing._utils import ALL_TYPES """ DataFrame copy expectations diff --git a/python/cudf/cudf/tests/test_datasets.py b/python/cudf/cudf/tests/test_datasets.py index 8e5e5ab66c4..7f4e249a6d7 100644 --- a/python/cudf/cudf/tests/test_datasets.py +++ b/python/cudf/cudf/tests/test_datasets.py @@ -3,7 +3,7 @@ import numpy as np import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_dataset_timeseries(): diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index e3ecaafae5b..092e9790c63 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -15,10 +15,10 @@ from cudf import DataFrame, Series from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.index import DatetimeIndex +from cudf.testing import assert_eq from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, - assert_eq, assert_exceptions_equal, expect_warning_if, ) diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index 0745e5aba48..c41a938f6ea 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. import decimal from decimal import Decimal @@ -11,12 +11,12 @@ import cudf from cudf.core.column import Decimal32Column, Decimal64Column, NumericalColumn from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype +from cudf.testing import assert_eq from cudf.testing._utils import ( FLOAT_TYPES, INTEGER_TYPES, SIGNED_TYPES, _decimal_series, - assert_eq, expect_warning_if, ) diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py index 8ce4da792a4..7f48e414180 100644 --- a/python/cudf/cudf/tests/test_df_protocol.py +++ b/python/cudf/cudf/tests/test_df_protocol.py @@ -20,7 +20,7 @@ from_dataframe, protocol_dtype_to_cupy_dtype, ) -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.fixture( diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py index 7ea3979b0f1..ebcc35784ee 100644 --- a/python/cudf/cudf/tests/test_dlpack.py +++ b/python/cudf/cudf/tests/test_dlpack.py @@ -9,7 +9,7 @@ from packaging import version import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq nelems = [0, 3, 10] dtype = [np.uint16, np.int32, np.float64] diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py index c3c8ed922f0..ed0cf0053ea 100644 --- a/python/cudf/cudf/tests/test_dropna.py +++ b/python/cudf/cudf/tests/test_dropna.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py index 0efd8d9781c..edb534a3618 100644 --- a/python/cudf/cudf/tests/test_dtypes.py +++ b/python/cudf/cudf/tests/test_dtypes.py @@ -17,7 +17,7 @@ ListDtype, StructDtype, ) -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq from cudf.utils.dtypes import np_to_pa_dtype diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py index 161b245953b..0b4ed52ba96 100644 --- a/python/cudf/cudf/tests/test_duplicates.py +++ b/python/cudf/cudf/tests/test_duplicates.py @@ -9,7 +9,8 @@ import cudf from cudf import concat -from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.testing import assert_eq +from cudf.testing._utils import assert_exceptions_equal # most tests are similar to pandas drop_duplicates diff --git a/python/cudf/cudf/tests/test_ewm.py b/python/cudf/cudf/tests/test_ewm.py index 0861d2363ce..6cb3c19d5a8 100644 --- a/python/cudf/cudf/tests/test_ewm.py +++ b/python/cudf/cudf/tests/test_ewm.py @@ -2,7 +2,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_factorize.py b/python/cudf/cudf/tests/test_factorize.py index f8782681f62..47f9180dcb1 100644 --- a/python/cudf/cudf/tests/test_factorize.py +++ b/python/cudf/cudf/tests/test_factorize.py @@ -7,7 +7,7 @@ import cudf from cudf import DataFrame, Index -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize("ncats,nelem", [(2, 2), (2, 10), (10, 100)]) diff --git a/python/cudf/cudf/tests/test_feather.py b/python/cudf/cudf/tests/test_feather.py index 12a325fa4e8..7e5523bb8c7 100644 --- a/python/cudf/cudf/tests/test_feather.py +++ b/python/cudf/cudf/tests/test_feather.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. import os from string import ascii_letters @@ -9,7 +9,8 @@ import pytest import cudf -from cudf.testing._utils import NUMERIC_TYPES, assert_eq +from cudf.testing import assert_eq +from cudf.testing._utils import NUMERIC_TYPES @pytest.fixture(params=[0, 1, 10, 100]) diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py index a677ace18ec..fc22d8bc0ea 100644 --- a/python/cudf/cudf/tests/test_gcs.py +++ b/python/cudf/cudf/tests/test_gcs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import io import os @@ -8,7 +8,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq gcsfs = pytest.importorskip("gcsfs") diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 674f694a224..826a0e52f57 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -28,11 +28,11 @@ from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops from cudf.core.udf.groupby_typing import SUPPORTED_GROUPBY_NUMPY_TYPES from cudf.core.udf.utils import UDFError, precompiled +from cudf.testing import assert_eq from cudf.testing._utils import ( DATETIME_TYPES, SIGNED_TYPES, TIMEDELTA_TYPES, - assert_eq, assert_exceptions_equal, expect_warning_if, ) diff --git a/python/cudf/cudf/tests/test_hdf.py b/python/cudf/cudf/tests/test_hdf.py index d420c95cfb4..430ed973f19 100644 --- a/python/cudf/cudf/tests/test_hdf.py +++ b/python/cudf/cudf/tests/test_hdf.py @@ -8,7 +8,8 @@ import pytest import cudf -from cudf.testing._utils import NUMERIC_TYPES, UNSIGNED_TYPES, assert_eq +from cudf.testing import assert_eq +from cudf.testing._utils import NUMERIC_TYPES, UNSIGNED_TYPES pytest.importorskip("tables") diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py index f8de16f8609..098b5192d4a 100644 --- a/python/cudf/cudf/tests/test_hdfs.py +++ b/python/cudf/cudf/tests/test_hdfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import os from io import BytesIO @@ -10,7 +10,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq if not os.environ.get("RUN_HDFS_TESTS"): pytestmark = pytest.mark.skip("Env not configured to run HDFS tests") diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index a59836df5ba..05dcd85df6a 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -18,6 +18,7 @@ from cudf.api.extensions import no_default from cudf.api.types import is_bool_dtype from cudf.core.index import CategoricalIndex, DatetimeIndex, Index, RangeIndex +from cudf.testing import assert_eq from cudf.testing._utils import ( ALL_TYPES, FLOAT_TYPES, @@ -28,7 +29,6 @@ UNSIGNED_TYPES, assert_column_memory_eq, assert_column_memory_ne, - assert_eq, assert_exceptions_equal, expect_warning_if, ) diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 009e48a8669..7005cbc6834 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -11,10 +11,9 @@ import cudf from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION -from cudf.testing import _utils as utils +from cudf.testing import _utils as utils, assert_eq from cudf.testing._utils import ( INTEGER_TYPES, - assert_eq, assert_exceptions_equal, expect_warning_if, ) diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py index a0e90cc89a2..4a0dc331e1a 100644 --- a/python/cudf/cudf/tests/test_interpolate.py +++ b/python/cudf/cudf/tests/test_interpolate.py @@ -4,11 +4,8 @@ import cudf from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION -from cudf.testing._utils import ( - assert_eq, - assert_exceptions_equal, - expect_warning_if, -) +from cudf.testing import assert_eq +from cudf.testing._utils import assert_exceptions_equal, expect_warning_if @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_interval.py b/python/cudf/cudf/tests/test_interval.py index 013f4439ad5..1b395c09ba8 100644 --- a/python/cudf/cudf/tests/test_interval.py +++ b/python/cudf/cudf/tests/test_interval.py @@ -6,7 +6,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_join_order.py b/python/cudf/cudf/tests/test_join_order.py index 8d71a6c05b8..9ea4ba007d2 100644 --- a/python/cudf/cudf/tests/test_join_order.py +++ b/python/cudf/cudf/tests/test_join_order.py @@ -8,7 +8,7 @@ import cudf from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.fixture(params=[False, True], ids=["unsorted", "sorted"]) diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py index f36774daab2..b1ce69e58ef 100644 --- a/python/cudf/cudf/tests/test_joining.py +++ b/python/cudf/cudf/tests/test_joining.py @@ -9,11 +9,11 @@ import cudf from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype, Decimal128Dtype +from cudf.testing import assert_eq from cudf.testing._utils import ( INTEGER_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES, - assert_eq, assert_exceptions_equal, expect_warning_if, ) diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index ba6a8f94719..297040b6d95 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -14,11 +14,11 @@ import cudf from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION +from cudf.testing import assert_eq from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES, - assert_eq, expect_warning_if, ) diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index f04cb8a91a4..f76143cb381 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -12,12 +12,8 @@ from cudf import NA from cudf._lib.copying import get_element from cudf.api.types import is_scalar -from cudf.testing._utils import ( - DATETIME_TYPES, - NUMERIC_TYPES, - TIMEDELTA_TYPES, - assert_eq, -) +from cudf.testing import assert_eq +from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_monotonic.py b/python/cudf/cudf/tests/test_monotonic.py index 3c627a5fe89..0896d91570e 100644 --- a/python/cudf/cudf/tests/test_monotonic.py +++ b/python/cudf/cudf/tests/test_monotonic.py @@ -12,7 +12,7 @@ import cudf from cudf import Index, MultiIndex, Series from cudf.core.index import CategoricalIndex, DatetimeIndex, RangeIndex -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize("testrange", [(10, 20, 1), (0, -10, -1), (5, 5, 1)]) diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index 7b95e4f9a44..07c2e9c3fcf 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -21,12 +21,8 @@ import cudf from cudf.api.extensions import no_default from cudf.core.column import as_column -from cudf.testing._utils import ( - assert_eq, - assert_exceptions_equal, - assert_neq, - expect_warning_if, -) +from cudf.testing import assert_eq, assert_neq +from cudf.testing._utils import assert_exceptions_equal, expect_warning_if @contextmanager diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py index 03081208739..1b0589254f5 100644 --- a/python/cudf/cudf/tests/test_numerical.py +++ b/python/cudf/cudf/tests/test_numerical.py @@ -5,7 +5,8 @@ import pytest import cudf -from cudf.testing._utils import NUMERIC_TYPES, assert_eq, expect_warning_if +from cudf.testing import assert_eq +from cudf.testing._utils import NUMERIC_TYPES, expect_warning_if from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes diff --git a/python/cudf/cudf/tests/test_numpy_interop.py b/python/cudf/cudf/tests/test_numpy_interop.py index 46324a85bb4..fa664d52ecf 100644 --- a/python/cudf/cudf/tests/test_numpy_interop.py +++ b/python/cudf/cudf/tests/test_numpy_interop.py @@ -1,10 +1,10 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. import numpy as np import pytest from cudf import DataFrame, Series -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_to_records_noindex(): diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/test_onehot.py index cd0055ad78b..154e1e19072 100644 --- a/python/cudf/cudf/tests/test_onehot.py +++ b/python/cudf/cudf/tests/test_onehot.py @@ -7,7 +7,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq pytestmark = pytest.mark.spilling diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index b83b8f08a8b..e0884a5819a 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -15,9 +15,8 @@ import cudf from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.io.orc import ORCWriter -from cudf.testing import assert_frame_equal +from cudf.testing import assert_eq, assert_frame_equal from cudf.testing._utils import ( - assert_eq, expect_warning_if, gen_rand_series, supported_numpy_dtypes, diff --git a/python/cudf/cudf/tests/test_pack.py b/python/cudf/cudf/tests/test_pack.py index da506a8d5b2..ad78621c5fa 100644 --- a/python/cudf/cudf/tests/test_pack.py +++ b/python/cudf/cudf/tests/test_pack.py @@ -20,7 +20,7 @@ from cudf import DataFrame, Index, Series from cudf._lib.copying import pack, unpack -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_sizeof_packed_dataframe(): diff --git a/python/cudf/cudf/tests/test_pandas_interop.py b/python/cudf/cudf/tests/test_pandas_interop.py index 78cf5b998e8..5782437e394 100644 --- a/python/cudf/cudf/tests/test_pandas_interop.py +++ b/python/cudf/cudf/tests/test_pandas_interop.py @@ -1,11 +1,11 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. import numpy as np import pandas as pd import cudf from cudf import DataFrame -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_to_pandas(): diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index af79f361b43..e1e7952605b 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -28,12 +28,8 @@ ParquetWriter, merge_parquet_filemetadata, ) -from cudf.testing import dataset_generator as dg -from cudf.testing._utils import ( - TIMEDELTA_TYPES, - assert_eq, - set_random_null_mask_inplace, -) +from cudf.testing import assert_eq, dataset_generator as dg +from cudf.testing._utils import TIMEDELTA_TYPES, set_random_null_mask_inplace @contextmanager diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py index 13a07ef8adc..719e8a33285 100644 --- a/python/cudf/cudf/tests/test_pickling.py +++ b/python/cudf/cudf/tests/test_pickling.py @@ -8,7 +8,7 @@ from cudf import DataFrame, Index, RangeIndex, Series from cudf.core.buffer import as_buffer -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq pytestmark = pytest.mark.spilling diff --git a/python/cudf/cudf/tests/test_quantiles.py b/python/cudf/cudf/tests/test_quantiles.py index 8b126073a0f..7d8303df0c3 100644 --- a/python/cudf/cudf/tests/test_quantiles.py +++ b/python/cudf/cudf/tests/test_quantiles.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import re @@ -6,7 +6,8 @@ import pytest import cudf -from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.testing import assert_eq +from cudf.testing._utils import assert_exceptions_equal def test_single_q(): diff --git a/python/cudf/cudf/tests/test_query.py b/python/cudf/cudf/tests/test_query.py index cf9e70d85c7..b12209fd3b9 100644 --- a/python/cudf/cudf/tests/test_query.py +++ b/python/cudf/cudf/tests/test_query.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. import datetime @@ -11,7 +11,7 @@ import cudf from cudf import DataFrame -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq from cudf.utils import queryutils _params_query_parser = [] diff --git a/python/cudf/cudf/tests/test_query_mask.py b/python/cudf/cudf/tests/test_query_mask.py index ae5171f28d4..9372681187d 100644 --- a/python/cudf/cudf/tests/test_query_mask.py +++ b/python/cudf/cudf/tests/test_query_mask.py @@ -1,11 +1,11 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. import numpy as np import pandas as pd import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq _data = [ {"a": [0, 1.0, 2.0, None, np.nan, None, 3, 5]}, diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py index 1a5f25e320f..4c1d8ce92ae 100644 --- a/python/cudf/cudf/tests/test_rank.py +++ b/python/cudf/cudf/tests/test_rank.py @@ -7,7 +7,8 @@ import pytest from cudf import DataFrame -from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.testing import assert_eq +from cudf.testing._utils import assert_exceptions_equal @pytest.fixture diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py index c6ffa1d2bc7..1247fa362ce 100644 --- a/python/cudf/cudf/tests/test_reductions.py +++ b/python/cudf/cudf/tests/test_reductions.py @@ -11,13 +11,8 @@ import cudf from cudf import Series from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype -from cudf.testing import _utils as utils -from cudf.testing._utils import ( - NUMERIC_TYPES, - assert_eq, - expect_warning_if, - gen_rand, -) +from cudf.testing import _utils as utils, assert_eq +from cudf.testing._utils import NUMERIC_TYPES, expect_warning_if, gen_rand params_dtype = NUMERIC_TYPES diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py index 9466398964a..d4fe5ff3bb5 100644 --- a/python/cudf/cudf/tests/test_replace.py +++ b/python/cudf/cudf/tests/test_replace.py @@ -12,10 +12,10 @@ import cudf from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype +from cudf.testing import assert_eq from cudf.testing._utils import ( INTEGER_TYPES, NUMERIC_TYPES, - assert_eq, assert_exceptions_equal, expect_warning_if, ) diff --git a/python/cudf/cudf/tests/test_resampling.py b/python/cudf/cudf/tests/test_resampling.py index d7a3fea1273..95fa8e9a50a 100644 --- a/python/cudf/cudf/tests/test_resampling.py +++ b/python/cudf/cudf/tests/test_resampling.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def assert_resample_results_equal(lhs, rhs, **kwargs): diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index daa1e70808f..50db4302b75 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -10,12 +10,8 @@ import cudf from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.buffer.spill_manager import get_global_manager -from cudf.testing._utils import ( - ALL_TYPES, - DATETIME_TYPES, - NUMERIC_TYPES, - assert_eq, -) +from cudf.testing import assert_eq +from cudf.testing._utils import ALL_TYPES, DATETIME_TYPES, NUMERIC_TYPES pytest_xfail = pytest.mark.xfail pytestmark = pytest.mark.spilling diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py index 1d1d7ae8d29..135870f7359 100644 --- a/python/cudf/cudf/tests/test_rolling.py +++ b/python/cudf/cudf/tests/test_rolling.py @@ -7,7 +7,7 @@ import pytest import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq from cudf.testing.dataset_generator import rand_dataframe diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py index cdce17eeb76..a44bf791767 100644 --- a/python/cudf/cudf/tests/test_s3.py +++ b/python/cudf/cudf/tests/test_s3.py @@ -12,7 +12,7 @@ from fsspec.core import get_fs_token_paths import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq moto = pytest.importorskip("moto", minversion="3.1.6") boto3 = pytest.importorskip("boto3") diff --git a/python/cudf/cudf/tests/test_scan.py b/python/cudf/cudf/tests/test_scan.py index 4cbc2197cfd..b76566b00e2 100644 --- a/python/cudf/cudf/tests/test_scan.py +++ b/python/cudf/cudf/tests/test_scan.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. from itertools import product @@ -8,12 +8,8 @@ import cudf from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype -from cudf.testing._utils import ( - INTEGER_TYPES, - NUMERIC_TYPES, - assert_eq, - gen_rand, -) +from cudf.testing import assert_eq +from cudf.testing._utils import INTEGER_TYPES, NUMERIC_TYPES, gen_rand params_sizes = [0, 1, 2, 5] diff --git a/python/cudf/cudf/tests/test_search.py b/python/cudf/cudf/tests/test_search.py index 3ba652ff6c0..65943518113 100644 --- a/python/cudf/cudf/tests/test_search.py +++ b/python/cudf/cudf/tests/test_search.py @@ -5,7 +5,8 @@ import pytest import cudf -from cudf.testing._utils import assert_eq, gen_rand, random_bitmask +from cudf.testing import assert_eq +from cudf.testing._utils import gen_rand, random_bitmask @pytest.mark.parametrize("side", ["left", "right"]) diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py index f26d78e7783..0b892a51895 100644 --- a/python/cudf/cudf/tests/test_serialize.py +++ b/python/cudf/cudf/tests/test_serialize.py @@ -9,8 +9,7 @@ import pytest import cudf -from cudf.testing import _utils as utils -from cudf.testing._utils import assert_eq +from cudf.testing import _utils as utils, assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 52956c230ba..467d0c46ae7 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -17,11 +17,11 @@ from cudf.api.extensions import no_default from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.errors import MixedTypeError +from cudf.testing import assert_eq from cudf.testing._utils import ( NUMERIC_TYPES, SERIES_OR_INDEX_NAMES, TIMEDELTA_TYPES, - assert_eq, assert_exceptions_equal, expect_warning_if, gen_rand, @@ -1054,6 +1054,18 @@ def test_fillna_with_nan(data, nan_as_null, fill_value): assert_eq(expected, actual) +def test_fillna_categorical_with_non_categorical_raises(): + ser = cudf.Series([1, None], dtype="category") + with pytest.raises(TypeError): + ser.fillna(cudf.Series([1, 2])) + + +def test_fillna_categorical_with_different_categories_raises(): + ser = cudf.Series([1, None], dtype="category") + with pytest.raises(TypeError): + ser.fillna(cudf.Series([1, 2]), dtype="category") + + def test_series_mask_mixed_dtypes_error(): s = cudf.Series(["a", "b", "c"]) with pytest.raises( diff --git a/python/cudf/cudf/tests/test_seriesmap.py b/python/cudf/cudf/tests/test_seriesmap.py index 9da08e483c9..3d8b6a79d2a 100644 --- a/python/cudf/cudf/tests/test_seriesmap.py +++ b/python/cudf/cudf/tests/test_seriesmap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from itertools import product from math import floor @@ -9,7 +9,8 @@ import cudf from cudf import Series -from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.testing import assert_eq +from cudf.testing._utils import assert_exceptions_equal def test_series_map_basic(): diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index ff2f7bd41f2..69122cdbafa 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -6,11 +6,8 @@ import cudf from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION -from cudf.testing._utils import ( - assert_eq, - assert_exceptions_equal, - expect_warning_if, -) +from cudf.testing import assert_eq +from cudf.testing._utils import assert_exceptions_equal, expect_warning_if @pytest.mark.parametrize("df", [pd.DataFrame({"a": [1, 2, 3]})]) diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py index 449f21721f4..a8ffce6e88b 100644 --- a/python/cudf/cudf/tests/test_sorting.py +++ b/python/cudf/cudf/tests/test_sorting.py @@ -10,10 +10,10 @@ from cudf import DataFrame, Series from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.core.column import NumericalColumn +from cudf.testing import assert_eq from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, - assert_eq, assert_exceptions_equal, expect_warning_if, ) diff --git a/python/cudf/cudf/tests/test_spilling.py b/python/cudf/cudf/tests/test_spilling.py index 59b8e6d2e70..7af83a99d60 100644 --- a/python/cudf/cudf/tests/test_spilling.py +++ b/python/cudf/cudf/tests/test_spilling.py @@ -39,7 +39,7 @@ SpillableBufferOwner, SpillLock, ) -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq if get_global_manager() is not None: pytest.skip( diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py index 27811d0fcde..d5f63fdab77 100644 --- a/python/cudf/cudf/tests/test_stats.py +++ b/python/cudf/cudf/tests/test_stats.py @@ -11,11 +11,8 @@ from cudf.api.extensions import no_default from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION from cudf.datasets import randomdata -from cudf.testing._utils import ( - assert_eq, - assert_exceptions_equal, - expect_warning_if, -) +from cudf.testing import assert_eq +from cudf.testing._utils import assert_exceptions_equal, expect_warning_if params_dtypes = [np.int32, np.uint32, np.float32, np.float64] methods = ["min", "max", "sum", "mean", "var", "std"] diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 801c530da43..f447759d010 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -17,10 +17,10 @@ from cudf import concat from cudf.core.column.string import StringColumn from cudf.core.index import Index +from cudf.testing import assert_eq from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, - assert_eq, assert_exceptions_equal, ) from cudf.utils import dtypes as dtypeutils diff --git a/python/cudf/cudf/tests/test_string_udfs.py b/python/cudf/cudf/tests/test_string_udfs.py index 5dbb86fe27d..4432d2afc8e 100644 --- a/python/cudf/cudf/tests/test_string_udfs.py +++ b/python/cudf/cudf/tests/test_string_udfs.py @@ -21,7 +21,8 @@ udf_string, ) from cudf.core.udf.utils import _get_extensionty_size, _ptx_file -from cudf.testing._utils import assert_eq, sv_to_udf_str +from cudf.testing import assert_eq +from cudf.testing._utils import sv_to_udf_str from cudf.utils._numba import _CUDFNumbaConfig _PTX_FILE = _ptx_file() diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py index 60d9516f385..e91edc9eec6 100644 --- a/python/cudf/cudf/tests/test_struct.py +++ b/python/cudf/cudf/tests/test_struct.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import numpy as np import pandas as pd @@ -7,7 +7,8 @@ import cudf from cudf.core.dtypes import StructDtype -from cudf.testing._utils import DATETIME_TYPES, TIMEDELTA_TYPES, assert_eq +from cudf.testing import assert_eq +from cudf.testing._utils import DATETIME_TYPES, TIMEDELTA_TYPES @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_testing.py b/python/cudf/cudf/tests/test_testing.py index 1994536f395..c3620db3880 100644 --- a/python/cudf/cudf/tests/test_testing.py +++ b/python/cudf/cudf/tests/test_testing.py @@ -17,9 +17,8 @@ OTHER_TYPES, assert_column_memory_eq, assert_column_memory_ne, - assert_eq, ) -from cudf.testing.testing import assert_column_equal +from cudf.testing.testing import assert_column_equal, assert_eq @pytest.fixture( diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py index 0c591965361..c4a2349f535 100644 --- a/python/cudf/cudf/tests/test_timedelta.py +++ b/python/cudf/cudf/tests/test_timedelta.py @@ -9,8 +9,8 @@ import pytest import cudf -from cudf.testing import _utils as utils -from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.testing import _utils as utils, assert_eq +from cudf.testing._utils import assert_exceptions_equal _TIMEDELTA_DATA = [ [1000000, 200000, 3000000], diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 4843decedba..087d10b8295 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -17,9 +17,9 @@ ) from cudf.core.udf.api import Masked from cudf.core.udf.utils import precompiled +from cudf.testing import assert_eq from cudf.testing._utils import ( _decimal_series, - assert_eq, parametrize_numeric_dtypes_pairwise, sv_to_udf_str, ) diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py index 15d9d03d4a7..dbbf4fba3a6 100644 --- a/python/cudf/cudf/tests/test_unaops.py +++ b/python/cudf/cudf/tests/test_unaops.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. import itertools import operator @@ -10,7 +10,7 @@ import cudf from cudf import Series -from cudf.testing import _utils as utils +from cudf.testing import _utils as utils, assert_eq _unaops = [operator.abs, operator.invert, operator.neg, np.ceil, np.floor] @@ -128,4 +128,4 @@ def test_scalar_no_negative_bools(): def test_series_bool_neg(): sr = Series([True, False, True, None, False, None, True, True]) psr = sr.to_pandas(nullable=True) - utils.assert_eq((-sr).to_pandas(nullable=True), -psr, check_dtype=True) + assert_eq((-sr).to_pandas(nullable=True), -psr, check_dtype=True) diff --git a/python/cudf/cudf/tests/text/test_subword_tokenizer.py b/python/cudf/cudf/tests/text/test_subword_tokenizer.py index b21edc0477f..78b58344374 100644 --- a/python/cudf/cudf/tests/text/test_subword_tokenizer.py +++ b/python/cudf/cudf/tests/text/test_subword_tokenizer.py @@ -7,7 +7,7 @@ import cudf from cudf.core.subword_tokenizer import SubwordTokenizer -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.fixture(scope="module") diff --git a/python/cudf/cudf/tests/text/test_text_methods.py b/python/cudf/cudf/tests/text/test_text_methods.py index 36f7f3de828..52179f55da3 100644 --- a/python/cudf/cudf/tests/text/test_text_methods.py +++ b/python/cudf/cudf/tests/text/test_text_methods.py @@ -9,7 +9,7 @@ import cudf from cudf.core.byte_pair_encoding import BytePairEncoder from cudf.core.tokenize_vocabulary import TokenizeVocabulary -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq def test_tokenize(): diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 5be4d350c0b..eed5037cbea 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1175,7 +1175,7 @@ def test_intermediates_are_proxied(): def test_from_dataframe(): cudf = pytest.importorskip("cudf") - from cudf.testing._utils import assert_eq + from cudf.testing import assert_eq data = {"foo": [1, 2, 3], "bar": [4, 5, 6]} diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd index 84a3a32646d..2de0bf39785 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd @@ -7,7 +7,7 @@ from libcpp.memory cimport unique_ptr from libcpp.string cimport string from libcpp.vector cimport vector -from cudf._lib.io.datasource cimport Datasource +from cudf._lib.pylibcudf.io.datasource cimport Datasource from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource diff --git a/python/custreamz/custreamz/tests/test_kafka.py b/python/custreamz/custreamz/tests/test_kafka.py index ad3b829544b..3a3c4e994d0 100644 --- a/python/custreamz/custreamz/tests/test_kafka.py +++ b/python/custreamz/custreamz/tests/test_kafka.py @@ -1,8 +1,8 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import confluent_kafka as ck import pytest -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq @pytest.mark.parametrize("commit_offset", [1, 45, 100, 22, 1000, 10]) diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py index 58d28f0597e..6f04b5737da 100644 --- a/python/dask_cudf/dask_cudf/tests/test_accessor.py +++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py @@ -9,7 +9,8 @@ from dask import dataframe as dd from cudf import DataFrame, Series, date_range -from cudf.testing._utils import assert_eq, does_not_raise +from cudf.testing import assert_eq +from cudf.testing._utils import does_not_raise import dask_cudf from dask_cudf.tests.utils import xfail_dask_expr diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py index 7f8a619ae22..174923c2c7e 100644 --- a/python/dask_cudf/dask_cudf/tests/test_core.py +++ b/python/dask_cudf/dask_cudf/tests/test_core.py @@ -795,7 +795,7 @@ def test_dataframe_set_index(): pddf = dd.from_pandas(pdf, npartitions=4) pddf = pddf.set_index("str") - from cudf.testing._utils import assert_eq + from cudf.testing import assert_eq assert_eq(ddf.compute(), pddf.compute()) diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py index 07fdb25dff9..be10b0d4843 100644 --- a/python/dask_cudf/dask_cudf/tests/test_distributed.py +++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py @@ -9,7 +9,7 @@ from distributed.utils_test import cleanup, loop, loop_in_thread # noqa: F401 import cudf -from cudf.testing._utils import assert_eq +from cudf.testing import assert_eq import dask_cudf