Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename logical_cast to bit_cast and allow additional conversions #7373

Merged
merged 10 commits into from
Mar 3, 2021
42 changes: 22 additions & 20 deletions cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ class column_view : public detail::column_view_base {
auto child_end() const noexcept { return _children.cend(); }

private:
friend column_view logical_cast(column_view const& input, data_type type);
friend column_view bit_cast(column_view const& input, data_type type);

std::vector<column_view> _children{}; ///< Based on element type, children
///< may contain additional data
Expand Down Expand Up @@ -550,7 +550,7 @@ class mutable_column_view : public detail::column_view_base {
operator column_view() const;

private:
friend mutable_column_view logical_cast(mutable_column_view const& input, data_type type);
friend mutable_column_view bit_cast(mutable_column_view const& input, data_type type);

std::vector<mutable_column_view> mutable_children;
};
Expand All @@ -564,47 +564,49 @@ class mutable_column_view : public detail::column_view_base {
size_type count_descendants(column_view parent);

/**
* @brief Zero-copy cast between types with the same underlying representation.
* @brief Zero-copy cast between types with the same size and compatible underlying representations.
*
* This is similar to `reinterpret_cast` or `bit_cast` in that it gives a view of the same raw bits
* as a different type. Unlike `reinterpret_cast` however, this cast is only allowed on types that
* have the same width and underlying representation. For example, the way timestamp types are laid
* out in memory is equivalent to an integer representing a duration since a fixed epoch; logically
* casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of the
* duration count. However, an INT32 column cannot be logically cast to INT64 as the sizes differ,
* nor can an INT32 columm be logically cast to a FLOAT32 since what the bits represent differs.
* have the same width and compatible representations. For example, the way timestamp types are laid
* out in memory is equivalent to an integer representing a duration since a fixed epoch;
* bit-casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of
* the duration count. A FLOAT32 can also be bit-casted into INT32 and treated as an integer value.
* However, an INT32 column cannot be bit-casted to INT64 as the sizes differ, nor can a string_view
* column be casted into a numeric type column as their data representations are not compatible.
*
* The validity of the conversion can be checked with `cudf::is_logically_castable()`.
* The validity of the conversion can be checked with `cudf::is_bit_castable()`.
*
* @throws cudf::logic_error if the specified cast is not possible, i.e.,
* `is_logically_castable(input.type(), type)` is false.
* `is_bit_castable(input.type(), type)` is false.
*
* @param input The `column_view` to cast from
* @param type The `data_type` to cast to
* @return New `column_view` wrapping the same data as `input` but cast to `type`
*/
column_view logical_cast(column_view const& input, data_type type);
column_view bit_cast(column_view const& input, data_type type);

/**
* @brief Zero-copy cast between types with the same underlying representation.
* @brief Zero-copy cast between types with the same size and compatible underlying representations.
*
* This is similar to `reinterpret_cast` or `bit_cast` in that it gives a view of the same raw bits
* as a different type. Unlike `reinterpret_cast` however, this cast is only allowed on types that
* have the same width and underlying representation. For example, the way timestamp types are laid
* out in memory is equivalent to an integer representing a duration since a fixed epoch; logically
* casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of the
* duration count. However, an INT32 column cannot be logically cast to INT64 as the sizes differ,
* nor can an INT32 columm be logically cast to a FLOAT32 since what the bits represent differs.
* have the same width and compatible representations. For example, the way timestamp types are laid
* out in memory is equivalent to an integer representing a duration since a fixed epoch;
* bit-casting to the same integer type (INT32 for days, INT64 for others) results in a raw view of
* the duration count. A FLOAT32 can also be bit-casted into INT32 and treated as an integer value.
* However, an INT32 column cannot be bit-casted to INT64 as the sizes differ, nor can a string_view
* column be casted into a numeric type column as their data representations are not compatible.
*
* The validity of the conversion can be checked with `cudf::is_logically_castable()`.
* The validity of the conversion can be checked with `cudf::is_bit_castable()`.
*
* @throws cudf::logic_error if the specified cast is not possible, i.e.,
* `is_logically_castable(input.type(), type)` is false.
* `is_bit_castable(input.type(), type)` is false.
*
* @param input The `mutable_column_view` to cast from
* @param type The `data_type` to cast to
* @return New `mutable_column_view` wrapping the same data as `input` but cast to `type`
*/
mutable_column_view logical_cast(mutable_column_view const& input, data_type type);
mutable_column_view bit_cast(mutable_column_view const& input, data_type type);

} // namespace cudf
81 changes: 32 additions & 49 deletions cpp/include/cudf/utilities/traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,74 +591,57 @@ constexpr inline bool is_nested(data_type type)
return cudf::type_dispatcher(type, is_nested_impl{});
}

template <typename FromType, typename ToType>
struct is_logically_castable_impl : std::false_type {
};

// Allow cast to same type
template <typename Type>
struct is_logically_castable_impl<Type, Type> : std::true_type {
};

#ifndef MAP_CASTABLE_TYPES
#define MAP_CASTABLE_TYPES(Type1, Type2) \
template <> \
struct is_logically_castable_impl<Type1, Type2> : std::true_type { \
}; \
template <> \
struct is_logically_castable_impl<Type2, Type1> : std::true_type { \
};
#endif

// Allow cast between timestamp and integer representation
MAP_CASTABLE_TYPES(cudf::timestamp_D, cudf::timestamp_D::duration::rep);
MAP_CASTABLE_TYPES(cudf::timestamp_s, cudf::timestamp_s::duration::rep);
MAP_CASTABLE_TYPES(cudf::timestamp_ms, cudf::timestamp_ms::duration::rep);
MAP_CASTABLE_TYPES(cudf::timestamp_us, cudf::timestamp_us::duration::rep);
MAP_CASTABLE_TYPES(cudf::timestamp_ns, cudf::timestamp_ns::duration::rep);
// Allow cast between durations and integer representation
MAP_CASTABLE_TYPES(cudf::duration_D, cudf::duration_D::rep);
MAP_CASTABLE_TYPES(cudf::duration_s, cudf::duration_s::rep);
MAP_CASTABLE_TYPES(cudf::duration_ms, cudf::duration_ms::rep);
MAP_CASTABLE_TYPES(cudf::duration_us, cudf::duration_us::rep);
MAP_CASTABLE_TYPES(cudf::duration_ns, cudf::duration_ns::rep);
// Allow cast between decimals and integer representation
MAP_CASTABLE_TYPES(numeric::decimal32, numeric::decimal32::rep);
MAP_CASTABLE_TYPES(numeric::decimal64, numeric::decimal64::rep);

template <typename FromType>
struct is_logically_castable_to_impl {
template <typename ToType>
struct is_bit_castable_to_impl {
template <typename ToType, typename std::enable_if_t<is_compound<ToType>()>* = nullptr>
constexpr bool operator()()
{
return false;
}

template <typename ToType, typename std::enable_if_t<not is_compound<ToType>()>* = nullptr>
constexpr bool operator()()
{
return is_logically_castable_impl<FromType, ToType>::value;
if (not cuda::std::is_trivially_copyable_v<FromType> ||
not cuda::std::is_trivially_copyable_v<ToType>) {
return false;
}
constexpr auto from_size = sizeof(cudf::device_storage_type_t<FromType>);
constexpr auto to_size = sizeof(cudf::device_storage_type_t<ToType>);
return from_size == to_size;
}
};

struct is_logically_castable_from_impl {
template <typename FromType>
struct is_bit_castable_from_impl {
template <typename FromType, typename std::enable_if_t<is_compound<FromType>()>* = nullptr>
constexpr bool operator()(data_type)
{
return false;
}

template <typename FromType, typename std::enable_if_t<not is_compound<FromType>()>* = nullptr>
constexpr bool operator()(data_type to)
{
return type_dispatcher(to, is_logically_castable_to_impl<FromType>{});
return cudf::type_dispatcher(to, is_bit_castable_to_impl<FromType>{});
}
};

/**
* @brief Indicates whether `from` is logically castable to `to`.
* @brief Indicates whether `from` is bit-castable to `to`.
*
* Data types that have the same size and underlying representation, e.g. INT32 and TIMESTAMP_DAYS
* which are both represented as 32-bit integers in memory, are eligible for logical cast.
* This casting is based on std::bit_cast. Data types that have the same size and are trivially
* copyable are eligible for this casting.
*
* See `cudf::logical_cast()` which returns a zero-copy `column_view` when casting between
* logically castable types.
* See `cudf::bit_cast()` which returns a zero-copy `column_view` when casting between
* bit-castable types.
*
* @param from The `data_type` to convert from
* @param to The `data_type` to convert to
* @return `true` if the types are logically castable
* @return `true` if the types are castable
*/
constexpr bool is_logically_castable(data_type from, data_type to)
constexpr bool is_bit_castable(data_type from, data_type to)
{
return type_dispatcher(from, is_logically_castable_from_impl{}, to);
return type_dispatcher(from, is_bit_castable_from_impl{}, to);
}

template <typename From, typename To>
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/column/column_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ size_type count_descendants(column_view parent)
return std::accumulate(begin, begin + parent.num_children(), size_type{parent.num_children()});
}

column_view logical_cast(column_view const& input, data_type type)
column_view bit_cast(column_view const& input, data_type type)
{
CUDF_EXPECTS(is_logically_castable(input._type, type), "types are not logically castable");
CUDF_EXPECTS(is_bit_castable(input._type, type), "types are not bit-castable");
return column_view{type,
input._size,
input._data,
Expand All @@ -144,9 +144,9 @@ column_view logical_cast(column_view const& input, data_type type)
input._children};
ttnghia marked this conversation as resolved.
Show resolved Hide resolved
}

mutable_column_view logical_cast(mutable_column_view const& input, data_type type)
mutable_column_view bit_cast(mutable_column_view const& input, data_type type)
{
CUDF_EXPECTS(is_logically_castable(input._type, type), "types are not logically castable");
CUDF_EXPECTS(is_bit_castable(input._type, type), "types are not bit-castable");
return mutable_column_view{type,
input._size,
const_cast<void*>(input._data),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/filling/fill.cu
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ struct in_place_fill_range_dispatch {
auto unscaled = static_cast<cudf::fixed_point_scalar<T> const&>(value).value();
using RepType = typename T::rep;
auto s = cudf::numeric_scalar<RepType>(unscaled, value.is_valid());
auto view = cudf::logical_cast(destination, s.type());
auto view = cudf::bit_cast(destination, s.type());
in_place_fill<RepType>(view, begin, end, s, stream);
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ endfunction()
###################################################################################################
# - column tests ----------------------------------------------------------------------------------
ConfigureTest(COLUMN_TEST
column/bit_cast_test.cpp
column/column_test.cu
column/column_view_test.cpp
column/column_device_view_test.cu
column/compound_test.cu)

Expand Down
134 changes: 134 additions & 0 deletions cpp/tests/column/bit_cast_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/column/column_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/cudf_gtest.hpp>
#include <cudf_test/type_lists.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <random>

template <typename T, typename T2 = void>
struct rep_type_impl {
using type = void;
};

template <typename T>
struct rep_type_impl<T, std::enable_if_t<cudf::is_timestamp<T>()>> {
using type = typename T::duration::rep;
};

template <typename T>
struct rep_type_impl<T, std::enable_if_t<cudf::is_duration<T>()>> {
using type = typename T::rep;
};

template <typename T>
struct rep_type_impl<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
using type = typename T::rep;
};

template <typename T>
using rep_type_t = typename rep_type_impl<T>::type;

template <typename T>
struct ColumnViewAllTypesTests : public cudf::test::BaseFixture {
};

TYPED_TEST_CASE(ColumnViewAllTypesTests, cudf::test::FixedWidthTypes);

template <typename FromType, typename ToType, typename Iterator>
void do_bit_cast(cudf::column_view const& column_view, Iterator begin, Iterator end)
{
auto mutable_column_view = reinterpret_cast<cudf::mutable_column_view const&>(column_view);
cudf::data_type to_type{cudf::type_to_id<ToType>()};

if (std::is_same<FromType, ToType>::value) {
// Cast to same to_type
auto output = cudf::bit_cast(column_view, column_view.type());
auto output1 = cudf::bit_cast(mutable_column_view, mutable_column_view.type());
cudf::test::expect_columns_equal(output, column_view);
cudf::test::expect_columns_equal(output1, mutable_column_view);
} else if (std::is_same<rep_type_t<FromType>, ToType>::value ||
std::is_same<FromType, rep_type_t<ToType>>::value) {
// Cast integer to timestamp or vice versa
auto output = cudf::bit_cast(column_view, to_type);
auto output1 = cudf::bit_cast(mutable_column_view, to_type);
cudf::test::fixed_width_column_wrapper<ToType, cudf::size_type> expected(begin, end);
cudf::test::expect_columns_equal(output, expected);
cudf::test::expect_columns_equal(output1, expected);
} else {
if (cuda::std::is_trivially_copyable_v<FromType> &&
cuda::std::is_trivially_copyable_v<ToType>) {
constexpr auto from_size = sizeof(cudf::device_storage_type_t<FromType>);
constexpr auto to_size = sizeof(cudf::device_storage_type_t<ToType>);
if (from_size == to_size) {
// Cast from FromType to ToType
auto output1 = cudf::bit_cast(column_view, to_type);
auto output1_mutable = cudf::bit_cast(mutable_column_view, to_type);

// Cast back from ToType to FromType
cudf::data_type from_type{cudf::type_to_id<FromType>()};
auto output2 = cudf::bit_cast(output1, from_type);
auto output2_mutable = cudf::bit_cast(output1_mutable, from_type);

cudf::test::expect_columns_equal(output2, column_view);
cudf::test::expect_columns_equal(output2_mutable, mutable_column_view);
} else {
// Not allow to cast if sizes are mismatched
EXPECT_THROW(cudf::bit_cast(column_view, to_type), cudf::logic_error);
EXPECT_THROW(cudf::bit_cast(mutable_column_view, to_type), cudf::logic_error);
}
} else {
// Not allow to cast if any of from/to types is not trivially copyable
EXPECT_THROW(cudf::bit_cast(column_view, to_type), cudf::logic_error);
EXPECT_THROW(cudf::bit_cast(mutable_column_view, to_type), cudf::logic_error);
}
}
}

TYPED_TEST(ColumnViewAllTypesTests, BitCast)
{
auto begin = thrust::make_counting_iterator(1);
auto end = thrust::make_counting_iterator(16);

cudf::test::fixed_width_column_wrapper<TypeParam, cudf::size_type> input(begin, end);

do_bit_cast<TypeParam, int8_t>(input, begin, end);
do_bit_cast<TypeParam, int16_t>(input, begin, end);
do_bit_cast<TypeParam, int32_t>(input, begin, end);
do_bit_cast<TypeParam, int64_t>(input, begin, end);
do_bit_cast<TypeParam, float>(input, begin, end);
do_bit_cast<TypeParam, double>(input, begin, end);
do_bit_cast<TypeParam, bool>(input, begin, end);
do_bit_cast<TypeParam, cudf::duration_D>(input, begin, end);
do_bit_cast<TypeParam, cudf::duration_s>(input, begin, end);
do_bit_cast<TypeParam, cudf::duration_ms>(input, begin, end);
do_bit_cast<TypeParam, cudf::duration_us>(input, begin, end);
do_bit_cast<TypeParam, cudf::duration_ns>(input, begin, end);
do_bit_cast<TypeParam, cudf::timestamp_D>(input, begin, end);
do_bit_cast<TypeParam, cudf::timestamp_s>(input, begin, end);
do_bit_cast<TypeParam, cudf::timestamp_ms>(input, begin, end);
do_bit_cast<TypeParam, cudf::timestamp_us>(input, begin, end);
do_bit_cast<TypeParam, cudf::timestamp_ns>(input, begin, end);
}
Loading