1
+ /*
2
+ * Copyright (c) 2020, NVIDIA CORPORATION.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ #include < cudf/column/column.hpp>
18
+ #include < cudf/column/column_factories.hpp>
19
+ #include < cudf/detail/nvtx/ranges.hpp>
20
+ #include < cudf/detail/valid_if.cuh>
21
+ #include < cudf/strings/detail/utilities.hpp>
22
+ #include < cudf/utilities/error.hpp>
23
+ #include < strings/utilities.cuh>
24
+
25
+ #include < rmm/thrust_rmm_allocator.h>
26
+ #include < thrust/for_each.h>
27
+ #include < thrust/transform_reduce.h>
28
+
29
+ // clang-format off
30
+ namespace cudf {
31
+ namespace strings {
32
+ namespace detail {
33
+
34
+ // Create a strings-type column from vector of pointer/size pairs
35
+ template <typename IndexPairIterator>
36
+ std::unique_ptr<column> make_strings_column (
37
+ IndexPairIterator begin, IndexPairIterator end,
38
+ rmm::mr::device_memory_resource* mr,
39
+ cudaStream_t stream )
40
+ {
41
+ CUDF_FUNC_RANGE ();
42
+ size_type strings_count = thrust::distance (begin,end);
43
+ if (strings_count == 0 ) return strings::detail::make_empty_strings_column (mr, stream);
44
+
45
+ using string_index_pair = thrust::pair<const char *, size_type>;
46
+
47
+ auto execpol = rmm::exec_policy (stream);
48
+ // check total size is not too large for cudf column
49
+ size_t bytes = thrust::transform_reduce (
50
+ execpol->on (stream), begin, end,
51
+ [] __device__ (string_index_pair const & item) {
52
+ return (item.first != nullptr ) ? item.second : 0 ;
53
+ },
54
+ 0 ,
55
+ thrust::plus<size_t >());
56
+ CUDF_EXPECTS (bytes < std::numeric_limits<size_type>::max (),
57
+ " total size of strings is too large for cudf column" );
58
+
59
+ // build offsets column from the strings sizes
60
+ auto offsets_transformer = [begin] __device__ (size_type idx) {
61
+ string_index_pair const item = begin[idx];
62
+ return (item.first != nullptr ? static_cast <int32_t >(item.second ) : 0 );
63
+ };
64
+ auto offsets_transformer_itr = thrust::make_transform_iterator (
65
+ thrust::make_counting_iterator<size_type>(0 ), offsets_transformer);
66
+ auto offsets_column = strings::detail::make_offsets_child_column (
67
+ offsets_transformer_itr, offsets_transformer_itr + strings_count, mr, stream);
68
+ auto d_offsets = offsets_column->view ().template data <int32_t >();
69
+
70
+ // create null mask
71
+ auto new_nulls = experimental::detail::valid_if ( begin, end,
72
+ [] __device__ (string_index_pair const item) { return item.first != nullptr ; },
73
+ stream,
74
+ mr);
75
+ auto null_count = new_nulls.second ;
76
+ rmm::device_buffer null_mask;
77
+ if (null_count > 0 ) null_mask = std::move (new_nulls.first );
78
+
79
+ // build chars column
80
+ auto chars_column =
81
+ strings::detail::create_chars_child_column (strings_count, null_count, bytes, mr, stream);
82
+ auto d_chars = chars_column->mutable_view ().template data <char >();
83
+ thrust::for_each_n (execpol->on (stream),
84
+ thrust::make_counting_iterator<size_type>(0 ),
85
+ strings_count,
86
+ [begin, d_offsets, d_chars] __device__ (size_type idx) {
87
+ string_index_pair const item = begin[idx];
88
+ if (item.first != nullptr )
89
+ memcpy (d_chars + d_offsets[idx], item.first , item.second );
90
+ });
91
+
92
+ return make_strings_column (strings_count,
93
+ std::move (offsets_column),
94
+ std::move (chars_column),
95
+ null_count,
96
+ std::move (null_mask),
97
+ stream,
98
+ mr);
99
+ }
100
+
101
+ } // namespace detail
102
+ } // namespace strings
103
+ } // namespace cudf
104
+ // clang-format on TODO fix
0 commit comments