diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index 5e792403a88..37124d100dd 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -217,6 +217,14 @@ typedef enum cugraph_prior_sources_behavior_t { cugraph_error_code_t cugraph_sampling_options_create(cugraph_sampling_options_t** options, cugraph_error_t** error); +/** + * @brief Set flag to renumber results + * + * @param options - opaque pointer to the sampling options + * @param value - Boolean value to assign to the option + */ +void cugraph_sampling_set_renumber_results(cugraph_sampling_options_t* options, bool_t value); + /** * @brief Set flag to sample with_replacement * @@ -446,6 +454,24 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_index( cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_offsets( const cugraph_sample_result_t* result); +/** + * @brief Get the renumber map + * + * @param [in] result The result from a sampling algorithm + * @return type erased array pointing to the renumber map + */ +cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map( + const cugraph_sample_result_t* result); + +/** + * @brief Get the renumber map offsets + * + * @param [in] result The result from a sampling algorithm + * @return type erased array pointing to the renumber map offsets + */ +cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map_offsets( + const cugraph_sample_result_t* result); + /** * @brief Free a sampling result * diff --git a/cpp/src/c_api/uniform_neighbor_sampling.cpp b/cpp/src/c_api/uniform_neighbor_sampling.cpp index d9dc9ca4d50..ff6a6c49437 100644 --- a/cpp/src/c_api/uniform_neighbor_sampling.cpp +++ b/cpp/src/c_api/uniform_neighbor_sampling.cpp @@ -37,6 +37,7 @@ struct cugraph_sampling_options_t { bool_t return_hops_{FALSE}; prior_sources_behavior_t prior_sources_behavior_{prior_sources_behavior_t::DEFAULT}; bool_t dedupe_sources_{FALSE}; + bool_t renumber_results_{FALSE}; }; struct cugraph_sample_result_t { @@ -48,6 +49,8 @@ struct cugraph_sample_result_t { cugraph_type_erased_device_array_t* hop_{nullptr}; cugraph_type_erased_device_array_t* label_{nullptr}; cugraph_type_erased_device_array_t* offsets_{nullptr}; + cugraph_type_erased_device_array_t* renumber_map_{nullptr}; + cugraph_type_erased_device_array_t* renumber_map_offsets_{nullptr}; }; } // namespace c_api @@ -226,6 +229,22 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct vertex_partition_lasts, do_expensive_check_); + std::optional> renumber_map{std::nullopt}; + std::optional> renumber_map_offsets{std::nullopt}; + + if (options_.renumber_results_) { + std::tie(src, dst, renumber_map, renumber_map_offsets) = cugraph::renumber_sampled_edgelist( + handle_, + std::move(src), + hop ? std::make_optional(raft::device_span{hop->data(), hop->size()}) + : std::nullopt, + std::move(dst), + std::make_optional(std::make_tuple( + raft::device_span{edge_label->data(), edge_label->size()}, + raft::device_span{offsets->data(), offsets->size()})), + do_expensive_check_); + } + result_ = new cugraph::c_api::cugraph_sample_result_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(src, graph_->vertex_type_), new cugraph::c_api::cugraph_type_erased_device_array_t(dst, graph_->vertex_type_), @@ -242,7 +261,13 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) : nullptr, (offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(offsets.value(), SIZE_T) - : nullptr}; + : nullptr, + (renumber_map) ? new cugraph::c_api::cugraph_type_erased_device_array_t( + renumber_map.value(), graph_->vertex_type_) + : nullptr, + (renumber_map_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t( + renumber_map_offsets.value(), SIZE_T) + : nullptr}; } } }; @@ -263,6 +288,13 @@ extern "C" cugraph_error_code_t cugraph_sampling_options_create( return CUGRAPH_SUCCESS; } +extern "C" void cugraph_sampling_set_renumber_results(cugraph_sampling_options_t* options, + bool_t value) +{ + auto internal_pointer = reinterpret_cast(options); + internal_pointer->renumber_results_ = value; +} + extern "C" void cugraph_sampling_set_with_replacement(cugraph_sampling_options_t* options, bool_t value) { @@ -386,6 +418,26 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_of internal_pointer->offsets_->view()); } +extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map( + const cugraph_sample_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return internal_pointer->renumber_map_ == nullptr + ? NULL + : reinterpret_cast( + internal_pointer->renumber_map_->view()); +} + +extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map_offsets( + const cugraph_sample_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return internal_pointer->renumber_map_ == nullptr + ? NULL + : reinterpret_cast( + internal_pointer->renumber_map_offsets_->view()); +} + extern "C" cugraph_error_code_t cugraph_test_uniform_neighborhood_sample_result_create( const cugraph_resource_handle_t* handle, const cugraph_type_erased_device_array_view_t* srcs, diff --git a/cpp/tests/c_api/uniform_neighbor_sample_test.c b/cpp/tests/c_api/uniform_neighbor_sample_test.c index c93c99ea7fc..a2c1e230485 100644 --- a/cpp/tests/c_api/uniform_neighbor_sample_test.c +++ b/cpp/tests/c_api/uniform_neighbor_sample_test.c @@ -21,6 +21,7 @@ #include #include +#include typedef int32_t vertex_t; typedef int32_t edge_t; @@ -32,6 +33,15 @@ data_type_id_t weight_tid = FLOAT32; data_type_id_t edge_id_tid = INT32; data_type_id_t edge_type_tid = INT32; +int vertex_id_compare_function(const void * a, const void * b) { + if (*((vertex_t *) a) < *((vertex_t *) b)) + return -1; + else if (*((vertex_t *) a) > *((vertex_t *) b)) + return 1; + else + return 0; +} + int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle, vertex_t *h_src, vertex_t *h_dst, @@ -48,7 +58,8 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle bool_t with_replacement, bool_t return_hops, cugraph_prior_sources_behavior_t prior_sources_behavior, - bool_t dedupe_sources) + bool_t dedupe_sources, + bool_t renumber_results) { // Create graph int test_ret_value = 0; @@ -119,6 +130,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle cugraph_sampling_set_return_hops(sampling_options, return_hops); cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior); cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources); + cugraph_sampling_set_renumber_results(sampling_options, renumber_results); ret_code = cugraph_uniform_neighbor_sample(handle, graph, @@ -150,19 +162,28 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle cugraph_type_erased_device_array_view_t* result_hops; cugraph_type_erased_device_array_view_t* result_offsets; cugraph_type_erased_device_array_view_t* result_labels; - - result_srcs = cugraph_sample_result_get_sources(result); - result_dsts = cugraph_sample_result_get_destinations(result); - result_edge_id = cugraph_sample_result_get_edge_id(result); - result_weights = cugraph_sample_result_get_edge_weight(result); - result_edge_types = cugraph_sample_result_get_edge_type(result); - result_hops = cugraph_sample_result_get_hop(result); - result_hops = cugraph_sample_result_get_hop(result); - result_offsets = cugraph_sample_result_get_offsets(result); - result_labels = cugraph_sample_result_get_start_labels(result); + cugraph_type_erased_device_array_view_t* result_renumber_map; + cugraph_type_erased_device_array_view_t* result_renumber_map_offsets; + + result_srcs = cugraph_sample_result_get_sources(result); + result_dsts = cugraph_sample_result_get_destinations(result); + result_edge_id = cugraph_sample_result_get_edge_id(result); + result_weights = cugraph_sample_result_get_edge_weight(result); + result_edge_types = cugraph_sample_result_get_edge_type(result); + result_hops = cugraph_sample_result_get_hop(result); + result_hops = cugraph_sample_result_get_hop(result); + result_offsets = cugraph_sample_result_get_offsets(result); + result_labels = cugraph_sample_result_get_start_labels(result); + result_renumber_map = cugraph_sample_result_get_renumber_map(result); + result_renumber_map_offsets = cugraph_sample_result_get_renumber_map_offsets(result); size_t result_size = cugraph_type_erased_device_array_view_size(result_srcs); size_t result_offsets_size = cugraph_type_erased_device_array_view_size(result_offsets); + size_t renumber_map_size = 0; + + if (renumber_results) { + renumber_map_size = cugraph_type_erased_device_array_view_size(result_renumber_map); + } vertex_t h_result_srcs[result_size]; vertex_t h_result_dsts[result_size]; @@ -172,6 +193,8 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle int32_t h_result_hops[result_size]; size_t h_result_offsets[result_offsets_size]; int h_result_labels[result_offsets_size-1]; + vertex_t h_renumber_map[renumber_map_size]; + size_t h_renumber_map_offsets[result_offsets_size]; ret_code = cugraph_type_erased_device_array_view_copy_to_host( handle, (byte_t*)h_result_srcs, result_srcs, &ret_error); @@ -205,6 +228,16 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle handle, (byte_t*)h_result_labels, result_labels, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + if (renumber_results) { + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_renumber_map, result_renumber_map, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_renumber_map_offsets, result_renumber_map_offsets, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + } + // First, check that all edges are actually part of the graph weight_t M_w[num_vertices][num_vertices]; edge_t M_edge_id[num_vertices][num_vertices]; @@ -223,16 +256,35 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle M_edge_type[h_src[i]][h_dst[i]] = h_edge_types[i]; } - for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { - TEST_ASSERT(test_ret_value, - M_w[h_result_srcs[i]][h_result_dsts[i]] == h_result_weight[i], - "uniform_neighbor_sample got edge that doesn't exist"); - TEST_ASSERT(test_ret_value, - M_edge_id[h_result_srcs[i]][h_result_dsts[i]] == h_result_edge_id[i], - "uniform_neighbor_sample got edge that doesn't exist"); - TEST_ASSERT(test_ret_value, - M_edge_type[h_result_srcs[i]][h_result_dsts[i]] == h_result_edge_types[i], - "uniform_neighbor_sample got edge that doesn't exist"); + if (renumber_results) { + for (int label_id = 0 ; label_id < (result_offsets_size - 1) ; ++label_id) { + for (size_t i = h_result_offsets[label_id]; (i < h_result_offsets[label_id+1]) && (test_ret_value == 0) ; ++i) { + vertex_t src = h_renumber_map[h_renumber_map_offsets[label_id] + h_result_srcs[i]]; + vertex_t dst = h_renumber_map[h_renumber_map_offsets[label_id] + h_result_dsts[i]]; + + TEST_ASSERT(test_ret_value, + M_w[src][dst] == h_result_weight[i], + "uniform_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_id[src][dst] == h_result_edge_id[i], + "uniform_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_type[src][dst] == h_result_edge_types[i], + "uniform_neighbor_sample got edge that doesn't exist"); + } + } + } else { + for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + M_w[h_result_srcs[i]][h_result_dsts[i]] == h_result_weight[i], + "uniform_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_id[h_result_srcs[i]][h_result_dsts[i]] == h_result_edge_id[i], + "uniform_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_type[h_result_srcs[i]][h_result_dsts[i]] == h_result_edge_types[i], + "uniform_neighbor_sample got edge that doesn't exist"); + } } // @@ -264,6 +316,28 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle } } + if (renumber_results) { + size_t num_vertex_ids = 2 * (h_result_offsets[label_id+1] - h_result_offsets[label_id]); + vertex_t vertex_ids[num_vertex_ids]; + + for (size_t i = 0 ; (i < (h_result_offsets[label_id+1] - h_result_offsets[label_id])) && (test_ret_value == 0) ; ++i) { + vertex_ids[2*i] = h_result_srcs[h_result_offsets[label_id] + i]; + vertex_ids[2*i+1] = h_result_dsts[h_result_offsets[label_id] + i]; + } + + qsort(vertex_ids, num_vertex_ids, sizeof(vertex_t), vertex_id_compare_function); + + vertex_t current_v = 0; + for (size_t i = 0 ; (i < num_vertex_ids) && (test_ret_value == 0) ; ++i) { + if (vertex_ids[i] == current_v) + ++current_v; + else + TEST_ASSERT(test_ret_value, + vertex_ids[i] == (current_v - 1), + "vertices are not properly renumbered"); + } + } + for (int hop = 0 ; hop < fan_out_size ; ++hop) { if (prior_sources_behavior == CARRY_OVER) { destinations_size = sources_size; @@ -276,7 +350,8 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle if (h_result_hops[i] == hop) { bool found = false; for (size_t j = 0 ; (!found) && (j < sources_size) ; ++j) { - found = (h_result_srcs[i] == check_sources[j]); + found = renumber_results ? (h_renumber_map[h_renumber_map_offsets[label_id] + h_result_srcs[i]] == check_sources[j]) + : (h_result_srcs[i] == check_sources[j]); } TEST_ASSERT(test_ret_value, found, "encountered source vertex that was not part of previous frontier"); @@ -286,15 +361,16 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle // Make sure destination isn't already in the source list bool found = false; for (size_t j = 0 ; (!found) && (j < destinations_size) ; ++j) { - found = (h_result_dsts[i] == check_destinations[j]); + found = renumber_results ? (h_renumber_map[h_renumber_map_offsets[label_id] + h_result_dsts[i]] == check_destinations[j]) + : (h_result_dsts[i] == check_destinations[j]); } if (!found) { - check_destinations[destinations_size] = h_result_dsts[i]; + check_destinations[destinations_size] = renumber_results ? h_renumber_map[h_renumber_map_offsets[label_id] + h_result_dsts[i]] : h_result_dsts[i]; ++destinations_size; } } else { - check_destinations[destinations_size] = h_result_dsts[i]; + check_destinations[destinations_size] = renumber_results ? h_renumber_map[h_renumber_map_offsets[label_id] + h_result_dsts[i]] : h_result_dsts[i]; ++destinations_size; } } @@ -844,11 +920,12 @@ int test_uniform_neighbor_sample_clean(const cugraph_resource_handle_t* handle) bool_t return_hops = TRUE; cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT; bool_t dedupe_sources = FALSE; + bool_t renumber_results = FALSE; return generic_uniform_neighbor_sample_test(handle, src, dst, weight, edge_ids, edge_types, num_vertices, num_edges, start, start_labels, num_starts, fan_out, fan_out_size, with_replacement, - return_hops, prior_sources_behavior, dedupe_sources); + return_hops, prior_sources_behavior, dedupe_sources, renumber_results); } int test_uniform_neighbor_sample_dedupe_sources(const cugraph_resource_handle_t* handle) @@ -881,11 +958,12 @@ int test_uniform_neighbor_sample_dedupe_sources(const cugraph_resource_handle_t* bool_t return_hops = TRUE; cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT; bool_t dedupe_sources = TRUE; + bool_t renumber_results = FALSE; return generic_uniform_neighbor_sample_test(handle, src, dst, weight, edge_ids, edge_types, num_vertices, num_edges, start, start_labels, num_starts, fan_out, fan_out_size, with_replacement, - return_hops, prior_sources_behavior, dedupe_sources); + return_hops, prior_sources_behavior, dedupe_sources, renumber_results); } int test_uniform_neighbor_sample_unique_sources(const cugraph_resource_handle_t* handle) @@ -918,11 +996,12 @@ int test_uniform_neighbor_sample_unique_sources(const cugraph_resource_handle_t* bool_t return_hops = TRUE; cugraph_prior_sources_behavior_t prior_sources_behavior = EXCLUDE; bool_t dedupe_sources = FALSE; + bool_t renumber_results = FALSE; return generic_uniform_neighbor_sample_test(handle, src, dst, weight, edge_ids, edge_types, num_vertices, num_edges, start, start_labels, num_starts, fan_out, fan_out_size, with_replacement, - return_hops, prior_sources_behavior, dedupe_sources); + return_hops, prior_sources_behavior, dedupe_sources, renumber_results); } int test_uniform_neighbor_sample_carry_over_sources(const cugraph_resource_handle_t* handle) @@ -955,11 +1034,50 @@ int test_uniform_neighbor_sample_carry_over_sources(const cugraph_resource_handl bool_t return_hops = TRUE; cugraph_prior_sources_behavior_t prior_sources_behavior = CARRY_OVER; bool_t dedupe_sources = FALSE; + bool_t renumber_results = FALSE; + + return generic_uniform_neighbor_sample_test(handle, src, dst, weight, edge_ids, edge_types, num_vertices, num_edges, + start, start_labels, num_starts, + fan_out, fan_out_size, with_replacement, + return_hops, prior_sources_behavior, dedupe_sources, renumber_results); +} + +int test_uniform_neighbor_sample_renumber_results(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t fan_out_size = 3; + size_t num_starts = 2; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}; + int32_t edge_types[] = {8, 7, 6, 5, 4, 3, 2, 1, 0}; + vertex_t start[] = {2, 3}; + int start_labels[] = { 6, 12 }; + int fan_out[] = {-1, -1, -1}; + + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT; + bool_t dedupe_sources = FALSE; + bool_t renumber_results = TRUE; return generic_uniform_neighbor_sample_test(handle, src, dst, weight, edge_ids, edge_types, num_vertices, num_edges, start, start_labels, num_starts, fan_out, fan_out_size, with_replacement, - return_hops, prior_sources_behavior, dedupe_sources); + return_hops, prior_sources_behavior, dedupe_sources, renumber_results); } int main(int argc, char** argv) @@ -975,6 +1093,7 @@ int main(int argc, char** argv) result |= RUN_TEST_NEW(test_uniform_neighbor_sample_dedupe_sources, handle); result |= RUN_TEST_NEW(test_uniform_neighbor_sample_unique_sources, handle); result |= RUN_TEST_NEW(test_uniform_neighbor_sample_carry_over_sources, handle); + result |= RUN_TEST_NEW(test_uniform_neighbor_sample_renumber_results, handle); cugraph_free_resource_handle(handle);