diff --git a/python/cugraph/cugraph/structure/graph_classes.py b/python/cugraph/cugraph/structure/graph_classes.py index b89ada9bf50..6f6c7e5a26c 100644 --- a/python/cugraph/cugraph/structure/graph_classes.py +++ b/python/cugraph/cugraph/structure/graph_classes.py @@ -68,11 +68,14 @@ def __init__(self, m_graph=None, directed=False): if isinstance(m_graph, MultiGraph): elist = m_graph.view_edge_list() if m_graph.is_weighted(): - weights = "weights" + weights = m_graph.weight_column else: weights = None self.from_cudf_edgelist( - elist, source="src", destination="dst", edge_attr=weights + elist, + source=m_graph.source_columns, + destination=m_graph.destination_columns, + edge_attr=weights, ) else: raise TypeError( diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index ae2c57f5ef3..90db2c6b1f5 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -79,6 +79,8 @@ def __init__(self, properties): self.properties = simpleDistributedGraphImpl.Properties(properties) self.source_columns = None self.destination_columns = None + self.weight_column = None + self.vertex_columns = None def _make_plc_graph( sID, @@ -175,6 +177,7 @@ def __from_edgelist( "and destination parameters" ) ddf_columns = s_col + d_col + self.vertex_columns = ddf_columns.copy() _client = default_client() workers = _client.scheduler_info()["workers"] # Repartition to 2 partitions per GPU for memory efficient process @@ -214,10 +217,11 @@ def __from_edgelist( # The symmetrize step may add additional edges with unknown # ids and types for an undirected graph. Therefore, only # directed graphs may be used with ids and types. + # FIXME: Drop the check in symmetrize.py as it is redundant if len(edge_attr) == 3: if not self.properties.directed: raise ValueError( - "User-provided edge ids and edge " + "User-provided edge ids and/or edge " "types are not permitted for an " "undirected graph." ) @@ -285,6 +289,7 @@ def __from_edgelist( self.properties.renumber = renumber self.source_columns = source self.destination_columns = destination + self.weight_column = weight # If renumbering is not enabled, this function will only create # the edgelist_df and not do any renumbering. @@ -316,7 +321,6 @@ def __from_edgelist( ddf = ddf.map_partitions(lambda df: df.copy()) ddf = persist_dask_df_equal_parts_per_worker(ddf, _client) num_edges = len(ddf) - self._number_of_edges = num_edges ddf = get_persisted_df_worker_map(ddf, _client) delayed_tasks_d = { w: delayed(simpleDistributedGraphImpl._make_plc_graph)( @@ -356,6 +360,8 @@ def renumbered(self): def view_edge_list(self): """ + FIXME: Should this also return the edge ids and types? + Display the edge list. Compute it if needed. NOTE: If the graph is of type Graph() then the displayed undirected edges are the same as displayed by networkx Graph(), but the direction @@ -386,7 +392,59 @@ def view_edge_list(self): """ if self.edgelist is None: raise RuntimeError("Graph has no Edgelist.") - return self.edgelist.edgelist_df + + edgelist_df = self.input_df + is_string_dtype = False + is_multi_column = False + wgtCol = simpleDistributedGraphImpl.edgeWeightCol + if not self.properties.directed: + srcCol = self.source_columns + dstCol = self.destination_columns + if self.renumber_map.unrenumbered_id_type == "object": + # FIXME: Use the renumbered vertices instead and then un-renumber. + # This operation can be expensive. + is_string_dtype = True + edgelist_df = self.edgelist.edgelist_df + srcCol = self.renumber_map.renumbered_src_col_name + dstCol = self.renumber_map.renumbered_dst_col_name + + if isinstance(srcCol, list): + srcCol = self.renumber_map.renumbered_src_col_name + dstCol = self.renumber_map.renumbered_dst_col_name + edgelist_df = self.edgelist.edgelist_df + # unrenumber before extracting the upper triangular part + if len(self.source_columns) == 1: + edgelist_df = self.renumber_map.unrenumber(edgelist_df, srcCol) + edgelist_df = self.renumber_map.unrenumber(edgelist_df, dstCol) + else: + is_multi_column = True + + edgelist_df[srcCol], edgelist_df[dstCol] = edgelist_df[ + [srcCol, dstCol] + ].min(axis=1), edgelist_df[[srcCol, dstCol]].max(axis=1) + + edgelist_df = edgelist_df.groupby(by=[srcCol, dstCol]).sum().reset_index() + if wgtCol in edgelist_df.columns: + # FIXME: This breaks if there are are multi edges as those will + # be dropped during the symmetrization step and the original 'weight' + # will be halved. + edgelist_df[wgtCol] /= 2 + + if is_string_dtype or is_multi_column: + # unrenumber the vertices + edgelist_df = self.renumber_map.unrenumber(edgelist_df, srcCol) + edgelist_df = self.renumber_map.unrenumber(edgelist_df, dstCol) + + if self.properties.renumbered: + edgelist_df = edgelist_df.rename( + columns=self.renumber_map.internal_to_external_col_names + ) + + # If there is no 'wgt' column, nothing will happen + edgelist_df = edgelist_df.rename(columns={wgtCol: self.weight_column}) + + self.properties.edge_count = len(edgelist_df) + return edgelist_df def delete_edge_list(self): """ @@ -405,23 +463,7 @@ def number_of_vertices(self): Get the number of nodes in the graph. """ if self.properties.node_count is None: - if self.edgelist is not None: - if self.renumbered is True: - src_col_name = self.renumber_map.renumbered_src_col_name - dst_col_name = self.renumber_map.renumbered_dst_col_name - # FIXME: from_dask_cudf_edgelist() currently requires - # renumber=True for MG, so this else block will not be - # used. Should this else block be removed and added back when - # the restriction is removed? - else: - src_col_name = "src" - dst_col_name = "dst" - - ddf = self.edgelist.edgelist_df[[src_col_name, dst_col_name]] - # ddf = self.edgelist.edgelist_df[["src", "dst"]] - self.properties.node_count = ddf.max().max().compute() + 1 - else: - raise RuntimeError("Graph is Empty") + self.properties.node_count = len(self.nodes()) return self.properties.node_count def number_of_nodes(self): @@ -434,10 +476,16 @@ def number_of_edges(self, directed_edges=False): """ Get the number of edges in the graph. """ - if self.edgelist is not None: - return self._number_of_edges - else: - raise RuntimeError("Graph is Empty") + + if directed_edges and self.edgelist is not None: + return len(self.edgelist.edgelist_df) + + if self.properties.edge_count is None: + if self.edgelist is not None: + self.view_edge_list() + else: + raise RuntimeError("Graph is Empty") + return self.properties.edge_count def in_degree(self, vertex_subset=None): """ @@ -1021,19 +1069,8 @@ def edges(self): sources and destinations. It does not return the edge weights. For viewing edges with weights use view_edge_list() """ - if self.renumbered is True: - src_col_name = self.renumber_map.renumbered_src_col_name - dst_col_name = self.renumber_map.renumbered_dst_col_name - # FIXME: from_dask_cudf_edgelist() currently requires - # renumber=True for MG, so this else block will not be - # used. Should this else block be removed and added back when - # the restriction is removed? - else: - src_col_name = "src" - dst_col_name = "dst" - # return self.view_edge_list()[["src", "dst"]] - return self.view_edge_list()[[src_col_name, dst_col_name]] + return self.view_edge_list()[self.vertex_columns] def nodes(self): """ @@ -1045,23 +1082,26 @@ def nodes(self): a dataframe and do 'renumber_map.unrenumber' or 'G.unrenumber' """ - if self.renumbered: - # FIXME: This relies on current implementation - # of NumberMap, should not really expose - # this, perhaps add a method to NumberMap + if self.edgelist is not None: + if self.renumbered: + # FIXME: This relies on current implementation + # of NumberMap, should not really expose + # this, perhaps add a method to NumberMap - df = self.renumber_map.implementation.ddf.drop(columns="global_id") + df = self.renumber_map.implementation.ddf.drop(columns="global_id") - if len(df.columns) > 1: - return df - else: - return df[df.columns[0]] + if len(df.columns) > 1: + return df + else: + return df[df.columns[0]] + else: + df = self.input_df + return dask_cudf.concat( + [df[self.source_columns], df[self.destination_columns]] + ).drop_duplicates() else: - df = self.input_df - return dask_cudf.concat( - [df[self.source_columns], df[self.destination_columns]] - ).drop_duplicates() + raise RuntimeError("Graph is Empty") def neighbors(self, n): if self.edgelist is None: diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index d0c0ded5eb4..2690ab88c13 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -47,8 +47,8 @@ class simpleGraphImpl: class EdgeList: def __init__( self, - source: str, - destination: str, + source: cudf.Series, + destination: cudf.Series, edge_attr: Union[cudf.DataFrame, Dict[str, cudf.DataFrame]] = None, ): self.edgelist_df = cudf.DataFrame() @@ -96,6 +96,7 @@ def __init__(self, properties): def __init__(self, properties): # Structure self.edgelist = None + self.input_df = None self.adjlist = None self.transposedadjlist = None self.renumber_map = None @@ -109,6 +110,11 @@ def __init__(self, properties): self.batch_adjlists = None self.batch_transposed_adjlists = None + self.source_columns = None + self.destination_columns = None + self.vertex_columns = None + self.weight_column = None + # Functions # FIXME: Change to public function # FIXME: Make function more modular @@ -149,6 +155,7 @@ def __from_edgelist( "destination parameters" ) df_columns = s_col + d_col + self.vertex_columns = df_columns.copy() if edge_attr is not None: if weight is not None or edge_id is not None or edge_type is not None: @@ -212,9 +219,11 @@ def __from_edgelist( elist = input_df.compute().reset_index(drop=True) else: raise TypeError("input should be a cudf.DataFrame or a dask_cudf dataFrame") - - # Original, unmodified input dataframe. + # initial, unmodified input dataframe. self.input_df = elist + self.weight_column = weight + self.source_columns = source + self.destination_columns = destination # Renumbering self.renumber_map = None @@ -233,6 +242,8 @@ def __from_edgelist( # Use renumber_map to figure out if the python renumbering occured self.properties.renumbered = renumber_map.is_renumbered self.renumber_map = renumber_map + self.renumber_map.implementation.src_col_names = simpleGraphImpl.srcCol + self.renumber_map.implementation.dst_col_names = simpleGraphImpl.dstCol else: if type(source) is list and type(destination) is list: raise ValueError("set renumber to True for multi column ids") @@ -405,24 +416,104 @@ def view_edge_list(self): src, dst, weights = graph_primtypes_wrapper.view_edge_list(self) self.edgelist = self.EdgeList(src, dst, weights) - edgelist_df = self.edgelist.edgelist_df + srcCol = self.source_columns + dstCol = self.destination_columns + """ + Only use the initial input dataframe if the graph is directed with: + 1) single vertex column names with integer vertex type + 2) list of vertex column names of size 1 with integer vertex type + """ + use_initial_input_df = True + + if self.input_df is not None: + if type(srcCol) is list and type(dstCol) is list: + if len(srcCol) == 1: + srcCol = srcCol[0] + dstCol = dstCol[0] + if self.input_df[srcCol].dtype not in [ + np.int32, + np.int64, + ] or self.input_df[dstCol].dtype not in [np.int32, np.int64]: + # hypergraph case + use_initial_input_df = False + else: + use_initial_input_df = False + + elif self.input_df[srcCol].dtype not in [ + np.int32, + np.int64, + ] or self.input_df[dstCol].dtype not in [np.int32, np.int64]: + use_initial_input_df = False + else: + use_initial_input_df = False - if self.properties.renumbered: + if use_initial_input_df and self.properties.directed: + edgelist_df = self.input_df + else: + edgelist_df = self.edgelist.edgelist_df + if srcCol is None and dstCol is None: + srcCol = simpleGraphImpl.srcCol + dstCol = simpleGraphImpl.dstCol + + if use_initial_input_df and not self.properties.directed: + # unrenumber before extracting the upper triangular part + # case when the vertex column name is of size 1 + if self.properties.renumbered: + edgelist_df = self.renumber_map.unrenumber( + edgelist_df, simpleGraphImpl.srcCol + ) + edgelist_df = self.renumber_map.unrenumber( + edgelist_df, simpleGraphImpl.dstCol + ) + edgelist_df = edgelist_df.rename( + columns=self.renumber_map.internal_to_external_col_names + ) + # extract the upper triangular part + edgelist_df = edgelist_df[edgelist_df[srcCol] <= edgelist_df[dstCol]] + else: + edgelist_df = edgelist_df[ + edgelist_df[simpleGraphImpl.srcCol] + <= edgelist_df[simpleGraphImpl.dstCol] + ] + elif not use_initial_input_df and self.properties.renumbered: + # Do not unrenumber the vertices if the initial input df was used + if not self.properties.directed: + edgelist_df = edgelist_df[ + edgelist_df[simpleGraphImpl.srcCol] + <= edgelist_df[simpleGraphImpl.dstCol] + ] edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.srcCol ) edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.dstCol ) + edgelist_df = edgelist_df.rename( + columns=self.renumber_map.internal_to_external_col_names + ) - # FIXME: revisit this approach - if not self.properties.directed: - edgelist_df = edgelist_df[ - edgelist_df[simpleGraphImpl.srcCol] - <= edgelist_df[simpleGraphImpl.dstCol] - ] - edgelist_df = edgelist_df.reset_index(drop=True) - self.properties.edge_count = len(edgelist_df) + if self.vertex_columns is not None and len(self.vertex_columns) == 2: + # single column vertices internally renamed to 'simpleGraphImpl.srcCol' + # and 'simpleGraphImpl.dstCol'. + if not set(self.vertex_columns).issubset(set(edgelist_df.columns)): + # Get the initial column names passed by the user. + if srcCol is not None and dstCol is not None: + edgelist_df = edgelist_df.rename( + columns={ + simpleGraphImpl.srcCol: srcCol, + simpleGraphImpl.dstCol: dstCol, + } + ) + + # FIXME: When renumbered, the MG API uses renumbered col names which + # is not consistant with the SG API. + + self.properties.edge_count = len(edgelist_df) + + wgtCol = simpleGraphImpl.edgeWeightCol + edgelist_df = edgelist_df.rename( + columns={wgtCol: self.weight_column} + ).reset_index(drop=True) return edgelist_df @@ -1175,7 +1266,7 @@ def edges(self): sources and destinations. It does not return the edge weights. For viewing edges with weights use view_edge_list() """ - return self.view_edge_list()[[simpleGraphImpl.srcCol, simpleGraphImpl.dstCol]] + return self.view_edge_list()[self.vertex_columns] def nodes(self): """ diff --git a/python/cugraph/cugraph/structure/number_map.py b/python/cugraph/cugraph/structure/number_map.py index 481f99b9060..d7da20f9d84 100644 --- a/python/cugraph/cugraph/structure/number_map.py +++ b/python/cugraph/cugraph/structure/number_map.py @@ -25,6 +25,8 @@ class NumberMap: class SingleGPU: def __init__(self, df, src_col_names, dst_col_names, id_type, store_transposed): self.col_names = NumberMap.compute_vals(src_col_names) + # FIXME: rename the next two attributes to its singular conterpart as there + # is only one 'src' and 'dst' col name self.src_col_names = src_col_names self.dst_col_names = dst_col_names self.df = df @@ -141,6 +143,8 @@ def __init__( self, ddf, src_col_names, dst_col_names, id_type, store_transposed ): self.col_names = NumberMap.compute_vals(src_col_names) + self.src_col_names = src_col_names + self.dst_col_names = dst_col_names self.val_types = NumberMap.compute_vals_types(ddf, src_col_names) self.val_types["count"] = np.int32 self.id_type = id_type @@ -258,6 +262,7 @@ def __init__( # The column name 'id' contains the renumbered vertices and the other column(s) # contain the original vertices self.df_internal_to_external = None + self.internal_to_external_col_names = {} @staticmethod def compute_vals_types(df, column_names): @@ -480,7 +485,15 @@ def renumber_and_segment( # For columns with mismatch dtypes, set the renumbered # id_type to either 'int32' or 'int64' - if df.dtypes.nunique() > 1: + if isinstance(src_col_names, list): + vertex_col_names = src_col_names.copy() + else: + vertex_col_names = [src_col_names] + if isinstance(dst_col_names, list): + vertex_col_names += dst_col_names + else: + vertex_col_names += [dst_col_names] + if df[vertex_col_names].dtypes.nunique() > 1: # can't determine the edgelist input type unrenumbered_id_type = None else: @@ -503,7 +516,9 @@ def renumber_and_segment( renumbered = True renumber_map = NumberMap(renumber_id_type, unrenumbered_id_type, renumbered) - if not isinstance(src_col_names, list): + renumber_map.input_src_col_names = src_col_names + renumber_map.input_dst_col_names = dst_col_names + if not isinstance(renumber_map.input_src_col_names, list): src_col_names = [src_col_names] dst_col_names = [dst_col_names] @@ -512,6 +527,10 @@ def renumber_and_segment( # renumbered_dst_col_name) renumber_map.set_renumbered_col_names(src_col_names, dst_col_names, df.columns) + # FIXME: Remove 'src_col_names' and 'dst_col_names' from this initialization as + # those will capture 'simpleGraph.srcCol' and 'simpleGraph.dstCol'. + # In fact the input src and dst col names are already captured in + # 'renumber_map.input_src_col_names' and 'renumber_map.input_dst_col_names'. if isinstance(df, cudf.DataFrame): renumber_map.implementation = NumberMap.SingleGPU( df, @@ -648,6 +667,35 @@ def unrenumber(self, df, column_name, preserve_order=False, get_column_names=Fal mapping[nm] = nm + "_" + column_name col_names = list(mapping.values()) + if isinstance(self.input_src_col_names, list): + input_src_col_names = self.input_src_col_names.copy() + input_dst_col_names = self.input_dst_col_names.copy() + else: + # Assuming the src and dst columns are of the same length + # if they are lists. + input_src_col_names = [self.input_src_col_names] + input_dst_col_names = [self.input_dst_col_names] + if not isinstance(col_names, list): + col_names = [col_names] + + if column_name in [ + self.renumbered_src_col_name, + self.implementation.src_col_names, + ]: + self.internal_to_external_col_names.update( + dict(zip(col_names, input_src_col_names)) + ) + elif column_name in [ + self.renumbered_dst_col_name, + self.implementation.dst_col_names, + ]: + self.internal_to_external_col_names.update( + dict(zip(col_names, input_dst_col_names)) + ) + + if len(self.implementation.col_names) == 1: + col_names = col_names[0] + if preserve_order: index_name = NumberMap.generate_unused_column_name(df) df[index_name] = df.index @@ -665,6 +713,9 @@ def unrenumber(self, df, column_name, preserve_order=False, get_column_names=Fal df = df.map_partitions(lambda df: df.rename(columns=mapping, copy=False)) else: df = df.rename(columns=mapping, copy=False) + # FIXME: This parameter is not working as expected as it oesn't return + # the unrenumbered column names: leverage 'self.internal_to_external_col_names' + # instead. if get_column_names: return df, col_names else: diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index 15011fa8dbc..4c00e68344d 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -230,6 +230,7 @@ def symmetrize( """ + # FIXME: Redundant check that should be done at the graph creation if "edge_id" in input_df.columns and symmetrize: raise ValueError("Edge IDs are not supported on undirected graphs") diff --git a/python/cugraph/cugraph/testing/utils.py b/python/cugraph/cugraph/testing/utils.py index 793063eb3ed..c78e92d400a 100644 --- a/python/cugraph/cugraph/testing/utils.py +++ b/python/cugraph/cugraph/testing/utils.py @@ -409,7 +409,7 @@ def compare_mst(mst_cugraph, mst_nx): pass # check total weight - cg_sum = edgelist_df["weights"].sum() + cg_sum = edgelist_df[mst_cugraph.weight_column].sum() nx_sum = mst_nx_df["weight"].sum() print(cg_sum) print(nx_sum) diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py index 3e4dd3af4fc..db34c68a054 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality.py @@ -118,7 +118,7 @@ def calc_betweenness_centrality( ) M = G.to_pandas_edgelist().rename( - columns={"src": "0", "dst": "1", "weights": "weight"} + columns={"src": "0", "dst": "1", "wgt": edge_attr} ) Gnx = nx.from_pandas_edgelist( diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py index aa41f8e1c82..97e503e5428 100644 --- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py @@ -16,7 +16,7 @@ import dask_cudf from pylibcugraph.testing.utils import gen_fixture_params_product -from cugraph.experimental.datasets import DATASETS_UNDIRECTED, email_Eu_core +from cugraph.experimental.datasets import DATASETS_UNDIRECTED import cugraph import cugraph.dask as dcg @@ -41,7 +41,7 @@ def setup_function(): # email_Eu_core is too expensive to test -datasets = DATASETS_UNDIRECTED + [email_Eu_core] +datasets = DATASETS_UNDIRECTED # ============================================================================= diff --git a/python/cugraph/cugraph/tests/community/test_balanced_cut.py b/python/cugraph/cugraph/tests/community/test_balanced_cut.py index 0a95a1846ce..f6c1a741011 100644 --- a/python/cugraph/cugraph/tests/community/test_balanced_cut.py +++ b/python/cugraph/cugraph/tests/community/test_balanced_cut.py @@ -102,7 +102,7 @@ def test_edge_cut_clustering_with_edgevals(graph_file, partitions): @pytest.mark.sg -@pytest.mark.parametrize("graph_file", [DEFAULT_DATASETS[2]]) +@pytest.mark.parametrize("graph_file", DEFAULT_DATASETS) @pytest.mark.parametrize("partitions", PARTITIONS) def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): gc.collect() @@ -111,7 +111,7 @@ def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): # read_weights_in_sp=True => value column dtype is float32 G = graph_file.get_graph() NM = G.to_pandas_edgelist().rename( - columns={"src": "0", "dst": "1", "weights": "weight"} + columns={"src": "0", "dst": "1", "wgt": "weight"} ) G = nx.from_pandas_edgelist( diff --git a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py index 3a6a6e0d409..d93fa3b547d 100644 --- a/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py +++ b/python/cugraph/cugraph/tests/community/test_induced_subgraph_mg.py @@ -90,8 +90,8 @@ def input_expected_output(input_combo): # Sample k vertices from the cuGraph graph # FIXME: Leverage the method 'select_random_vertices' instead - srcs = G.view_edge_list()["src"] - dsts = G.view_edge_list()["dst"] + srcs = G.view_edge_list()["0"] + dsts = G.view_edge_list()["1"] vertices = cudf.concat([srcs, dsts]).drop_duplicates() vertices = vertices.sample(num_seeds).astype("int32") diff --git a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py index b0dcc2ede3d..c1f8f4c3546 100644 --- a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py +++ b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py @@ -39,7 +39,7 @@ def setup_function(): # currently in networkx master and will hopefully will make it to a release # soon. def ktruss_ground_truth(graph_file): - G = nx.read_edgelist(str(graph_file), nodetype=int, data=(("weights", float),)) + G = nx.read_edgelist(str(graph_file), nodetype=int, data=(("weight", float),)) df = nx.to_pandas_edgelist(G) return df @@ -50,18 +50,18 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file): edgelist_df = k_truss_cugraph.view_edge_list() src = edgelist_df["src"] dst = edgelist_df["dst"] - wgt = edgelist_df["weights"] + wgt = edgelist_df["weight"] assert len(edgelist_df) == len(k_truss_nx) for i in range(len(src)): has_edge = ( (k_truss_nx["source"] == src[i]) & (k_truss_nx["target"] == dst[i]) - & np.isclose(k_truss_nx["weights"], wgt[i]) + & np.isclose(k_truss_nx["weight"], wgt[i]) ).any() has_opp_edge = ( (k_truss_nx["source"] == dst[i]) & (k_truss_nx["target"] == src[i]) - & np.isclose(k_truss_nx["weights"], wgt[i]) + & np.isclose(k_truss_nx["weight"], wgt[i]) ).any() assert has_edge or has_opp_edge return True diff --git a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py index 2cf0525d2ad..0f7bb14581f 100644 --- a/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py +++ b/python/cugraph/cugraph/tests/community/test_triangle_count_mg.py @@ -69,8 +69,8 @@ def input_expected_output(dask_client, input_combo): if start_list: # sample k nodes from the cuGraph graph k = random.randint(1, 10) - srcs = G.view_edge_list()["src"] - dsts = G.view_edge_list()["dst"] + srcs = G.view_edge_list()[G.source_columns] + dsts = G.view_edge_list()[G.destination_columns] nodes = cudf.concat([srcs, dsts]).drop_duplicates() start_list = nodes.sample(k) else: diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py index c68108ce241..7f4eeeb69d5 100644 --- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py +++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py @@ -83,9 +83,12 @@ def input_expected_output(dask_client, input_combo): ) sg_k_core_results = sg_k_core_graph.view_edge_list() # FIXME: The result will come asymetric. Symmetrize the results + srcCol = sg_k_core_graph.source_columns + dstCol = sg_k_core_graph.destination_columns + wgtCol = sg_k_core_graph.weight_column sg_k_core_results = ( - symmetrize_df(sg_k_core_results, "src", "dst", "weights") - .sort_values(["src", "dst"]) + symmetrize_df(sg_k_core_results, srcCol, dstCol, wgtCol) + .sort_values([srcCol, dstCol]) .reset_index(drop=True) ) @@ -144,7 +147,10 @@ def test_dask_k_core(dask_client, benchmark, input_expected_output): expected_k_core_results = input_expected_output["sg_k_core_results"] k_core_results = ( - k_core_results.compute().sort_values(["src", "dst"]).reset_index(drop=True) + k_core_results.compute() + .sort_values(["src", "dst"]) + .reset_index(drop=True) + .rename(columns={"weights": "weight"}) ) assert_frame_equal( diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py index 8e8ab13574d..9d9572b88b2 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py @@ -187,11 +187,11 @@ def test_pagerank( G = graph_file.get_graph(create_using=cugraph.Graph(directed=True)) if has_precomputed_vertex_out_weight == 1: - df = G.view_edge_list()[["src", "weights"]] + df = G.view_edge_list()[["src", "wgt"]] pre_vtx_o_wgt = ( df.groupby(["src"], as_index=False) .sum() - .rename(columns={"src": "vertex", "weights": "sums"}) + .rename(columns={"src": "vertex", "wgt": "sums"}) ) cugraph_pr = cugraph_call( diff --git a/python/cugraph/cugraph/tests/nx/test_nx_convert.py b/python/cugraph/cugraph/tests/nx/test_nx_convert.py index 58b89a4bda9..e20897572d0 100644 --- a/python/cugraph/cugraph/tests/nx/test_nx_convert.py +++ b/python/cugraph/cugraph/tests/nx/test_nx_convert.py @@ -25,8 +25,9 @@ def _compare_graphs(nxG, cuG, has_wt=True): assert nxG.number_of_edges() == cuG.number_of_edges() cu_df = cuG.view_edge_list().to_pandas() + cu_df = cu_df.rename(columns={"0": "src", "1": "dst"}) if has_wt is True: - cu_df = cu_df.drop(columns=["weights"]) + cu_df = cu_df.drop(columns=["weight"]) out_of_order = cu_df[cu_df["src"] > cu_df["dst"]] if len(out_of_order) > 0: @@ -72,12 +73,11 @@ def test_networkx_compatibility(graph_file): # create a cuGraph Directed Graph gdf = cudf.from_pandas(M) - gdf = gdf.rename(columns={"weight": "weights"}) cuG = cugraph.from_cudf_edgelist( gdf, source="0", destination="1", - edge_attr="weights", + edge_attr="weight", create_using=cugraph.Graph(directed=True), ) diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks.py b/python/cugraph/cugraph/tests/sampling/test_random_walks.py index 48629fa03a6..9c94e036683 100644 --- a/python/cugraph/cugraph/tests/sampling/test_random_walks.py +++ b/python/cugraph/cugraph/tests/sampling/test_random_walks.py @@ -76,7 +76,7 @@ def calc_random_walks(G, max_depth=None, use_padding=False, legacy_result_type=T """ assert G is not None - G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="weights") + G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt") k = random.randint(1, 6) @@ -136,8 +136,9 @@ def check_random_walks_padded(G, path_data, seeds, max_depth, legacy_result_type e_wgt_paths = path_data[1] e_wgt_idx = 0 - G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="weights") + G, _ = ensure_cugraph_obj_for_nx(G, nx_weight_attr="wgt") df_G = G.input_df + if "weight" in df_G.columns: df_G = df_G.rename(columns={"weight": "wgt"}) @@ -176,17 +177,18 @@ def check_random_walks_padded(G, path_data, seeds, max_depth, legacy_result_type else: # check valid edge wgt - expected_wgt = edge["wgt"].iloc[0] - result_wgt = e_wgt_paths.iloc[e_wgt_idx] - - if expected_wgt != result_wgt: - print( - "[ERR] Invalid edge wgt: " - "The edge src {} dst {} has wgt {} but got {}".format( - src, dst, expected_wgt, result_wgt + if G.is_weighted(): + expected_wgt = edge["wgt"].iloc[0] + result_wgt = e_wgt_paths.iloc[e_wgt_idx] + + if expected_wgt != result_wgt: + print( + "[ERR] Invalid edge wgt: " + "The edge src {} dst {} has wgt {} but got {}".format( + src, dst, expected_wgt, result_wgt + ) ) - ) - invalid_edge_wgt += 1 + invalid_edge_wgt += 1 e_wgt_idx += 1 if src != -1 and dst == -1: @@ -195,9 +197,10 @@ def check_random_walks_padded(G, path_data, seeds, max_depth, legacy_result_type assert invalid_seeds == 0 assert invalid_edge == 0 - assert invalid_edge_wgt == 0 assert len(v_paths) == (max_depth) * len(seeds) - assert len(e_wgt_paths) == (max_depth - 1) * len(seeds) + if G.is_weighted(): + assert invalid_edge_wgt == 0 + assert len(e_wgt_paths) == (max_depth - 1) * len(seeds) if legacy_result_type: sizes = path_data[2] @@ -298,11 +301,15 @@ def test_random_walks_nx(graph_file): M = G.to_pandas_edgelist() + source = G.source_columns + target = G.destination_columns + edge_attr = G.weight_column + Gnx = nx.from_pandas_edgelist( M, - source="src", - target="dst", - edge_attr="weights", + source=source, + target=target, + edge_attr=edge_attr, create_using=nx.DiGraph(), ) max_depth = random.randint(2, 10) diff --git a/python/cugraph/cugraph/tests/structure/test_graph.py b/python/cugraph/cugraph/tests/structure/test_graph.py index a80c47662e2..de306309ca4 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph.py +++ b/python/cugraph/cugraph/tests/structure/test_graph.py @@ -62,8 +62,8 @@ def compare_graphs(nx_graph, cu_graph): edgelist_df = cu_graph.view_edge_list().reset_index(drop=True) df = cudf.DataFrame() - df["source"] = edgelist_df["src"] - df["target"] = edgelist_df["dst"] + df["source"] = edgelist_df["source"] + df["target"] = edgelist_df["target"] if len(edgelist_df.columns) > 2: df["weight"] = edgelist_df["weights"] cu_to_nx_graph = nx.from_pandas_edgelist( @@ -319,10 +319,10 @@ def test_edges_for_Graph(graph_file): edges.append([edge[1], edge[0]]) else: edges.append([edge[0], edge[1]]) - nx_edge_list = cudf.DataFrame(list(edges), columns=["src", "dst"]) + nx_edge_list = cudf.DataFrame(list(edges), columns=["0", "1"]) assert_frame_equal( - nx_edge_list.sort_values(by=["src", "dst"]).reset_index(drop=True), - cu_edge_list.sort_values(by=["src", "dst"]).reset_index(drop=True), + nx_edge_list.sort_values(by=["0", "1"]).reset_index(drop=True), + cu_edge_list.sort_values(by=["0", "1"]).reset_index(drop=True), check_dtype=False, ) @@ -344,7 +344,8 @@ def test_view_edge_list_for_Graph(graph_file): G = cugraph.from_cudf_edgelist( cu_M, source="0", destination="1", create_using=cugraph.Graph ) - cu_edge_list = G.view_edge_list().sort_values(["src", "dst"]) + + cu_edge_list = G.view_edge_list().sort_values(["0", "1"]) # Check if number of Edges is same assert len(nx_edges) == len(cu_edge_list) @@ -359,12 +360,12 @@ def test_view_edge_list_for_Graph(graph_file): edges.append([edge[0], edge[1]]) edges = list(edges) edges.sort() - nx_edge_list = cudf.DataFrame(edges, columns=["src", "dst"]) + nx_edge_list = cudf.DataFrame(edges, columns=["0", "1"]) # Compare nx and cugraph edges when viewing edgelist # assert cu_edge_list.equals(nx_edge_list) - assert (cu_edge_list["src"].to_numpy() == nx_edge_list["src"].to_numpy()).all() - assert (cu_edge_list["dst"].to_numpy() == nx_edge_list["dst"].to_numpy()).all() + assert (cu_edge_list["0"].to_numpy() == nx_edge_list["0"].to_numpy()).all() + assert (cu_edge_list["1"].to_numpy() == nx_edge_list["1"].to_numpy()).all() # Test @@ -682,8 +683,8 @@ def test_to_pandas_edgelist(graph_file): G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source="0", destination="1") - assert "s" in G.to_pandas_edgelist("s", "d").columns - assert "s" in G.to_pandas_edgelist(source="s", destination="d").columns + assert "0" in G.to_pandas_edgelist("0", "1").columns + assert "0" in G.to_pandas_edgelist(source="0", destination="1").columns @pytest.mark.sg @@ -877,3 +878,107 @@ def test_graph_creation_edge_properties(graph_file, edge_props): G = cugraph.Graph(directed=True) G.from_cudf_edgelist(df, source="0", destination="1", **prop_keys) + + +@pytest.mark.sg +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", [True, False]) +@pytest.mark.parametrize("renumber", [True, False]) +def test_graph_creation_edges(graph_file, directed, renumber): + # Verifies that the input dataframe passed the user is the same + # retrieved from the graph when the graph is directed + srcCol = "source" + dstCol = "target" + wgtCol = "weight" + input_df = cudf.read_csv( + graph_file, + delimiter=" ", + names=[srcCol, dstCol, wgtCol], + dtype=["int32", "int32", "float32"], + header=None, + ) + + G = cugraph.Graph(directed=directed) + + if renumber: + # trigger renumbering by passing a list of vertex column + srcCol = [srcCol] + dstCol = [dstCol] + vertexCol = srcCol + dstCol + else: + vertexCol = [srcCol, dstCol] + G.from_cudf_edgelist(input_df, source=srcCol, destination=dstCol, edge_attr=wgtCol) + + columns = vertexCol.copy() + columns.append(wgtCol) + + edge_list_view = G.view_edge_list().loc[:, columns] + edges = G.edges().loc[:, vertexCol] + + assert_frame_equal( + edge_list_view.drop(columns=wgtCol) + .sort_values(by=vertexCol) + .reset_index(drop=True), + edges.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) + + if directed: + assert_frame_equal( + edge_list_view.sort_values(by=vertexCol).reset_index(drop=True), + input_df.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) + else: + # If the graph is undirected, ensures that only the upper triangular + # matrix of the adjacency matrix is returned + if isinstance(srcCol, list): + srcCol = srcCol[0] + dstCol = dstCol[0] + is_upper_triangular = edge_list_view[srcCol] <= edge_list_view[dstCol] + is_upper_triangular = list(set(is_upper_triangular.values_host)) + assert len(is_upper_triangular) == 1 + assert is_upper_triangular[0] + + +@pytest.mark.sg +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", [True, False]) +def test_graph_creation_edges_multi_col_vertices(graph_file, directed): + srcCol = ["src_0", "src_1"] + dstCol = ["dst_0", "dst_1"] + wgtCol = "weight" + vertexCol = srcCol + dstCol + columns = vertexCol.copy() + columns.append(wgtCol) + + input_df = cudf.read_csv( + graph_file, + delimiter=" ", + names=[srcCol[0], dstCol[0], wgtCol], + dtype=["int32", "int32", "float32"], + header=None, + ) + input_df["src_1"] = input_df["src_0"] + 1000 + input_df["dst_1"] = input_df["dst_0"] + 1000 + + G = cugraph.Graph(directed=directed) + G.from_cudf_edgelist(input_df, source=srcCol, destination=dstCol, edge_attr=wgtCol) + + input_df = input_df.loc[:, columns] + edge_list_view = G.view_edge_list().loc[:, columns] + edges = G.edges().loc[:, vertexCol] + + assert_frame_equal( + edge_list_view.drop(columns=wgtCol) + .sort_values(by=vertexCol) + .reset_index(drop=True), + edges.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) + if directed: + assert_frame_equal( + edge_list_view.sort_values(by=vertexCol).reset_index(drop=True), + input_df.sort_values(by=vertexCol).reset_index(drop=True), + check_dtype=False, + ) diff --git a/python/cugraph/cugraph/tests/structure/test_graph_mg.py b/python/cugraph/cugraph/tests/structure/test_graph_mg.py index 707b195dfa8..3024e50402a 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph_mg.py +++ b/python/cugraph/cugraph/tests/structure/test_graph_mg.py @@ -338,7 +338,7 @@ def test_mg_select_random_vertices( assert len(join) == len(sampled_vertices) -@pytest.mark.sg +@pytest.mark.mg @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize( "edge_props", @@ -363,3 +363,54 @@ def test_graph_creation_edge_properties(dask_client, graph_file, edge_props): G = cugraph.Graph(directed=True) G.from_dask_cudf_edgelist(df, source="0", destination="1", **prop_keys) + + +@pytest.mark.parametrize("directed", [True, False]) +@pytest.mark.parametrize("renumber", [True, False]) +@pytest.mark.parametrize("graph_file", datasets) +def test_graph_creation_properties(dask_client, graph_file, directed, renumber): + srcCol = "src" + dstCol = "dst" + wgtCol = "wgt" + df = cudf.read_csv( + graph_file, + delimiter=" ", + names=[srcCol, dstCol, wgtCol], + dtype=["int32", "int32", "float32"], + header=None, + ) + ddf = dask_cudf.from_cudf(df, npartitions=2) + + if renumber: + # trigger renumbering by passing a list of vertex column + srcCol = [srcCol] + dstCol = [dstCol] + vertexCol = srcCol + dstCol + else: + vertexCol = [srcCol, dstCol] + + sG = cugraph.Graph(directed=directed) + mG = cugraph.Graph(directed=directed) + sG.from_cudf_edgelist(df, source=srcCol, destination=dstCol, edge_attr=wgtCol) + mG.from_dask_cudf_edgelist(ddf, source=srcCol, destination=dstCol, edge_attr=wgtCol) + + columns = vertexCol.copy() + columns.append(wgtCol) + + sG_edgelist_view = ( + sG.view_edge_list() + .sort_values(by=vertexCol) + .reset_index(drop=True) + .loc[:, columns] + ) + mG_edgelist_view = ( + mG.view_edge_list() + .compute() + .sort_values(by=vertexCol) + .reset_index(drop=True) + .loc[:, columns] + ) + + assert sG.number_of_nodes() == mG.number_of_nodes() + assert sG.number_of_edges() == mG.number_of_edges() + assert_frame_equal(sG_edgelist_view, mG_edgelist_view, check_dtype=False) diff --git a/python/cugraph/cugraph/tests/structure/test_multigraph.py b/python/cugraph/cugraph/tests/structure/test_multigraph.py index af78c238d4e..a9ea617fdb8 100644 --- a/python/cugraph/cugraph/tests/structure/test_multigraph.py +++ b/python/cugraph/cugraph/tests/structure/test_multigraph.py @@ -47,7 +47,7 @@ def test_multigraph(graph_file): assert G.number_of_nodes() == Gnx.number_of_nodes() cuedges = cugraph.to_pandas_edgelist(G) cuedges.rename( - columns={"src": "source", "dst": "target", "weights": "weight"}, inplace=True + columns={"src": "source", "dst": "target", "wgt": "weight"}, inplace=True ) cuedges["weight"] = cuedges["weight"].round(decimals=3) nxedges = nx.to_pandas_edgelist(Gnx).astype( diff --git a/python/cugraph/cugraph/utilities/nx_factory.py b/python/cugraph/cugraph/utilities/nx_factory.py index 2448a511229..d07d17978d7 100644 --- a/python/cugraph/cugraph/utilities/nx_factory.py +++ b/python/cugraph/cugraph/utilities/nx_factory.py @@ -236,11 +236,15 @@ def cugraph_to_nx(G): pdf = G.view_edge_list().to_pandas() num_col = len(pdf.columns) + source = G.source_columns + target = G.destination_columns + if num_col == 2: - Gnx = nx.from_pandas_edgelist(pdf, source="src", target="dst") + Gnx = nx.from_pandas_edgelist(pdf, source=source, target=target) else: + edge_attr = G.weight_column Gnx = nx.from_pandas_edgelist( - pdf, source="src", target="dst", edge_attr="weights" + pdf, source=source, target=target, edge_attr=edge_attr ) return Gnx diff --git a/python/pylibcugraph/pylibcugraph/graphs.pxd b/python/pylibcugraph/pylibcugraph/graphs.pxd index 4e52ed557ed..a2df44ba26e 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pxd +++ b/python/pylibcugraph/pylibcugraph/graphs.pxd @@ -25,6 +25,7 @@ from pylibcugraph._cugraph_c.graph cimport ( cdef class _GPUGraph: cdef cugraph_graph_t* c_graph_ptr cdef cugraph_type_erased_device_array_view_t* edge_id_view_ptr + cdef cugraph_type_erased_device_array_view_t* weights_view_ptr cdef class SGGraph(_GPUGraph): pass diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx index fb4692bf3a8..33a8a09c6f4 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pyx +++ b/python/pylibcugraph/pylibcugraph/graphs.pyx @@ -166,11 +166,10 @@ cdef class SGGraph(_GPUGraph): dst_or_index_array ) - cdef cugraph_type_erased_device_array_view_t* weights_view_ptr = \ - create_cugraph_type_erased_device_array_view_from_py_obj( + + self.weights_view_ptr = create_cugraph_type_erased_device_array_view_from_py_obj( weight_array ) - self.edge_id_view_ptr = create_cugraph_type_erased_device_array_view_from_py_obj( edge_id_array @@ -187,7 +186,7 @@ cdef class SGGraph(_GPUGraph): &(graph_properties.c_graph_properties), srcs_or_offsets_view_ptr, dsts_or_indices_view_ptr, - weights_view_ptr, + self.weights_view_ptr, self.edge_id_view_ptr, edge_type_view_ptr, store_transposed, @@ -205,7 +204,7 @@ cdef class SGGraph(_GPUGraph): &(graph_properties.c_graph_properties), srcs_or_offsets_view_ptr, dsts_or_indices_view_ptr, - weights_view_ptr, + self.weights_view_ptr, self.edge_id_view_ptr, edge_type_view_ptr, store_transposed, @@ -224,7 +223,7 @@ cdef class SGGraph(_GPUGraph): cugraph_type_erased_device_array_view_free(srcs_or_offsets_view_ptr) cugraph_type_erased_device_array_view_free(dsts_or_indices_view_ptr) - cugraph_type_erased_device_array_view_free(weights_view_ptr) + cugraph_type_erased_device_array_view_free(self.weights_view_ptr) if self.edge_id_view_ptr is not NULL: cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr) if edge_type_view_ptr is not NULL: @@ -337,7 +336,7 @@ cdef class MGGraph(_GPUGraph): create_cugraph_type_erased_device_array_view_from_py_obj( dst_array ) - cdef cugraph_type_erased_device_array_view_t* weights_view_ptr = \ + self.weights_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( weight_array ) @@ -355,7 +354,7 @@ cdef class MGGraph(_GPUGraph): &(graph_properties.c_graph_properties), srcs_view_ptr, dsts_view_ptr, - weights_view_ptr, + self.weights_view_ptr, self.edge_id_view_ptr, edge_type_view_ptr, store_transposed, @@ -369,7 +368,7 @@ cdef class MGGraph(_GPUGraph): cugraph_type_erased_device_array_view_free(srcs_view_ptr) cugraph_type_erased_device_array_view_free(dsts_view_ptr) - cugraph_type_erased_device_array_view_free(weights_view_ptr) + cugraph_type_erased_device_array_view_free(self.weights_view_ptr) if self.edge_id_view_ptr is not NULL: cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr) if edge_type_view_ptr is not NULL: diff --git a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx index 4a2b8a70189..1570523beb8 100644 --- a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx +++ b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx @@ -96,6 +96,8 @@ def uniform_random_walks(ResourceHandle resource_handle, cdef uintptr_t cai_start_ptr = \ start_vertices.__cuda_array_interface__["data"][0] + cdef cugraph_type_erased_device_array_view_t* weights_ptr + cdef cugraph_type_erased_device_array_view_t* start_ptr = \ cugraph_type_erased_device_array_view_create( cai_start_ptr, @@ -113,14 +115,17 @@ def uniform_random_walks(ResourceHandle resource_handle, cdef cugraph_type_erased_device_array_view_t* path_ptr = \ cugraph_random_walk_result_get_paths(result_ptr) - cdef cugraph_type_erased_device_array_view_t* weights_ptr = \ - cugraph_random_walk_result_get_weights(result_ptr) + + if input_graph.weights_view_ptr is NULL: + cupy_weights = None + else: + weights_ptr = cugraph_random_walk_result_get_weights(result_ptr) + cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr) max_path_length = \ cugraph_random_walk_result_get_max_path_length(result_ptr) cupy_paths = copy_to_cupy_array(c_resource_handle_ptr, path_ptr) - cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr) cugraph_random_walk_result_free(result_ptr) cugraph_type_erased_device_array_view_free(start_ptr)