From a96e933f280ffb42d15e8f3260756fed7b58bee8 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Wed, 3 Apr 2024 00:36:47 -0400 Subject: [PATCH] [BUG] Always Persist Dask DataFrames in cuGraph-DGL Graph Storage (#4296) Always persists the dask dataframe in the cuGraph-DGL graph storage object. This resolves a bug where the dataframe was unpersisted, causing `'TypeError("Could not construct DataFrame from ")'` to be raised when trying to access it after saving it as a dataset. Authors: - Alex Barghi (https://github.com/alexbarghi-nv) Approvers: - Vibhu Jawa (https://github.com/VibhuJawa) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4296 --- python/cugraph-dgl/cugraph_dgl/cugraph_storage.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/cugraph-dgl/cugraph_dgl/cugraph_storage.py b/python/cugraph-dgl/cugraph_dgl/cugraph_storage.py index ded77245e57..6a1b6ee32b8 100644 --- a/python/cugraph-dgl/cugraph_dgl/cugraph_storage.py +++ b/python/cugraph-dgl/cugraph_dgl/cugraph_storage.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -170,6 +170,13 @@ def __init__( self._edges_dict = add_node_offset_to_edges_dict( _edges_dict, self._ntype_offset_d ) + + # Persist the dataframes so they can be retrieved later + # for a multi-GPU workflow. + if not single_gpu: + for k in list(self._edges_dict.keys()): + self._edges_dict[k] = self._edges_dict[k].persist() + self._etype_id_dict = { etype: etype_id for etype_id, etype in enumerate(self.canonical_etypes) }