Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing util improvements and refactoring #3705

Merged
merged 34 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
beecde0
Use download_dir to store results files in datasets dir, instead of t…
betochimas Jul 12, 2023
5386a29
Style check
betochimas Jul 12, 2023
0d6930c
Modified results file generation, refactored traversal tests, no prec…
betochimas Jul 20, 2023
0adf443
Renaming files, style check
betochimas Jul 20, 2023
bbd48f1
Merge pull request #103 from rapidsai/branch-23.08
betochimas Jul 24, 2023
d679d56
Removed temp results files, addresses mapping changes
betochimas Jul 24, 2023
3768ac3
Merge branch 'rapidsai:branch-23.08' into branch-23.08
betochimas Jul 24, 2023
9885c85
Removed old traversal tests and kept only resultset data
betochimas Jul 24, 2023
8722798
Merge pull request #104 from betochimas/branch-23.08
betochimas Jul 24, 2023
5b921b0
Added results files for CI, can remove easily as needed
betochimas Jul 25, 2023
f4b21d7
Change resultset mappings format
betochimas Jul 31, 2023
5c1f1b1
Merge branch 'branch-23.08' into branch-23.08-resultsset
betochimas Jul 31, 2023
45d12bc
Style check and result file relabeling
betochimas Jul 31, 2023
fc90fef
Edits to generating results and utils.py
betochimas Aug 1, 2023
2159ba2
Minor comment edit
Aug 1, 2023
7a002fc
comment fix, try to verify commit via sig
betochimas Aug 1, 2023
6f71cb1
Attempt to sign commits
betochimas Aug 1, 2023
202f44e
Merge pull request #105 from rapidsai/branch-23.08
betochimas Aug 4, 2023
54258e9
Reuse datasets path env to load resultsets near datasets, testing imp…
betochimas Aug 8, 2023
95685db
Removed comments, renaming variables
betochimas Aug 9, 2023
2a92f2f
Addresses review suggestions
betochimas Aug 14, 2023
495964b
Merge branch 'branch-23.08' into branch-23.08-resultsset
betochimas Aug 14, 2023
2c336a1
change pathing to pass CI
betochimas Aug 14, 2023
7e3c078
Merge
betochimas Aug 15, 2023
f5b9ec3
Merge branch 'branch-23.10' into branch-23.08-resultsset
betochimas Aug 15, 2023
0902b8b
Uncommented s3 download block
betochimas Aug 15, 2023
ae70f99
Merge request switching target
betochimas Aug 15, 2023
f0202b7
Merge branch 'branch-23.10' into branch-23.08-resultsset
betochimas Aug 16, 2023
c26ed71
Merge branch 'branch-23.10' into branch-23.08-resultsset
betochimas Aug 16, 2023
08b04cb
Merge branch 'branch-23.10' into branch-23.08-resultsset
betochimas Aug 18, 2023
32424ea
Removed Resultset env var
betochimas Aug 18, 2023
c9df485
Merge branch 'branch-23.10' into branch-23.08-resultsset
betochimas Aug 21, 2023
d95a5f1
Merge branch 'branch-23.10' into branch-23.08-resultsset
betochimas Aug 21, 2023
3ab132d
Removed duplicate constants and fixed imports
betochimas Aug 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion python/cugraph/cugraph/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH, RAPIDS_DATASET_ROOT_DIR
from cugraph.testing.utils import (
RAPIDS_DATASET_ROOT_DIR_PATH,
RAPIDS_DATASET_ROOT_DIR,
RAPIDS_RESULTSET_ROOT_DIR_PATH,
rlratzel marked this conversation as resolved.
Show resolved Hide resolved
)
from cugraph.testing.resultset import Resultset, load_resultset, get_resultset
from cugraph.datasets import (
cyber,
dolphins,
Expand Down
277 changes: 277 additions & 0 deletions python/cugraph/cugraph/testing/generate_resultsets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from tempfile import NamedTemporaryFile
import random

import numpy as np
import networkx as nx

import cudf
import cugraph
from cugraph.datasets import dolphins, netscience, karate_disjoint, karate
from cugraph.testing import utils, Resultset, SMALL_DATASETS


_results_dir = utils.RAPIDS_RESULTSET_ROOT_DIR_PATH
_resultsets = {}


def add_resultset(result_data_dictionary, **kwargs):
rs = Resultset(result_data_dictionary)
hashable_dict_repr = tuple((k, kwargs[k]) for k in sorted(kwargs.keys()))
_resultsets[hashable_dict_repr] = rs


if __name__ == "__main__":
# =============================================================================
# Parameters
# =============================================================================
SEEDS = [42]

DIRECTED_GRAPH_OPTIONS = [True, False]

DEPTH_LIMITS = [None, 1, 5, 18]

DATASETS = [dolphins, netscience, karate_disjoint]

# =============================================================================
# tests/traversal/test_bfs.py
# =============================================================================
test_bfs_results = {}

for ds in DATASETS + [karate]:
for seed in SEEDS:
for depth_limit in DEPTH_LIMITS:
for dirctd in DIRECTED_GRAPH_OPTIONS:
# this is used for get_cu_graph_golden_results_and_params
Gnx = utils.generate_nx_graph_from_file(
ds.get_path(), directed=dirctd
)
random.seed(seed)
start_vertex = random.sample(list(Gnx.nodes()), 1)[0]
golden_values = nx.single_source_shortest_path_length(
Gnx, start_vertex, cutoff=depth_limit
)
vertices = cudf.Series(golden_values.keys())
distances = cudf.Series(golden_values.values())
add_resultset(
{"vertex": vertices, "distance": distances},
graph_dataset=ds.metadata["name"],
graph_directed=str(dirctd),
algo="single_source_shortest_path_length",
start_vertex=str(start_vertex),
cutoff=str(depth_limit),
)

# these are pandas dataframes
for dirctd in DIRECTED_GRAPH_OPTIONS:
Gnx = utils.generate_nx_graph_from_file(karate.get_path(), directed=dirctd)
golden_result = cugraph.bfs_edges(Gnx, source=7)
cugraph_df = cudf.from_pandas(golden_result)
add_resultset(
cugraph_df,
graph_dataset="karate",
graph_directed=str(dirctd),
algo="bfs_edges",
source="7",
)

# =============================================================================
# tests/traversal/test_sssp.py
# =============================================================================
test_sssp_results = {}

SOURCES = [1]

for ds in SMALL_DATASETS:
for source in SOURCES:
Gnx = utils.generate_nx_graph_from_file(ds.get_path(), directed=True)
golden_paths = nx.single_source_dijkstra_path_length(Gnx, source)
vertices = cudf.Series(golden_paths.keys())
distances = cudf.Series(golden_paths.values())
add_resultset(
{"vertex": vertices, "distance": distances},
graph_dataset=ds.metadata["name"],
graph_directed="True",
algo="single_source_dijkstra_path_length",
source=str(source),
)

M = utils.read_csv_for_nx(ds.get_path(), read_weights_in_sp=True)
edge_attr = "weight"
Gnx = nx.from_pandas_edgelist(
M,
source="0",
target="1",
edge_attr=edge_attr,
create_using=nx.DiGraph(),
)

M["weight"] = M["weight"].astype(np.int32)
Gnx = nx.from_pandas_edgelist(
M,
source="0",
target="1",
edge_attr="weight",
create_using=nx.DiGraph(),
)
golden_paths_datatypeconv = nx.single_source_dijkstra_path_length(
Gnx, source
)
vertices_datatypeconv = cudf.Series(golden_paths_datatypeconv.keys())
distances_datatypeconv = cudf.Series(golden_paths_datatypeconv.values())
add_resultset(
{"vertex": vertices_datatypeconv, "distance": distances_datatypeconv},
graph_dataset=ds.metadata["name"],
graph_directed="True",
algo="single_source_dijkstra_path_length",
test="data_type_conversion",
source=str(source),
)

for dirctd in DIRECTED_GRAPH_OPTIONS:
for source in SOURCES:
Gnx = utils.generate_nx_graph_from_file(
karate.get_path(), directed=dirctd, edgevals=True
)
add_resultset(
cugraph.sssp(Gnx, source),
graph_dataset="karate",
graph_directed=str(dirctd),
algo="sssp_nonnative",
source=str(source),
)

Gnx = nx.Graph()
Gnx.add_edge(0, 1, other=10)
Gnx.add_edge(1, 2, other=20)
df = cugraph.sssp(Gnx, 0, edge_attr="other")
add_resultset(df, algo="sssp_nonnative", test="network_edge_attr")

# =============================================================================
# tests/traversal/test_paths.py
# =============================================================================
CONNECTED_GRAPH = """1,5,3
1,4,1
1,2,1
1,6,2
1,7,2
4,5,1
2,3,1
7,6,2
"""

DISCONNECTED_GRAPH = CONNECTED_GRAPH + "8,9,4"

paths = [("1", "1"), ("1", "5"), ("1", "3"), ("1", "6")]
invalid_paths = {
"connected": [("-1", "1"), ("0", "42")],
"disconnected": [("1", "10"), ("1", "8")],
}

with NamedTemporaryFile(mode="w+", suffix=".csv") as graph_tf:
graph_tf.writelines(DISCONNECTED_GRAPH)
graph_tf.seek(0)
Gnx_DIS = nx.read_weighted_edgelist(graph_tf.name, delimiter=",")

res1 = nx.shortest_path_length(Gnx_DIS, source="1", weight="weight")
vertices = cudf.Series(res1.keys())
distances = cudf.Series(res1.values())
add_resultset(
{"vertex": vertices, "distance": distances},
algo="shortest_path_length",
graph_dataset="DISCONNECTED",
graph_directed="True",
source="1",
weight="weight",
)

# NOTE: Currently, only traversal result files are generated
random.seed(24)
traversal_mappings = cudf.DataFrame(
columns=[
"#UUID",
"arg0",
"arg0val",
"arg1",
"arg1val",
"arg2",
"arg2val",
"arg3",
"arg3val",
"arg4",
"arg4val",
"arg5",
"arg5val",
"arg6",
"arg6val",
"arg7",
"arg7val",
"arg8",
"arg8val",
"arg9",
"arg9val",
]
)
# Generating ALL results files
if not utils.RAPIDS_RESULTSET_ROOT_DIR_PATH.exists():
utils.RAPIDS_RESULTSET_ROOT_DIR_PATH.mkdir(parents=True, exist_ok=True)

for temp in _resultsets:
res = _resultsets[temp].get_cudf_dataframe()
temp_filename = str(random.getrandbits(50))
temp_dict = dict(temp)
argnames, argvals = [t for t in temp_dict.keys()], [
t for t in temp_dict.values()
]
single_mapping = np.empty(21, dtype=object)
dict_length = len(argnames)

single_mapping[0] = temp_filename
for i in np.arange(dict_length):
single_mapping[2 * i + 1] = argnames[i]
single_mapping[2 * i + 2] = argvals[i]
temp_mapping = cudf.DataFrame(
[single_mapping],
columns=[
"#UUID",
"arg0",
"arg0val",
"arg1",
"arg1val",
"arg2",
"arg2val",
"arg3",
"arg3val",
"arg4",
"arg4val",
"arg5",
"arg5val",
"arg6",
"arg6val",
"arg7",
"arg7val",
"arg8",
"arg8val",
"arg9",
"arg9val",
],
)
traversal_mappings = cudf.concat(
[traversal_mappings, temp_mapping], axis=0, ignore_index=True
)
res.to_csv(_results_dir / (temp_filename + ".csv"), index=False)
traversal_mappings.to_csv(
_results_dir / "traversal_mappings.csv", index=False, sep=" "
)
Loading
Loading