diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index bfa50e69b1d..2702dcad0b5 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -30,6 +30,7 @@ jobs: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@cuda-120 with: + matrix_filter: map(select(.CUDA_VER | startswith("11"))) build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} date: ${{ inputs.date }} @@ -39,6 +40,7 @@ jobs: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-120 with: + matrix_filter: map(select(.CUDA_VER | startswith("11"))) build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} date: ${{ inputs.date }} @@ -61,7 +63,11 @@ jobs: build_type: branch node_type: "gpu-latest-1" arch: "amd64" + branch: ${{ inputs.branch }} + build_type: ${{ inputs.build_type || 'branch' }} container_image: "rapidsai/ci:latest" + date: ${{ inputs.date }} + node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" wheel-build-pylibcugraph: secrets: inherit diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 2447863ab86..12d49520d35 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -35,6 +35,7 @@ jobs: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-build.yaml@cuda-120 with: + matrix_filter: map(select(.CUDA_VER | startswith("11"))) build_type: pull-request node_type: cpu16 conda-cpp-tests: @@ -42,18 +43,21 @@ jobs: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-120 with: + matrix_filter: map(select(.CUDA_VER | startswith("11"))) build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-120 with: + matrix_filter: map(select(.CUDA_VER | startswith("11"))) build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120 with: + matrix_filter: map(select(.CUDA_VER | startswith("11"))) build_type: pull-request conda-notebook-tests: needs: conda-python-build @@ -63,7 +67,7 @@ jobs: build_type: pull-request node_type: "gpu-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci:cuda11.8.0-ubuntu22.04-py3.10" run_script: "ci/test_notebooks.sh" docs-build: needs: conda-python-build @@ -73,7 +77,7 @@ jobs: build_type: pull-request node_type: "gpu-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci:cuda11.8.0-ubuntu22.04-py3.10" run_script: "ci/build_docs.sh" wheel-build-pylibcugraph: needs: checks @@ -97,7 +101,7 @@ jobs: package-name: pylibcugraph # On arm also need to install cupy from the specific webpage. test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64" - test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets pytest -v ./python/pylibcugraph/pylibcugraph/tests" + test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets python -m pytest -v ./python/pylibcugraph/pylibcugraph/tests" test-smoketest: "python ci/wheel_smoke_test_pylibcugraph.py" wheel-build-cugraph: needs: wheel-tests-pylibcugraph @@ -124,6 +128,6 @@ jobs: test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && RAPIDS_PY_WHEEL_NAME=pylibcugraph_cu11 rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" # Skip dataset downloads on arm to save CI time -- arm only runs smoke tests. # On arm also need to install cupy from the specific site. - test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibcugraph_cu11 rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" - test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets pytest -v -m sg ./python/cugraph/cugraph/tests" + test-before-arm64: "RAPIDS_PY_WHEEL_NAME=pylibcugraph_${{ '${PIP_CU_VERSION}' }} rapids-download-wheels-from-s3 ./local-pylibcugraph-dep && pip install --no-deps ./local-pylibcugraph-dep/*.whl && pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08" + test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets python -m pytest -v -m sg ./python/cugraph/cugraph/tests" test-smoketest: "python ci/wheel_smoke_test_cugraph.py" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 693a45afd25..d4ba92b0dae 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -18,6 +18,7 @@ jobs: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-cpp-tests.yaml@cuda-120 with: + matrix_filter: map(select(.CUDA_VER | startswith("11"))) build_type: nightly branch: ${{ inputs.branch }} date: ${{ inputs.date }} @@ -26,6 +27,7 @@ jobs: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-120 with: + matrix_filter: map(select(.CUDA_VER | startswith("11"))) build_type: nightly branch: ${{ inputs.branch }} date: ${{ inputs.date }} @@ -41,7 +43,7 @@ jobs: package-name: pylibcugraph # On arm also need to install cupy from the specific webpage. test-before-arm64: "pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64" - test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets pytest -v ./python/pylibcugraph/pylibcugraph/tests" + test-unittest: "RAPIDS_DATASET_ROOT_DIR=./datasets python -m pytest -v ./python/pylibcugraph/pylibcugraph/tests" wheel-tests-cugraph: secrets: inherit uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux-test.yml@cuda-120 @@ -52,7 +54,7 @@ jobs: sha: ${{ inputs.sha }} package-name: cugraph # Always want to test against latest dask/distributed. - test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" + test-before-amd64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08" # On arm also need to install cupy from the specific webpage. - test-before-arm64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.06" - test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets pytest -v -m sg ./python/cugraph/cugraph/tests" + test-before-arm64: "cd ./datasets && bash ./get_test_data.sh && cd - && pip install 'cupy-cuda11x<12.0.0' -f https://pip.cupy.dev/aarch64 && pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.08" + test-unittest: "RAPIDS_DATASET_ROOT_DIR=/__w/cugraph/cugraph/datasets python -m pytest -v -m sg ./python/cugraph/cugraph/tests" diff --git a/benchmarks/cugraph/standalone/bulk_sampling/README.md b/benchmarks/cugraph/standalone/bulk_sampling/README.md new file mode 100644 index 00000000000..f48eea5c556 --- /dev/null +++ b/benchmarks/cugraph/standalone/bulk_sampling/README.md @@ -0,0 +1,116 @@ +# cuGraph Bulk Sampling + +## Overview +The `cugraph_bulk_sampling.py` script runs the bulk sampler for a variety of datasets, including +both generated (rmat) datasets and disk (ogbn_papers100M, etc.) datasets. It can also load +replicas of these datasets to create a larger benchmark (i.e. ogbn_papers100M x2). + +## Arguments +The script takes a variety of arguments to control sampling behavior. +Required: + --output_root + The output root directory. File/folder names are auto-generated. + For instance, if the output root directory is /home/samples, + the samples will be written to a new folder in /home/samples that + contains information about the sampling run as well as the time + of the run. + + --dataset_root + The folder where datasets are stored. Uses the format described + in the input format section. + + --datasets + Comma-separated list of datasets; can specify ogb or rmat (i.e. ogb_papers100M[2],rmat_22_16). + For ogb datasets, can provide replication factor using brackets. + Will attempt to read from dataset_root/. + +Optional: + --fanouts + Comma-separated list of fanout values (i.e. [10, 25]). + The default fanout is [10, 25]. + + --batch_sizes + Comma-separated list of batch sizes (i.e. 500, 1000). + Defaults to "512,1024" + + --seeds_per_call_opts + Comma-separated list of seeds per call. Controls the number of input seed vertices processed + in a single sampling call. + Defaults to 524288 + + --reverse_edges + Whether to reverse the edges of the input edgelist. Should be set to False for PyG and True for DGL. + Defaults to False (PyG). + + --dask_worker_devices + Comma-separated list of the GPUs to assign to dask (i.e. "0,1,2"). + Defaults to just the default GPU (0). + Changing this is strongly recommended in order to take advantage of all GPUs on the system. + + --random_seed + Seed for random number generation. + Defaults to '62' + + --persist + Whether to aggressively use persist() in dask to make the ETL steps (NOT PART OF SAMPLING) faster. + Will probably make this script finish sooner at the expense of memory usage, but won't affect + sampling time. + Changing this is not recommended unless you know what you are doing. + Defaults to False. + +## Input Format +The script expects its input data in the following format: +``` + +| +|------ meta.json +|------ parquet +|------ |---------- +|------ |---------- |---------------------------- [node_label.parquet] +|------ |---------- +|------ |---------- |---------------------------- [node_label.parquet] +... +|------ |---------- +|------ |---------- |------------------------------------------ edge_index.parquet +|------ |---------- +|------ |---------- |------------------------------------------ edge_index.parquet +... + +``` + +`node_label.parquet` only needs to be present for vertex types that have labeled +nodes. It consists of two columns, "node" which contains node ids, and "label", +which contains the labeled class of the node. + +`edge_index.parquet` is required for all edge types. It has two columns, `src` +and `dst`, representing the source and destination vertices of the edges in that +edge type's COO edge index. + +`meta.json` is a json file containing metadata needed to properly process +the parquet files. It must have the following format: +``` +{ + "num_nodes": { + ": <# nodes of node type 0>, + ": <# nodes of node type 1>, + ... + }, + "num_edges": { + : <# edges of edge type 0>, + : <# edges of edge type 1>, + ... + } +} +``` + +## Output Meta +The script, in addition to the samples, will also output a file named `output_meta.json`. +This file contains various statistics about the sampling run, including the runtime, +as well as information about the dataset and system that the samples were produced from. + +This metadata file can be used to gather the results from the sampling and training stages +together. + +## Other Notes +For rmat datasets, you will need to generate your own bogus features in the training stage. +Since that is trivial, that is not done in this sampling script. diff --git a/benchmarks/cugraph/standalone/bulk_sampling/benchmarking_script.ipynb b/benchmarks/cugraph/standalone/bulk_sampling/benchmarking_script.ipynb new file mode 100644 index 00000000000..3ea158d1f61 --- /dev/null +++ b/benchmarks/cugraph/standalone/bulk_sampling/benchmarking_script.ipynb @@ -0,0 +1,1860 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "9b8d43d5-3005-4b0b-b418-b84af104bc3b", + "metadata": {}, + "outputs": [], + "source": [ + "!export RAPIDS_NO_INITIALIZE=\"1\"\n", + "!export CUDF_SPILL=\"1\"\n", + "!export LIBCUDF_CUFILE_POLICY=\"OFF\"\n", + "\n", + "from cugraph_bulk_sampling import start_dask_client, benchmark_cugraph_bulk_sampling, load_disk_dataset, construct_graph\n", + "from cugraph_bulk_sampling import sample_graph\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "id": "f3f04da7-c937-4dab-b432-fc569522f411", + "metadata": {}, + "source": [ + "# Setup Cluster" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "26324a75-1b34-4c7b-8a26-23bac23e91b4", + "metadata": {}, + "outputs": [], + "source": [ + "dask_worker_devices='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fc8d56ef-4036-4105-9764-1c6cbb2bdb15", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dask client/cluster created using LocalCUDACluster\n" + ] + } + ], + "source": [ + "client, cluster = start_dask_client(dask_worker_devices=dask_worker_devices,\n", + " jit_unspill=False,\n", + " rmm_pool_size=28e9,\n", + " rmm_async=True)" + ] + }, + { + "cell_type": "markdown", + "id": "5335b115-eeb0-470d-9884-79990506ead7", + "metadata": {}, + "source": [ + "# Setup Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c9c8fb66-6bdd-45d7-8564-cc28e383d966", + "metadata": {}, + "outputs": [], + "source": [ + "dataset='ogbn_papers100M'\n", + "dataset_root=\".\"\n", + "output_root=\".\"\n", + "reverse_edges=True\n", + "add_edge_types=False\n", + "batch_size=512\n", + "seeds_per_call=524288\n", + "fanout=[25,25]\n", + "replication_factor=4\n", + "seed=123\n", + "\n", + "dataset_dir=dataset_root\n", + "output_path=output_root\n", + "persist=False\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "37ed06f6-ad06-443a-be12-61800d59d221", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading edge index for edge type paper__cites__paper\n", + "Loading node labels for node type paper (offset=0)\n", + "Number of input edges = 6,462,743,488\n", + "constructed graph\n" + ] + } + ], + "source": [ + "dask_edgelist_df, dask_label_df, node_offsets, edge_offsets, total_num_nodes = \\\n", + " load_disk_dataset(\n", + " dataset,\n", + " dataset_dir=dataset_dir,\n", + " reverse_edges=reverse_edges,\n", + " replication_factor=replication_factor,\n", + " persist=False,\n", + " add_edge_types=add_edge_types\n", + " )\n", + "num_input_edges = len(dask_edgelist_df)\n", + "print(\n", + "f\"Number of input edges = {num_input_edges:,}\"\n", + ")\n", + "\n", + "G = construct_graph(\n", + "dask_edgelist_df\n", + ")\n", + "del dask_edgelist_df\n", + "print('constructed graph')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f71cf5a3-7e4b-4497-9c14-a342cc5abbcd", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/index.py:3139: FutureWarning: cudf.StringIndex is deprecated and will be removed from cudf in a future version. Use cudf.Index with the appropriate dtype instead.\n", + " warnings.warn(\n", + "/opt/conda/envs/rapids/lib/python3.10/site-packages/cudf/core/index.py:3139: FutureWarning: cudf.StringIndex is deprecated and will be removed from cudf in a future version. Use cudf.Index with the appropriate dtype instead.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input memory: 103403895808\n" + ] + } + ], + "source": [ + "input_memory = G.edgelist.edgelist_df.memory_usage().sum().compute()\n", + "print(f'input memory: {input_memory}')\n", + "\n", + "output_subdir = os.path.join(output_path, f'{dataset}[{replication_factor}]_b{batch_size}_f{fanout}')\n", + "os.makedirs(output_subdir, exist_ok=True)\n", + "\n", + "output_sample_path = os.path.join(output_subdir, 'samples')\n", + "os.makedirs(output_sample_path, exist_ok=True)\n", + "\n", + "batches_per_partition = 200_000 // batch_size" + ] + }, + { + "cell_type": "markdown", + "id": "3d276c5c-65d6-4191-b2a5-37b30d2cd44b", + "metadata": {}, + "source": [ + "# Benchmarking Sample Graph" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "675b580c-6a7a-4571-88dd-0d4429f9e5ff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 9.981931209564209\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '51.1MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '58.6MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '59.0MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '149.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '82.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '98.1MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '240.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '73.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '310.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '267.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '80.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '131.8MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '205.2MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '288.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '303.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '130.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.299846172332764\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '252.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '278.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '243.6MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '256.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '330.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '239.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '254.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '239.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '278.6MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.2GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '68.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '397.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '79.0MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '127.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '90.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '303.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '53.5MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.2623701095581055\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '73.1MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '179.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '253.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '366.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '98.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '130.1MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '152.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '265.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '435.1MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.2GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '463.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '151.5MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '379.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '192.2MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '150.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '277.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '416.1MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.1GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.487639665603638\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '241.2MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '176.8MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '292.2MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '118.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '60.2MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '204.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '208.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '185.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '254.7MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '175.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '102.6MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '83.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '197.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '142.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '262.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '227.0MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.208818197250366\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '261.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '360.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '240.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '225.2MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.1GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '428.5MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '288.3MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '202.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '128.7MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '258.3MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.2GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '203.0MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '278.5MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '102.3MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '284.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '596.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '301.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '203.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.344887971878052\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '171.8MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '225.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '245.2MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '315.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '248.2MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '147.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '64.5MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '290.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '301.7MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '152.2MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '276.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '240.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '252.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '625.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.5GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '117.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '192.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.1GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.830034255981445\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '202.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '256.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '224.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '101.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '223.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.6GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '115.3MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '59.8MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '260.1MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '82.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '194.4MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '82.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '168.2MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '252.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '181.7MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '277.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '157.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.1GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.967972278594971\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '355.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '65.0MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '224.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '339.2MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '208.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.6GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '92.4MB',\n", + " 'peak_bytes': '2.3GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '59.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '452.7MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '440.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '159.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '228.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '261.1MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '284.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '129.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '262.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '579.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.1GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.228902339935303\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '169.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '179.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '214.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '57.5MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '85.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '283.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '81.7MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '265.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '240.1MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '83.1MB',\n", + " 'peak_bytes': '2.3GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '193.2MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '117.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '181.2MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '408.1MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '309.4MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '237.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.624476432800293\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '159.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '181.7MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '134.2MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '214.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '281.9MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '201.2MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '399.7MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '128.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '291.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '228.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '125.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '383.4MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '313.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '152.3MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '249.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '100.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.998133420944214\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '209.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '211.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '115.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '239.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.1GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '40.3MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '274.8MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '190.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '290.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '61.2MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '496.2MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '242.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '164.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '276.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '341.6MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '295.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '131.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.723633289337158\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '241.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '220.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '375.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '246.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.1GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '291.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '179.6MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '202.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '259.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '241.0MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '283.4MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '261.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '124.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '252.2MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '134.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '277.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '100.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.884088754653931\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '544.1MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '65.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '222.7MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '136.2MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '257.0MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '92.4MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '86.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '452.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '233.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '191.8MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '385.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '365.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '284.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '288.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '263.2MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '257.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.929041385650635\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '106.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '89.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '198.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '77.6MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '393.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '285.2MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '247.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '265.4MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '316.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '86.4MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '244.2MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '255.7MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '181.1MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '218.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '317.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '297.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.864148378372192\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '241.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '176.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '205.7MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '67.0MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '250.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '197.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '201.2MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '252.7MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '359.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '280.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '309.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '371.2MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '256.0MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '197.4MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '249.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '297.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.87973165512085\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '237.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '210.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '182.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '109.6MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '256.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '289.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '144.2MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '231.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '456.4MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '230.5MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '271.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '290.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '308.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '309.7MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '206.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '82.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 5.986347436904907\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '264.8MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '33.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '91.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '116.7MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '170.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '154.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '109.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '71.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '116.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '280.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '271.2MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '561.2MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '277.0MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '350.7MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '296.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '93.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.348597764968872\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '209.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '254.6MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '236.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '121.5MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '68.5MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '344.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '75.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '52.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '146.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '127.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '267.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '218.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '466.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '279.0MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.5GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '262.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '211.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.334516286849976\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '355.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '252.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '40.9MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '99.9MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '211.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '295.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '556.3MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '265.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '264.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '129.9MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '83.4MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '250.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '284.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '254.6MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '249.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '152.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.598327398300171\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '226.7MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '227.2MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '230.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '90.4MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '190.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.6GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '278.6MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '167.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '186.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '449.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '96.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '406.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '110.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '249.2MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '594.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '278.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '142.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.1GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.590704679489136\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '257.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '256.7MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '339.5MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '221.2MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '158.1MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '455.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '144.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '231.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '196.0MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '159.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '225.0MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '726.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '134.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '191.8MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '263.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '170.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.666577577590942\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '536.6MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '256.7MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '19.6MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '113.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '138.4MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '197.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '144.2MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '128.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '307.8MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.2GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '150.0MB',\n", + " 'peak_bytes': '2.3GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '164.7MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '213.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '246.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '106.5MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '309.1MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '268.1MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.1GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.439242839813232\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '106.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '256.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '222.2MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '81.3MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '66.6MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '199.2MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '72.3MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '240.5MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '232.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '150.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '256.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '200.5MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '466.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '312.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.5GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '134.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '493.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.1GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.422755718231201\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '241.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '220.2MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '153.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '50.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '339.5MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '92.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '190.7MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '265.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '236.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '274.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '201.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '122.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '144.6MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '602.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '510.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '78.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.785901784896851\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '258.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '224.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '181.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '217.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '167.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '436.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '164.6MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '205.6MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '204.9MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.2GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '172.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '297.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '61.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '282.2MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '259.0MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '171.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '143.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.363157033920288\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '536.5MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '224.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '250.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '225.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.1GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '220.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '181.3MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '252.2MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '240.6MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '134.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '228.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '405.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '78.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '284.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '174.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '276.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '437.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.568510055541992\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '171.6MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '224.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '117.2MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '254.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '427.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '116.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '128.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '265.7MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '247.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.2GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '139.3MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '116.4MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '590.2MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '207.6MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '182.7MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.5GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '133.0MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '214.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.1GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.794158220291138\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '235.0MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '225.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '227.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '239.0MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '230.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.6GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '212.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '93.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '195.4MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '231.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '386.9MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '256.8MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '86.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '659.0MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '811.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '265.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '322.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.1GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 7.261108636856079\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '355.5MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '251.8MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '238.9MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '336.4MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '166.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '59.6GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '236.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '228.9MB',\n", + " 'peak_bytes': '2.4GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '259.5MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '245.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '562.3MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '232.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '206.6MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '181.0MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '177.0MB',\n", + " 'peak_bytes': '2.9GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '185.8MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '224.9MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "created batches\n", + "flushed all batches\n", + "function: sample_graph\n", + "function args: (, , '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}\n", + "execution_time: 6.985189437866211\n", + "allocation_counts:\n", + "{ 'tcp://127.0.0.1:33343': { 'current_bytes': '66.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '60.4GB'},\n", + " 'tcp://127.0.0.1:33565': { 'current_bytes': '256.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:33977': { 'current_bytes': '168.4MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.4GB'},\n", + " 'tcp://127.0.0.1:34603': { 'current_bytes': '413.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '60.2GB'},\n", + " 'tcp://127.0.0.1:36543': { 'current_bytes': '141.7MB',\n", + " 'peak_bytes': '2.5GB',\n", + " 'total_bytes': '59.5GB'},\n", + " 'tcp://127.0.0.1:39379': { 'current_bytes': '324.9MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '61.5GB'},\n", + " 'tcp://127.0.0.1:40517': { 'current_bytes': '368.7MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.2GB'},\n", + " 'tcp://127.0.0.1:40547': { 'current_bytes': '452.6MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '61.8GB'},\n", + " 'tcp://127.0.0.1:40565': { 'current_bytes': '154.8MB',\n", + " 'peak_bytes': '2.7GB',\n", + " 'total_bytes': '62.3GB'},\n", + " 'tcp://127.0.0.1:40769': { 'current_bytes': '181.3MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:42093': { 'current_bytes': '118.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:42897': { 'current_bytes': '280.7MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:43245': { 'current_bytes': '236.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.9GB'},\n", + " 'tcp://127.0.0.1:46157': { 'current_bytes': '275.1MB',\n", + " 'peak_bytes': '2.6GB',\n", + " 'total_bytes': '62.4GB'},\n", + " 'tcp://127.0.0.1:46757': { 'current_bytes': '183.6MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '61.7GB'},\n", + " 'tcp://127.0.0.1:46883': { 'current_bytes': '167.1MB',\n", + " 'peak_bytes': '2.8GB',\n", + " 'total_bytes': '62.0GB'}}\n", + "6.32 s ± 0 ns per loop (mean ± std. dev. of 1 run, 30 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n30 -r1\n", + "\n", + "\n", + "execution_time, allocation_counts = sample_graph(\n", + " G,\n", + " dask_label_df,\n", + " output_sample_path,\n", + " seed=seed,\n", + " batch_size=batch_size,\n", + " seeds_per_call=seeds_per_call,\n", + " batches_per_partition=batches_per_partition,\n", + " fanout=fanout,\n", + " persist=persist,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27066cf3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.11 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "vscode": { + "interpreter": { + "hash": "f708a36acfaef0acf74ccd43dfb58100269bf08fb79032a1e0a6f35bd9856f51" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/benchmarks/cugraph/standalone/bulk_sampling/bulk_sampling.sh b/benchmarks/cugraph/standalone/bulk_sampling/bulk_sampling.sh new file mode 100755 index 00000000000..e62cb3cda29 --- /dev/null +++ b/benchmarks/cugraph/standalone/bulk_sampling/bulk_sampling.sh @@ -0,0 +1,50 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +export RAPIDS_NO_INITIALIZE="1" +export CUDF_SPILL="1" +export LIBCUDF_CUFILE_POLICY=OFF + + +dataset_name=$1 +dataset_root=$2 +output_root=$3 +batch_sizes=$4 +fanouts=$5 +reverse_edges=$6 + +rm -rf $output_root +mkdir -p $output_root + +# Change to 2 in Selene +gpu_per_replica=4 +#--add_edge_ids \ + +# Expand to 1, 4, 8 in Selene +for i in 1,2,3,4: +do + for replication in 2; + do + dataset_name_with_replication="${dataset_name}[${replication}]" + dask_worker_devices=$(seq -s, 0 $((gpu_per_replica*replication-1))) + echo "Sampling dataset = $dataset_name_with_replication on devices = $dask_worker_devices" + python3 cugraph_bulk_sampling.py --datasets $dataset_name_with_replication \ + --dataset_root $dataset_root \ + --batch_sizes $batch_sizes \ + --output_root $output_root \ + --dask_worker_devices $dask_worker_devices \ + --fanouts $fanouts \ + --batch_sizes $batch_sizes \ + --reverse_edges + done +done \ No newline at end of file diff --git a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py new file mode 100644 index 00000000000..d2a3716da8a --- /dev/null +++ b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py @@ -0,0 +1,740 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import warnings +import argparse +import traceback + +from cugraph.testing.mg_utils import ( + generate_edgelist_rmat, + # get_allocation_counts_dask_persist, + get_allocation_counts_dask_lazy, + sizeof_fmt, + get_peak_output_ratio_across_workers, + restart_client, + start_dask_client, + stop_dask_client, + enable_spilling, +) + +from cugraph.structure.symmetrize import symmetrize +from cugraph.experimental.gnn import BulkSampler + +import cugraph + +import json +import re +import os +import gc +from time import sleep, perf_counter +from math import ceil + +import pandas as pd +import numpy as np +import cupy +import cudf + +import dask_cudf +import dask.dataframe as ddf +from dask.distributed import default_client +from cugraph.dask import get_n_workers + +from typing import Optional, Union, Dict + + +def construct_graph(dask_dataframe): + """ + Args: + dask_dataframe: + dask_dataframe contains weighted and undirected edges with self + loops. Multiple edges will likely be present as well. + directed: + If True, the graph will be directed. + renumber: + If True, the graph will be renumbered. + Returns: + G: cugraph.Graph + """ + assert dask_dataframe['src'].dtype == 'int64' + assert dask_dataframe['dst'].dtype == 'int64' + + if 'etp' in dask_dataframe.columns: + assert dask_dataframe['etp'].dtype == 'int32' + + G = cugraph.MultiGraph(directed=True) + G.from_dask_cudf_edgelist( + dask_dataframe, + source="src", + destination="dst", + edge_type='etp' if 'etp' in dask_dataframe.columns else None, + renumber=False + ) + return G + + +def symmetrize_ddf(dask_dataframe): + source_col, dest_col = symmetrize( + dask_dataframe, + 'src', + 'dst', + multi=True, + symmetrize=True, + ) + + new_ddf = source_col.to_frame() + new_ddf['dst'] = dest_col + + return new_ddf + +def renumber_ddf(dask_df, persist=False): + vertices = dask_cudf.concat([dask_df['src'], dask_df['dst']]).unique().reset_index(drop=True) + if persist: + vertices = vertices.persist() + + vertices.name = 'v' + vertices = vertices.reset_index().set_index('v').rename(columns={'index': 'm'}) + if persist: + vertices = vertices.persist() + + src = dask_df.merge(vertices, left_on='src', right_on='v', how='left').m.rename('src') + dst = dask_df.merge(vertices, left_on='dst', right_on='v', how='left').m.rename('dst') + df = src.to_frame() + df['dst'] = dst + + return df.reset_index(drop=True) + +def _make_batch_ids(bdf: cudf.DataFrame, batch_size: int, num_workers: int, partition_info: Optional[Union[dict, str]] = None): + # Required by dask; need to skip dummy partitions. + if partition_info is None: + return cudf.DataFrame({ + 'batch': cudf.Series(dtype='int32'), + 'start': cudf.Series(dtype='int64') + }) + + partition = partition_info['number'] + if partition is None: + raise ValueError('division is absent') + + num_batches = int(ceil(len(bdf) / batch_size)) + + batch_ids = cupy.repeat( + cupy.arange(num_batches * partition, num_batches * (partition + 1), dtype='int32'), + batch_size + )[:len(bdf)] + + bdf = bdf.reset_index(drop=True) + bdf['batch'] = cudf.Series(batch_ids) + + return bdf + + +def _replicate_df(df: cudf.DataFrame, replication_factor: int, col_item_counts:Dict[str, int], partition_info: Optional[Union[dict, str]] = None): + # Required by dask; need to skip dummy partitions. + if partition_info is None: + return cudf.DataFrame({ + col: cudf.Series(dtype=df[col].dtype) for col in col_item_counts.keys() + }) + + original_df = df.copy() + + if replication_factor > 1: + for r in range(1, replication_factor): + df_replicated = original_df + for col, offset in col_item_counts.items(): + df_replicated[col] += offset * r + + df = cudf.concat([df, df_replicated], ignore_index=True) + + return df + + +@get_allocation_counts_dask_lazy(return_allocations=True, logging=True) +def sample_graph(G, label_df, output_path,seed=42, batch_size=500, seeds_per_call=200000, batches_per_partition=100, fanout=[5, 5, 5], persist=False): + cupy.random.seed(seed) + + sampler = BulkSampler( + batch_size=batch_size, + output_path=output_path, + graph=G, + fanout_vals=fanout, + with_replacement=False, + random_state=seed, + seeds_per_call=seeds_per_call, + batches_per_partition=batches_per_partition, + log_level = logging.INFO + ) + + n_workers = len(default_client().scheduler_info()['workers']) + + meta = cudf.DataFrame({ + 'node': cudf.Series(dtype='int64'), + 'batch': cudf.Series(dtype='int32') + }) + + batch_df = label_df.map_partitions(_make_batch_ids, batch_size, n_workers, meta=meta) + #batch_df = batch_df.sort_values(by='node') + + # should always persist the batch dataframe or performance may be suboptimal + batch_df = batch_df.persist() + + del label_df + print('created batches') + + + start_time = perf_counter() + sampler.add_batches(batch_df, start_col_name='node', batch_col_name='batch') + sampler.flush() + end_time = perf_counter() + print('flushed all batches') + return (end_time - start_time) + + +def assign_offsets_pyg(node_counts: Dict[str, int], replication_factor:int=1): + # cuGraph-PyG assigns offsets based on lexicographic order + node_offsets = {} + node_offsets_replicated = {} + count = 0 + count_replicated = 0 + for node_type in sorted(node_counts.keys()): + node_offsets[node_type] = count + node_offsets_replicated[node_type] = count_replicated + + count += node_counts[node_type] + count_replicated += node_counts[node_type] * replication_factor + + return node_offsets, node_offsets_replicated, count_replicated + +def generate_rmat_dataset(dataset, seed=62, labeled_percentage=0.01, num_labels=256, reverse_edges=False, persist=False, add_edge_types=False): + """ + Generates an rmat dataset. Currently does not support heterogeneous datasets. + + Parameters + ---------- + dataset: The specifier of the rmat dataset (i.e. rmat_20_16) + seed: The seed to use for random number generation + num_labels: The number of classes for the labeled nodes + reverse_edges: Whether to reverse the edges in the edgelist (should be True for DGL, False, for PyG) + """ + + dataset = dataset.split('_') + scale = int(dataset[1]) + edgefactor = int(dataset[2]) + + dask_edgelist_df = generate_edgelist_rmat( + scale=scale, edgefactor=edgefactor, seed=seed, unweighted=True, mg=True, + ) + dask_edgelist_df = dask_edgelist_df.astype("int64") + dask_edgelist_df = dask_edgelist_df.reset_index(drop=True) + + + dask_edgelist_df = renumber_ddf(dask_edgelist_df).persist() + if persist: + dask_edgelist_df = dask_edgelist_df.persist() + + dask_edgelist_df = symmetrize_ddf(dask_edgelist_df).persist() + if persist: + dask_edgelist_df = dask_edgelist_df.persist() + + if add_edge_types: + dask_edgelist_df['etp'] = cupy.int32(0) # doesn't matter what the value is, really + + # generator = np.random.default_rng(seed=seed) + num_labeled_nodes = int(2**(scale+1) * labeled_percentage) + label_df = pd.DataFrame({ + 'node': np.arange(num_labeled_nodes), + # 'label': generator.integers(0, num_labels - 1, num_labeled_nodes).astype('float32') + }) + + n_workers = len(default_client().scheduler_info()['workers']) + dask_label_df = ddf.from_pandas(label_df, npartitions=n_workers*2) + del label_df + gc.collect() + + dask_label_df = dask_cudf.from_dask_dataframe(dask_label_df) + + node_offsets = {'paper': 0} + edge_offsets = {('paper','cites','paper'):0} + total_num_nodes = int(dask_cudf.concat([dask_edgelist_df.src, dask_edgelist_df.dst]).nunique().compute()) + + if reverse_edges: + dask_edgelist_df = dask_edgelist_df.rename(columns={'src':'dst', 'dst':'src'}) + + return dask_edgelist_df, dask_label_df, node_offsets, edge_offsets, total_num_nodes + + +def load_disk_dataset(dataset, dataset_dir='.', reverse_edges=True, replication_factor=1, persist=False, add_edge_types=False): + from pathlib import Path + path = Path(dataset_dir) / dataset + parquet_path = path / 'parquet' + + n_workers = get_n_workers() + + with open(os.path.join(path, 'meta.json')) as meta_file: + meta = json.load(meta_file) + + node_offsets, node_offsets_replicated, total_num_nodes = \ + assign_offsets_pyg(meta['num_nodes'], replication_factor=replication_factor) + + edge_index_dict = {} + for edge_type in meta['num_edges'].keys(): + print(f'Loading edge index for edge type {edge_type}') + + can_edge_type = tuple(edge_type.split('__')) + edge_index_dict[can_edge_type] = dask_cudf.read_parquet( + Path(parquet_path) / edge_type / 'edge_index.parquet' + ).repartition(n_workers*2) + + edge_index_dict[can_edge_type]['src'] += node_offsets_replicated[can_edge_type[0]] + edge_index_dict[can_edge_type]['dst'] += node_offsets_replicated[can_edge_type[-1]] + + edge_index_dict[can_edge_type] = edge_index_dict[can_edge_type] + if persist: + edge_index_dict = edge_index_dict.persist() + + if replication_factor > 1: + edge_index_dict[can_edge_type] = edge_index_dict[can_edge_type].map_partitions( + _replicate_df, + replication_factor, + { + 'src': meta['num_nodes'][can_edge_type[0]], + 'dst': meta['num_nodes'][can_edge_type[2]], + }, + meta=cudf.DataFrame({'src':cudf.Series(dtype='int64'), 'dst':cudf.Series(dtype='int64')}) + ) + + if persist: + edge_index_dict[can_edge_type] = edge_index_dict[can_edge_type].persist() + + gc.collect() + + if reverse_edges: + edge_index_dict[can_edge_type] = edge_index_dict[can_edge_type].rename(columns={'src':'dst','dst':'src'}) + + if persist: + edge_index_dict[can_edge_type] = edge_index_dict[can_edge_type].persist() + + # Assign numeric edge type ids based on lexicographic order + edge_offsets = {} + edge_count = 0 + for num_edge_type, can_edge_type in enumerate(sorted(edge_index_dict.keys())): + if add_edge_types: + edge_index_dict[can_edge_type]['etp'] = cupy.int32(num_edge_type) + edge_offsets[can_edge_type] = edge_count + edge_count += len(edge_index_dict[can_edge_type]) + + all_edges_df = dask_cudf.concat( + list(edge_index_dict.values()) + ) + + if persist: + all_edges_df = all_edges_df.persist() + + del edge_index_dict + gc.collect() + + node_labels = {} + for node_type, offset in node_offsets_replicated.items(): + print(f'Loading node labels for node type {node_type} (offset={offset})') + node_label_path = os.path.join(os.path.join(parquet_path, node_type), 'node_label.parquet') + if os.path.exists(node_label_path): + node_labels[node_type] = dask_cudf.read_parquet(node_label_path).repartition(n_workers).drop('label',axis=1).persist() + node_labels[node_type]['node'] += offset + node_labels[node_type] = node_labels[node_type].persist() + + if replication_factor > 1: + node_labels[node_type] = node_labels[node_type].map_partitions( + _replicate_df, + replication_factor, + { + 'node': meta['num_nodes'][node_type] + }, + meta=cudf.DataFrame({'node':cudf.Series(dtype='int64')}) + ) + + if persist: + node_labels[node_type] = node_labels[node_type].persist() + + gc.collect() + + node_labels_df = dask_cudf.concat( + list(node_labels.values()) + ) + + if persist: + node_labels_df = node_labels_df.persist() + + del node_labels + gc.collect() + + return all_edges_df, node_labels_df, node_offsets_replicated, edge_offsets, total_num_nodes + + +def benchmark_cugraph_bulk_sampling( + dataset, + output_path, + seed, + batch_size, + seeds_per_call, + fanout, + reverse_edges=True, + dataset_dir='.', + replication_factor=1, + num_labels=256, + labeled_percentage=0.001, + persist=False, + add_edge_types=False): + """ + Entry point for the benchmark. + + Parameters + ---------- + dataset: str + The dataset to sample. Can be rmat_{scale}_{edgefactor}, or the name of an ogb dataset. + output_path: str + The output path, where samples and metadata will be stored. + seed: int + The random seed. + batch_size: int + The batch size (number of input seeds in a single sampling batch). + seeds_per_call: int + The number of input seeds in a single sampling call. + fanout: list[int] + The fanout. + reverse_edges: bool + Whether to reverse edges when constructing the graph. + dataset_dir: str + The directory where datasets are stored (only for ogb datasets) + replication_factor: int + The number of times to replicate the dataset. + num_labels: int + The number of random labels to generate (only for rmat datasets) + labeled_percentage: float + The percentage of the data that is labeled (only for rmat datasets) + Defaults to 0.001 to match papers100M + persist: bool + Whether to aggressively persist data in dask in attempt to speed up ETL. + Defaults to False. + add_edge_types: bool + Whether to add edge types to the edgelist. + Defaults to False. + """ + print(dataset) + if dataset[0:4] == 'rmat': + dask_edgelist_df, dask_label_df, node_offsets, edge_offsets, total_num_nodes = \ + generate_rmat_dataset( + dataset, + reverse_edges=reverse_edges, + seed=seed, + labeled_percentage=labeled_percentage, + num_labels=num_labels, + persist=persist, + add_edge_types=add_edge_types + ) + + else: + dask_edgelist_df, dask_label_df, node_offsets, edge_offsets, total_num_nodes = \ + load_disk_dataset( + dataset, + dataset_dir=dataset_dir, + reverse_edges=reverse_edges, + replication_factor=replication_factor, + persist=persist, + add_edge_types=add_edge_types + ) + + num_input_edges = len(dask_edgelist_df) + print( + f"Number of input edges = {num_input_edges:,}" + ) + + G = construct_graph( + dask_edgelist_df + ) + del dask_edgelist_df + print('constructed graph') + + input_memory = G.edgelist.edgelist_df.memory_usage().sum().compute() + print(f'input memory: {input_memory}') + + output_subdir = os.path.join(output_path, f'{dataset}[{replication_factor}]_b{batch_size}_f{fanout}') + os.makedirs(output_subdir) + + output_sample_path = os.path.join(output_subdir, 'samples') + os.makedirs(output_sample_path) + + batches_per_partition = 200_000 // batch_size + execution_time, allocation_counts = sample_graph( + G, + dask_label_df, + output_sample_path, + seed=seed, + batch_size=batch_size, + seeds_per_call=seeds_per_call, + batches_per_partition=batches_per_partition, + fanout=fanout, + persist=persist, + ) + + output_meta = { + 'dataset': dataset, + 'dataset_dir': dataset_dir, + 'seed': seed, + 'node_offsets': node_offsets, + 'edge_offsets': {'__'.join(k): v for k, v in edge_offsets.items()}, + 'total_num_nodes': total_num_nodes, + 'total_num_edges': num_input_edges, + 'batch_size': batch_size, + 'seeds_per_call': seeds_per_call, + 'batches_per_partition': batches_per_partition, + 'fanout': fanout, + 'replication_factor': replication_factor, + 'num_sampling_gpus': len(G._plc_graph), + 'execution_time': execution_time, + } + + with open(os.path.join(output_subdir, 'output_meta.json'), 'w') as f: + json.dump( + output_meta, + f, + indent='\t' + ) + + print('allocation counts b:') + print(allocation_counts.values()) + + ( + input_to_peak_ratio, + output_to_peak_ratio, + input_memory_per_worker, + peak_allocation_across_workers, + ) = get_memory_statistics( + allocation_counts=allocation_counts, input_memory=input_memory + ) + print(f"Number of edges in final graph = {G.number_of_edges():,}") + print("-" * 80) + return ( + num_input_edges, + input_to_peak_ratio, + output_to_peak_ratio, + input_memory_per_worker, + peak_allocation_across_workers, + ) + + +def get_memory_statistics(allocation_counts, input_memory): + """ + Get memory statistics for the benchmark. + """ + output_to_peak_ratio = get_peak_output_ratio_across_workers(allocation_counts) + peak_allocation_across_workers = max( + [a["peak_bytes"] for a in allocation_counts.values()] + ) + input_memory_per_worker = input_memory / len(allocation_counts.keys()) + input_to_peak_ratio = peak_allocation_across_workers / input_memory_per_worker + print(f"Edge List Memory = {sizeof_fmt(input_memory_per_worker)}") + print(f"Peak Memory across workers = {sizeof_fmt(peak_allocation_across_workers)}") + print(f"Max Peak to output graph ratio across workers = {output_to_peak_ratio:.2f}") + print( + f"Max Peak to avg input graph ratio across workers = {input_to_peak_ratio:.2f}" + ) + return ( + input_to_peak_ratio, + output_to_peak_ratio, + input_memory_per_worker, + peak_allocation_across_workers, + ) + + +def get_args(): + parser = argparse.ArgumentParser() + + parser.add_argument( + '--output_root', + type=str, + help='The output root directory. File/folder names are auto-generated.', + required=True, + ) + + parser.add_argument( + '--dataset_root', + type=str, + help='The dataset root directory containing ogb datasets.', + required=True, + ) + + parser.add_argument( + '--datasets', + type=str, + help=( + 'Comma separated list of datasets; can specify ogb or rmat (i.e. ogb_papers100M[2],rmat_22_16).' + ' For ogb datasets, can provide replication factor using brackets.' + ), + required=True, + ) + + parser.add_argument( + '--fanouts', + type=str, + help='Comma separated list of fanouts (i.e. 10_25,5_5_5)', + required=False, + default='10_25', + ) + + parser.add_argument( + '--batch_sizes', + type=str, + help='Comma separated list of batch sizes (i.e. 500,1000)', + required=False, + default='512,1024' + ) + + parser.add_argument( + '--seeds_per_call_opts', + type=str, + help='Comma separated list of seeds per call (i.e. 1000000,2000000)', + required=False, + default='524288', + ) + + parser.add_argument( + '--reverse_edges', + action='store_true', + help='Whether to reverse the edges for DGL (defaults to False). Should be True for DGL, False for PyG.', + required=False, + default=False, + ) + + parser.add_argument( + '--dask_worker_devices', + type=str, + help='Comma separated list of dask worker devices', + required=False, + default="0" + ) + + parser.add_argument( + '--random_seed', + type=int, + help='Random seed', + required=False, + default=62 + ) + + parser.add_argument( + '--persist', + action='store_true', + help='Will add additional persist() calls to speed up ETL. Does not affect sampling runtime.', + required=False, + default=False, + ) + + parser.add_argument( + '--add_edge_types', + action='store_true', + help='Adds edge types to the edgelist. Required for PyG if not providing edge ids.', + required=False, + default=False, + ) + + return parser.parse_args() + + +# call __main__ function +if __name__ == "__main__": + logging.basicConfig() + + args = get_args() + fanouts = [[int(f) for f in fanout.split('_')] for fanout in args.fanouts.split(',')] + datasets = args.datasets.split(',') + batch_sizes = [int(b) for b in args.batch_sizes.split(',')] + seeds_per_call_opts = [int(s) for s in args.seeds_per_call_opts.split(',')] + dask_worker_devices = [int(d) for d in args.dask_worker_devices.split(',')] + + client, cluster = start_dask_client(dask_worker_devices=dask_worker_devices, jit_unspill=False, rmm_pool_size=28e9, rmm_async=True) + enable_spilling() + stats_ls = [] + client.run(enable_spilling) + for dataset in datasets: + if re.match(r'([A-z]|[0-9])+\[[0-9]+\]', dataset): + replication_factor = int(dataset[-2]) + dataset = dataset[:-3] + else: + replication_factor = 1 + + for fanout in fanouts: + for batch_size in batch_sizes: + for seeds_per_call in seeds_per_call_opts: + print(f'dataset: {dataset}') + print(f'batch size: {batch_size}') + print(f'fanout: {fanout}') + print(f'seeds_per_call: {seeds_per_call}') + + try: + stats_d = {} + ( + num_input_edges, + input_to_peak_ratio, + output_to_peak_ratio, + input_memory_per_worker, + peak_allocation_across_workers, + ) = benchmark_cugraph_bulk_sampling( + dataset=dataset, + output_path=args.output_root, + seed=args.random_seed, + batch_size=batch_size, + seeds_per_call=seeds_per_call, + fanout=fanout, + dataset_dir=args.dataset_root, + reverse_edges=args.reverse_edges, + replication_factor=replication_factor, + persist=args.persist, + add_edge_types=args.add_edge_types, + ) + stats_d["dataset"] = dataset + stats_d["num_input_edges"] = num_input_edges + stats_d["batch_size"] = batch_size + stats_d["fanout"] = fanout + stats_d["seeds_per_call"] = seeds_per_call + stats_d["input_memory_per_worker"] = sizeof_fmt(input_memory_per_worker) + stats_d["peak_allocation_across_workers"] = sizeof_fmt( + peak_allocation_across_workers + ) + stats_d["input_to_peak_ratio"] = input_to_peak_ratio + stats_d["output_to_peak_ratio"] = output_to_peak_ratio + stats_ls.append(stats_d) + except Exception as e: + warnings.warn('An Exception Occurred!') + print(e) + traceback.print_exc() + restart_client(client) + sleep(10) + + stats_df = pd.DataFrame( + stats_ls, + columns=[ + "dataset", + "num_input_edges", + "directed", + "renumber", + "input_memory_per_worker", + "peak_allocation_across_workers", + "input_to_peak_ratio", + "output_to_peak_ratio", + ], + ) + stats_df.to_csv("cugraph_sampling_stats.csv") + print("-" * 40 + f"dataset = {dataset} completed" + "-" * 40) + + # Cleanup Dask Cluster + stop_dask_client(client, cluster) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 3bb98ce4150..cf9cba2af4d 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -378,10 +378,11 @@ rmm::device_uvector betweenness_centrality( * @param normalized A flag indicating whether or not to normalize the result * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * - * @return device vector containing the centralities. + * @return edge_property_t containing the centralities. */ template -rmm::device_uvector edge_betweenness_centrality( +edge_property_t, weight_t> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -1181,6 +1182,9 @@ void sssp(raft::handle_t const& handle, /** * @brief Compute PageRank scores. * + * @deprecated This API will be deprecated to replaced by the new version below + * that returns metadata about the algorithm. + * * This function computes general (if @p personalization_vertices is `nullptr`) or personalized (if * @p personalization_vertices is not `nullptr`.) PageRank scores. * @@ -1236,6 +1240,74 @@ void pagerank(raft::handle_t const& handle, bool has_initial_guess = false, bool do_expensive_check = false); +/** + * @brief Metadata about the execution of one of the centrality algorithms + */ +// FIXME: This structure should be propagated to other algorithms that converge +// (eigenvector centrality, hits and katz centrality) +// +struct centrality_algorithm_metadata_t { + size_t number_of_iterations_{}; + bool converged_{}; +}; + +/** + * @brief Compute PageRank scores. + * + * This function computes general (if @p personalization_vertices is `nullptr`) or personalized (if + * @p personalization_vertices is not `nullptr`.) PageRank scores. + * + * @throws cugraph::logic_error on erroneous input arguments or if fails to converge before @p + * max_iterations. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam result_t Type of PageRank scores. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p + * edge_weight_view.has_value() == false, edge weights are assumed to be 1.0. + * @param precomputed_vertex_out_weight_sums Pointer to an array storing sums of out-going edge + * weights for the vertices (for re-use) or `std::nullopt`. If `std::nullopt`, these values are + * freshly computed. Computing these values outside this function reduces the number of memory + * allocations/deallocations and computing if a user repeatedly computes PageRank scores using the + * same graph with different personalization vectors. + * @param personalization Optional tuple containing device spans of vertex identifiers and + * personalization values for the vertices (compute personalized PageRank) or `std::nullopt` + * (compute general PageRank). + * @param initial_pageranks Optional device span containing initial PageRank values. If + * specified this array will be used as the initial values and the PageRank values will be + * updated in place. If not specified then the initial values will be set to 1.0 divided by + * the number of vertices in the graph and the return value will contain an `rmm::device_uvector` + * containing the resulting PageRank values. + * @param alpha PageRank damping factor. + * @param epsilon Error tolerance to check convergence. Convergence is assumed if the sum of the + * differences in PageRank values between two consecutive iterations is less than the number of + * vertices in the graph multiplied by @p epsilon. + * @param max_iterations Maximum number of PageRank iterations. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple containing the optional pagerank results (populated if @p initial_pageranks is + * set to `std::nullopt`) and a metadata structure with metadata indicating how many iterations + * were run and whether the algorithm converged or not. + */ +template +std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + result_t alpha, + result_t epsilon, + size_t max_iterations = 500, + bool do_expensive_check = false); + /** * @brief Compute Eigenvector Centrality scores. * diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh index 81ece768edb..cd8739114f2 100644 --- a/cpp/include/cugraph/detail/decompress_edge_partition.cuh +++ b/cpp/include/cugraph/detail/decompress_edge_partition.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -190,9 +190,12 @@ void decompress_edge_partition_to_edgelist( edge_partition_device_view_t edge_partition, std::optional> edge_partition_weight_view, + std::optional> + edge_partition_id_view, vertex_t* edgelist_majors /* [OUT] */, vertex_t* edgelist_minors /* [OUT] */, std::optional edgelist_weights /* [OUT] */, + std::optional edgelist_ids /* [OUT] */, std::optional> const& segment_offsets) { auto number_of_edges = edge_partition.number_of_edges(); @@ -203,6 +206,13 @@ void decompress_edge_partition_to_edgelist( edge_partition.indices(), edge_partition.indices() + number_of_edges, edgelist_minors); + if (edge_partition_id_view) { + assert(edgelist_ids.has_value()); + thrust::copy(handle.get_thrust_policy(), + (*edge_partition_id_view).value_first(), + (*edge_partition_id_view).value_first() + number_of_edges, + (*edgelist_ids)); + } if (edge_partition_weight_view) { assert(edgelist_weights.has_value()); thrust::copy(handle.get_thrust_policy(), diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 1c01568ae17..017b32d0470 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -350,12 +350,14 @@ void renumber_local_ext_vertices(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object of the graph to be decompressed. + * @param edge_id_view Optional view object holding edge ids for @p graph_view. * @param edge_weight_view Optional view object holding edge weights for @p graph_view. * @param renumber_map If valid, return the renumbered edge list based on the provided @p * renumber_map * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return Tuple of edge sources, destinations, and (optional) edge weights (if @p - * edge_weight_view.has_value() is true). + * @return Tuple of edge sources, destinations, (optional) edge weights (if + * @p edge_weight_view.has_value() is true) and (optional) edge ids (if + * @p edge_id_view.has_value() is true). */ template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check = false); diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh index d29e7c47d14..501e74cf47b 100644 --- a/cpp/include/cugraph/utilities/device_functors.cuh +++ b/cpp/include/cugraph/utilities/device_functors.cuh @@ -57,16 +57,28 @@ struct pack_bool_t { } }; -template +template struct indirection_t { Iterator first{}; - __device__ typename thrust::iterator_traits::value_type operator()(size_t i) const + __device__ typename thrust::iterator_traits::value_type operator()(index_t i) const { return *(first + i); } }; +template +struct indirection_if_idx_valid_t { + Iterator first{}; + index_t invalid_idx{}; + typename thrust::iterator_traits::value_type invalid_value{}; + + __device__ typename thrust::iterator_traits::value_type operator()(index_t i) const + { + return (i != invalid_idx) ? *(first + i) : invalid_value; + } +}; + template struct not_equal_t { T compare{}; diff --git a/cpp/include/cugraph_c/centrality_algorithms.h b/cpp/include/cugraph_c/centrality_algorithms.h index 5fa3520a9cb..0ac0e58540f 100644 --- a/cpp/include/cugraph_c/centrality_algorithms.h +++ b/cpp/include/cugraph_c/centrality_algorithms.h @@ -56,6 +56,22 @@ cugraph_type_erased_device_array_view_t* cugraph_centrality_result_get_vertices( cugraph_type_erased_device_array_view_t* cugraph_centrality_result_get_values( cugraph_centrality_result_t* result); +/** + * @brief Get the number of iterations executed from the algorithm metadata + * + * @param [in] result The result from a centrality algorithm + * @return the number of iterations + */ +size_t cugraph_centrality_result_get_num_iterations(cugraph_centrality_result_t* result); + +/** + * @brief Returns true if the centrality algorithm converged + * + * @param [in] result The result from a centrality algorithm + * @return True if the centrality algorithm converged, false otherwise + */ +bool_t cugraph_centrality_result_converged(cugraph_centrality_result_t* result); + /** * @brief Free centrality result * @@ -114,9 +130,68 @@ cugraph_error_code_t cugraph_pagerank( cugraph_centrality_result_t** result, cugraph_error_t** error); +/** + * @brief Compute pagerank + * + * @deprecated This version of pagerank should be dropped in favor + * of the cugraph_pagerank_allow_nonconvergence version. + * Eventually that version will be renamed to this version. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] precomputed_vertex_out_weight_vertices + * Optionally send in precomputed sum of vertex out weights + * (a performance optimization). This defines the vertices. + * Set to NULL if no value is passed. + * @param [in] precomputed_vertex_out_weight_sums + * Optionally send in precomputed sum of vertex out weights + * (a performance optimization). Set to NULL if + * no value is passed. + * @param [in] initial_guess_vertices + * Optionally send in an initial guess of the pagerank values + * (a performance optimization). This defines the vertices. + * Set to NULL if no value is passed. If NULL, initial PageRank + * values are set to 1.0 divided by the number of vertices in + * the graph. + * @param [in] initial_guess_values + * Optionally send in an initial guess of the pagerank values + * (a performance optimization). Set to NULL if + * no value is passed. If NULL, initial PageRank values are set + * to 1.0 divided by the number of vertices in the graph. + * @param [in] alpha PageRank damping factor. + * @param [in] epsilon Error tolerance to check convergence. Convergence is assumed + * if the sum of the differences in PageRank values between two + * consecutive iterations is less than the number of vertices + * in the graph multiplied by @p epsilon. + * @param [in] max_iterations Maximum number of PageRank iterations. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to pagerank results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_pagerank_allow_nonconvergence( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, + double alpha, + double epsilon, + size_t max_iterations, + bool_t do_expensive_check, + cugraph_centrality_result_t** result, + cugraph_error_t** error); + /** * @brief Compute personalized pagerank * + * @deprecated This version of personalized pagerank should be dropped in favor + * of the cugraph_personalized_pagerank_allow_nonconvergence version. + * Eventually that version will be renamed to this version. + * * @param [in] handle Handle for accessing resources * @param [in] graph Pointer to graph * @param [in] precomputed_vertex_out_weight_vertices @@ -171,6 +246,63 @@ cugraph_error_code_t cugraph_personalized_pagerank( cugraph_centrality_result_t** result, cugraph_error_t** error); +/** + * @brief Compute personalized pagerank + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] precomputed_vertex_out_weight_vertices + * Optionally send in precomputed sum of vertex out weights + * (a performance optimization). This defines the vertices. + * Set to NULL if no value is passed. + * @param [in] precomputed_vertex_out_weight_sums + * Optionally send in precomputed sum of vertex out weights + * (a performance optimization). Set to NULL if + * no value is passed. + * @param [in] initial_guess_vertices + * Optionally send in an initial guess of the pagerank values + * (a performance optimization). This defines the vertices. + * Set to NULL if no value is passed. If NULL, initial PageRank + * values are set to 1.0 divided by the number of vertices in + * the graph. + * @param [in] initial_guess_values + * Optionally send in an initial guess of the pagerank values + * (a performance optimization). Set to NULL if + * no value is passed. If NULL, initial PageRank values are set + * to 1.0 divided by the number of vertices in the graph. + * @param [in] personalization_vertices Pointer to an array storing personalization vertex + * identifiers (compute personalized PageRank). + * @param [in] personalization_values Pointer to an array storing personalization values for the + * vertices in the personalization set. + * @param [in] alpha PageRank damping factor. + * @param [in] epsilon Error tolerance to check convergence. Convergence is assumed + * if the sum of the differences in PageRank values between two + * consecutive iterations is less than the number of vertices + * in the graph multiplied by @p epsilon. + * @param [in] max_iterations Maximum number of PageRank iterations. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to pagerank results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_personalized_pagerank_allow_nonconvergence( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, + const cugraph_type_erased_device_array_view_t* personalization_vertices, + const cugraph_type_erased_device_array_view_t* personalization_values, + double alpha, + double epsilon, + size_t max_iterations, + bool_t do_expensive_check, + cugraph_centrality_result_t** result, + cugraph_error_t** error); + /** * @brief Compute eigenvector centrality * @@ -294,6 +426,15 @@ cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_src_ cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_dst_vertices( cugraph_edge_centrality_result_t* result); +/** + * @brief Get the edge ids from an edge centrality result + * + * @param [in] result The result from an edge centrality algorithm + * @return type erased array of edge ids + */ +cugraph_type_erased_device_array_view_t* cugraph_edge_centrality_result_get_edge_ids( + cugraph_edge_centrality_result_t* result); + /** * @brief Get the centrality values from an edge centrality algorithm result * diff --git a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh index f097f9c43a2..ab14ff6c685 100644 --- a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh +++ b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh @@ -27,7 +27,8 @@ #include #include -#include +#include +#include #include #include @@ -118,7 +119,7 @@ union pair_packer()>> { */ template , + typename Hasher = cudf::hashing::detail::default_hash, typename Equality = equal_to, typename Allocator = default_allocator>> class concurrent_unordered_map { diff --git a/cpp/src/c_api/betweenness_centrality.cpp b/cpp/src/c_api/betweenness_centrality.cpp index 0387b050262..3cf3e92e960 100644 --- a/cpp/src/c_api/betweenness_centrality.cpp +++ b/cpp/src/c_api/betweenness_centrality.cpp @@ -144,7 +144,7 @@ struct edge_betweenness_centrality_functor : public cugraph::c_api::abstract_fun cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertex_list_{}; bool_t normalized_{}; bool do_expensive_check_{}; - cugraph::c_api::cugraph_centrality_result_t* result_{}; + cugraph::c_api::cugraph_edge_centrality_result_t* result_{}; edge_betweenness_centrality_functor(cugraph_resource_handle_t const* handle, cugraph_graph_t* graph, @@ -190,6 +190,10 @@ struct edge_betweenness_centrality_functor : public cugraph::c_api::abstract_fun cugraph::edge_property_t, weight_t>*>(graph_->edge_weights_); + auto edge_ids = reinterpret_cast< + cugraph::edge_property_t, + edge_t>*>(graph_->edge_ids_); + auto number_map = reinterpret_cast*>(graph_->number_map_); rmm::device_uvector local_vertices(0, handle_.get_stream()); @@ -230,14 +234,24 @@ struct edge_betweenness_centrality_functor : public cugraph::c_api::abstract_fun normalized_, do_expensive_check_); - CUGRAPH_FAIL("Need to clean up return type"); + auto [src_ids, dst_ids, output_centralities, output_edge_ids] = + cugraph::decompress_to_edgelist( + handle_, + graph_view, + std::make_optional(centralities.view()), + (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, + (number_map != nullptr) ? std::make_optional(raft::device_span{ + number_map->data(), number_map->size()}) + : std::nullopt); -#if 0 result_ = new cugraph::c_api::cugraph_edge_centrality_result_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(src_ids, graph_->vertex_type_), new cugraph::c_api::cugraph_type_erased_device_array_t(dst_ids, graph_->vertex_type_), - new cugraph::c_api::cugraph_type_erased_device_array_t(centralities, graph_->weight_type_)}; -#endif + output_edge_ids ? new cugraph::c_api::cugraph_type_erased_device_array_t(*output_edge_ids, + graph_->edge_type_) + : nullptr, + new cugraph::c_api::cugraph_type_erased_device_array_t(*output_centralities, + graph_->weight_type_)}; } } }; diff --git a/cpp/src/c_api/centrality_result.cpp b/cpp/src/c_api/centrality_result.cpp index c3ded9fbd89..08e7c0341f2 100644 --- a/cpp/src/c_api/centrality_result.cpp +++ b/cpp/src/c_api/centrality_result.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,6 +34,18 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_centrality_result_ge internal_pointer->values_->view()); } +size_t cugraph_centrality_result_get_num_iterations(cugraph_centrality_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return internal_pointer->num_iterations_; +} + +bool_t cugraph_centrality_result_converged(cugraph_centrality_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return internal_pointer->converged_ ? bool_t::TRUE : bool_t::FALSE; +} + extern "C" void cugraph_centrality_result_free(cugraph_centrality_result_t* result) { auto internal_pointer = reinterpret_cast(result); diff --git a/cpp/src/c_api/centrality_result.hpp b/cpp/src/c_api/centrality_result.hpp index e39db686152..e0acde9cce3 100644 --- a/cpp/src/c_api/centrality_result.hpp +++ b/cpp/src/c_api/centrality_result.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,11 +24,14 @@ namespace c_api { struct cugraph_centrality_result_t { cugraph_type_erased_device_array_t* vertex_ids_{}; cugraph_type_erased_device_array_t* values_{}; + size_t num_iterations_{0}; + bool converged_{false}; }; struct cugraph_edge_centrality_result_t { cugraph_type_erased_device_array_t* src_ids_{}; cugraph_type_erased_device_array_t* dst_ids_{}; + cugraph_type_erased_device_array_t* edge_ids_{}; cugraph_type_erased_device_array_t* values_{}; }; diff --git a/cpp/src/c_api/pagerank.cpp b/cpp/src/c_api/pagerank.cpp index 2565a1aebe2..50eda152c67 100644 --- a/cpp/src/c_api/pagerank.cpp +++ b/cpp/src/c_api/pagerank.cpp @@ -120,9 +120,7 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor { auto number_map = reinterpret_cast*>(graph_->number_map_); - rmm::device_uvector pageranks(graph_view.local_vertex_partition_range_size(), - handle_.get_stream()); - + rmm::device_uvector initial_pageranks(0, handle_.get_stream()); rmm::device_uvector personalization_vertices(0, handle_.get_stream()); rmm::device_uvector personalization_values(0, handle_.get_stream()); @@ -201,7 +199,7 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor { initial_guess_values.size(), handle_.get_stream()); - pageranks = cugraph::detail:: + initial_pageranks = cugraph::detail:: collect_local_vertex_values_from_ext_vertex_value_pairs( handle_, std::move(initial_guess_vertices), @@ -213,25 +211,30 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor { do_expensive_check_); } - cugraph::pagerank( - handle_, - graph_view, - (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, - precomputed_vertex_out_weight_sums_ - ? std::make_optional(precomputed_vertex_out_weight_sums.data()) - : std::nullopt, - personalization_vertices_ ? std::make_optional(personalization_vertices.data()) - : std::nullopt, - personalization_values_ ? std::make_optional(personalization_values.data()) : std::nullopt, - personalization_vertices_ - ? std::make_optional(static_cast(personalization_vertices.size())) - : std::nullopt, - pageranks.data(), - static_cast(alpha_), - static_cast(epsilon_), - max_iterations_, - initial_guess_values_ != nullptr, - do_expensive_check_); + auto [pageranks, metadata] = + cugraph::pagerank( + handle_, + graph_view, + (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, + precomputed_vertex_out_weight_sums_ + ? std::make_optional( + raft::device_span{precomputed_vertex_out_weight_sums.data(), + precomputed_vertex_out_weight_sums.size()}) + : std::nullopt, + personalization_vertices_ + ? std::make_optional( + std::make_tuple(raft::device_span{personalization_vertices.data(), + personalization_vertices.size()}, + raft::device_span{personalization_values.data(), + personalization_values.size()})) + : std::nullopt, + initial_guess_values_ != nullptr ? std::make_optional(raft::device_span{ + initial_pageranks.data(), initial_pageranks.size()}) + : std::nullopt, + static_cast(alpha_), + static_cast(epsilon_), + max_iterations_, + do_expensive_check_); rmm::device_uvector vertex_ids(graph_view.local_vertex_partition_range_size(), handle_.get_stream()); @@ -239,7 +242,9 @@ struct pagerank_functor : public cugraph::c_api::abstract_functor { result_ = new cugraph::c_api::cugraph_centrality_result_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(vertex_ids, graph_->vertex_type_), - new cugraph::c_api::cugraph_type_erased_device_array_t(pageranks, graph_->weight_type_)}; + new cugraph::c_api::cugraph_type_erased_device_array_t(pageranks, graph_->weight_type_), + metadata.number_of_iterations_, + metadata.converged_}; } } }; @@ -305,6 +310,75 @@ extern "C" cugraph_error_code_t cugraph_pagerank( max_iterations, do_expensive_check); + auto return_value = cugraph::c_api::run_algorithm(graph, functor, result, error); + + CAPI_EXPECTS(cugraph_centrality_result_converged(*result) == bool_t::TRUE, + CUGRAPH_UNKNOWN_ERROR, + "PageRank failed to converge.", + *error); + + return return_value; +} + +extern "C" cugraph_error_code_t cugraph_pagerank_allow_nonconvergence( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, + double alpha, + double epsilon, + size_t max_iterations, + bool_t do_expensive_check, + cugraph_centrality_result_t** result, + cugraph_error_t** error) +{ + if (precomputed_vertex_out_weight_vertices != nullptr) { + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + precomputed_vertex_out_weight_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and precomputed_vertex_out_weight_vertices must match", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->weight_type_ == + reinterpret_cast( + precomputed_vertex_out_weight_sums) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and precomputed_vertex_out_weight_sums must match", + *error); + } + if (initial_guess_vertices != nullptr) { + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + initial_guess_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and initial_guess_vertices must match", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->weight_type_ == + reinterpret_cast( + initial_guess_values) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and initial_guess_values must match", + *error); + } + pagerank_functor functor(handle, + graph, + precomputed_vertex_out_weight_vertices, + precomputed_vertex_out_weight_sums, + initial_guess_vertices, + initial_guess_values, + nullptr, + nullptr, + alpha, + epsilon, + max_iterations, + do_expensive_check); + return cugraph::c_api::run_algorithm(graph, functor, result, error); } @@ -373,6 +447,94 @@ extern "C" cugraph_error_code_t cugraph_personalized_pagerank( *error); } + pagerank_functor functor(handle, + graph, + precomputed_vertex_out_weight_vertices, + precomputed_vertex_out_weight_sums, + initial_guess_vertices, + initial_guess_values, + personalization_vertices, + personalization_values, + alpha, + epsilon, + max_iterations, + do_expensive_check); + + auto return_value = cugraph::c_api::run_algorithm(graph, functor, result, error); + + CAPI_EXPECTS(cugraph_centrality_result_converged(*result) == bool_t::TRUE, + CUGRAPH_UNKNOWN_ERROR, + "PageRank failed to converge.", + *error); + + return return_value; +} + +extern "C" cugraph_error_code_t cugraph_personalized_pagerank_allow_nonconvergence( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, + const cugraph_type_erased_device_array_view_t* personalization_vertices, + const cugraph_type_erased_device_array_view_t* personalization_values, + double alpha, + double epsilon, + size_t max_iterations, + bool_t do_expensive_check, + cugraph_centrality_result_t** result, + cugraph_error_t** error) +{ + if (precomputed_vertex_out_weight_vertices != nullptr) { + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + precomputed_vertex_out_weight_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and precomputed_vertex_out_weight_vertices must match", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->weight_type_ == + reinterpret_cast( + precomputed_vertex_out_weight_sums) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and precomputed_vertex_out_weight_sums must match", + *error); + } + if (initial_guess_vertices != nullptr) { + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + initial_guess_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and initial_guess_vertices must match", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->weight_type_ == + reinterpret_cast( + initial_guess_values) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and initial_guess_values must match", + *error); + } + if (personalization_vertices != nullptr) { + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + personalization_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and personalization_vector must match", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->weight_type_ == + reinterpret_cast( + personalization_values) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and personalization_vector must match", + *error); + } + pagerank_functor functor(handle, graph, precomputed_vertex_out_weight_vertices, diff --git a/cpp/src/centrality/betweenness_centrality_impl.cuh b/cpp/src/centrality/betweenness_centrality_impl.cuh index 5631fadde96..0a87531d6ca 100644 --- a/cpp/src/centrality/betweenness_centrality_impl.cuh +++ b/cpp/src/centrality/betweenness_centrality_impl.cuh @@ -16,8 +16,12 @@ #pragma once #include +#include +#include #include +#include #include +#include #include #include #include @@ -55,6 +59,24 @@ struct brandes_e_op_t { } }; +template +struct extract_edge_e_op_t { + vertex_t d{}; + + template + __device__ thrust::optional> operator()( + vertex_t src, + vertex_t dst, + thrust::tuple src_props, + thrust::tuple dst_props, + weight_t edge_centrality) + { + return ((thrust::get<0>(dst_props) == d) && (thrust::get<0>(src_props) == (d - 1))) + ? thrust::optional>{thrust::make_tuple(src, dst)} + : thrust::nullopt; + } +}; + } // namespace namespace cugraph { @@ -77,16 +99,16 @@ std::tuple, rmm::device_uvector> brandes_b constexpr int bucket_idx_cur{0}; constexpr int bucket_idx_next{1}; - rmm::device_uvector sigma(graph_view.local_vertex_partition_range_size(), - handle.get_stream()); - rmm::device_uvector distance(graph_view.local_vertex_partition_range_size(), - handle.get_stream()); - detail::scalar_fill(handle, distance.data(), distance.size(), invalid_distance); - detail::scalar_fill(handle, sigma.data(), sigma.size(), edge_t{0}); + rmm::device_uvector sigmas(graph_view.local_vertex_partition_range_size(), + handle.get_stream()); + rmm::device_uvector distances(graph_view.local_vertex_partition_range_size(), + handle.get_stream()); + detail::scalar_fill(handle, distances.data(), distances.size(), invalid_distance); + detail::scalar_fill(handle, sigmas.data(), sigmas.size(), edge_t{0}); - edge_src_property_t, edge_t> src_sigma( + edge_src_property_t, edge_t> src_sigmas( handle, graph_view); - edge_dst_property_t, vertex_t> dst_distance( + edge_dst_property_t, vertex_t> dst_distances( handle, graph_view); auto vertex_partition = @@ -97,7 +119,7 @@ std::tuple, rmm::device_uvector> brandes_b handle.get_thrust_policy(), vertex_frontier.bucket(bucket_idx_cur).begin(), vertex_frontier.bucket(bucket_idx_cur).end(), - [d_sigma = sigma.begin(), d_distance = distance.begin(), vertex_partition] __device__( + [d_sigma = sigmas.begin(), d_distance = distances.begin(), vertex_partition] __device__( auto v) { auto offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); d_distance[offset] = 0; @@ -108,15 +130,15 @@ std::tuple, rmm::device_uvector> brandes_b edge_t hop{0}; while (true) { - update_edge_src_property(handle, graph_view, sigma.begin(), src_sigma); - update_edge_dst_property(handle, graph_view, distance.begin(), dst_distance); + update_edge_src_property(handle, graph_view, sigmas.begin(), src_sigmas); + update_edge_dst_property(handle, graph_view, distances.begin(), dst_distances); auto [new_frontier, new_sigma] = transform_reduce_v_frontier_outgoing_e_by_dst(handle, graph_view, vertex_frontier.bucket(bucket_idx_cur), - src_sigma.view(), - dst_distance.view(), + src_sigmas.view(), + dst_distances.view(), cugraph::edge_dummy_property_t{}.view(), brandes_e_op_t{}, reduce_op::plus()); @@ -127,8 +149,8 @@ std::tuple, rmm::device_uvector> brandes_b std::move(new_sigma), vertex_frontier, std::vector{bucket_idx_next}, - thrust::make_zip_iterator(distance.begin(), sigma.begin()), - thrust::make_zip_iterator(distance.begin(), sigma.begin()), + thrust::make_zip_iterator(distances.begin(), sigmas.begin()), + thrust::make_zip_iterator(distances.begin(), sigmas.begin()), [hop] __device__(auto v, auto old_values, auto v_sigma) { return thrust::make_tuple( thrust::make_optional(bucket_idx_next), @@ -143,7 +165,7 @@ std::tuple, rmm::device_uvector> brandes_b ++hop; } - return std::make_tuple(std::move(distance), std::move(sigma)); + return std::make_tuple(std::move(distances), std::move(sigmas)); } template @@ -152,8 +174,8 @@ void accumulate_vertex_results( graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span centralities, - rmm::device_uvector&& distance, - rmm::device_uvector&& sigma, + rmm::device_uvector&& distances, + rmm::device_uvector&& sigmas, bool with_endpoints, bool do_expensive_check) { @@ -162,26 +184,26 @@ void accumulate_vertex_results( vertex_t diameter = transform_reduce_v( handle, graph_view, - distance.begin(), + distances.begin(), [] __device__(auto, auto d) { return (d == invalid_distance) ? vertex_t{0} : d; }, vertex_t{0}, reduce_op::maximum{}, do_expensive_check); - rmm::device_uvector delta(sigma.size(), handle.get_stream()); - detail::scalar_fill(handle, delta.data(), delta.size(), weight_t{0}); + rmm::device_uvector deltas(sigmas.size(), handle.get_stream()); + detail::scalar_fill(handle, deltas.data(), deltas.size(), weight_t{0}); if (with_endpoints) { vertex_t count = count_if_v( handle, graph_view, - distance.begin(), + distances.begin(), [] __device__(auto, auto d) { return (d != invalid_distance); }, do_expensive_check); thrust::transform(handle.get_thrust_policy(), - distance.begin(), - distance.end(), + distances.begin(), + distances.end(), centralities.begin(), centralities.begin(), [count] __device__(auto d, auto centrality) { @@ -205,12 +227,12 @@ void accumulate_vertex_results( update_edge_src_property( handle, graph_view, - thrust::make_zip_iterator(distance.begin(), sigma.begin(), delta.begin()), + thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), src_properties); update_edge_dst_property( handle, graph_view, - thrust::make_zip_iterator(distance.begin(), sigma.begin(), delta.begin()), + thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), dst_properties); // FIXME: To do this efficiently, I need a version of @@ -243,29 +265,167 @@ void accumulate_vertex_results( }, weight_t{0}, reduce_op::plus{}, - delta.begin(), + deltas.begin(), do_expensive_check); update_edge_src_property( handle, graph_view, - thrust::make_zip_iterator(distance.begin(), sigma.begin(), delta.begin()), + thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), src_properties); update_edge_dst_property( handle, graph_view, - thrust::make_zip_iterator(distance.begin(), sigma.begin(), delta.begin()), + thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), dst_properties); thrust::transform(handle.get_thrust_policy(), centralities.begin(), centralities.end(), - delta.begin(), + deltas.begin(), centralities.begin(), thrust::plus()); } } +template +void accumulate_edge_results( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + edge_property_view_t centralities_view, + rmm::device_uvector&& distances, + rmm::device_uvector&& sigmas, + bool do_expensive_check) +{ + constexpr vertex_t invalid_distance = std::numeric_limits::max(); + + vertex_t diameter = transform_reduce_v( + handle, + graph_view, + distances.begin(), + [] __device__(auto, auto d) { return (d == invalid_distance) ? vertex_t{0} : d; }, + vertex_t{0}, + reduce_op::maximum{}, + do_expensive_check); + + rmm::device_uvector deltas(sigmas.size(), handle.get_stream()); + detail::scalar_fill(handle, deltas.data(), deltas.size(), weight_t{0}); + + edge_src_property_t, + thrust::tuple> + src_properties(handle, graph_view); + edge_dst_property_t, + thrust::tuple> + dst_properties(handle, graph_view); + + update_edge_src_property( + handle, + graph_view, + thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), + src_properties); + update_edge_dst_property( + handle, + graph_view, + thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), + dst_properties); + + // + // For now this will do a O(E) pass over all edges over the diameter + // of the graph. + // + // Based on Brandes algorithm, we want to follow back pointers in non-increasing + // distance from S to compute delta + // + for (vertex_t d = diameter; d > 0; --d) { + // + // Populate edge_list with edges where `thrust::get<0>(dst_props) == d` + // and `thrust::get<0>(dst_props) == (d-1)` + // + cugraph::edge_bucket_t edge_list(handle); + + { + auto [src, dst] = extract_transform_e(handle, + graph_view, + src_properties.view(), + dst_properties.view(), + centralities_view, + extract_edge_e_op_t{d}, + do_expensive_check); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(src.begin(), dst.begin()), + thrust::make_zip_iterator(src.end(), dst.end())); + + // Eliminate duplicates in case of a multi-graph + auto new_edgelist_end = thrust::unique(handle.get_thrust_policy(), + thrust::make_zip_iterator(src.begin(), dst.begin()), + thrust::make_zip_iterator(src.end(), dst.end())); + + src.resize( + thrust::distance(thrust::make_zip_iterator(src.begin(), dst.begin()), new_edgelist_end), + handle.get_stream()); + dst.resize(src.size(), handle.get_stream()); + + edge_list.insert(src.begin(), src.end(), dst.begin()); + } + + transform_e( + handle, + graph_view, + edge_list, + src_properties.view(), + dst_properties.view(), + centralities_view, + [d] __device__(auto src, auto dst, auto src_props, auto dst_props, auto edge_centrality) { + if ((thrust::get<0>(dst_props) == d) && (thrust::get<0>(src_props) == (d - 1))) { + auto sigma_v = static_cast(thrust::get<1>(src_props)); + auto sigma_w = static_cast(thrust::get<1>(dst_props)); + auto delta_w = thrust::get<2>(dst_props); + + return edge_centrality + (sigma_v / sigma_w) * (1 + delta_w); + } else { + return edge_centrality; + } + }, + centralities_view, + do_expensive_check); + + per_v_transform_reduce_outgoing_e( + handle, + graph_view, + src_properties.view(), + dst_properties.view(), + cugraph::edge_dummy_property_t{}.view(), + [d] __device__(auto, auto, auto src_props, auto dst_props, auto) { + if ((thrust::get<0>(dst_props) == d) && (thrust::get<0>(src_props) == (d - 1))) { + auto sigma_v = static_cast(thrust::get<1>(src_props)); + auto sigma_w = static_cast(thrust::get<1>(dst_props)); + auto delta_w = thrust::get<2>(dst_props); + + return (sigma_v / sigma_w) * (1 + delta_w); + } else { + return weight_t{0}; + } + }, + weight_t{0}, + reduce_op::plus{}, + deltas.begin(), + do_expensive_check); + + update_edge_src_property( + handle, + graph_view, + thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), + src_properties); + update_edge_dst_property( + handle, + graph_view, + thrust::make_zip_iterator(distances.begin(), sigmas.begin(), deltas.begin()), + dst_properties); + } +} + template betweenness_centrality( // // Betweenness Centrality algorithm based on the Brandes Algorithm (2001) // - if (do_expensive_check) {} + if (do_expensive_check) { + auto vertex_partition = + vertex_partition_device_view_t(graph_view.local_vertex_partition_view()); + auto num_invalid_vertices = + thrust::count_if(handle.get_thrust_policy(), + vertices_begin, + vertices_end, + [vertex_partition] __device__(auto val) { + return !(vertex_partition.is_valid_vertex(val) && + vertex_partition.in_local_vertex_partition_range_nocheck(val)); + }); + if constexpr (multi_gpu) { + num_invalid_vertices = host_scalar_allreduce( + handle.get_comms(), num_invalid_vertices, raft::comms::op_t::SUM, handle.get_stream()); + } + CUGRAPH_EXPECTS(num_invalid_vertices == 0, + "Invalid input argument: sources have invalid vertex IDs."); + } rmm::device_uvector centralities(graph_view.local_vertex_partition_range_size(), handle.get_stream()); @@ -333,14 +510,14 @@ rmm::device_uvector betweenness_centrality( // FIXME: This has an inefficiency in early iterations, as it doesn't have enough work to // keep the GPUs busy. But we can't run too many at once or we will run out of // memory. Need to investigate options to improve this performance - auto [distance, sigma] = + auto [distances, sigmas] = brandes_bfs(handle, graph_view, edge_weight_view, vertex_frontier, do_expensive_check); accumulate_vertex_results(handle, graph_view, edge_weight_view, raft::device_span{centralities.data(), centralities.size()}, - std::move(distance), - std::move(sigma), + std::move(distances), + std::move(sigmas), include_endpoints, do_expensive_check); } @@ -379,7 +556,8 @@ template -rmm::device_uvector edge_betweenness_centrality( +edge_property_t, weight_t> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -388,10 +566,88 @@ rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check) { - CUGRAPH_FAIL("Not Implemented"); - // Edge betweenness is computed like vertex betweenness, but you accumulate - // centrality on each edge. We need to adapt this to support edge properties - // properly. + // + // Betweenness Centrality algorithm based on the Brandes Algorithm (2001) + // + if (do_expensive_check) { + auto vertex_partition = + vertex_partition_device_view_t(graph_view.local_vertex_partition_view()); + auto num_invalid_vertices = + thrust::count_if(handle.get_thrust_policy(), + vertices_begin, + vertices_end, + [vertex_partition] __device__(auto val) { + return !(vertex_partition.is_valid_vertex(val) && + vertex_partition.in_local_vertex_partition_range_nocheck(val)); + }); + if constexpr (multi_gpu) { + num_invalid_vertices = host_scalar_allreduce( + handle.get_comms(), num_invalid_vertices, raft::comms::op_t::SUM, handle.get_stream()); + } + CUGRAPH_EXPECTS(num_invalid_vertices == 0, + "Invalid input argument: sources have invalid vertex IDs."); + } + + edge_property_t, weight_t> centralities( + handle, graph_view); + + fill_edge_property(handle, graph_view, weight_t{0}, centralities, do_expensive_check); + + size_t num_sources = thrust::distance(vertices_begin, vertices_end); + std::vector source_offsets{{0, num_sources}}; + int my_rank = 0; + + if constexpr (multi_gpu) { + auto source_counts = + host_scalar_allgather(handle.get_comms(), num_sources, handle.get_stream()); + + num_sources = std::accumulate(source_counts.begin(), source_counts.end(), 0); + source_offsets.resize(source_counts.size() + 1); + source_offsets[0] = 0; + std::inclusive_scan(source_counts.begin(), source_counts.end(), source_offsets.begin() + 1); + my_rank = handle.get_comms().get_rank(); + } + + // + // FIXME: This could be more efficient using something akin to the + // technique in WCC. Take the entire set of sources, insert them into + // a tagged frontier (tagging each source with itself). Then we can + // expand from multiple sources concurrently. The challenge is managing + // the memory explosion. + // + for (size_t source_idx = 0; source_idx < num_sources; ++source_idx) { + // + // BFS + // + constexpr size_t bucket_idx_cur = 0; + constexpr size_t num_buckets = 2; + + vertex_frontier_t vertex_frontier(handle, num_buckets); + + if ((source_idx >= source_offsets[my_rank]) && (source_idx < source_offsets[my_rank + 1])) { + vertex_frontier.bucket(bucket_idx_cur) + .insert(vertices_begin + (source_idx - source_offsets[my_rank]), + vertices_begin + (source_idx - source_offsets[my_rank]) + 1); + } + + // + // Now we need to do modified BFS + // + // FIXME: This has an inefficiency in early iterations, as it doesn't have enough work to + // keep the GPUs busy. But we can't run too many at once or we will run out of + // memory. Need to investigate options to improve this performance + auto [distances, sigmas] = + brandes_bfs(handle, graph_view, edge_weight_view, vertex_frontier, do_expensive_check); + accumulate_edge_results(handle, + graph_view, + edge_weight_view, + centralities.mutable_view(), + std::move(distances), + std::move(sigmas), + do_expensive_check); + } + + return centralities; } } // namespace detail @@ -431,7 +687,8 @@ rmm::device_uvector betweenness_centrality( } template -rmm::device_uvector edge_betweenness_centrality( +edge_property_t, weight_t> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, diff --git a/cpp/src/centrality/betweenness_centrality_mg.cu b/cpp/src/centrality/betweenness_centrality_mg.cu index 7bb1f4db6d1..2df843c95c9 100644 --- a/cpp/src/centrality/betweenness_centrality_mg.cu +++ b/cpp/src/centrality/betweenness_centrality_mg.cu @@ -73,7 +73,8 @@ template rmm::device_uvector betweenness_centrality( bool const include_endpoints, bool do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, float> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -81,7 +82,8 @@ template rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, float> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -89,7 +91,8 @@ template rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, float> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -97,7 +100,8 @@ template rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, double> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -105,7 +109,8 @@ template rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, double> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -113,7 +118,8 @@ template rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, double> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, diff --git a/cpp/src/centrality/betweenness_centrality_sg.cu b/cpp/src/centrality/betweenness_centrality_sg.cu index 1d10b720d09..191857ff5dd 100644 --- a/cpp/src/centrality/betweenness_centrality_sg.cu +++ b/cpp/src/centrality/betweenness_centrality_sg.cu @@ -73,7 +73,8 @@ template rmm::device_uvector betweenness_centrality( bool const include_endpoints, bool do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, float> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -81,7 +82,8 @@ template rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, float> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -89,7 +91,8 @@ template rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, float> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -97,7 +100,8 @@ template rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, double> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -105,7 +109,8 @@ template rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, double> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, @@ -113,7 +118,8 @@ template rmm::device_uvector edge_betweenness_centrality( bool const normalized, bool const do_expensive_check); -template rmm::device_uvector edge_betweenness_centrality( +template edge_property_t, double> +edge_betweenness_centrality( const raft::handle_t& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index 62ede6eaafb..b388ba53e81 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -42,6 +43,11 @@ CUCO_DECLARE_BITWISE_COMPARABLE(float) CUCO_DECLARE_BITWISE_COMPARABLE(double) +// FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched. +namespace cuco { +template <> +struct is_bitwise_comparable> : std::true_type {}; +} // namespace cuco namespace cugraph { namespace detail { diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index bbd720131de..e811aafc776 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -48,6 +48,11 @@ CUCO_DECLARE_BITWISE_COMPARABLE(float) CUCO_DECLARE_BITWISE_COMPARABLE(double) +// FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched. +namespace cuco { +template <> +struct is_bitwise_comparable> : std::true_type {}; +} // namespace cuco namespace cugraph { namespace detail { diff --git a/cpp/src/detail/graph_partition_utils.cuh b/cpp/src/detail/graph_partition_utils.cuh index 88e9623e043..67574719b45 100644 --- a/cpp/src/detail/graph_partition_utils.cuh +++ b/cpp/src/detail/graph_partition_utils.cuh @@ -19,7 +19,7 @@ #include -#include +#include #include #include #include diff --git a/cpp/src/link_analysis/pagerank_impl.cuh b/cpp/src/link_analysis/pagerank_impl.cuh index 49d1a3eabb9..3a84cdedfda 100644 --- a/cpp/src/link_analysis/pagerank_impl.cuh +++ b/cpp/src/link_analysis/pagerank_impl.cuh @@ -46,22 +46,19 @@ namespace cugraph { namespace detail { -// FIXME: personalization_vector_size is confusing in OPG (local or aggregate?) template -void pagerank( +centrality_algorithm_metadata_t pagerank( raft::handle_t const& handle, GraphViewType const& pull_graph_view, std::optional> edge_weight_view, - std::optional precomputed_vertex_out_weight_sums, - std::optional personalization_vertices, - std::optional personalization_values, - std::optional personalization_vector_size, - result_t* pageranks, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, + raft::device_span>> personalization, + raft::device_span pageranks, result_t alpha, result_t epsilon, size_t max_iterations, - bool has_initial_guess, bool do_expensive_check) { using vertex_t = typename GraphViewType::vertex_type; @@ -75,27 +72,26 @@ void pagerank( "GraphViewType should support the pull model."); auto const num_vertices = pull_graph_view.number_of_vertices(); - if (num_vertices == 0) { return; } + if (num_vertices == 0) { return centrality_algorithm_metadata_t{0, true}; } auto aggregate_personalization_vector_size = - personalization_vertices ? GraphViewType::is_multi_gpu - ? host_scalar_allreduce(handle.get_comms(), - *personalization_vector_size, - raft::comms::op_t::SUM, - handle.get_stream()) - : *personalization_vector_size - : vertex_t{0}; + personalization ? GraphViewType::is_multi_gpu + ? host_scalar_allreduce(handle.get_comms(), + std::get<0>(*personalization).size(), + raft::comms::op_t::SUM, + handle.get_stream()) + : std::get<0>(*personalization).size() + : vertex_t{0}; // 1. check input arguments - CUGRAPH_EXPECTS((personalization_vertices.has_value() == false) || - (personalization_values.has_value() && personalization_vector_size.has_value()), - "Invalid input argument: if personalization_vertices.has_value() is true, " - "personalization_values.has_value() and personalization_vector_size.has_value() " - "should be true as well."); + CUGRAPH_EXPECTS((personalization.has_value() == false) || + (std::get<0>(*personalization).size() == std::get<1>(*personalization).size()), + "Invalid input argument: if personalization.has_value() is true, the size of " + "vertices and values should match"); CUGRAPH_EXPECTS( - (personalization_vertices.has_value() == false) || (aggregate_personalization_vector_size > 0), - "Invalid input argument: if personalization_vertices.has_value() is true, the input " + (personalization.has_value() == false) || (aggregate_personalization_vector_size > 0), + "Invalid input argument: if personalizations.has_value() is true, the input " "personalization vector size should not be 0."); CUGRAPH_EXPECTS((alpha >= 0.0) && (alpha <= 1.0), "Invalid input argument: alpha should be in [0.0, 1.0]."); @@ -106,7 +102,7 @@ void pagerank( auto num_negative_precomputed_vertex_out_weight_sums = count_if_v(handle, pull_graph_view, - *precomputed_vertex_out_weight_sums, + precomputed_vertex_out_weight_sums->data(), [] __device__(auto, auto val) { return val < result_t{0.0}; }); CUGRAPH_EXPECTS( num_negative_precomputed_vertex_out_weight_sums == 0, @@ -126,17 +122,10 @@ void pagerank( "Invalid input argument: input edge weights should have non-negative values."); } - if (has_initial_guess) { - auto num_negative_values = count_if_v( - handle, pull_graph_view, pageranks, [] __device__(auto, auto val) { return val < 0.0; }); - CUGRAPH_EXPECTS(num_negative_values == 0, - "Invalid input argument: initial guess values should be non-negative."); - } - if constexpr (GraphViewType::is_multi_gpu) { auto num_gpus_with_valid_personalization_vector = host_scalar_allreduce(handle.get_comms(), - personalization_vertices ? int{1} : int{0}, + personalization ? int{1} : int{0}, raft::comms::op_t::SUM, handle.get_stream()); CUGRAPH_EXPECTS( @@ -151,8 +140,8 @@ void pagerank( pull_graph_view.local_vertex_partition_view()); auto num_invalid_vertices = thrust::count_if(handle.get_thrust_policy(), - *personalization_vertices, - *personalization_vertices + *personalization_vector_size, + std::get<0>(*personalization).begin(), + std::get<0>(*personalization).end(), [vertex_partition] __device__(auto val) { return !(vertex_partition.is_valid_vertex(val) && vertex_partition.in_local_vertex_partition_range_nocheck(val)); @@ -163,17 +152,36 @@ void pagerank( } CUGRAPH_EXPECTS(num_invalid_vertices == 0, "Invalid input argument: peresonalization vertices have invalid vertex IDs."); - auto num_negative_values = - thrust::count_if(handle.get_thrust_policy(), - *personalization_values, - *personalization_values + *personalization_vector_size, - [] __device__(auto val) { return val < 0.0; }); + auto num_negative_values = thrust::count_if(handle.get_thrust_policy(), + std::get<1>(*personalization).begin(), + std::get<1>(*personalization).end(), + [] __device__(auto val) { return val < 0.0; }); if constexpr (GraphViewType::is_multi_gpu) { num_negative_values = host_scalar_allreduce( handle.get_comms(), num_negative_values, raft::comms::op_t::SUM, handle.get_stream()); } CUGRAPH_EXPECTS(num_negative_values == 0, "Invalid input argument: peresonalization values should be non-negative."); + + rmm::device_uvector check_for_duplicates(std::get<0>(*personalization).size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + std::get<0>(*personalization).begin(), + std::get<0>(*personalization).end(), + check_for_duplicates.begin()); + + thrust::sort( + handle.get_thrust_policy(), check_for_duplicates.begin(), check_for_duplicates.end()); + + auto num_uniques = + thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(check_for_duplicates.size()), + detail::is_first_in_run_t{check_for_duplicates.data()}); + + CUGRAPH_EXPECTS( + static_cast(num_uniques) == check_for_duplicates.size(), + "Invalid input argument: personalization vertices not contain duplicate entries."); } } @@ -196,35 +204,16 @@ void pagerank( } } auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums - ? *precomputed_vertex_out_weight_sums + ? (*precomputed_vertex_out_weight_sums).data() : (*tmp_vertex_out_weight_sums).data(); - // 3. initialize pagerank values - - if (has_initial_guess) { - auto sum = reduce_v(handle, pull_graph_view, pageranks, result_t{0.0}); - CUGRAPH_EXPECTS(sum > 0.0, - "Invalid input argument: sum of the PageRank initial " - "guess values should be positive."); - thrust::transform(handle.get_thrust_policy(), - pageranks, - pageranks + pull_graph_view.local_vertex_partition_range_size(), - pageranks, - [sum] __device__(auto val) { return val / sum; }); - } else { - thrust::fill(handle.get_thrust_policy(), - pageranks, - pageranks + pull_graph_view.local_vertex_partition_range_size(), - result_t{1.0} / static_cast(num_vertices)); - } - - // 4. sum the personalization values + // 3. sum the personalization values result_t personalization_sum{0.0}; if (aggregate_personalization_vector_size > 0) { personalization_sum = thrust::reduce(handle.get_thrust_policy(), - *personalization_values, - *personalization_values + *personalization_vector_size, + std::get<1>(*personalization).begin(), + std::get<1>(*personalization).end(), result_t{0.0}); if constexpr (GraphViewType::is_multi_gpu) { personalization_sum = host_scalar_allreduce( @@ -243,18 +232,13 @@ void pagerank( edge_src_property_t edge_src_pageranks(handle, pull_graph_view); size_t iter{0}; while (true) { - thrust::copy(handle.get_thrust_policy(), - pageranks, - pageranks + pull_graph_view.local_vertex_partition_range_size(), - old_pageranks.data()); - - auto vertex_val_first = - thrust::make_zip_iterator(thrust::make_tuple(pageranks, vertex_out_weight_sums)); + thrust::copy( + handle.get_thrust_policy(), pageranks.begin(), pageranks.end(), old_pageranks.data()); auto dangling_sum = transform_reduce_v( handle, pull_graph_view, - vertex_val_first, + thrust::make_zip_iterator(pageranks.begin(), vertex_out_weight_sums), [] __device__(auto, auto val) { auto const pagerank = thrust::get<0>(val); auto const out_weight_sum = thrust::get<1>(val); @@ -262,19 +246,21 @@ void pagerank( }, result_t{0.0}); - thrust::transform(handle.get_thrust_policy(), - vertex_val_first, - vertex_val_first + pull_graph_view.local_vertex_partition_range_size(), - pageranks, - [] __device__(auto val) { - auto const pagerank = thrust::get<0>(val); - auto const out_weight_sum = thrust::get<1>(val); - auto const divisor = - out_weight_sum == result_t{0.0} ? result_t{1.0} : out_weight_sum; - return pagerank / divisor; - }); + thrust::transform( + handle.get_thrust_policy(), + thrust::make_zip_iterator(pageranks.begin(), vertex_out_weight_sums), + thrust::make_zip_iterator( + pageranks.end(), + vertex_out_weight_sums + pull_graph_view.local_vertex_partition_range_size()), + pageranks.begin(), + [] __device__(auto val) { + auto const pagerank = thrust::get<0>(val); + auto const out_weight_sum = thrust::get<1>(val); + auto const divisor = out_weight_sum == result_t{0.0} ? result_t{1.0} : out_weight_sum; + return pagerank / divisor; + }); - update_edge_src_property(handle, pull_graph_view, pageranks, edge_src_pageranks); + update_edge_src_property(handle, pull_graph_view, pageranks.data(), edge_src_pageranks); auto unvarying_part = aggregate_personalization_vector_size == 0 ? (dangling_sum * alpha + static_cast(1.0 - alpha)) / @@ -293,7 +279,7 @@ void pagerank( }, unvarying_part, reduce_op::plus{}, - pageranks); + pageranks.begin()); } else { per_v_transform_reduce_incoming_e( handle, @@ -306,20 +292,23 @@ void pagerank( }, unvarying_part, reduce_op::plus{}, - pageranks); + pageranks.begin()); } if (aggregate_personalization_vector_size > 0) { auto vertex_partition = vertex_partition_device_view_t( pull_graph_view.local_vertex_partition_view()); - auto val_first = thrust::make_zip_iterator( - thrust::make_tuple(*personalization_vertices, *personalization_values)); thrust::for_each( handle.get_thrust_policy(), - val_first, - val_first + *personalization_vector_size, - [vertex_partition, pageranks, dangling_sum, personalization_sum, alpha] __device__( - auto val) { + thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(*personalization).begin(), + std::get<1>(*personalization).begin())), + thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(*personalization).end(), + std::get<1>(*personalization).end())), + [vertex_partition, + pageranks = pageranks.data(), + dangling_sum, + personalization_sum, + alpha] __device__(auto val) { auto v = thrust::get<0>(val); auto value = thrust::get<1>(val); *(pageranks + vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)) += @@ -331,7 +320,7 @@ void pagerank( auto diff_sum = transform_reduce_v( handle, pull_graph_view, - thrust::make_zip_iterator(thrust::make_tuple(pageranks, old_pageranks.data())), + thrust::make_zip_iterator(thrust::make_tuple(pageranks.begin(), old_pageranks.begin())), [] __device__(auto, auto val) { return std::abs(thrust::get<0>(val) - thrust::get<1>(val)); }, result_t{0.0}); @@ -340,9 +329,11 @@ void pagerank( if (diff_sum < epsilon) { break; } else if (iter >= max_iterations) { - CUGRAPH_FAIL("PageRank failed to converge."); + break; } } + + return centrality_algorithm_metadata_t{iter, (iter < max_iterations)}; } } // namespace detail @@ -364,19 +355,102 @@ void pagerank(raft::handle_t const& handle, { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - detail::pagerank(handle, - graph_view, - edge_weight_view, - precomputed_vertex_out_weight_sums, - personalization_vertices, - personalization_values, - personalization_vector_size, - pageranks, - alpha, - epsilon, - max_iterations, - has_initial_guess, - do_expensive_check); + CUGRAPH_EXPECTS((personalization_vertices.has_value() == false) || + (personalization_values.has_value() && personalization_vector_size.has_value()), + "Invalid input argument: if personalization_vertices.has_value() is true, "); + + // initialize pagerank values + if (has_initial_guess) { + if (do_expensive_check) { + auto num_negative_values = count_if_v( + handle, graph_view, pageranks, [] __device__(auto, auto val) { return val < 0.0; }); + CUGRAPH_EXPECTS(num_negative_values == 0, + "Invalid input argument: initial guess values should be non-negative."); + } + + auto sum = reduce_v(handle, graph_view, pageranks, result_t{0.0}); + CUGRAPH_EXPECTS(sum > 0.0, + "Invalid input argument: sum of the PageRank initial " + "guess values should be positive."); + thrust::transform(handle.get_thrust_policy(), + pageranks, + pageranks + graph_view.local_vertex_partition_range_size(), + pageranks, + [sum] __device__(auto val) { return val / sum; }); + } else { + thrust::fill(handle.get_thrust_policy(), + pageranks, + pageranks + graph_view.local_vertex_partition_range_size(), + result_t{1.0} / static_cast(graph_view.number_of_vertices())); + } + + auto metadata = detail::pagerank( + handle, + graph_view, + edge_weight_view, + std::make_optional(raft::device_span{ + *precomputed_vertex_out_weight_sums, + static_cast(graph_view.local_vertex_partition_range_size())}), + personalization_vertices + ? std::make_optional(std::make_tuple( + raft::device_span{*personalization_vertices, + static_cast(*personalization_vector_size)}, + raft::device_span{*personalization_values, + static_cast(*personalization_vector_size)})) + : std::nullopt, + raft::device_span{ + pageranks, static_cast(graph_view.local_vertex_partition_range_size())}, + alpha, + epsilon, + max_iterations, + do_expensive_check); + + CUGRAPH_EXPECTS(metadata.converged_, "PageRank failed to converge."); +} + +template +std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + result_t alpha, + result_t epsilon, + size_t max_iterations, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + + rmm::device_uvector local_pageranks(graph_view.local_vertex_partition_range_size(), + handle.get_stream()); + if (!initial_pageranks) { + thrust::fill(handle.get_thrust_policy(), + local_pageranks.begin(), + local_pageranks.end(), + result_t{1.0} / graph_view.number_of_vertices()); + } else { + thrust::copy(handle.get_thrust_policy(), + initial_pageranks->begin(), + initial_pageranks->end(), + local_pageranks.begin()); + } + + auto metadata = + detail::pagerank(handle, + graph_view, + edge_weight_view, + precomputed_vertex_out_weight_sums, + personalization, + raft::device_span{local_pageranks.data(), local_pageranks.size()}, + alpha, + epsilon, + max_iterations, + do_expensive_check); + + return std::make_tuple(std::move(local_pageranks), metadata); } } // namespace cugraph diff --git a/cpp/src/link_analysis/pagerank_mg.cu b/cpp/src/link_analysis/pagerank_mg.cu index d6dd5f60544..dc9892f69a8 100644 --- a/cpp/src/link_analysis/pagerank_mg.cu +++ b/cpp/src/link_analysis/pagerank_mg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,4 +102,82 @@ template void pagerank(raft::handle_t const& handle, bool has_initial_guess, bool do_expensive_check); +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool do_expensive_check); + +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool do_expensive_check); + +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool do_expensive_check); + +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool do_expensive_check); + +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool do_expensive_check); + +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_analysis/pagerank_sg.cu b/cpp/src/link_analysis/pagerank_sg.cu index 3dc0adc45df..51d123fe337 100644 --- a/cpp/src/link_analysis/pagerank_sg.cu +++ b/cpp/src/link_analysis/pagerank_sg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,4 +102,82 @@ template void pagerank(raft::handle_t const& handle, bool has_initial_guess, bool do_expensive_check); +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool do_expensive_check); + +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool do_expensive_check); + +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool do_expensive_check); + +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool do_expensive_check); + +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool do_expensive_check); + +template std::tuple, centrality_algorithm_metadata_t> pagerank( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> precomputed_vertex_out_weight_sums, + std::optional, raft::device_span>> + personalization, + std::optional> initial_pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 2d0d0a876e6..98453d46c3f 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -974,7 +974,7 @@ nbr_intersection(raft::handle_t const& handle, .get_stream()); // initially store minimum degrees (upper bound for intersection sizes) if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { auto second_element_to_idx_map = - detail::kv_cuco_store_device_view_t((*major_to_idx_map_ptr)->view()); + detail::kv_cuco_store_find_device_view_t((*major_to_idx_map_ptr)->view()); thrust::transform( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer), @@ -1005,7 +1005,7 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { auto second_element_to_idx_map = - detail::kv_cuco_store_device_view_t((*major_to_idx_map_ptr)->view()); + detail::kv_cuco_store_find_device_view_t((*major_to_idx_map_ptr)->view()); thrust::tabulate( handle.get_thrust_policy(), rx_v_pair_nbr_intersection_sizes.begin(), diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh index 8395fc55833..f20865c92dc 100644 --- a/cpp/src/prims/kv_store.cuh +++ b/cpp/src/prims/kv_store.cuh @@ -16,11 +16,14 @@ #pragma once #include +#include #include +#include #include #include +#include #include #include #include @@ -29,6 +32,7 @@ #include #include #include +#include #include #include @@ -45,7 +49,7 @@ namespace cugraph { namespace detail { template -struct binary_search_find_op_t { +struct kv_binary_search_find_op_t { using key_type = typename thrust::iterator_traits::value_type; using value_type = typename thrust::iterator_traits::value_type; @@ -67,7 +71,7 @@ struct binary_search_find_op_t { }; template -struct binary_search_contains_op_t { +struct kv_binary_search_contains_op_t { using key_type = typename thrust::iterator_traits::value_type; KeyIterator store_key_first{}; @@ -79,6 +83,105 @@ struct binary_search_contains_op_t { } }; +template +struct kv_cuco_insert_and_increment_t { + using key_type = typename thrust::iterator_traits::value_type; + using cuco_store_type = cuco::experimental::static_map< + key_type, + size_t, + cuco::experimental::extent, + cuda::thread_scope_device, + thrust::equal_to, + cuco::experimental::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>>; + + typename cuco_store_type::ref_type device_ref{}; + KeyIterator key_first{}; + size_t* counter{nullptr}; + size_t invalid_idx{}; + + __device__ size_t operator()(size_t i) + { + auto pair = thrust::make_tuple(*(key_first + i), size_t{0} /* dummy */); + auto [iter, inserted] = device_ref.insert_and_find(pair); + if (inserted) { + cuda::atomic_ref atomic_counter(*counter); + auto idx = atomic_counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed); + using ref_type = typename cuco_store_type::ref_type; + cuda::atomic_ref ref((*iter).second); + ref.store(idx, cuda::std::memory_order_relaxed); + return idx; + } else { + return invalid_idx; + } + } +}; + +template +struct kv_cuco_insert_if_and_increment_t { + using key_type = typename thrust::iterator_traits::value_type; + using cuco_store_type = cuco::experimental::static_map< + key_type, + size_t, + cuco::experimental::extent, + cuda::thread_scope_device, + thrust::equal_to, + cuco::experimental::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>>; + + typename cuco_store_type::ref_type device_ref{}; + KeyIterator key_first{}; + StencilIterator stencil_first{}; + PredOp pred_op{}; + size_t* counter{nullptr}; + size_t invalid_idx{}; + + __device__ size_t operator()(size_t i) + { + if (pred_op(*(stencil_first + i)) == false) { return invalid_idx; } + + auto pair = thrust::make_tuple(*(key_first + i), size_t{0} /* dummy */); + auto [iter, inserted] = device_ref.insert_and_find(pair); + if (inserted) { + cuda::atomic_ref atomic_counter(*counter); + auto idx = atomic_counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed); + using ref_type = typename cuco_store_type::ref_type; + cuda::atomic_ref ref((*iter).second); + ref.store(idx, cuda::std::memory_order_relaxed); + return idx; + } else { + return invalid_idx; + } + } +}; + +template +struct kv_cuco_insert_and_assign_t { + using cuco_store_type = cuco::experimental::static_map< + key_t, + std::conditional_t, value_t, size_t>, + cuco::experimental::extent, + cuda::thread_scope_device, + thrust::equal_to, + cuco::experimental::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>>; + + typename cuco_store_type::ref_type device_ref{}; + + __device__ void operator()(thrust::tuple pair) + { + auto [iter, inserted] = device_ref.insert_and_find(pair); + if (!inserted) { + using ref_type = typename cuco_store_type::ref_type; + cuda::atomic_ref ref((*iter).second); + ref.store(thrust::get<1>(pair), cuda::std::memory_order_relaxed); + } + } +}; + template struct kv_binary_search_store_device_view_t { using key_type = typename ViewType::key_type; @@ -112,18 +215,19 @@ struct kv_binary_search_store_device_view_t { }; template -struct kv_cuco_store_device_view_t { - using key_type = typename ViewType::key_type; - using value_type = typename ViewType::value_type; - using cuco_store_device_view_type = typename ViewType::cuco_store_type::device_view; +struct kv_cuco_store_find_device_view_t { + using key_type = typename ViewType::key_type; + using value_type = typename ViewType::value_type; + using cuco_store_device_ref_type = + typename ViewType::cuco_store_type::ref_type; static_assert(!ViewType::binary_search); - __host__ kv_cuco_store_device_view_t(ViewType view) - : cuco_store_device_view(view.cuco_store_device_view()) + __host__ kv_cuco_store_find_device_view_t(ViewType view) + : cuco_store_device_ref(view.cuco_store_find_device_ref()) { if constexpr (std::is_arithmetic_v) { - invalid_value = cuco_store_device_view.get_empty_value_sentinel(); + invalid_value = cuco_store_device_ref.empty_value_sentinel(); } else { store_value_first = view.store_value_first(); invalid_value = view.invalid_value(); @@ -132,11 +236,11 @@ struct kv_cuco_store_device_view_t { __device__ value_type find(key_type key) const { - auto found = cuco_store_device_view.find(key); - if (found == cuco_store_device_view.end()) { + auto found = cuco_store_device_ref.find(key); + if (found == cuco_store_device_ref.end()) { return invalid_value; } else { - auto val = found->second.load(cuda::std::memory_order_relaxed); + auto val = (*found).second; if constexpr (std::is_arithmetic_v) { return val; } else { @@ -145,7 +249,7 @@ struct kv_cuco_store_device_view_t { } } - cuco_store_device_view_type cuco_store_device_view{}; + cuco_store_device_ref_type cuco_store_device_ref{}; std::conditional_t, typename ViewType::value_iterator, std::byte /* dummy */> @@ -185,7 +289,7 @@ class kv_binary_search_store_view_t { key_first, key_last, value_first, - binary_search_find_op_t{ + kv_binary_search_find_op_t{ store_key_first_, store_key_last_, store_value_first_, invalid_value_}); } @@ -195,11 +299,12 @@ class kv_binary_search_store_view_t { ResultValueIterator value_first, rmm::cuda_stream_view stream) const { - thrust::transform(rmm::exec_policy(stream), - key_first, - key_last, - value_first, - binary_search_contains_op_t{store_key_first_, store_key_last_}); + thrust::transform( + rmm::exec_policy(stream), + key_first, + key_last, + value_first, + kv_binary_search_contains_op_t{store_key_first_, store_key_last_}); } KeyIterator store_key_first() const { return store_key_first_; } @@ -227,31 +332,29 @@ class kv_cuco_store_view_t { static constexpr bool binary_search = false; - using cuco_store_type = - cuco::static_map, value_type, size_t>, - cuda::thread_scope_device, - rmm::mr::stream_allocator_adaptor>>; + using cuco_store_type = cuco::experimental::static_map< + key_t, + std::conditional_t, value_type, size_t>, + cuco::experimental::extent, + cuda::thread_scope_device, + thrust::equal_to, + cuco::experimental::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>>; - // FIXME: const_cast as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/242 (cuco find() is not a const function) template kv_cuco_store_view_t(cuco_store_type const* store, std::enable_if_t, int32_t> = 0) - : cuco_store_(const_cast(store)) + : cuco_store_(store) { } - // FIXME: const_cast as a temporary workaround for - // https://github.com/NVIDIA/cuCollections/issues/242 (cuco find() is not a const function) template kv_cuco_store_view_t(cuco_store_type const* store, ValueIterator value_first, type invalid_value, std::enable_if_t, int32_t> = 0) - : cuco_store_(const_cast(store)), - store_value_first_(value_first), - invalid_value_(invalid_value) + : cuco_store_(store), store_value_first_(value_first), invalid_value_(invalid_value) { } @@ -262,34 +365,17 @@ class kv_cuco_store_view_t { rmm::cuda_stream_view stream) const { if constexpr (std::is_arithmetic_v) { - cuco_store_->find(key_first, - key_last, - value_first, - cuco::detail::MurmurHash3_32{}, - thrust::equal_to{}, - stream); + cuco_store_->find(key_first, key_last, value_first, stream.value()); } else { rmm::device_uvector indices(thrust::distance(key_first, key_last), stream); - cuco_store_->find(key_first, - key_last, - indices.begin(), - cuco::detail::MurmurHash3_32{}, - thrust::equal_to{}, - stream); - auto invalid_idx = cuco_store_->get_empty_value_sentinel(); - thrust::transform(rmm::exec_policy(stream), - indices.begin(), - indices.end(), - value_first, - [store_value_first = store_value_first_, - invalid_value = invalid_value_, - invalid_idx] __device__(auto idx) { - if (idx != invalid_idx) { - return *(store_value_first + idx); - } else { - return invalid_value; - } - }); + auto invalid_idx = cuco_store_->empty_value_sentinel(); + cuco_store_->find(key_first, key_last, indices.begin(), stream.value()); + thrust::transform( + rmm::exec_policy(stream), + indices.begin(), + indices.end(), + value_first, + indirection_if_idx_valid_t{store_value_first_, invalid_idx, invalid_value_}); } } @@ -299,15 +385,10 @@ class kv_cuco_store_view_t { ResultValueIterator value_first, rmm::cuda_stream_view stream) const { - cuco_store_->contains(key_first, - key_last, - value_first, - cuco::detail::MurmurHash3_32{}, - thrust::equal_to{}, - stream); + cuco_store_->contains(key_first, key_last, value_first, stream.value()); } - auto cuco_store_device_view() const { return cuco_store_->get_device_view(); } + auto cuco_store_find_device_ref() const { return cuco_store_->ref(cuco::experimental::find); } template std::enable_if_t, ValueIterator> store_value_first() const @@ -315,21 +396,19 @@ class kv_cuco_store_view_t { return store_value_first_; } - key_t invalid_key() const { return cuco_store_->get_empty_key_sentinel(); } + key_t invalid_key() const { return cuco_store_->empty_key_sentinel(); } value_type invalid_value() const { if constexpr (std::is_arithmetic_v) { - return cuco_store_->get_empty_value_sentinel(); + return cuco_store_->empty_value_sentinel(); } else { return invalid_value_; } } private: - // FIXME: cuco_store should be a const pointer but we can't due to - // https://github.com/NVIDIA/cuCollections/issues/242 (cuco find() is not a const function) - cuco_store_type* cuco_store_{}; + cuco_store_type const* cuco_store_{}; std::conditional_t, ValueIterator, std::byte /* dummy */> store_value_first_{}; @@ -395,6 +474,29 @@ class kv_binary_search_store_t { } } + auto retrieve_all(rmm::cuda_stream_view stream) + { + rmm::device_uvector tmp_store_keys(store_keys_.size(), stream); + auto tmp_store_values = + allocate_dataframe_buffer(size_dataframe_buffer(store_values_), stream); + thrust::copy( + rmm::exec_policy(stream), store_keys_.begin(), store_keys_.end(), tmp_store_keys.begin()); + thrust::copy(rmm::exec_policy(stream), + get_dataframe_buffer_begin(store_values_), + get_dataframe_buffer_end(store_values_), + get_dataframe_buffer_begin(tmp_store_values)); + return std::make_tuple(std::move(tmp_store_keys), std::move(tmp_store_values)); + } + + auto release(rmm::cuda_stream_view stream) + { + auto tmp_store_keys = std::move(store_keys_); + auto tmp_store_values = std::move(store_values_); + store_keys_ = rmm::device_uvector(0, stream); + store_values_ = allocate_dataframe_buffer(0, stream); + return std::make_tuple(std::move(tmp_store_keys), std::move(tmp_store_values)); + } + key_t const* store_key_first() const { return store_keys_.cbegin(); } key_t const* store_key_last() const { return store_keys_.cend(); } @@ -403,6 +505,10 @@ class kv_binary_search_store_t { value_t invalid_value() const { return invalid_value_; } + size_t size() const { return store_keys_.size(); } + + size_t capacity() const { return store_keys_.size(); } + private: rmm::device_uvector store_keys_; decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{})) store_values_; @@ -421,14 +527,28 @@ class kv_cuco_store_t { std::invoke_result_t), value_buffer_type&>; - using cuco_store_type = - cuco::static_map, value_t, size_t>, - cuda::thread_scope_device, - rmm::mr::stream_allocator_adaptor>>; + using cuco_store_type = cuco::experimental::static_map< + key_t, + std::conditional_t, value_t, size_t>, + cuco::experimental::extent, + cuda::thread_scope_device, + thrust::equal_to, + cuco::experimental::linear_probing<1, // CG size + cuco::murmurhash3_32>, + rmm::mr::stream_allocator_adaptor>>; kv_cuco_store_t(rmm::cuda_stream_view stream) {} + kv_cuco_store_t(size_t capacity, + key_t invalid_key, + value_t invalid_value, + rmm::cuda_stream_view stream) + { + allocate(capacity, invalid_key, invalid_value, stream); + capacity_ = capacity; + size_ = 0; + } + template kv_cuco_store_t(KeyIterator key_first, KeyIterator key_last, @@ -437,51 +557,228 @@ class kv_cuco_store_t { value_t invalid_value, rmm::cuda_stream_view stream) { - double constexpr load_factor = 0.7; - auto num_keys = static_cast(thrust::distance(key_first, key_last)); - auto cuco_size = std::max( - static_cast(static_cast(num_keys) / load_factor), - static_cast(num_keys) + 1); // cuco::static_map requires at least one empty slot - auto stream_adapter = rmm::mr::make_stream_allocator_adaptor( - rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()), stream); + auto num_keys = static_cast(thrust::distance(key_first, key_last)); + allocate(num_keys, invalid_key, invalid_value, stream); + if constexpr (!std::is_arithmetic_v) { invalid_value_ = invalid_value; } + capacity_ = num_keys; + size_ = 0; + + insert(key_first, key_last, value_first, stream); + } + + template + void insert(KeyIterator key_first, + KeyIterator key_last, + ValueIterator value_first, + rmm::cuda_stream_view stream) + { + auto num_keys = static_cast(thrust::distance(key_first, key_last)); + if (num_keys == 0) return; + if constexpr (std::is_arithmetic_v) { - cuco_store_ = - std::make_unique(cuco_size, - cuco::sentinel::empty_key{invalid_key}, - cuco::sentinel::empty_value{invalid_value}, - stream_adapter, - stream); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(key_first, value_first)); + size_ += cuco_store_->insert(pair_first, pair_first + num_keys, stream.value()); + } else { + auto old_store_value_size = size_dataframe_buffer(store_values_); + // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this + // requires placing the atomic variable on managed memory and this adds additional + // complication. + rmm::device_scalar counter(old_store_value_size, stream); + auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find); + rmm::device_uvector store_value_offsets(num_keys, stream); + thrust::tabulate( + rmm::exec_policy(stream), + store_value_offsets.begin(), + store_value_offsets.end(), + kv_cuco_insert_and_increment_t{ + mutable_device_ref, key_first, counter.data(), std::numeric_limits::max()}); + size_ += counter.value(stream); + resize_dataframe_buffer(store_values_, size_, stream); + thrust::scatter_if(rmm::exec_policy(stream), + value_first, + value_first + num_keys, + store_value_offsets.begin() /* map */, + store_value_offsets.begin() /* stencil */, + get_dataframe_buffer_begin(store_values_), + not_equal_t{std::numeric_limits::max()}); + } + } + template + void insert_if(KeyIterator key_first, + KeyIterator key_last, + ValueIterator value_first, + StencilIterator stencil_first, + PredOp pred_op, + rmm::cuda_stream_view stream) + { + auto num_keys = static_cast(thrust::distance(key_first, key_last)); + if (num_keys == 0) return; + + if constexpr (std::is_arithmetic_v) { auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(key_first, value_first)); - cuco_store_->insert(pair_first, - pair_first + num_keys, - cuco::detail::MurmurHash3_32{}, - thrust::equal_to{}, - stream); + size_ += cuco_store_->insert_if( + pair_first, pair_first + num_keys, stencil_first, pred_op, stream.value()); } else { - cuco_store_ = std::make_unique( - cuco_size, - cuco::sentinel::empty_key{invalid_key}, - cuco::sentinel::empty_value{std::numeric_limits::max()}, - stream_adapter, - stream); - store_values_ = allocate_dataframe_buffer(num_keys, stream); - invalid_value_ = invalid_value; + auto old_store_value_size = size_dataframe_buffer(store_values_); + // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this + // requires placing the atomic variable on managed memory and this adds additional + // complication. + rmm::device_scalar counter(old_store_value_size, stream); + auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find); + rmm::device_uvector store_value_offsets(num_keys, stream); + thrust::tabulate(rmm::exec_policy(stream), + store_value_offsets.begin(), + store_value_offsets.end(), + kv_cuco_insert_if_and_increment_t{ + mutable_device_ref, + key_first, + stencil_first, + pred_op, + counter.data(), + std::numeric_limits::max()}); + size_ += counter.value(stream); + resize_dataframe_buffer(store_values_, size_, stream); + thrust::scatter_if(rmm::exec_policy(stream), + value_first, + value_first + num_keys, + store_value_offsets.begin() /* map */, + store_value_offsets.begin() /* stencil */, + get_dataframe_buffer_begin(store_values_), + not_equal_t{std::numeric_limits::max()}); + } + } + + template + void insert_and_assign(KeyIterator key_first, + KeyIterator key_last, + ValueIterator value_first, + rmm::cuda_stream_view stream) + { + auto num_keys = static_cast(thrust::distance(key_first, key_last)); + if (num_keys == 0) return; + if constexpr (std::is_arithmetic_v) { + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(key_first, value_first)); + // FIXME: a temporary solution till insert_and_assign is added to + // cuco::experimental::static_map + auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find); + thrust::for_each(rmm::exec_policy(stream), + pair_first, + pair_first + num_keys, + detail::kv_cuco_insert_and_assign_t{mutable_device_ref}); + // FIXME: this is an upper bound of size_, as some inserts may fail due to existing keys + size_ += num_keys; + } else { + auto old_store_value_size = size_dataframe_buffer(store_values_); + // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this + // requires placing the atomic variable on managed memory and this adds additional + // complication. + rmm::device_scalar counter(old_store_value_size, stream); + auto mutable_device_ref = cuco_store_->ref(cuco::experimental::insert_and_find); + rmm::device_uvector store_value_offsets(num_keys, stream); + thrust::tabulate( + rmm::exec_policy(stream), + store_value_offsets.begin(), + store_value_offsets.end(), + kv_cuco_insert_and_increment_t{ + mutable_device_ref, key_first, counter.data(), std::numeric_limits::max()}); + size_ += counter.value(stream); + resize_dataframe_buffer(store_values_, size_, stream); + thrust::scatter_if(rmm::exec_policy(stream), + value_first, + value_first + num_keys, + store_value_offsets.begin() /* map */, + store_value_offsets.begin() /* stencil */, + get_dataframe_buffer_begin(store_values_), + not_equal_t{std::numeric_limits::max()}); + + // now perform assigns (for k,v pairs that failed to insert) + + rmm::device_uvector kv_indices(num_keys, stream); + thrust::sequence(rmm::exec_policy(), kv_indices.begin(), kv_indices.end(), size_t{0}); auto pair_first = thrust::make_zip_iterator( - thrust::make_tuple(key_first, thrust::make_counting_iterator(size_t{0}))); - cuco_store_->insert(pair_first, - pair_first + num_keys, - cuco::detail::MurmurHash3_32{}, - thrust::equal_to{}, - stream); - thrust::copy(rmm::exec_policy(stream), - value_first, - value_first + num_keys, - get_dataframe_buffer_begin(store_values_)); + thrust::make_tuple(store_value_offsets.begin(), kv_indices.begin())); + kv_indices.resize( + thrust::distance( + pair_first, + thrust::remove_if(rmm::exec_policy(stream), + pair_first, + pair_first + num_keys, + [invalid_idx = std::numeric_limits::max()] __device__( + auto pair) { return thrust::get<0>(pair) != invalid_idx; })), + stream); + store_value_offsets.resize(0, stream); + store_value_offsets.shrink_to_fit(stream); + + thrust::sort(rmm::exec_policy(stream), + kv_indices.begin(), + kv_indices.end(), + [key_first] __device__(auto lhs, auto rhs) { + return *(key_first + lhs) < *(key_first + rhs); + }); + kv_indices.resize(thrust::distance(kv_indices.begin(), + thrust::unique(rmm::exec_policy(stream), + kv_indices.begin(), + kv_indices.end(), + [key_first] __device__(auto lhs, auto rhs) { + return *(key_first + lhs) == + *(key_first + rhs); + })), + stream); + + thrust::for_each( + rmm::exec_policy(stream), + kv_indices.begin(), + kv_indices.end(), + [key_first, + value_first, + store_value_first = get_dataframe_buffer_begin(store_values_), + device_ref = cuco_store_->ref(cuco::experimental::find)] __device__(auto kv_idx) { + size_t store_value_offset{}; + auto found = device_ref.find(*(key_first + kv_idx)); + assert(found != device_ref.end()); + store_value_offset = (*found).second; + *(store_value_first + store_value_offset) = *(value_first + kv_idx); + }); } } + auto retrieve_all(rmm::cuda_stream_view stream) + { + rmm::device_uvector keys(size_, stream); + auto values = allocate_dataframe_buffer(0, stream); + if constexpr (std::is_arithmetic_v) { + values.resize(size_, stream); + auto pair_last = cuco_store_->retrieve_all(keys.begin(), values.begin(), stream.value()); + // FIXME: this resize (& shrink_to_fit) shouldn't be necessary if size_ is exact + keys.resize(thrust::distance(keys.begin(), std::get<0>(pair_last)), stream); + values.resize(keys.size(), stream); + } else { + rmm::device_uvector indices(size_, stream); + auto pair_last = cuco_store_->retrieve_all(keys.begin(), indices.begin(), stream.value()); + // FIXME: this resize (& shrink_to_fit) shouldn't be necessary if size_ is exact + keys.resize(thrust::distance(keys.begin(), std::get<0>(pair_last)), stream); + indices.resize(keys.size(), stream); + resize_dataframe_buffer(values, keys.size(), stream); + thrust::gather(rmm::exec_policy(stream), + indices.begin(), + indices.end(), + get_dataframe_buffer_begin(store_values_), + get_dataframe_buffer_begin(values)); + } + return std::make_tuple(std::move(keys), std::move(values)); + } + + auto release(rmm::cuda_stream_view stream) + { + auto [retrieved_keys, retrieved_values] = retrieve_all(stream); + allocate(0, invalid_key(), invalid_value(), stream); + capacity_ = 0; + size_ = 0; + return std::make_tuple(std::move(retrieved_keys), std::move(retrieved_values)); + } + cuco_store_type const* cuco_store_ptr() const { return cuco_store_.get(); } template @@ -490,18 +787,60 @@ class kv_cuco_store_t { return get_dataframe_buffer_cbegin(store_values_); } - key_t invalid_key() const { return cuco_store_.get_empty_key_sentinel(); } + key_t invalid_key() const { return cuco_store_->empty_key_sentinel(); } value_t invalid_value() const { if constexpr (std::is_arithmetic_v) { - return cuco_store_.get_empty_value_sentinel(); + return cuco_store_->empty_value_sentinel(); } else { return invalid_value_; } } + // FIXME: currently this returns an upper-bound + size_t size() const { return size_; } + + size_t capacity() const { return capacity_; } + private: + void allocate(size_t num_keys, + key_t invalid_key, + value_t invalid_value, + rmm::cuda_stream_view stream) + { + double constexpr load_factor = 0.7; + auto cuco_size = std::max( + static_cast(static_cast(num_keys) / load_factor), + static_cast(num_keys) + 1); // cuco::static_map requires at least one empty slot + + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor( + rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()), stream); + if constexpr (std::is_arithmetic_v) { + cuco_store_ = std::make_unique( + cuco_size, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{invalid_value}, + thrust::equal_to{}, + cuco::experimental::linear_probing<1, // CG size + cuco::murmurhash3_32>{}, + stream_adapter, + stream.value()); + } else { + cuco_store_ = std::make_unique( + cuco_size, + cuco::sentinel::empty_key{invalid_key}, + cuco::sentinel::empty_value{std::numeric_limits::max()}, + thrust::equal_to{}, + cuco::experimental::linear_probing<1, // CG size + cuco::murmurhash3_32>{}, + stream_adapter, + stream); + store_values_ = allocate_dataframe_buffer(0, stream); + reserve_dataframe_buffer(store_values_, num_keys, stream); + } + } + std::unique_ptr cuco_store_{nullptr}; std::conditional_t, decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{})), @@ -510,6 +849,12 @@ class kv_cuco_store_t { std::conditional_t, value_t, std::byte /* dummy */> invalid_value_{}; + size_t capacity_{0}; + size_t size_{ + 0}; // caching as cuco_store_->size() is expensive (this scans the entire slots to handle + // user inserts through a device reference (and currently this is an upper bound (this + // will become exact once we fully switch to cuco::experimental::static_map and use the + // static_map class's insert_and_assign function; this function will be added soon) }; } // namespace detail @@ -528,6 +873,23 @@ class kv_store_t { kv_store_t(rmm::cuda_stream_view stream) : store_(stream) {} + /* when use_binary_search = false */ + template + kv_store_t( + size_t capacity /* one can expect good performance till the capacity, the actual underlying + capacity can be larger (for performance & correctness reasons) */ + , + key_t invalid_key /* invalid key shouldn't appear in any *iter in [key_first, key_last) */, + value_t invalid_value /* invalid_value shouldn't appear in any *iter in [value_first, + value_first + thrust::distance(key_first, key_last)), invalid_value is + returned when match fails for the given key */ + , + rmm::cuda_stream_view stream, + std::enable_if_t = 0) + : store_(capacity, invalid_key, invalid_value, stream) + { + } + /* when use_binary_search = true */ template kv_store_t( @@ -576,6 +938,47 @@ class kv_store_t { { } + /* when use binary_search = false, this requires that the capacity is large enough */ + template + std::enable_if_t insert(KeyIterator key_first, + KeyIterator key_last, + ValueIterator value_first, + rmm::cuda_stream_view stream) + { + store_.insert(key_first, key_last, value_first, stream); + } + + /* when use binary_search = false, this requires that the capacity is large enough */ + template + std::enable_if_t insert_if(KeyIterator key_first, + KeyIterator key_last, + ValueIterator value_first, + StencilIterator stencil_first, + PredOp pred_op, + rmm::cuda_stream_view stream) + { + store_.insert_if(key_first, key_last, value_first, stencil_first, pred_op, stream); + } + + /* when use binary_search = false, this requires that the capacity is large enough */ + template + std::enable_if_t insert_and_assign(KeyIterator key_first, + KeyIterator key_last, + ValueIterator value_first, + rmm::cuda_stream_view stream) + { + store_.insert_and_assign(key_first, key_last, value_first, stream); + } + + auto retrieve_all(rmm::cuda_stream_view stream) const { return store_.retrieve_all(stream); } + + // kv_store_t becomes empty after release + auto release(rmm::cuda_stream_view stream) { return store_.release(stream); } + auto view() const { if constexpr (use_binary_search) { @@ -593,6 +996,18 @@ class kv_store_t { } } + template + std::enable_if_t invalid_key() const + { + return store_.invalid_key(); + } + + value_t invalid_value() const { return store_.invalid_value(); } + + size_t size() const { return store_.size(); } + + size_t capacity() const { return store_.capacity(); } + private: std::conditional_t, diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index f30de0750e3..d69bb8af25e 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -346,7 +346,7 @@ void per_v_pair_transform_dst_nbr_intersection( // partition? This may provide additional performance improvement opportunities??? auto chunk_vertex_pair_first = thrust::make_transform_iterator( chunk_vertex_pair_index_first, - detail::indirection_t{vertex_pair_first}); + detail::indirection_t{vertex_pair_first}); auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, graph_view, diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 69cce08d352..d7c094a2361 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -399,11 +399,12 @@ rmm::device_uvector get_sampling_index_without_replacement( if (retry_segment_indices) { retry_degrees = rmm::device_uvector((*retry_segment_indices).size(), handle.get_stream()); - thrust::transform(handle.get_thrust_policy(), - (*retry_segment_indices).begin(), - (*retry_segment_indices).end(), - (*retry_degrees).begin(), - indirection_t{segment_degree_first}); + thrust::transform( + handle.get_thrust_policy(), + (*retry_segment_indices).begin(), + (*retry_segment_indices).end(), + (*retry_degrees).begin(), + indirection_t{segment_degree_first}); retry_sample_nbr_indices = rmm::device_uvector( (*retry_segment_indices).size() * high_partition_over_sampling_K, handle.get_stream()); retry_sample_indices = rmm::device_uvector( diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index a4d34443413..2e19adc34c4 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -756,7 +756,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( : thrust::nullopt; std::conditional_t, - detail::kv_cuco_store_device_view_t> + detail::kv_cuco_store_find_device_view_t> dst_key_value_map_device_view( GraphViewType::is_multi_gpu ? multi_gpu_minor_key_value_map_ptr->view() : kv_store_view); thrust::transform(handle.get_thrust_policy(), diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index 4823c1febf4..b5cfdf4b16b 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -293,9 +293,11 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( GraphViewType::is_multi_gpu>(handle, edge_partition, std::nullopt, + std::nullopt, majors.data(), minors.data(), std::nullopt, + std::nullopt, segment_offsets); auto vertex_pair_first = diff --git a/cpp/src/sampling/neighborhood.cu b/cpp/src/sampling/neighborhood.cu index 0c0beb8d8b0..2f7b203a319 100644 --- a/cpp/src/sampling/neighborhood.cu +++ b/cpp/src/sampling/neighborhood.cu @@ -22,6 +22,8 @@ #include +#include + namespace cugraph { template @@ -34,14 +36,19 @@ sample_neighbors_adjacency_list(raft::handle_t const& handle, size_t sampling_size, ops::graph::SamplingAlgoT sampling_algo) { - const auto [ops_graph, max_degree] = detail::get_graph_and_max_degree(graph_view); - return ops::graph::uniform_sample_csr(rng_state, + using base_vertex_t = std::decay_t; + using base_edge_t = std::decay_t; + static_assert(std::is_same_v, + "cugraph-ops sampling not yet implemented for different node and edge types"); + + const auto ops_graph = detail::get_graph(graph_view); + return ops::graph::uniform_sample_csc(rng_state, ops_graph, ptr_d_start, num_start_vertices, sampling_size, sampling_algo, - max_degree, + ops_graph.dst_max_in_degree, handle.get_stream()); } @@ -55,14 +62,19 @@ std::tuple, rmm::device_uvector> sample_ size_t sampling_size, ops::graph::SamplingAlgoT sampling_algo) { - const auto [ops_graph, max_degree] = detail::get_graph_and_max_degree(graph_view); + using base_vertex_t = std::decay_t; + using base_edge_t = std::decay_t; + static_assert(std::is_same_v, + "cugraph-ops sampling not yet implemented for different node and edge types"); + + const auto ops_graph = detail::get_graph(graph_view); return ops::graph::uniform_sample_coo(rng_state, ops_graph, ptr_d_start, num_start_vertices, sampling_size, sampling_algo, - max_degree, + ops_graph.dst_max_in_degree, handle.get_stream()); } diff --git a/cpp/src/structure/coarsen_graph_impl.cuh b/cpp/src/structure/coarsen_graph_impl.cuh index 6dacbee2fb1..b8dc28d563e 100644 --- a/cpp/src/structure/coarsen_graph_impl.cuh +++ b/cpp/src/structure/coarsen_graph_impl.cuh @@ -168,9 +168,12 @@ decompress_edge_partition_to_relabeled_and_grouped_and_coarsened_edgelist( handle, edge_partition, edge_partition_weight_view, + std::optional>{ + std::nullopt}, edgelist_majors.data(), edgelist_minors.data(), edgelist_weights ? std::optional{(*edgelist_weights).data()} : std::nullopt, + std::optional{std::nullopt}, segment_offsets); auto pair_first = diff --git a/cpp/src/structure/decompress_to_edgelist_impl.cuh b/cpp/src/structure/decompress_to_edgelist_impl.cuh index fb0ffdb96c1..d653307c620 100644 --- a/cpp/src/structure/decompress_to_edgelist_impl.cuh +++ b/cpp/src/structure/decompress_to_edgelist_impl.cuh @@ -52,11 +52,13 @@ template , rmm::device_uvector, - std::optional>>> + std::optional>, + std::optional>>> decompress_to_edgelist_impl( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check) { @@ -86,6 +88,9 @@ decompress_to_edgelist_impl( rmm::device_uvector edgelist_majors(number_of_local_edges, handle.get_stream()); rmm::device_uvector edgelist_minors(edgelist_majors.size(), handle.get_stream()); + auto edgelist_ids = edge_id_view ? std::make_optional>( + edgelist_majors.size(), handle.get_stream()) + : std::nullopt; auto edgelist_weights = edge_weight_view ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) : std::nullopt; @@ -101,10 +106,15 @@ decompress_to_edgelist_impl( detail::edge_partition_edge_property_device_view_t>( (*edge_weight_view), i) : std::nullopt, + edge_id_view ? std::make_optional< + detail::edge_partition_edge_property_device_view_t>( + (*edge_id_view), i) + : std::nullopt, edgelist_majors.data() + cur_size, edgelist_minors.data() + cur_size, edgelist_weights ? std::optional{(*edgelist_weights).data() + cur_size} : std::nullopt, + edgelist_ids ? std::optional{(*edgelist_ids).data() + cur_size} : std::nullopt, graph_view.local_edge_partition_segment_offsets(i)); cur_size += edgelist_edge_counts[i]; } @@ -131,16 +141,34 @@ decompress_to_edgelist_impl( major_ptrs[i] = edgelist_majors.data() + cur_size; minor_ptrs[i] = edgelist_minors.data() + cur_size; if (edgelist_weights) { - thrust::sort_by_key(handle.get_thrust_policy(), - minor_ptrs[i], - minor_ptrs[i] + edgelist_edge_counts[i], - thrust::make_zip_iterator(thrust::make_tuple( - major_ptrs[i], (*edgelist_weights).data() + cur_size))); + if (edgelist_ids) { + thrust::sort_by_key( + handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + thrust::make_zip_iterator(thrust::make_tuple(major_ptrs[i], + (*edgelist_ids).data() + cur_size, + (*edgelist_weights).data() + cur_size))); + } else { + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + thrust::make_zip_iterator(thrust::make_tuple( + major_ptrs[i], (*edgelist_weights).data() + cur_size))); + } } else { - thrust::sort_by_key(handle.get_thrust_policy(), - minor_ptrs[i], - minor_ptrs[i] + edgelist_edge_counts[i], - major_ptrs[i]); + if (edgelist_ids) { + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + thrust::make_zip_iterator(thrust::make_tuple( + major_ptrs[i], (*edgelist_ids).data() + cur_size))); + } else { + thrust::sort_by_key(handle.get_thrust_policy(), + minor_ptrs[i], + minor_ptrs[i] + edgelist_edge_counts[i], + major_ptrs[i]); + } } rmm::device_uvector d_segment_offsets(d_thresholds.size(), handle.get_stream()); thrust::lower_bound(handle.get_thrust_policy(), @@ -172,7 +200,8 @@ decompress_to_edgelist_impl( return std::make_tuple(store_transposed ? std::move(edgelist_minors) : std::move(edgelist_majors), store_transposed ? std::move(edgelist_majors) : std::move(edgelist_minors), - std::move(edgelist_weights)); + std::move(edgelist_weights), + std::move(edgelist_ids)); } template , rmm::device_uvector, - std::optional>>> + std::optional>, + std::optional>>> decompress_to_edgelist_impl( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check) { @@ -206,6 +237,9 @@ decompress_to_edgelist_impl( auto edgelist_weights = edge_weight_view ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) : std::nullopt; + auto edgelist_ids = edge_id_view ? std::make_optional>( + edgelist_majors.size(), handle.get_stream()) + : std::nullopt; detail::decompress_edge_partition_to_edgelist( handle, edge_partition_device_view_t( @@ -215,9 +249,14 @@ decompress_to_edgelist_impl( detail::edge_partition_edge_property_device_view_t>( (*edge_weight_view), 0) : std::nullopt, + edge_id_view ? std::make_optional< + detail::edge_partition_edge_property_device_view_t>( + (*edge_id_view), 0) + : std::nullopt, edgelist_majors.data(), edgelist_minors.data(), edgelist_weights ? std::optional{(*edgelist_weights).data()} : std::nullopt, + edgelist_ids ? std::optional{(*edgelist_ids).data()} : std::nullopt, graph_view.local_edge_partition_segment_offsets()); if (renumber_map) { @@ -232,7 +271,8 @@ decompress_to_edgelist_impl( return std::make_tuple(store_transposed ? std::move(edgelist_minors) : std::move(edgelist_majors), store_transposed ? std::move(edgelist_majors) : std::move(edgelist_minors), - std::move(edgelist_weights)); + std::move(edgelist_weights), + std::move(edgelist_ids)); } } // namespace @@ -244,18 +284,20 @@ template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check) { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); return decompress_to_edgelist_impl( - handle, graph_view, edge_weight_view, renumber_map, do_expensive_check); + handle, graph_view, edge_weight_view, edge_id_view, renumber_map, do_expensive_check); } } // namespace cugraph diff --git a/cpp/src/structure/decompress_to_edgelist_mg.cu b/cpp/src/structure/decompress_to_edgelist_mg.cu index 9f03570504b..fbe56ca9b04 100644 --- a/cpp/src/structure/decompress_to_edgelist_mg.cu +++ b/cpp/src/structure/decompress_to_edgelist_mg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,121 +21,145 @@ namespace cugraph { template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); diff --git a/cpp/src/structure/decompress_to_edgelist_sg.cu b/cpp/src/structure/decompress_to_edgelist_sg.cu index 296f39fdfd2..5b8e410e087 100644 --- a/cpp/src/structure/decompress_to_edgelist_sg.cu +++ b/cpp/src/structure/decompress_to_edgelist_sg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,121 +21,145 @@ namespace cugraph { template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); template std::tuple, rmm::device_uvector, - std::optional>> + std::optional>, + std::optional>> decompress_to_edgelist( raft::handle_t const& handle, graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool do_expensive_check); diff --git a/cpp/src/structure/relabel_impl.cuh b/cpp/src/structure/relabel_impl.cuh index c4c34733a4d..192120e6b4c 100644 --- a/cpp/src/structure/relabel_impl.cuh +++ b/cpp/src/structure/relabel_impl.cuh @@ -142,7 +142,7 @@ void relabel(raft::handle_t const& handle, handle.get_stream()); if (skip_missing_labels) { - auto device_view = detail::kv_cuco_store_device_view_t(relabel_map_view); + auto device_view = detail::kv_cuco_store_find_device_view_t(relabel_map_view); thrust::transform( handle.get_thrust_policy(), rx_unique_old_labels.begin(), @@ -187,7 +187,7 @@ void relabel(raft::handle_t const& handle, handle.get_stream()); auto relabel_map_view = relabel_map.view(); if (skip_missing_labels) { - auto device_view = detail::kv_cuco_store_device_view_t(relabel_map_view); + auto device_view = detail::kv_cuco_store_find_device_view_t(relabel_map_view); thrust::transform( handle.get_thrust_policy(), labels, diff --git a/cpp/src/structure/symmetrize_graph_impl.cuh b/cpp/src/structure/symmetrize_graph_impl.cuh index 4afa4122a06..3334e089ba3 100644 --- a/cpp/src/structure/symmetrize_graph_impl.cuh +++ b/cpp/src/structure/symmetrize_graph_impl.cuh @@ -73,12 +73,17 @@ symmetrize_graph_impl( auto is_multigraph = graph.is_multigraph(); - auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist( + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); + std::optional> edgelist_weights{std::nullopt}; + + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( handle, graph_view, edge_weights ? std::optional>{(*edge_weights).view()} : std::nullopt, + std::optional>{std::nullopt}, std::make_optional>((*renumber_map).data(), (*renumber_map).size())); graph = graph_t(handle); @@ -158,12 +163,17 @@ symmetrize_graph_impl( auto is_multigraph = graph.is_multigraph(); bool renumber = renumber_map.has_value(); - auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist( + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); + std::optional> edgelist_weights{std::nullopt}; + + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( handle, graph_view, edge_weights ? std::optional>{(*edge_weights).view()} : std::nullopt, + std::optional>{std::nullopt}, renumber_map ? std::make_optional>((*renumber_map).data(), (*renumber_map).size()) : std::nullopt); diff --git a/cpp/src/structure/transpose_graph_impl.cuh b/cpp/src/structure/transpose_graph_impl.cuh index c2609362b0b..5b418a15478 100644 --- a/cpp/src/structure/transpose_graph_impl.cuh +++ b/cpp/src/structure/transpose_graph_impl.cuh @@ -74,12 +74,17 @@ transpose_graph_impl( auto is_multigraph = graph.is_multigraph(); - auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist( + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); + std::optional> edgelist_weights{std::nullopt}; + + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( handle, graph_view, edge_weights ? std::optional>{(*edge_weights).view()} : std::nullopt, + std::optional>{std::nullopt}, std::make_optional>((*renumber_map).data(), (*renumber_map).size())); graph = graph_t(handle); @@ -165,12 +170,17 @@ transpose_graph_impl( auto is_multigraph = graph.is_multigraph(); bool renumber = renumber_map.has_value(); - auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist( + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); + std::optional> edgelist_weights{std::nullopt}; + + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( handle, graph_view, edge_weights ? std::optional>{(*edge_weights).view()} : std::nullopt, + std::optional>{std::nullopt}, renumber_map ? std::make_optional>((*renumber_map).data(), (*renumber_map).size()) : std::nullopt); diff --git a/cpp/src/structure/transpose_graph_storage_impl.cuh b/cpp/src/structure/transpose_graph_storage_impl.cuh index b34d2f67dcd..980c9b10c53 100644 --- a/cpp/src/structure/transpose_graph_storage_impl.cuh +++ b/cpp/src/structure/transpose_graph_storage_impl.cuh @@ -74,12 +74,17 @@ transpose_graph_storage_impl( // FIXME: if is_symmetric is true we can do this more efficiently, // since the graph contents should be exactly the same - auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist( + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); + std::optional> edgelist_weights{std::nullopt}; + + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( handle, graph_view, edge_weights ? std::optional>{(*edge_weights).view()} : std::nullopt, + std::optional>{std::nullopt}, std::make_optional>((*renumber_map).data(), (*renumber_map).size())); graph = graph_t(handle); @@ -170,12 +175,17 @@ transpose_graph_storage_impl( // FIXME: if is_symmetric is true we can do this more efficiently, // since the graph contents should be exactly the same - auto [edgelist_srcs, edgelist_dsts, edgelist_weights] = decompress_to_edgelist( + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); + std::optional> edgelist_weights{std::nullopt}; + + std::tie(edgelist_srcs, edgelist_dsts, edgelist_weights, std::ignore) = decompress_to_edgelist( handle, graph_view, edge_weights ? std::optional>{(*edge_weights).view()} : std::nullopt, + std::optional>{std::nullopt}, renumber_map ? std::make_optional>((*renumber_map).data(), (*renumber_map).size()) : std::nullopt); diff --git a/cpp/src/utilities/cugraph_ops_utils.hpp b/cpp/src/utilities/cugraph_ops_utils.hpp index 1dbe930e4c9..9aea4183866 100644 --- a/cpp/src/utilities/cugraph_ops_utils.hpp +++ b/cpp/src/utilities/cugraph_ops_utils.hpp @@ -20,18 +20,20 @@ #include -#include - namespace cugraph { namespace detail { template -ops::graph::fg_csr get_graph( +ops::graph::csc get_graph( graph_view_t const& gview) { - ops::graph::fg_csr graph; - graph.n_nodes = gview.number_of_vertices(); - graph.n_indices = gview.number_of_edges(); + ops::graph::csc graph; + graph.n_src_nodes = gview.number_of_vertices(); + graph.n_dst_nodes = gview.number_of_vertices(); + graph.n_indices = gview.number_of_edges(); + // FIXME this is sufficient for now, but if there is a fast (cached) way + // of getting max degree, use that instead + graph.dst_max_in_degree = std::numeric_limits::max(); // FIXME: this is evil and is just temporary until we have a matching type in cugraph-ops // or we change the type accepted by the functions calling into cugraph-ops graph.offsets = const_cast(gview.local_edge_partition_view().offsets().data()); @@ -39,15 +41,5 @@ ops::graph::fg_csr get_graph( return graph; } -template -std::tuple, NodeTypeT> get_graph_and_max_degree( - graph_view_t const& gview) -{ - // FIXME this is sufficient for now, but if there is a fast (cached) way - // of getting max degree, use that instead - auto max_degree = std::numeric_limits::max(); - return std::make_tuple(get_graph(gview), max_degree); -} - } // namespace detail } // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 7d4a2181af1..3bcd5546455 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -606,6 +606,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureCTestMG(MG_CAPI_KATZ_TEST c_api/mg_katz_test.c c_api/mg_test_utils.cpp) ConfigureCTestMG(MG_CAPI_EIGENVECTOR_CENTRALITY_TEST c_api/mg_eigenvector_centrality_test.c c_api/mg_test_utils.cpp) ConfigureCTestMG(MG_CAPI_BETWEENNESS_CENTRALITY_TEST c_api/mg_betweenness_centrality_test.c c_api/mg_test_utils.cpp) + ConfigureCTestMG(MG_CAPI_EDGE_BETWEENNESS_CENTRALITY_TEST c_api/mg_edge_betweenness_centrality_test.c c_api/mg_test_utils.cpp) ConfigureCTestMG(MG_CAPI_HITS_TEST c_api/mg_hits_test.c c_api/mg_test_utils.cpp) ConfigureCTestMG(MG_CAPI_UNIFORM_NEIGHBOR_SAMPLE_TEST c_api/mg_uniform_neighbor_sample_test.c c_api/mg_test_utils.cpp) ConfigureCTestMG(MG_CAPI_RANDOM_WALKS_TEST c_api/mg_random_walks_test.c c_api/mg_test_utils.cpp) @@ -654,6 +655,7 @@ ConfigureCTest(CAPI_PAGERANK_TEST c_api/pagerank_test.c) ConfigureCTest(CAPI_KATZ_TEST c_api/katz_test.c) ConfigureCTest(CAPI_EIGENVECTOR_CENTRALITY_TEST c_api/eigenvector_centrality_test.c) ConfigureCTest(CAPI_BETWEENNESS_CENTRALITY_TEST c_api/betweenness_centrality_test.c) +ConfigureCTest(CAPI_EDGE_BETWEENNESS_CENTRALITY_TEST c_api/edge_betweenness_centrality_test.c) ConfigureCTest(CAPI_HITS_TEST c_api/hits_test.c) ConfigureCTest(CAPI_BFS_TEST c_api/bfs_test.c) ConfigureCTest(CAPI_SSSP_TEST c_api/sssp_test.c) diff --git a/cpp/tests/c_api/edge_betweenness_centrality.c b/cpp/tests/c_api/edge_betweenness_centrality_test.c similarity index 51% rename from cpp/tests/c_api/edge_betweenness_centrality.c rename to cpp/tests/c_api/edge_betweenness_centrality_test.c index 7a56f90eac7..ab119288fab 100644 --- a/cpp/tests/c_api/edge_betweenness_centrality.c +++ b/cpp/tests/c_api/edge_betweenness_centrality_test.c @@ -29,9 +29,11 @@ typedef float weight_t; int generic_edge_betweenness_centrality_test(vertex_t* h_src, vertex_t* h_dst, weight_t* h_wgt, + vertex_t* h_seeds, weight_t* h_result, size_t num_vertices, size_t num_edges, + size_t num_seeds, bool_t store_transposed, size_t num_vertices_to_sample) { @@ -40,64 +42,102 @@ int generic_edge_betweenness_centrality_test(vertex_t* h_src, cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; cugraph_error_t* ret_error; - cugraph_resource_handle_t* p_handle = NULL; - cugraph_graph_t* p_graph = NULL; - cugraph_centrality_result_t* p_result = NULL; - cugraph_rng_state_t* rng_state = NULL; + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + cugraph_edge_centrality_result_t* result = NULL; + cugraph_rng_state_t* rng_state = NULL; + cugraph_type_erased_device_array_t* seeds = NULL; + cugraph_type_erased_device_array_view_t* seeds_view = NULL; - p_handle = cugraph_create_resource_handle(NULL); - TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed."); + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); - ret_code = cugraph_rng_state_create(p_handle, 0, &rng_state, &ret_error); + ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "failed to create rng_state."); - ret_code = create_test_graph(p_handle, + ret_code = create_test_graph(handle, h_src, h_dst, h_wgt, num_edges, - rng_state, store_transposed, FALSE, FALSE, - &p_graph, + &graph, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + if (h_seeds == NULL) { + ret_code = cugraph_select_random_vertices( + handle, graph, rng_state, num_vertices_to_sample, &seeds, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "select random seeds failed."); + + seeds_view = cugraph_type_erased_device_array_view(seeds); + } else { + ret_code = + cugraph_type_erased_device_array_create(handle, num_seeds, INT32, &seeds, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "seeds create failed."); + + seeds_view = cugraph_type_erased_device_array_view(seeds); + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, seeds_view, (byte_t*)h_seeds, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "seeds copy_from_host failed."); + } + ret_code = cugraph_edge_betweenness_centrality( - p_handle, p_graph, num_vertices_to_sample, NULL, FALSE, FALSE, &p_result, &ret_error); + handle, graph, seeds_view, FALSE, FALSE, &result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); TEST_ASSERT( test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_edge_betweenness_centrality failed."); - cugraph_type_erased_device_array_view_t* vertices; + cugraph_type_erased_device_array_view_t* srcs; + cugraph_type_erased_device_array_view_t* dsts; cugraph_type_erased_device_array_view_t* centralities; - vertices = cugraph_centrality_result_get_vertices(p_result); - centralities = cugraph_centrality_result_get_values(p_result); + srcs = cugraph_edge_centrality_result_get_src_vertices(result); + dsts = cugraph_edge_centrality_result_get_dst_vertices(result); + centralities = cugraph_edge_centrality_result_get_values(result); + + size_t num_local_edges = cugraph_type_erased_device_array_view_size(srcs); - vertex_t h_vertices[num_vertices]; - weight_t h_centralities[num_vertices]; + vertex_t h_cugraph_src[num_local_edges]; + vertex_t h_cugraph_dst[num_local_edges]; + weight_t h_centralities[num_local_edges]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_cugraph_src, srcs , &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); ret_code = cugraph_type_erased_device_array_view_copy_to_host( - p_handle, (byte_t*)h_vertices, vertices, &ret_error); + handle, (byte_t*)h_cugraph_dst, dsts, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); ret_code = cugraph_type_erased_device_array_view_copy_to_host( - p_handle, (byte_t*)h_centralities, centralities, &ret_error); + handle, (byte_t*)h_centralities, centralities, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) { + weight_t M[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) { + M[i][j] = 0.0; + } + + for (int i = 0; i < num_edges; ++i) { + M[h_src[i]][h_dst[i]] = h_result[i]; + } + + for (int i = 0; (i < num_local_edges) && (test_ret_value == 0); ++i) { TEST_ASSERT(test_ret_value, - nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.001), - "centralities results don't match"); + nearlyEqual(M[h_cugraph_src[i]][h_cugraph_dst[i]], h_centralities[i], 0.001), + "betweenness centrality results don't match"); } - cugraph_centrality_result_free(p_result); - cugraph_sg_graph_free(p_graph); - cugraph_free_resource_handle(p_handle); + cugraph_edge_centrality_result_free(result); + cugraph_sg_graph_free(graph); + cugraph_free_resource_handle(handle); cugraph_error_free(ret_error); return test_ret_value; @@ -112,14 +152,14 @@ int test_edge_betweenness_centrality() vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; weight_t h_wgt[] = { 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - weight_t h_result[] = {0.236325, 0.292055, 0.458457, 0.60533, 0.190498, 0.495942}; + weight_t h_result[] = { 0, 2, 3, 1.83333, 2, 2, 3, 2, 3.16667, 2.83333, 4.33333, 0, 2, 2.83333, 3.66667, 2.33333 }; double epsilon = 1e-6; size_t max_iterations = 200; // Eigenvector centrality wants store_transposed = TRUE return generic_edge_betweenness_centrality_test( - h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, TRUE, 5); + h_src, h_dst, h_wgt, NULL, h_result, num_vertices, num_edges, 0, TRUE, 5); } /******************************************************************************/ diff --git a/cpp/tests/c_api/mg_edge_betweenness_centrality.c b/cpp/tests/c_api/mg_edge_betweenness_centrality_test.c similarity index 54% rename from cpp/tests/c_api/mg_edge_betweenness_centrality.c rename to cpp/tests/c_api/mg_edge_betweenness_centrality_test.c index 17ce717dcfe..13f0085be84 100644 --- a/cpp/tests/c_api/mg_edge_betweenness_centrality.c +++ b/cpp/tests/c_api/mg_edge_betweenness_centrality_test.c @@ -29,9 +29,11 @@ int generic_edge_betweenness_centrality_test(const cugraph_resource_handle_t* ha vertex_t* h_src, vertex_t* h_dst, weight_t* h_wgt, + vertex_t* h_seeds, weight_t* h_result, size_t num_vertices, size_t num_edges, + size_t num_seeds, bool_t store_transposed, size_t num_vertices_to_sample) { @@ -40,16 +42,43 @@ int generic_edge_betweenness_centrality_test(const cugraph_resource_handle_t* ha cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; cugraph_error_t* ret_error; - cugraph_graph_t* p_graph = NULL; - cugraph_centrality_result_t* p_result = NULL; + cugraph_graph_t* graph = NULL; + cugraph_edge_centrality_result_t* result = NULL; + cugraph_rng_state_t* rng_state = NULL; + cugraph_type_erased_device_array_t* seeds = NULL; + cugraph_type_erased_device_array_view_t* seeds_view = NULL; ret_code = create_mg_test_graph( - handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &p_graph, &ret_error); + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &graph, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_mg_test_graph failed."); + int rank = cugraph_resource_handle_get_rank(handle); + + ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "failed to create rng_state."); + + if (h_seeds == NULL) { + ret_code = cugraph_select_random_vertices( + handle, graph, rng_state, num_vertices_to_sample, &seeds, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "select random seeds failed."); + + seeds_view = cugraph_type_erased_device_array_view(seeds); + } else { + if (rank > 0) num_seeds = 0; + + ret_code = + cugraph_type_erased_device_array_create(handle, num_seeds, INT32, &seeds, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "seeds create failed."); + + seeds_view = cugraph_type_erased_device_array_view(seeds); + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, seeds_view, (byte_t*)h_seeds, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "seeds copy_from_host failed."); + } + ret_code = cugraph_edge_betweenness_centrality( - handle, p_graph, num_vertices_to_sample, NULL, FALSE, FALSE, &p_result, &ret_error); + handle, graph, seeds_view, FALSE, FALSE, &result, &ret_error); TEST_ASSERT( test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_edge_betweenness_centrality failed."); @@ -57,33 +86,51 @@ int generic_edge_betweenness_centrality_test(const cugraph_resource_handle_t* ha // the returned values with the expected results for the entire // graph. Each GPU will have a subset of the total vertices, so // they will do a subset of the comparisons. - cugraph_type_erased_device_array_view_t* vertices; + cugraph_type_erased_device_array_view_t* srcs; + cugraph_type_erased_device_array_view_t* dsts; cugraph_type_erased_device_array_view_t* centralities; - vertices = cugraph_centrality_result_get_vertices(p_result); - centralities = cugraph_centrality_result_get_values(p_result); + srcs = cugraph_edge_centrality_result_get_src_vertices(result); + dsts = cugraph_edge_centrality_result_get_dst_vertices(result); + centralities = cugraph_edge_centrality_result_get_values(result); + + size_t num_local_edges = cugraph_type_erased_device_array_view_size(srcs); - vertex_t h_vertices[num_vertices]; - weight_t h_centralities[num_vertices]; + vertex_t h_cugraph_src[num_local_edges]; + vertex_t h_cugraph_dst[num_local_edges]; + weight_t h_centralities[num_local_edges]; ret_code = cugraph_type_erased_device_array_view_copy_to_host( - handle, (byte_t*)h_vertices, vertices, &ret_error); + handle, (byte_t*)h_cugraph_src, srcs , &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_cugraph_dst, dsts, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); ret_code = cugraph_type_erased_device_array_view_copy_to_host( handle, (byte_t*)h_centralities, centralities, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - size_t num_local_vertices = cugraph_type_erased_device_array_view_size(vertices); + weight_t M[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) { + M[i][j] = 0.0; + } - for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) { + for (int i = 0; i < num_edges; ++i) { + M[h_src[i]][h_dst[i]] = h_result[i]; + } + + for (int i = 0; (i < num_local_edges) && (test_ret_value == 0); ++i) { TEST_ASSERT(test_ret_value, - nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.001), + nearlyEqual(M[h_cugraph_src[i]][h_cugraph_dst[i]], h_centralities[i], 0.001), "betweenness centrality results don't match"); } - cugraph_centrality_result_free(p_result); - cugraph_mg_graph_free(p_graph); + cugraph_edge_centrality_result_free(result); + cugraph_mg_graph_free(graph); cugraph_error_free(ret_error); return test_ret_value; @@ -98,14 +145,16 @@ int test_edge_betweenness_centrality(const cugraph_resource_handle_t* handle) vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; weight_t h_wgt[] = { 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - weight_t h_result[] = {0.236374, 0.292046, 0.458369, 0.605472, 0.190544, 0.495814}; + weight_t h_result[] = { 3.16667, 2.83333, 4.33333, 1.83333, 2, 2.83333, 3.66667, 2.33333, + 3.16667, 2.83333, 4.33333, 1.83333, 2, 2.83333, 3.66667, 2.33333 }; + double epsilon = 1e-6; size_t max_iterations = 200; // Eigenvector centrality wants store_transposed = TRUE return generic_edge_betweenness_centrality_test( - handle, h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, TRUE, 5); + handle, h_src, h_dst, h_wgt, NULL, h_result, num_vertices, num_edges, 0, TRUE, 6); } /******************************************************************************/ diff --git a/cpp/tests/c_api/mg_pagerank_test.c b/cpp/tests/c_api/mg_pagerank_test.c index 09925b9ac4e..9c142236808 100644 --- a/cpp/tests/c_api/mg_pagerank_test.c +++ b/cpp/tests/c_api/mg_pagerank_test.c @@ -100,6 +100,81 @@ int generic_pagerank_test(const cugraph_resource_handle_t* handle, return test_ret_value; } +int generic_pagerank_nonconverging_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + weight_t* h_result, + size_t num_vertices, + size_t num_edges, + bool_t store_transposed, + double alpha, + double epsilon, + size_t max_iterations) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_graph_t* p_graph = NULL; + cugraph_centrality_result_t* p_result = NULL; + + ret_code = create_mg_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &p_graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_mg_test_graph failed."); + + ret_code = cugraph_pagerank_allow_nonconvergence(handle, + p_graph, + NULL, + NULL, + NULL, + NULL, + alpha, + epsilon, + max_iterations, + FALSE, + &p_result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed."); + + // NOTE: Because we get back vertex ids and pageranks, we can simply compare + // the returned values with the expected results for the entire + // graph. Each GPU will have a subset of the total vertices, so + // they will do a subset of the comparisons. + cugraph_type_erased_device_array_view_t* vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + vertices = cugraph_centrality_result_get_vertices(p_result); + pageranks = cugraph_centrality_result_get_values(p_result); + + size_t num_local_vertices = cugraph_type_erased_device_array_view_size(vertices); + + vertex_t h_vertices[num_local_vertices]; + weight_t h_pageranks[num_local_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_vertices, vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(p_result); + cugraph_mg_graph_free(p_graph); + cugraph_error_free(ret_error); + + return test_ret_value; +} + int generic_personalized_pagerank_test(const cugraph_resource_handle_t* handle, vertex_t* h_src, vertex_t* h_dst, @@ -209,6 +284,115 @@ int generic_personalized_pagerank_test(const cugraph_resource_handle_t* handle, return test_ret_value; } +int generic_personalized_pagerank_nonconverging_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + weight_t* h_result, + vertex_t* h_personalization_vertices, + weight_t* h_personalization_values, + size_t num_vertices, + size_t num_edges, + size_t num_personalization_vertices, + bool_t store_transposed, + double alpha, + double epsilon, + size_t max_iterations) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_graph_t* p_graph = NULL; + cugraph_centrality_result_t* p_result = NULL; + cugraph_type_erased_device_array_t* personalization_vertices = NULL; + cugraph_type_erased_device_array_t* personalization_values = NULL; + cugraph_type_erased_device_array_view_t* personalization_vertices_view = NULL; + cugraph_type_erased_device_array_view_t* personalization_values_view = NULL; + + data_type_id_t vertex_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + ret_code = create_mg_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &p_graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + if (cugraph_resource_handle_get_rank(handle) != 0) { num_personalization_vertices = 0; } + + ret_code = cugraph_type_erased_device_array_create( + handle, num_personalization_vertices, vertex_tid, &personalization_vertices, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices create failed."); + + ret_code = cugraph_type_erased_device_array_create( + handle, num_personalization_vertices, weight_tid, &personalization_values, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values create failed."); + + personalization_vertices_view = cugraph_type_erased_device_array_view(personalization_vertices); + personalization_values_view = cugraph_type_erased_device_array_view(personalization_values); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, personalization_vertices_view, (byte_t*)h_personalization_vertices, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, personalization_values_view, (byte_t*)h_personalization_values, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values copy_from_host failed."); + + ret_code = cugraph_personalized_pagerank_allow_nonconvergence(handle, + p_graph, + NULL, + NULL, + NULL, + NULL, + personalization_vertices_view, + personalization_values_view, + alpha, + epsilon, + max_iterations, + FALSE, + &p_result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed."); + + cugraph_type_erased_device_array_view_t* vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + vertices = cugraph_centrality_result_get_vertices(p_result); + pageranks = cugraph_centrality_result_get_values(p_result); + + size_t num_local_vertices = cugraph_type_erased_device_array_view_size(vertices); + + vertex_t h_vertices[num_local_vertices]; + weight_t h_pageranks[num_local_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_vertices, vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_local_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(p_result); + cugraph_mg_graph_free(p_graph); + cugraph_error_free(ret_error); + + return test_ret_value; +} + int test_pagerank(const cugraph_resource_handle_t* handle) { size_t num_edges = 8; @@ -323,6 +507,34 @@ int test_pagerank_4_with_transpose(const cugraph_resource_handle_t* handle) max_iterations); } +int test_pagerank_non_convergence(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 8; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + weight_t h_result[] = {0.0776471, 0.167637, 0.0639699, 0.220202, 0.140046, 0.330498}; + + double alpha = 0.95; + double epsilon = 0.0001; + size_t max_iterations = 2; + + // Pagerank wants store_transposed = TRUE + return generic_pagerank_nonconverging_test(handle, + h_src, + h_dst, + h_wgt, + h_result, + num_vertices, + num_edges, + TRUE, + alpha, + epsilon, + max_iterations); +} + int test_personalized_pagerank(const cugraph_resource_handle_t* handle) { size_t num_edges = 3; @@ -356,6 +568,40 @@ int test_personalized_pagerank(const cugraph_resource_handle_t* handle) max_iterations); } +int test_personalized_pagerank_non_convergence(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 3; + size_t num_vertices = 4; + + vertex_t h_src[] = {0, 1, 2}; + vertex_t h_dst[] = {1, 2, 3}; + weight_t h_wgt[] = {1.f, 1.f, 1.f}; + weight_t h_result[] = { 0.03625, 0.285, 0.32125, 0.3575 }; + + + vertex_t h_personalized_vertices[] = {0, 1, 2, 3}; + weight_t h_personalized_values[] = {0.1, 0.2, 0.3, 0.4}; + + double alpha = 0.85; + double epsilon = 1.0e-6; + size_t max_iterations = 1; + + return generic_personalized_pagerank_nonconverging_test(handle, + h_src, + h_dst, + h_wgt, + h_result, + h_personalized_vertices, + h_personalized_values, + num_vertices, + num_edges, + num_vertices, + FALSE, + alpha, + epsilon, + max_iterations); +} + /******************************************************************************/ int main(int argc, char** argv) @@ -368,7 +614,9 @@ int main(int argc, char** argv) result |= RUN_MG_TEST(test_pagerank_with_transpose, handle); result |= RUN_MG_TEST(test_pagerank_4, handle); result |= RUN_MG_TEST(test_pagerank_4_with_transpose, handle); + result |= RUN_MG_TEST(test_pagerank_non_convergence, handle); result |= RUN_MG_TEST(test_personalized_pagerank, handle); + result |= RUN_MG_TEST(test_personalized_pagerank_non_convergence, handle); cugraph_free_resource_handle(handle); free_mg_raft_handle(raft_handle); diff --git a/cpp/tests/c_api/pagerank_test.c b/cpp/tests/c_api/pagerank_test.c index 048750da06c..e12021cd16d 100644 --- a/cpp/tests/c_api/pagerank_test.c +++ b/cpp/tests/c_api/pagerank_test.c @@ -67,6 +67,82 @@ int generic_pagerank_test(vertex_t* h_src, &p_result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + cugraph_type_erased_device_array_view_t* vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + vertices = cugraph_centrality_result_get_vertices(p_result); + pageranks = cugraph_centrality_result_get_values(p_result); + + vertex_t h_vertices[num_vertices]; + weight_t h_pageranks[num_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_vertices, vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(p_result); + cugraph_sg_graph_free(p_graph); + cugraph_free_resource_handle(p_handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int generic_pagerank_nonconverging_test(vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + weight_t* h_result, + size_t num_vertices, + size_t num_edges, + bool_t store_transposed, + double alpha, + double epsilon, + size_t max_iterations) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* p_handle = NULL; + cugraph_graph_t* p_graph = NULL; + cugraph_centrality_result_t* p_result = NULL; + + p_handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed."); + + ret_code = create_test_graph( + p_handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, FALSE, &p_graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = cugraph_pagerank_allow_nonconvergence(p_handle, + p_graph, + NULL, + NULL, + NULL, + NULL, + alpha, + epsilon, + max_iterations, + FALSE, + &p_result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_pagerank failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); cugraph_type_erased_device_array_view_t* vertices; cugraph_type_erased_device_array_view_t* pageranks; @@ -208,6 +284,115 @@ int generic_personalized_pagerank_test(vertex_t* h_src, return test_ret_value; } +int generic_personalized_pagerank_nonconverging_test(vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + weight_t* h_result, + vertex_t* h_personalization_vertices, + weight_t* h_personalization_values, + size_t num_vertices, + size_t num_edges, + size_t num_personalization_vertices, + bool_t store_transposed, + double alpha, + double epsilon, + size_t max_iterations) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* p_handle = NULL; + cugraph_graph_t* p_graph = NULL; + cugraph_centrality_result_t* p_result = NULL; + cugraph_type_erased_device_array_t* personalization_vertices = NULL; + cugraph_type_erased_device_array_t* personalization_values = NULL; + cugraph_type_erased_device_array_view_t* personalization_vertices_view = NULL; + cugraph_type_erased_device_array_view_t* personalization_values_view = NULL; + + data_type_id_t vertex_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + p_handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, p_handle != NULL, "resource handle creation failed."); + + ret_code = create_test_graph( + p_handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, FALSE, &p_graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = cugraph_type_erased_device_array_create( + p_handle, num_personalization_vertices, vertex_tid, &personalization_vertices, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices create failed."); + + ret_code = cugraph_type_erased_device_array_create( + p_handle, num_personalization_vertices, weight_tid, &personalization_values, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values create failed."); + + personalization_vertices_view = cugraph_type_erased_device_array_view(personalization_vertices); + personalization_values_view = cugraph_type_erased_device_array_view(personalization_values); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + p_handle, personalization_vertices_view, (byte_t*)h_personalization_vertices, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_vertices copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + p_handle, personalization_values_view, (byte_t*)h_personalization_values, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "personalization_values copy_from_host failed."); + + ret_code = cugraph_personalized_pagerank_allow_nonconvergence(p_handle, + p_graph, + NULL, + NULL, + NULL, + NULL, + personalization_vertices_view, + personalization_values_view, + alpha, + epsilon, + max_iterations, + FALSE, + &p_result, + &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_personalized_pagerank failed."); + + cugraph_type_erased_device_array_view_t* vertices; + cugraph_type_erased_device_array_view_t* pageranks; + + vertices = cugraph_centrality_result_get_vertices(p_result); + pageranks = cugraph_centrality_result_get_values(p_result); + + vertex_t h_vertices[num_vertices]; + weight_t h_pageranks[num_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_vertices, vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + p_handle, (byte_t*)h_pageranks, pageranks, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(h_result[h_vertices[i]], h_pageranks[i], 0.001), + "pagerank results don't match"); + } + + cugraph_centrality_result_free(p_result); + cugraph_sg_graph_free(p_graph); + cugraph_free_resource_handle(p_handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + int test_pagerank() { size_t num_edges = 8; @@ -286,6 +471,25 @@ int test_pagerank_4_with_transpose() h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, TRUE, alpha, epsilon, max_iterations); } +int test_pagerank_non_convergence() +{ + size_t num_edges = 8; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + weight_t h_result[] = {0.0776471, 0.167637, 0.0639699, 0.220202, 0.140046, 0.330498}; + + double alpha = 0.95; + double epsilon = 0.0001; + size_t max_iterations = 2; + + // Pagerank wants store_transposed = TRUE + return generic_pagerank_nonconverging_test( + h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, TRUE, alpha, epsilon, max_iterations); +} + int test_personalized_pagerank() { size_t num_edges = 3; @@ -318,6 +522,39 @@ int test_personalized_pagerank() max_iterations); } +int test_personalized_pagerank_non_convergence() +{ + size_t num_edges = 3; + size_t num_vertices = 4; + + vertex_t h_src[] = {0, 1, 2}; + vertex_t h_dst[] = {1, 2, 3}; + weight_t h_wgt[] = {1.f, 1.f, 1.f}; + weight_t h_result[] = { 0.03625, 0.285, 0.32125, 0.3575 }; + + + vertex_t h_personalized_vertices[] = {0, 1, 2, 3}; + weight_t h_personalized_values[] = {0.1, 0.2, 0.3, 0.4}; + + double alpha = 0.85; + double epsilon = 1.0e-6; + size_t max_iterations = 1; + + return generic_personalized_pagerank_nonconverging_test(h_src, + h_dst, + h_wgt, + h_result, + h_personalized_vertices, + h_personalized_values, + num_vertices, + num_edges, + num_vertices, + FALSE, + alpha, + epsilon, + max_iterations); +} + /******************************************************************************/ int main(int argc, char** argv) @@ -327,6 +564,8 @@ int main(int argc, char** argv) result |= RUN_TEST(test_pagerank_with_transpose); result |= RUN_TEST(test_pagerank_4); result |= RUN_TEST(test_pagerank_4_with_transpose); + result |= RUN_TEST(test_pagerank_non_convergence); result |= RUN_TEST(test_personalized_pagerank); + result |= RUN_TEST(test_personalized_pagerank_non_convergence); return result; } diff --git a/cpp/tests/centrality/betweenness_centrality_reference.hpp b/cpp/tests/centrality/betweenness_centrality_reference.hpp index 9a86de934c3..3c60020265a 100644 --- a/cpp/tests/centrality/betweenness_centrality_reference.hpp +++ b/cpp/tests/centrality/betweenness_centrality_reference.hpp @@ -33,7 +33,7 @@ void ref_bfs(std::vector const& offsets, std::queue& Q, std::stack& S, std::vector& dist, - std::vector>& pred, + std::vector>>& pred, std::vector& sigmas, vertex_t source) { @@ -61,7 +61,7 @@ void ref_bfs(std::vector const& offsets, // Edge(v, w) on a shortest path? if (dist[nbr] == dist[v] + 1) { sigmas[nbr] += sigmas[v]; - pred[nbr].push_back(v); + pred[nbr].push_back(std::make_pair(v, nbr_idx)); } } } @@ -70,7 +70,7 @@ void ref_bfs(std::vector const& offsets, template void ref_accumulation(std::vector& result, std::stack& S, - std::vector>& pred, + std::vector>>& pred, std::vector& sigmas, std::vector& deltas, vertex_t source) @@ -80,8 +80,8 @@ void ref_accumulation(std::vector& result, while (!S.empty()) { vertex_t w = S.top(); S.pop(); - for (vertex_t v : pred[w]) { - deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]); + for (auto v : pred[w]) { + deltas[v.first] += (sigmas[v.first] / sigmas[w]) * (1.0 + deltas[w]); } if (w != source) { result[w] += deltas[w]; } } @@ -90,7 +90,7 @@ void ref_accumulation(std::vector& result, template void ref_endpoints_accumulation(std::vector& result, std::stack& S, - std::vector>& pred, + std::vector>>& pred, std::vector& sigmas, std::vector& deltas, vertex_t source) @@ -101,17 +101,19 @@ void ref_endpoints_accumulation(std::vector& result, while (!S.empty()) { vertex_t w = S.top(); S.pop(); - for (vertex_t v : pred[w]) { - deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]); + for (auto v : pred[w]) { + deltas[v.first] += (sigmas[v.first] / sigmas[w]) * (1.0 + deltas[w]); } if (w != source) { result[w] += deltas[w] + 1; } } } -template +template void ref_edge_accumulation(std::vector& result, + std::vector const& offsets, + std::vector const& indices, std::stack& S, - std::vector>& pred, + std::vector>>& pred, std::vector& sigmas, std::vector& deltas, vertex_t source) @@ -120,10 +122,12 @@ void ref_edge_accumulation(std::vector& result, while (!S.empty()) { vertex_t w = S.top(); S.pop(); - for (vertex_t v : pred[w]) { - deltas[v] += (sigmas[v] / sigmas[w]) * (1.0 + deltas[w]); + for (auto v : pred[w]) { + double coefficient = (sigmas[v.first] / sigmas[w]) * (1.0 + deltas[w]); + + deltas[v.first] += coefficient; + result[v.second] += coefficient; } - if (w != source) { result[w] += deltas[w]; } } } @@ -181,7 +185,7 @@ std::vector betweenness_centrality_reference( std::stack S; std::vector dist(result.size()); - std::vector> pred(result.size()); + std::vector>> pred(result.size()); std::vector sigmas(result.size()); std::vector deltas(result.size()); @@ -220,14 +224,14 @@ std::vector edge_betweenness_centrality_reference( std::stack S; std::vector dist(offsets.size() - 1); - std::vector> pred(offsets.size() - 1); + std::vector>> pred(result.size()); std::vector sigmas(offsets.size() - 1); std::vector deltas(offsets.size() - 1); for (vertex_t s : seeds) { ref_bfs(offsets, indices, Q, S, dist, pred, sigmas, s); - ref_edge_accumulation(result, S, pred, sigmas, deltas, s); + ref_edge_accumulation(result, offsets, indices, S, pred, sigmas, deltas, s); } } return result; diff --git a/cpp/tests/centrality/edge_betweenness_centrality_test.cpp b/cpp/tests/centrality/edge_betweenness_centrality_test.cpp index bb223067e1c..e4d22ff069c 100644 --- a/cpp/tests/centrality/edge_betweenness_centrality_test.cpp +++ b/cpp/tests/centrality/edge_betweenness_centrality_test.cpp @@ -99,7 +99,6 @@ class Tests_EdgeBetweennessCentrality hr_timer.start("Edge betweenness centrality"); } -#if 0 auto d_centralities = cugraph::edge_betweenness_centrality( handle, graph_view, @@ -108,17 +107,6 @@ class Tests_EdgeBetweennessCentrality raft::device_span{d_seeds.data(), d_seeds.size()}), betweenness_usecase.normalized, do_expensive_check); -#else - EXPECT_THROW(cugraph::edge_betweenness_centrality( - handle, - graph_view, - edge_weight_view, - std::make_optional>( - raft::device_span{d_seeds.data(), d_seeds.size()}), - betweenness_usecase.normalized, - do_expensive_check), - cugraph::logic_error); -#endif if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -127,24 +115,34 @@ class Tests_EdgeBetweennessCentrality } if (betweenness_usecase.check_correctness) { -#if 0 - auto [h_offsets, h_indices, h_wgt] = cugraph::test::graph_to_host_csr(handle, graph_view, edge_weight_view); + // Compute reference edge betweenness result + auto [h_offsets, h_indices, h_wgt] = + cugraph::test::graph_to_host_csr(handle, graph_view, edge_weight_view); - auto h_seeds = cugraph::test::to_host(handle, d_seeds); + auto h_seeds = cugraph::test::to_host(handle, d_seeds); auto h_reference_centralities = - betweenness_centrality_reference(h_offsets, h_indices, h_wgt, h_seeds, betweenness_usecase.include_endpoints); + edge_betweenness_centrality_reference(h_offsets, h_indices, h_wgt, h_seeds); + + rmm::device_uvector d_reference_src_vertex_ids(0, handle.get_stream()); + rmm::device_uvector d_reference_dst_vertex_ids(0, handle.get_stream()); + + std::tie(d_reference_src_vertex_ids, d_reference_dst_vertex_ids, std::ignore) = + cugraph::test::graph_to_device_coo(handle, graph_view, edge_weight_view); auto d_reference_centralities = cugraph::test::to_device(handle, h_reference_centralities); - // Need to get edges in order... + auto [d_cugraph_src_vertex_ids, d_cugraph_dst_vertex_ids, d_cugraph_results] = + cugraph::test::graph_to_device_coo( + handle, graph_view, std::make_optional(d_centralities.view())); cugraph::test::edge_betweenness_centrality_validate(handle, - d_renumber_map_labels, - d_centralities, - std::nullopt, + d_cugraph_src_vertex_ids, + d_cugraph_dst_vertex_ids, + *d_cugraph_results, + d_reference_src_vertex_ids, + d_reference_dst_vertex_ids, d_reference_centralities); -#endif } } }; @@ -188,7 +186,6 @@ INSTANTIATE_TEST_SUITE_P( EdgeBetweennessCentrality_Usecase{20, false, true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), - cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); INSTANTIATE_TEST_SUITE_P( diff --git a/cpp/tests/centrality/eigenvector_centrality_test.cpp b/cpp/tests/centrality/eigenvector_centrality_test.cpp index f3408d9b131..7cafcfbde85 100644 --- a/cpp/tests/centrality/eigenvector_centrality_test.cpp +++ b/cpp/tests/centrality/eigenvector_centrality_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -167,10 +167,15 @@ class Tests_EigenvectorCentrality } if (eigenvector_usecase.check_correctness) { - auto [dst_v, src_v, opt_wgt_v] = cugraph::decompress_to_edgelist( + rmm::device_uvector dst_v(0, handle.get_stream()); + rmm::device_uvector src_v(0, handle.get_stream()); + std::optional> opt_wgt_v{std::nullopt}; + + std::tie(dst_v, src_v, opt_wgt_v, std::ignore) = cugraph::decompress_to_edgelist( handle, graph_view, edge_weight_view, + std::optional>{std::nullopt}, std::optional>{std::nullopt}); auto h_src = cugraph::test::to_host(handle, src_v); diff --git a/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp b/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp index a1e73b6147b..ebc49e4a3e4 100644 --- a/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp +++ b/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp @@ -57,6 +57,8 @@ class Tests_MGEdgeBetweennessCentrality template void run_current_test(std::tuple const& param) { + constexpr bool do_expensive_check = false; + auto [betweenness_usecase, input_usecase] = param; HighResTimer hr_timer{}; @@ -83,7 +85,7 @@ class Tests_MGEdgeBetweennessCentrality mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt; raft::random::RngState rng_state(handle_->get_comms().get_rank()); - auto d_seeds = cugraph::select_random_vertices( + auto d_mg_seeds = cugraph::select_random_vertices( *handle_, mg_graph_view, std::optional>{std::nullopt}, @@ -98,24 +100,13 @@ class Tests_MGEdgeBetweennessCentrality hr_timer.start("MG edge betweenness centrality"); } -#if 0 auto d_centralities = cugraph::edge_betweenness_centrality( *handle_, mg_graph_view, mg_edge_weight_view, std::make_optional>( - raft::device_span{d_seeds.data(), d_seeds.size()}), + raft::device_span{d_mg_seeds.data(), d_mg_seeds.size()}), betweenness_usecase.normalized); -#else - EXPECT_THROW(cugraph::edge_betweenness_centrality( - *handle_, - mg_graph_view, - mg_edge_weight_view, - std::make_optional>( - raft::device_span{d_seeds.data(), d_seeds.size()}), - betweenness_usecase.normalized), - cugraph::logic_error); -#endif if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -125,22 +116,52 @@ class Tests_MGEdgeBetweennessCentrality } if (betweenness_usecase.check_correctness) { -#if 0 - d_centralities = cugraph::test::device_gatherv( - *handle_, raft::device_span(d_centralities.data(), d_centralities.size())); - d_seeds = cugraph::test::device_gatherv( - *handle_, raft::device_span(d_seeds.data(), d_seeds.size())); - - auto [h_src, h_dst, h_wgt] = cugraph::test::graph_to_host_coo(*handle_, graph_view); - - if (h_src.size() > 0) { - auto h_centralities = cugraph::test::to_host(*handle_, d_centralities); - auto h_seeds = cugraph::test::to_host(*handle_, d_seeds); - - cugraph::test::edge_betweenness_centrality_validate( - h_src, h_dst, h_wgt, h_centralities, h_seeds); + // Extract MG results + auto [d_cugraph_src_vertex_ids, d_cugraph_dst_vertex_ids, d_cugraph_results] = + cugraph::test::graph_to_device_coo( + *handle_, mg_graph_view, std::make_optional(d_centralities.view())); + + // Create SG graph so we can generate SG results + cugraph::graph_t sg_graph(*handle_); + std::optional< + cugraph::edge_property_t, weight_t>> + sg_edge_weights{std::nullopt}; + std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + false); + + auto d_mg_aggregate_seeds = cugraph::test::device_gatherv( + *handle_, raft::device_span{d_mg_seeds.data(), d_mg_seeds.size()}); + + if (handle_->get_comms().get_rank() == 0) { + auto sg_edge_weights_view = + sg_edge_weights ? std::make_optional(sg_edge_weights->view()) : std::nullopt; + + // Generate SG results and compare + auto d_sg_centralities = cugraph::edge_betweenness_centrality( + *handle_, + sg_graph.view(), + sg_edge_weights_view, + std::make_optional>(raft::device_span{ + d_mg_aggregate_seeds.data(), d_mg_aggregate_seeds.size()}), + betweenness_usecase.normalized, + do_expensive_check); + + auto [d_sg_src_vertex_ids, d_sg_dst_vertex_ids, d_sg_reference_centralities] = + cugraph::test::graph_to_device_coo( + *handle_, sg_graph.view(), std::make_optional(d_sg_centralities.view())); + + cugraph::test::edge_betweenness_centrality_validate(*handle_, + d_cugraph_src_vertex_ids, + d_cugraph_dst_vertex_ids, + *d_cugraph_results, + d_sg_src_vertex_ids, + d_sg_dst_vertex_ids, + *d_sg_reference_centralities); } -#endif } } diff --git a/cpp/tests/community/egonet_validate.cu b/cpp/tests/community/egonet_validate.cu index 44b74090ec4..5fc94c5c07d 100644 --- a/cpp/tests/community/egonet_validate.cu +++ b/cpp/tests/community/egonet_validate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,11 +44,16 @@ egonet_reference( int radius) { #if 1 - auto [d_coo_src, d_coo_dst, d_coo_wgt] = - cugraph::decompress_to_edgelist(handle, - graph_view, - edge_weight_view, - std::optional>{std::nullopt}); + rmm::device_uvector d_coo_src(0, handle.get_stream()); + rmm::device_uvector d_coo_dst(0, handle.get_stream()); + std::optional> d_coo_wgt{std::nullopt}; + + std::tie(d_coo_src, d_coo_dst, d_coo_wgt, std::ignore) = cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); #else // FIXME: This should be faster (smaller list of edges to operate on), but uniform_nbr_sample // doesn't preserve multi-edges (which is probably a bug) diff --git a/cpp/tests/cores/k_core_validate.cu b/cpp/tests/cores/k_core_validate.cu index 687349dbbd7..b264ed53540 100644 --- a/cpp/tests/cores/k_core_validate.cu +++ b/cpp/tests/cores/k_core_validate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,12 +61,17 @@ void check_correctness( EXPECT_EQ(error_count, 0) << "destination error count is non-zero"; - auto [graph_src, graph_dst, graph_wgt] = - cugraph::decompress_to_edgelist(handle, - graph_view, - edge_weight_view, - std::optional>{std::nullopt}, - false); + rmm::device_uvector graph_src(0, handle.get_stream()); + rmm::device_uvector graph_dst(0, handle.get_stream()); + std::optional> graph_wgt{std::nullopt}; + + std::tie(graph_src, graph_dst, graph_wgt, std::ignore) = cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + false); // Now we'll count how many edges should be in the subgraph auto expected_edge_count = diff --git a/cpp/tests/link_analysis/mg_pagerank_test.cpp b/cpp/tests/link_analysis/mg_pagerank_test.cpp index b3d9e0271d0..922a6ff2781 100644 --- a/cpp/tests/link_analysis/mg_pagerank_test.cpp +++ b/cpp/tests/link_analysis/mg_pagerank_test.cpp @@ -120,30 +120,25 @@ class Tests_MGPageRank result_t constexpr alpha{0.85}; result_t constexpr epsilon{1e-6}; - rmm::device_uvector d_mg_pageranks(mg_graph_view.local_vertex_partition_range_size(), - handle_->get_stream()); - if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); hr_timer.start("MG PageRank"); } - cugraph::pagerank( + auto [d_mg_pageranks, metadata] = cugraph::pagerank( *handle_, mg_graph_view, mg_edge_weight_view, std::nullopt, d_mg_personalization_vertices - ? std::optional{(*d_mg_personalization_vertices).data()} - : std::nullopt, - d_mg_personalization_values - ? std::optional{(*d_mg_personalization_values).data()} - : std::nullopt, - d_mg_personalization_vertices - ? std::optional{static_cast((*d_mg_personalization_vertices).size())} + ? std::make_optional(std::make_tuple( + raft::device_span{d_mg_personalization_vertices->data(), + d_mg_personalization_vertices->size()}, + raft::device_span{d_mg_personalization_values->data(), + d_mg_personalization_values->size()})) : std::nullopt, - d_mg_pageranks.data(), + std::optional>{std::nullopt}, alpha, epsilon, std::numeric_limits::max(), @@ -211,25 +206,19 @@ class Tests_MGPageRank ASSERT_EQ(mg_graph_view.number_of_vertices(), sg_graph_view.number_of_vertices()); - rmm::device_uvector d_sg_pageranks(sg_graph_view.number_of_vertices(), - handle_->get_stream()); - - cugraph::pagerank( + auto [d_sg_pageranks, sg_metadata] = cugraph::pagerank( *handle_, sg_graph_view, sg_edge_weight_view, std::nullopt, d_mg_aggregate_personalization_vertices - ? std::optional{(*d_mg_aggregate_personalization_vertices).data()} - : std::nullopt, - d_mg_aggregate_personalization_values - ? std::optional{(*d_mg_aggregate_personalization_values).data()} - : std::nullopt, - d_mg_aggregate_personalization_vertices - ? std::optional{static_cast( - (*d_mg_aggregate_personalization_vertices).size())} + ? std::make_optional(std::make_tuple( + raft::device_span{d_mg_aggregate_personalization_vertices->data(), + d_mg_aggregate_personalization_vertices->size()}, + raft::device_span{d_mg_aggregate_personalization_values->data(), + d_mg_aggregate_personalization_values->size()})) : std::nullopt, - d_sg_pageranks.data(), + std::optional>{std::nullopt}, alpha, epsilon, std::numeric_limits::max(), // max_iterations diff --git a/cpp/tests/link_analysis/pagerank_test.cpp b/cpp/tests/link_analysis/pagerank_test.cpp index adb4ea2fa54..0354b69b8a8 100644 --- a/cpp/tests/link_analysis/pagerank_test.cpp +++ b/cpp/tests/link_analysis/pagerank_test.cpp @@ -206,30 +206,27 @@ class Tests_PageRank result_t constexpr alpha{0.85}; result_t constexpr epsilon{1e-6}; - rmm::device_uvector d_pageranks(graph_view.number_of_vertices(), handle.get_stream()); - if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement hr_timer.start("PageRank"); } - cugraph::pagerank( + auto [d_pageranks, metadata] = cugraph::pagerank( handle, graph_view, edge_weight_view, std::nullopt, d_personalization_vertices - ? std::optional{(*d_personalization_vertices).data()} + ? std::make_optional( + std::make_tuple(raft::device_span{d_personalization_vertices->data(), + d_personalization_vertices->size()}, + raft::device_span{d_personalization_values->data(), + d_personalization_values->size()})) : std::nullopt, - d_personalization_values ? std::optional{(*d_personalization_values).data()} - : std::nullopt, - d_personalization_vertices ? std::optional{(*d_personalization_vertices).size()} - : std::nullopt, - d_pageranks.data(), + std::optional>{std::nullopt}, alpha, epsilon, std::numeric_limits::max(), - false, false); if (cugraph::test::g_perf) { diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu index bebb21bd720..449aa728d87 100644 --- a/cpp/tests/prims/mg_count_if_e.cu +++ b/cpp/tests/prims/mg_count_if_e.cu @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/prims/mg_count_if_v.cu b/cpp/tests/prims/mg_count_if_v.cu index f90f788cfae..3d745708401 100644 --- a/cpp/tests/prims/mg_count_if_v.cu +++ b/cpp/tests/prims/mg_count_if_v.cu @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu index 1c85b55e4be..b71fe5ddb5e 100644 --- a/cpp/tests/prims/mg_extract_transform_e.cu +++ b/cpp/tests/prims/mg_extract_transform_e.cu @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu index 3cd6bd243e1..4d9435dd344 100644 --- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu +++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu index 97d52c04114..677d6ce5022 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/prims/mg_reduce_v.cu b/cpp/tests/prims/mg_reduce_v.cu index 7080eb12da6..b6f8da48ef4 100644 --- a/cpp/tests/prims/mg_reduce_v.cu +++ b/cpp/tests/prims/mg_reduce_v.cu @@ -30,7 +30,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/prims/mg_transform_e.cu b/cpp/tests/prims/mg_transform_e.cu index 47def15fffc..127eddd43c7 100644 --- a/cpp/tests/prims/mg_transform_e.cu +++ b/cpp/tests/prims/mg_transform_e.cu @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include @@ -117,10 +117,11 @@ class Tests_MGTransformE { rmm::device_uvector srcs(0, handle_->get_stream()); rmm::device_uvector dsts(0, handle_->get_stream()); - std::tie(srcs, dsts, std::ignore) = cugraph::decompress_to_edgelist( + std::tie(srcs, dsts, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::optional>{std::nullopt}); auto edge_first = thrust::make_zip_iterator( thrust::make_tuple(store_transposed ? dsts.begin() : srcs.begin(), diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu index 8dba488f23d..79aa3da54df 100644 --- a/cpp/tests/prims/mg_transform_reduce_e.cu +++ b/cpp/tests/prims/mg_transform_reduce_e.cu @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/prims/mg_transform_reduce_v.cu b/cpp/tests/prims/mg_transform_reduce_v.cu index 3ea7636a718..c9fc138ae1b 100644 --- a/cpp/tests/prims/mg_transform_reduce_v.cu +++ b/cpp/tests/prims/mg_transform_reduce_v.cu @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu index 2eb270973f2..d0b97065da7 100644 --- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu +++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/prims/property_generator.cuh b/cpp/tests/prims/property_generator.cuh index 24a21c1cb01..e7264cd276f 100644 --- a/cpp/tests/prims/property_generator.cuh +++ b/cpp/tests/prims/property_generator.cuh @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/tests/sampling/detail/nbr_sampling_utils.cuh b/cpp/tests/sampling/detail/nbr_sampling_utils.cuh index 00c14009e86..8221073f556 100644 --- a/cpp/tests/sampling/detail/nbr_sampling_utils.cuh +++ b/cpp/tests/sampling/detail/nbr_sampling_utils.cuh @@ -31,7 +31,7 @@ #include #include -#include +#include #include diff --git a/cpp/tests/sampling/random_walks_check.cuh b/cpp/tests/sampling/random_walks_check.cuh index 4cd74f01bcb..f73891a1537 100644 --- a/cpp/tests/sampling/random_walks_check.cuh +++ b/cpp/tests/sampling/random_walks_check.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,11 +37,16 @@ void random_walks_validate( std::optional>&& d_weights, size_t max_length) { - auto [d_src, d_dst, d_wgt] = - cugraph::decompress_to_edgelist(handle, - graph_view, - edge_weight_view, - std::optional>{std::nullopt}); + rmm::device_uvector d_src(0, handle.get_stream()); + rmm::device_uvector d_dst(0, handle.get_stream()); + std::optional> d_wgt{std::nullopt}; + + std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); if constexpr (multi_gpu) { d_src = cugraph::test::device_gatherv( diff --git a/cpp/tests/structure/mg_symmetrize_test.cpp b/cpp/tests/structure/mg_symmetrize_test.cpp index cdacff91403..f2d37170f76 100644 --- a/cpp/tests/structure/mg_symmetrize_test.cpp +++ b/cpp/tests/structure/mg_symmetrize_test.cpp @@ -123,11 +123,15 @@ class Tests_MGSymmetrize if (symmetrize_usecase.check_correctness) { // 4-1. decompress MG results + rmm::device_uvector d_mg_srcs(0, handle_->get_stream()); + rmm::device_uvector d_mg_dsts(0, handle_->get_stream()); + std::optional> d_mg_weights{std::nullopt}; - auto [d_mg_srcs, d_mg_dsts, d_mg_weights] = cugraph::decompress_to_edgelist( + std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore) = cugraph::decompress_to_edgelist( *handle_, mg_graph.view(), mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, mg_renumber_map ? std::make_optional>( (*mg_renumber_map).data(), (*mg_renumber_map).size()) : std::nullopt); @@ -157,11 +161,15 @@ class Tests_MGSymmetrize ASSERT_FALSE(d_sg_renumber_map_labels.has_value()); // 4-4. decompress SG results + rmm::device_uvector d_sg_srcs(0, handle_->get_stream()); + rmm::device_uvector d_sg_dsts(0, handle_->get_stream()); + std::optional> d_sg_weights{std::nullopt}; - auto [d_sg_srcs, d_sg_dsts, d_sg_weights] = cugraph::decompress_to_edgelist( + std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore) = cugraph::decompress_to_edgelist( *handle_, sg_graph.view(), sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, std::optional>{std::nullopt}); // 4-5. compare diff --git a/cpp/tests/structure/mg_transpose_storage_test.cpp b/cpp/tests/structure/mg_transpose_storage_test.cpp index b6033a7ab53..1adce8d102e 100644 --- a/cpp/tests/structure/mg_transpose_storage_test.cpp +++ b/cpp/tests/structure/mg_transpose_storage_test.cpp @@ -131,13 +131,17 @@ class Tests_MGTransposeStorage if (transpose_storage_usecase.check_correctness) { // 3-1. decompress MG results + rmm::device_uvector d_mg_srcs(0, handle_->get_stream()); + rmm::device_uvector d_mg_dsts(0, handle_->get_stream()); + std::optional> d_mg_weights{std::nullopt}; - auto [d_mg_srcs, d_mg_dsts, d_mg_weights] = cugraph::decompress_to_edgelist( + std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore) = cugraph::decompress_to_edgelist( *handle_, mg_storage_transposed_graph.view(), mg_storage_transposed_edge_weights ? std::make_optional((*mg_storage_transposed_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, mg_renumber_map ? std::make_optional>( (*mg_renumber_map).data(), (*mg_renumber_map).size()) : std::nullopt); @@ -156,11 +160,15 @@ class Tests_MGTransposeStorage if (handle_->get_comms().get_rank() == int{0}) { // 3-3. decompress SG results + rmm::device_uvector d_sg_srcs(0, handle_->get_stream()); + rmm::device_uvector d_sg_dsts(0, handle_->get_stream()); + std::optional> d_sg_weights{std::nullopt}; - auto [d_sg_srcs, d_sg_dsts, d_sg_weights] = cugraph::decompress_to_edgelist( + std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore) = cugraph::decompress_to_edgelist( *handle_, sg_graph.view(), sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, std::optional>{std::nullopt}); // 3-4. compare diff --git a/cpp/tests/structure/mg_transpose_test.cpp b/cpp/tests/structure/mg_transpose_test.cpp index 3558e0f7d97..03a31e14ca9 100644 --- a/cpp/tests/structure/mg_transpose_test.cpp +++ b/cpp/tests/structure/mg_transpose_test.cpp @@ -121,11 +121,15 @@ class Tests_MGTranspose if (transpose_usecase.check_correctness) { // 4-1. decompress MG results + rmm::device_uvector d_mg_srcs(0, handle_->get_stream()); + rmm::device_uvector d_mg_dsts(0, handle_->get_stream()); + std::optional> d_mg_weights{std::nullopt}; - auto [d_mg_srcs, d_mg_dsts, d_mg_weights] = cugraph::decompress_to_edgelist( + std::tie(d_mg_srcs, d_mg_dsts, d_mg_weights, std::ignore) = cugraph::decompress_to_edgelist( *handle_, mg_graph.view(), mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, mg_renumber_map ? std::make_optional>( (*mg_renumber_map).data(), (*mg_renumber_map).size()) : std::nullopt); @@ -152,11 +156,15 @@ class Tests_MGTranspose std::optional>{std::nullopt}); // 4-4. decompress SG results + rmm::device_uvector d_sg_srcs(0, handle_->get_stream()); + rmm::device_uvector d_sg_dsts(0, handle_->get_stream()); + std::optional> d_sg_weights{std::nullopt}; - auto [d_sg_srcs, d_sg_dsts, d_sg_weights] = cugraph::decompress_to_edgelist( + std::tie(d_sg_srcs, d_sg_dsts, d_sg_weights, std::ignore) = cugraph::decompress_to_edgelist( *handle_, sg_graph.view(), sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, std::optional>{std::nullopt}); // 4-5. compare diff --git a/cpp/tests/structure/symmetrize_test.cpp b/cpp/tests/structure/symmetrize_test.cpp index 9673b29e389..89ff9ed139a 100644 --- a/cpp/tests/structure/symmetrize_test.cpp +++ b/cpp/tests/structure/symmetrize_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -211,14 +211,18 @@ class Tests_Symmetrize rmm::device_uvector d_org_srcs(0, handle.get_stream()); rmm::device_uvector d_org_dsts(0, handle.get_stream()); std::optional> d_org_weights{std::nullopt}; + if (symmetrize_usecase.check_correctness) { - std::tie(d_org_srcs, d_org_dsts, d_org_weights) = cugraph::decompress_to_edgelist( - handle, - graph.view(), - edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, - d_renumber_map_labels ? std::make_optional>( - (*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) - : std::nullopt); + std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore) = + cugraph::decompress_to_edgelist( + handle, + graph.view(), + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + d_renumber_map_labels + ? std::make_optional>((*d_renumber_map_labels).data(), + (*d_renumber_map_labels).size()) + : std::nullopt); } if (cugraph::test::g_perf) { @@ -240,13 +244,20 @@ class Tests_Symmetrize } if (symmetrize_usecase.check_correctness) { - auto [d_symm_srcs, d_symm_dsts, d_symm_weights] = cugraph::decompress_to_edgelist( - handle, - graph.view(), - edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, - d_renumber_map_labels ? std::make_optional>( - (*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) - : std::nullopt); + rmm::device_uvector d_symm_srcs(0, handle.get_stream()); + rmm::device_uvector d_symm_dsts(0, handle.get_stream()); + std::optional> d_symm_weights{std::nullopt}; + + std::tie(d_symm_srcs, d_symm_dsts, d_symm_weights, std::ignore) = + cugraph::decompress_to_edgelist( + handle, + graph.view(), + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + d_renumber_map_labels + ? std::make_optional>((*d_renumber_map_labels).data(), + (*d_renumber_map_labels).size()) + : std::nullopt); auto h_org_srcs = cugraph::test::to_host(handle, d_org_srcs); auto h_org_dsts = cugraph::test::to_host(handle, d_org_dsts); diff --git a/cpp/tests/structure/transpose_storage_test.cpp b/cpp/tests/structure/transpose_storage_test.cpp index 8c94e62d68b..a713abf7dae 100644 --- a/cpp/tests/structure/transpose_storage_test.cpp +++ b/cpp/tests/structure/transpose_storage_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -77,13 +77,16 @@ class Tests_TransposeStorage rmm::device_uvector d_org_dsts(0, handle.get_stream()); std::optional> d_org_weights{std::nullopt}; if (transpose_storage_usecase.check_correctness) { - std::tie(d_org_srcs, d_org_dsts, d_org_weights) = cugraph::decompress_to_edgelist( - handle, - graph.view(), - edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, - d_renumber_map_labels ? std::make_optional>( - (*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) - : std::nullopt); + std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore) = + cugraph::decompress_to_edgelist( + handle, + graph.view(), + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + d_renumber_map_labels + ? std::make_optional>((*d_renumber_map_labels).data(), + (*d_renumber_map_labels).size()) + : std::nullopt); } if (cugraph::test::g_perf) { @@ -107,13 +110,21 @@ class Tests_TransposeStorage } if (transpose_storage_usecase.check_correctness) { - auto [d_storage_transposed_srcs, d_storage_transposed_dsts, d_storage_transposed_weights] = + rmm::device_uvector d_storage_transposed_srcs(0, handle.get_stream()); + rmm::device_uvector d_storage_transposed_dsts(0, handle.get_stream()); + std::optional> d_storage_transposed_weights{std::nullopt}; + + std::tie(d_storage_transposed_srcs, + d_storage_transposed_dsts, + d_storage_transposed_weights, + std::ignore) = cugraph::decompress_to_edgelist( handle, storage_transposed_graph.view(), storage_transposed_edge_weights ? std::make_optional((*storage_transposed_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, d_renumber_map_labels ? std::make_optional>((*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) diff --git a/cpp/tests/structure/transpose_test.cpp b/cpp/tests/structure/transpose_test.cpp index 39ae7d7635e..1cbefa21fcc 100644 --- a/cpp/tests/structure/transpose_test.cpp +++ b/cpp/tests/structure/transpose_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -77,13 +77,16 @@ class Tests_Transpose rmm::device_uvector d_org_dsts(0, handle.get_stream()); std::optional> d_org_weights{std::nullopt}; if (transpose_usecase.check_correctness) { - std::tie(d_org_srcs, d_org_dsts, d_org_weights) = cugraph::decompress_to_edgelist( - handle, - graph.view(), - edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, - d_renumber_map_labels ? std::make_optional>( - (*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) - : std::nullopt); + std::tie(d_org_srcs, d_org_dsts, d_org_weights, std::ignore) = + cugraph::decompress_to_edgelist( + handle, + graph.view(), + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + d_renumber_map_labels + ? std::make_optional>((*d_renumber_map_labels).data(), + (*d_renumber_map_labels).size()) + : std::nullopt); } if (cugraph::test::g_perf) { @@ -101,11 +104,16 @@ class Tests_Transpose } if (transpose_usecase.check_correctness) { - auto [d_transposed_srcs, d_transposed_dsts, d_transposed_weights] = + rmm::device_uvector d_transposed_srcs(0, handle.get_stream()); + rmm::device_uvector d_transposed_dsts(0, handle.get_stream()); + std::optional> d_transposed_weights{std::nullopt}; + + std::tie(d_transposed_srcs, d_transposed_dsts, d_transposed_weights, std::ignore) = cugraph::decompress_to_edgelist( handle, graph.view(), edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, d_renumber_map_labels ? std::make_optional>((*d_renumber_map_labels).data(), (*d_renumber_map_labels).size()) diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 615522a863b..1fa869ac2df 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -480,6 +480,20 @@ graph_to_host_coo( cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view); +// If multi-GPU, only the rank 0 GPU holds the valid data +template +std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + // If multi-GPU, only the rank 0 GPU holds the valid data template const& graph_view, std::optional> edge_weight_view) { - auto [d_src, d_dst, d_wgt] = - cugraph::decompress_to_edgelist(handle, - graph_view, - edge_weight_view, - std::optional>{std::nullopt}); + rmm::device_uvector d_src(0, handle.get_stream()); + rmm::device_uvector d_dst(0, handle.get_stream()); + std::optional> d_wgt{std::nullopt}; + + std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); if constexpr (is_multi_gpu) { d_src = cugraph::test::device_gatherv( @@ -89,6 +94,53 @@ graph_to_host_coo( return std::make_tuple(std::move(h_src), std::move(h_dst), std::move(h_wgt)); } +template +std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view) +{ + rmm::device_uvector d_src(0, handle.get_stream()); + rmm::device_uvector d_dst(0, handle.get_stream()); + std::optional> d_wgt{std::nullopt}; + + std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); + + if constexpr (is_multi_gpu) { + d_src = cugraph::test::device_gatherv( + handle, raft::device_span{d_src.data(), d_src.size()}); + d_dst = cugraph::test::device_gatherv( + handle, raft::device_span{d_dst.data(), d_dst.size()}); + if (d_wgt) + *d_wgt = cugraph::test::device_gatherv( + handle, raft::device_span{d_wgt->data(), d_wgt->size()}); + if (handle.get_comms().get_rank() != 0) { + d_src.resize(0, handle.get_stream()); + d_src.shrink_to_fit(handle.get_stream()); + d_dst.resize(0, handle.get_stream()); + d_dst.shrink_to_fit(handle.get_stream()); + if (d_wgt) { + (*d_wgt).resize(0, handle.get_stream()); + (*d_wgt).shrink_to_fit(handle.get_stream()); + } + } + } + + return std::make_tuple(std::move(d_src), std::move(d_dst), std::move(d_wgt)); +} + template const& graph_view, std::optional> edge_weight_view) { - auto [d_src, d_dst, d_wgt] = - cugraph::decompress_to_edgelist(handle, - graph_view, - edge_weight_view, - std::optional>{std::nullopt}); + rmm::device_uvector d_src(0, handle.get_stream()); + rmm::device_uvector d_dst(0, handle.get_stream()); + std::optional> d_wgt{std::nullopt}; + + std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); if constexpr (is_multi_gpu) { d_src = cugraph::test::device_gatherv( @@ -184,8 +241,16 @@ mg_graph_to_sg_graph( std::optional> number_map, bool renumber) { - auto [d_src, d_dst, d_wgt] = - cugraph::decompress_to_edgelist(handle, graph_view, edge_weight_view, number_map); + rmm::device_uvector d_src(0, handle.get_stream()); + rmm::device_uvector d_dst(0, handle.get_stream()); + std::optional> d_wgt{std::nullopt}; + + std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + number_map); d_src = cugraph::test::device_gatherv( handle, raft::device_span{d_src.data(), d_src.size()}); diff --git a/cpp/tests/utilities/test_utilities_mg.cu b/cpp/tests/utilities/test_utilities_mg.cu index b572f7df23a..7366a8376a4 100644 --- a/cpp/tests/utilities/test_utilities_mg.cu +++ b/cpp/tests/utilities/test_utilities_mg.cu @@ -90,6 +90,102 @@ graph_to_host_coo( cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view); +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + template std::tuple, std::vector, std::optional>> graph_to_host_csr( raft::handle_t const& handle, diff --git a/cpp/tests/utilities/test_utilities_sg.cu b/cpp/tests/utilities/test_utilities_sg.cu index a5a4fecb4e5..aceff526f21 100644 --- a/cpp/tests/utilities/test_utilities_sg.cu +++ b/cpp/tests/utilities/test_utilities_sg.cu @@ -90,6 +90,102 @@ graph_to_host_coo( cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view); +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + +template std::tuple, + rmm::device_uvector, + std::optional>> +graph_to_device_coo( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view); + template std::tuple, std::vector, std::optional>> graph_to_host_csr( raft::handle_t const& handle, diff --git a/docs/cugraph/source/conf.py b/docs/cugraph/source/conf.py index 394acf0e950..b64901772dc 100644 --- a/docs/cugraph/source/conf.py +++ b/docs/cugraph/source/conf.py @@ -204,6 +204,5 @@ def setup(app): # The following is used by sphinx.ext.linkcode to provide links to github linkcode_resolve = make_linkcode_resolve( - 'cugraph', 'https://github.com/rapidsai/' - 'cugraph/blob/{revision}/python/' - '{package}/{path}#L{lineno}') + "https://github.com/rapidsai/cugraph/blob/{revision}/python/{path}#L{lineno}" +) \ No newline at end of file diff --git a/docs/cugraph/source/sphinxext/github_link.py b/docs/cugraph/source/sphinxext/github_link.py index fa8fe3f5fe3..cc28dc6e897 100644 --- a/docs/cugraph/source/sphinxext/github_link.py +++ b/docs/cugraph/source/sphinxext/github_link.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,10 +16,8 @@ # license in /thirdparty/LICENSES/LICENSE.scikit_learn import inspect -import os import re import subprocess -import sys from functools import partial from operator import attrgetter @@ -56,7 +54,7 @@ def _get_git_revision(): return revision.decode('utf-8') -def _linkcode_resolve(domain, info, package, url_fmt, revision): +def _linkcode_resolve(domain, info, url_fmt, revision): """Determine a link to online source for a class/method/function This is called by sphinx.ext.linkcode @@ -73,7 +71,7 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): if revision is None: return - if domain not in ('py', 'pyx'): + if domain != 'py': return if not info.get('module') or not info.get('fullname'): return @@ -89,41 +87,29 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): fn: str = None lineno: str = None - try: - fn = inspect.getsourcefile(obj) - except Exception: - fn = None - if not fn: - try: - fn = inspect.getsourcefile(sys.modules[obj.__module__]) - except Exception: - fn = None - - if not fn: - # Possibly Cython code. Search docstring for source - m = source_regex.search(obj.__doc__) - - if (m is not None): - source_file = m.group(1) - lineno = m.group(2) - - # fn is expected to be the absolute path. - fn = os.path.relpath(source_file, start=package) - print("{}:{}".format( - os.path.abspath(os.path.join("..", "python", "cuml", fn)), - lineno)) - else: - return - else: - # Test if we are absolute or not (pyx are relative) - if (not os.path.isabs(fn)): - # Should be relative to docs right now - fn = os.path.abspath(os.path.join("..", "python", fn)) - - # Convert to relative from module root - fn = os.path.relpath(fn, - start=os.path.dirname( - __import__(package).__file__)) + obj_module = inspect.getmodule(obj) + if not obj_module: + print(f"could not infer source code link for: {info}") + return + module_name = obj_module.__name__.split('.')[0] + + module_dir_dict = { + "cugraph_dgl": "cugraph-dgl", + "cugraph_pyg": "cugraph-pyg", + "cugraph_service_client": "cugraph-service/client", + "cugraph_service_server": "cugraph-service/server", + "cugraph": "cugraph", + "pylibcugraph": "pylibcugraph", + } + module_dir = module_dir_dict.get(module_name) + if not module_dir: + print(f"no source path directory set for {module_name}") + return + + obj_path = "/".join(obj_module.__name__.split(".")[1:]) + obj_file_ext = obj_module.__file__.split('.')[-1] + source_ext = "pyx" if obj_file_ext == "so" else "py" + fn = f"{module_dir}/{module_name}/{obj_path}.{source_ext}" # Get the line number if we need it. (Can work without it) if (lineno is None): @@ -137,18 +123,15 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): else: lineno = '' return url_fmt.format(revision=revision, - package=package, path=fn, lineno=lineno) -def make_linkcode_resolve(package, url_fmt): +def make_linkcode_resolve(url_fmt): """Returns a linkcode_resolve function for the given URL format revision is a git commit reference (hash or name) - package is the name of the root module of the package - url_fmt is along the lines of ('https://github.com/USER/PROJECT/' 'blob/{revision}/{package}/' '{path}#L{lineno}') @@ -156,5 +139,4 @@ def make_linkcode_resolve(package, url_fmt): revision = _get_git_revision() return partial(_linkcode_resolve, revision=revision, - package=package, url_fmt=url_fmt) diff --git a/mg_utils/run-dask-process.sh b/mg_utils/run-dask-process.sh index e5fa8fab332..b88abb685ec 100755 --- a/mg_utils/run-dask-process.sh +++ b/mg_utils/run-dask-process.sh @@ -102,6 +102,7 @@ function buildTcpArgs { " WORKER_ARGS="--rmm-pool-size=$WORKER_RMM_POOL_SIZE + --rmm-async --local-directory=/tmp/$LOGNAME --scheduler-file=$SCHEDULER_FILE --memory-limit=$DASK_HOST_MEMORY_LIMIT diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py index e70f2d0c6d1..7825febc24b 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py @@ -19,8 +19,8 @@ from cugraph_dgl.nn.conv.base import BaseConv from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch import BipartiteCSC, SampledCSC, StaticCSC -from pylibcugraphops.pytorch.operators import mha_gat_n2n, mha_gat_n2n_bipartite +from pylibcugraphops.pytorch import CSC +from pylibcugraphops.pytorch.operators import mha_gat_n2n dgl = import_optional("dgl") torch = import_optional("torch") @@ -173,9 +173,20 @@ def forward( :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. """ + if max_in_degree is None: + max_in_degree = -1 + bipartite = not isinstance(nfeat, torch.Tensor) offsets, indices, _ = g.adj_tensors("csc") + graph = CSC( + offsets=offsets, + indices=indices, + num_src_nodes=g.num_src_nodes(), + dst_max_in_degree=max_in_degree, + is_bipartite=bipartite, + ) + if efeat is not None: if self.fc_edge is None: raise RuntimeError( @@ -191,23 +202,8 @@ def forward( f"integers to allow bipartite node features, but got " f"{self.in_feats}." ) - _graph = BipartiteCSC( - offsets=offsets, indices=indices, num_src_nodes=g.num_src_nodes() - ) nfeat_src = self.fc_src(nfeat[0]) nfeat_dst = self.fc_dst(nfeat[1]) - - out = mha_gat_n2n_bipartite( - src_feat=nfeat_src, - dst_feat=nfeat_dst, - attn_weights=self.attn_weights, - graph=_graph, - num_heads=self.num_heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=efeat, - ) else: if not hasattr(self, "fc"): raise RuntimeError( @@ -215,36 +211,17 @@ def forward( f"integer, but got {self.in_feats}." ) nfeat = self.fc(nfeat) - # Sampled primitive does not support edge features - if g.is_block and efeat is None: - if max_in_degree is None: - max_in_degree = g.in_degrees().max().item() - - if max_in_degree < self.MAX_IN_DEGREE_MFG: - _graph = SampledCSC( - offsets=offsets, - indices=indices, - max_num_neighbors=max_in_degree, - num_src_nodes=g.num_src_nodes(), - ) - else: - offsets = self.pad_offsets(offsets, g.num_src_nodes() + 1) - _graph = StaticCSC(offsets=offsets, indices=indices) - else: - if g.is_block: - offsets = self.pad_offsets(offsets, g.num_src_nodes() + 1) - _graph = StaticCSC(offsets=offsets, indices=indices) - - out = mha_gat_n2n( - feat=nfeat, - attn_weights=self.attn_weights, - graph=_graph, - num_heads=self.num_heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=efeat, - )[: g.num_dst_nodes()] + + out = mha_gat_n2n( + (nfeat_src, nfeat_dst) if bipartite else nfeat, + self.attn_weights, + graph, + num_heads=self.num_heads, + activation="LeakyReLU", + negative_slope=self.negative_slope, + concat_heads=self.concat, + edge_feat=efeat, + )[: g.num_dst_nodes()] if self.concat: out = out.view(-1, self.num_heads, self.out_feats) diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py index 1898f5159b1..141adc86069 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py @@ -15,7 +15,7 @@ from cugraph_dgl.nn.conv.base import BaseConv from cugraph.utilities.utils import import_optional -from pylibcugraphops.pytorch import BipartiteCSC, StaticCSC +from pylibcugraphops.pytorch import CSC from pylibcugraphops.pytorch.operators import mha_simple_n2n dgl = import_optional("dgl") @@ -132,31 +132,34 @@ def forward( efeat: torch.Tensor, optional Edge feature tensor. Default: ``None``. """ - bipartite = not isinstance(nfeat, torch.Tensor) offsets, indices, _ = g.adj_tensors("csc") - - if bipartite: - src_feats, dst_feats = nfeat - _graph = BipartiteCSC( - offsets=offsets, indices=indices, num_src_nodes=g.num_src_nodes() - ) - else: - src_feats = dst_feats = nfeat - if g.is_block: - offsets = self.pad_offsets(offsets, g.num_src_nodes() + 1) - _graph = StaticCSC(offsets=offsets, indices=indices) - - query = self.lin_query(dst_feats) - key = self.lin_key(src_feats) - value = self.lin_value(src_feats) - if self.lin_edge is not None: + graph = CSC( + offsets=offsets, + indices=indices, + num_src_nodes=g.num_src_nodes(), + is_bipartite=True, + ) + + if isinstance(nfeat, torch.Tensor): + nfeat = (nfeat, nfeat) + + query = self.lin_query(nfeat[1][: g.num_dst_nodes()]) + key = self.lin_key(nfeat[0]) + value = self.lin_value(nfeat[0]) + + if efeat is not None: + if self.lin_edge is None: + raise RuntimeError( + f"{self.__class__.__name__}.edge_feats must be set to allow " + f"edge features." + ) efeat = self.lin_edge(efeat) out = mha_simple_n2n( key_emb=key, query_emb=query, value_emb=value, - graph=_graph, + graph=graph, num_heads=self.num_heads, concat_heads=self.concat, edge_emb=efeat, @@ -165,7 +168,7 @@ def forward( )[: g.num_dst_nodes()] if self.root_weight: - res = self.lin_skip(dst_feats[: g.num_dst_nodes()]) + res = self.lin_skip(nfeat[1][: g.num_dst_nodes()]) if self.lin_beta is not None: beta = self.lin_beta(torch.cat([out, res, out - res], dim=-1)) beta = beta.sigmoid() diff --git a/python/cugraph-dgl/tests/nn/test_transformerconv.py b/python/cugraph-dgl/tests/nn/test_transformerconv.py index 64af795231c..00476b9f0bb 100644 --- a/python/cugraph-dgl/tests/nn/test_transformerconv.py +++ b/python/cugraph-dgl/tests/nn/test_transformerconv.py @@ -26,14 +26,14 @@ @pytest.mark.parametrize("beta", [False, True]) -@pytest.mark.parametrize("bipartite", [False, True]) +@pytest.mark.parametrize("bipartite_node_feats", [False, True]) @pytest.mark.parametrize("concat", [False, True]) @pytest.mark.parametrize("idtype_int", [False, True]) @pytest.mark.parametrize("num_heads", [1, 2, 3, 4]) @pytest.mark.parametrize("to_block", [False, True]) @pytest.mark.parametrize("use_edge_feats", [False, True]) def test_TransformerConv( - beta, bipartite, concat, idtype_int, num_heads, to_block, use_edge_feats + beta, bipartite_node_feats, concat, idtype_int, num_heads, to_block, use_edge_feats ): device = "cuda" g = create_graph1().to(device) @@ -44,7 +44,7 @@ def test_TransformerConv( if to_block: g = dgl.to_block(g) - if bipartite: + if bipartite_node_feats: in_node_feats = (5, 3) nfeat = ( torch.rand(g.num_src_nodes(), in_node_feats[0], device=device), diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py index bec50792131..207efcdace4 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py @@ -12,7 +12,7 @@ # limitations under the License. import warnings -from typing import Any, Optional, Tuple, Union +from typing import Optional, Tuple, Union from cugraph.utilities.utils import import_optional @@ -20,13 +20,7 @@ torch_geometric = import_optional("torch_geometric") try: # pragma: no cover - from pylibcugraphops.pytorch import ( - BipartiteCSC, - SampledCSC, - SampledHeteroCSC, - StaticCSC, - StaticHeteroCSC, - ) + from pylibcugraphops.pytorch import CSC, HeteroCSC HAS_PYLIBCUGRAPHOPS = True except ImportError: @@ -94,7 +88,7 @@ def get_cugraph( csc: Tuple[torch.Tensor, torch.Tensor, int], bipartite: bool = False, max_num_neighbors: Optional[int] = None, - ) -> Any: + ) -> CSC: r"""Constructs a :obj:`cugraph-ops` graph object from CSC representation. Supports both bipartite and non-bipartite graphs. @@ -119,16 +113,16 @@ def get_cugraph( f"based processing (got CPU tensor)" ) - if bipartite: - return BipartiteCSC(colptr, row, num_src_nodes) + if max_num_neighbors is None: + max_num_neighbors = -1 - if num_src_nodes != colptr.numel() - 1: - if max_num_neighbors is None: - max_num_neighbors = int((colptr[1:] - colptr[:-1]).max()) - - return SampledCSC(colptr, row, max_num_neighbors, num_src_nodes) - - return StaticCSC(colptr, row) + return CSC( + offsets=colptr, + indices=row, + num_src_nodes=num_src_nodes, + dst_max_in_degree=max_num_neighbors, + is_bipartite=bipartite, + ) def get_typed_cugraph( self, @@ -137,7 +131,7 @@ def get_typed_cugraph( num_edge_types: Optional[int] = None, bipartite: bool = False, max_num_neighbors: Optional[int] = None, - ) -> Any: + ) -> HeteroCSC: r"""Constructs a typed :obj:`cugraph` graph object from a CSC representation where each edge corresponds to a given edge type. Supports both bipartite and non-bipartite graphs. @@ -162,21 +156,21 @@ def get_typed_cugraph( if num_edge_types is None: num_edge_types = int(edge_type.max()) + 1 + if max_num_neighbors is None: + max_num_neighbors = -1 + row, colptr, num_src_nodes = csc edge_type = edge_type.int() - if bipartite: - raise NotImplementedError - - if num_src_nodes != colptr.numel() - 1: - if max_num_neighbors is None: - max_num_neighbors = int((colptr[1:] - colptr[:-1]).max()) - - return SampledHeteroCSC( - colptr, row, edge_type, max_num_neighbors, num_src_nodes, num_edge_types - ) - - return StaticHeteroCSC(colptr, row, edge_type, num_edge_types) + return HeteroCSC( + offsets=colptr, + indices=row, + edge_types=edge_type, + num_src_nodes=num_src_nodes, + num_edge_types=num_edge_types, + dst_max_in_degree=max_num_neighbors, + is_bipartite=bipartite, + ) def forward( self, diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py index 4bf37cf3e72..23b7d50ba96 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py @@ -12,7 +12,7 @@ # limitations under the License. from typing import Optional, Tuple, Union -from pylibcugraphops.pytorch.operators import mha_gat_n2n, mha_gat_n2n_bipartite +from pylibcugraphops.pytorch.operators import mha_gat_n2n from cugraph.utilities.utils import import_optional @@ -203,19 +203,6 @@ def forward( ) x_src = self.lin_src(x[0]) x_dst = self.lin_dst(x[1]) - - out = mha_gat_n2n_bipartite( - x_src, - x_dst, - self.att, - graph, - num_heads=self.heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=edge_attr, - ) - else: if not hasattr(self, "lin"): raise RuntimeError( @@ -224,16 +211,16 @@ def forward( ) x = self.lin(x) - out = mha_gat_n2n( - x, - self.att, - graph, - num_heads=self.heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=edge_attr, - ) + out = mha_gat_n2n( + (x_src, x_dst) if bipartite else x, + self.att, + graph, + num_heads=self.heads, + activation="LeakyReLU", + negative_slope=self.negative_slope, + concat_heads=self.concat, + edge_feat=edge_attr, + ) if self.bias is not None: out = out + self.bias diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py index 66d962b3f86..d4c947b952a 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py @@ -12,7 +12,7 @@ # limitations under the License. from typing import Optional, Tuple, Union -from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n, mha_gat_v2_n2n_bipartite +from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n from cugraph.utilities.utils import import_optional @@ -187,8 +187,8 @@ def forward( representation to the desired format. edge_attr: (torch.Tensor, optional) The edge features. """ - bipartite = not isinstance(x, torch.Tensor) - graph = self.get_cugraph(csc, bipartite=bipartite or not self.share_weights) + bipartite = not isinstance(x, torch.Tensor) or not self.share_weights + graph = self.get_cugraph(csc, bipartite=bipartite) if edge_attr is not None: if self.lin_edge is None: @@ -200,38 +200,24 @@ def forward( edge_attr = edge_attr.view(-1, 1) edge_attr = self.lin_edge(edge_attr) - if not bipartite and self.share_weights: + if bipartite: + if isinstance(x, torch.Tensor): + x = (x, x) + x_src = self.lin_src(x[0]) + x_dst = self.lin_dst(x[1]) + else: x = self.lin_src(x) - out = mha_gat_v2_n2n( - x, - self.att, - graph, - num_heads=self.heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=edge_attr, - ) - else: - if bipartite: - x_src = self.lin_src(x[0]) - x_dst = self.lin_dst(x[1]) - else: - x_src = self.lin_src(x) - x_dst = self.lin_dst(x) - - out = mha_gat_v2_n2n_bipartite( - x_src, - x_dst, - self.att, - graph, - num_heads=self.heads, - activation="LeakyReLU", - negative_slope=self.negative_slope, - concat_heads=self.concat, - edge_feat=edge_attr, - ) + out = mha_gat_v2_n2n( + (x_src, x_dst) if bipartite else x, + self.att, + graph, + num_heads=self.heads, + activation="LeakyReLU", + negative_slope=self.negative_slope, + concat_heads=self.concat, + edge_feat=edge_attr, + ) if self.bias is not None: out = out + self.bias diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py index aeb51c028ae..f67756eb3fe 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py @@ -12,7 +12,7 @@ # limitations under the License. from typing import Optional, Tuple, Union -from pylibcugraphops.pytorch.operators import mha_simple_n2n as TransformerConvAgg +from pylibcugraphops.pytorch.operators import mha_simple_n2n from cugraph.utilities.utils import import_optional @@ -168,10 +168,10 @@ def forward( representation to the desired format. edge_attr: (torch.Tensor, optional) The edge features. """ - bipartite = not isinstance(x, torch.Tensor) + bipartite = True graph = self.get_cugraph(csc, bipartite=bipartite) - if not bipartite: + if isinstance(x, torch.Tensor): x = (x, x) query = self.lin_query(x[1]) @@ -186,7 +186,7 @@ def forward( ) edge_attr = self.lin_edge(edge_attr) - out = TransformerConvAgg( + out = mha_simple_n2n( key, query, value, diff --git a/python/cugraph/cugraph/__init__.py b/python/cugraph/cugraph/__init__.py index 8ed49ccdd1b..3b9c4e007e2 100644 --- a/python/cugraph/cugraph/__init__.py +++ b/python/cugraph/cugraph/__init__.py @@ -118,5 +118,6 @@ from cugraph import gnn +from cugraph import exceptions __version__ = "23.08.00" diff --git a/python/cugraph/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/cugraph/dask/link_analysis/pagerank.py index 4aba5725c1b..2dfd25fa522 100644 --- a/python/cugraph/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/cugraph/dask/link_analysis/pagerank.py @@ -13,31 +13,41 @@ # limitations under the License. # +import warnings + +import dask from dask.distributed import wait, default_client -import cugraph.dask.comms.comms as Comms import dask_cudf import cudf import numpy as np -import warnings -from cugraph.dask.common.input_utils import get_distributed_data - from pylibcugraph import ( + pagerank as plc_pagerank, + personalized_pagerank as plc_p_pagerank, + exceptions as plc_exceptions, ResourceHandle, - pagerank as pylibcugraph_pagerank, - personalized_pagerank as pylibcugraph_p_pagerank, ) +import cugraph.dask.comms.comms as Comms +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.exceptions import FailedToConvergeError + -def convert_to_cudf(cp_arrays): +def convert_to_return_tuple(plc_pr_retval): """ - Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper + Using the PLC pagerank return tuple, creates a cudf DataFrame from the cupy + arrays and extracts the (optional) bool. """ - cupy_vertices, cupy_pagerank = cp_arrays + if len(plc_pr_retval) == 3: + cupy_vertices, cupy_pagerank, converged = plc_pr_retval + else: + cupy_vertices, cupy_pagerank = plc_pr_retval + converged = True + df = cudf.DataFrame() df["vertex"] = cupy_vertices df["pagerank"] = cupy_pagerank - return df + return (df, converged) # FIXME: Move this function to the utility module so that it can be @@ -99,20 +109,26 @@ def _call_plc_pagerank( epsilon, max_iterations, do_expensive_check, + fail_on_nonconvergence, ): - - return pylibcugraph_pagerank( - resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), - graph=mg_graph_x, - precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, - precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, - initial_guess_vertices=initial_guess_vertices, - initial_guess_values=initial_guess_values, - alpha=alpha, - epsilon=epsilon, - max_iterations=max_iterations, - do_expensive_check=do_expensive_check, - ) + try: + return plc_pagerank( + resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), + graph=mg_graph_x, + precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, + precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, + initial_guess_vertices=initial_guess_vertices, + initial_guess_values=initial_guess_values, + alpha=alpha, + epsilon=epsilon, + max_iterations=max_iterations, + do_expensive_check=do_expensive_check, + fail_on_nonconvergence=fail_on_nonconvergence, + ) + # Re-raise this as a cugraph exception so users trying to catch this do not + # have to know to import another package. + except plc_exceptions.FailedToConvergeError as exc: + raise FailedToConvergeError from exc def _call_plc_personalized_pagerank( @@ -127,23 +143,30 @@ def _call_plc_personalized_pagerank( epsilon, max_iterations, do_expensive_check, + fail_on_nonconvergence, ): personalization_vertices = data_personalization["vertex"] personalization_values = data_personalization["values"] - return pylibcugraph_p_pagerank( - resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), - graph=mg_graph_x, - precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, - precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, - personalization_vertices=personalization_vertices, - personalization_values=personalization_values, - initial_guess_vertices=initial_guess_vertices, - initial_guess_values=initial_guess_values, - alpha=alpha, - epsilon=epsilon, - max_iterations=max_iterations, - do_expensive_check=do_expensive_check, - ) + try: + return plc_p_pagerank( + resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), + graph=mg_graph_x, + precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, + precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, + personalization_vertices=personalization_vertices, + personalization_values=personalization_values, + initial_guess_vertices=initial_guess_vertices, + initial_guess_values=initial_guess_values, + alpha=alpha, + epsilon=epsilon, + max_iterations=max_iterations, + do_expensive_check=do_expensive_check, + fail_on_nonconvergence=fail_on_nonconvergence, + ) + # Re-raise this as a cugraph exception so users trying to catch this do not + # have to know to import another package. + except plc_exceptions.FailedToConvergeError as exc: + raise FailedToConvergeError from exc def pagerank( @@ -154,6 +177,7 @@ def pagerank( max_iter=100, tol=1.0e-5, nstart=None, + fail_on_nonconvergence=True, ): """ Find the PageRank values for each vertex in a graph using multiple GPUs. @@ -222,8 +246,18 @@ def pagerank( nstart['values'] : cudf.Series Pagerank values for vertices + fail_on_nonconvergence : bool (default=True) + If the solver does not reach convergence, raise an exception if + fail_on_nonconvergence is True. If fail_on_nonconvergence is False, + the return value is a tuple of (pagerank, converged) where pagerank is + a cudf.DataFrame as described below, and converged is a boolean + indicating if the solver converged (True) or not (False). + Returns ------- + The return value varies based on the value of the fail_on_nonconvergence + paramter. If fail_on_nonconvergence is True: + PageRank : dask_cudf.DataFrame GPU data frame containing two dask_cudf.Series of size V: the vertex identifiers and the corresponding PageRank values. @@ -244,6 +278,12 @@ def pagerank( ddf['pagerank'] : dask_cudf.Series Contains the PageRank score + If fail_on_nonconvergence is False: + + (PageRank, converged) : tuple of (dask_cudf.DataFrame, bool) + PageRank is the GPU dataframe described above, converged is a bool + indicating if the solver converged (True) or not (False). + Examples -------- >>> import cugraph.dask as dcg @@ -328,6 +368,7 @@ def pagerank( tol, max_iter, do_expensive_check, + fail_on_nonconvergence, workers=[w], allow_other_workers=False, ) @@ -347,6 +388,7 @@ def pagerank( tol, max_iter, do_expensive_check, + fail_on_nonconvergence, workers=[w], allow_other_workers=False, ) @@ -355,17 +397,35 @@ def pagerank( wait(result) - cudf_result = [client.submit(convert_to_cudf, cp_arrays) for cp_arrays in result] + vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0] + + # Have each worker convert tuple of arrays and bool from PLC to cudf + # DataFrames and bools. This will be a list of futures. + result_tuples = [ + client.submit(convert_to_return_tuple, cp_arrays) for cp_arrays in result + ] - wait(cudf_result) + # Convert the futures to dask delayed objects so the tuples can be + # split. nout=2 is passed since each tuple/iterable is a fixed length of 2. + result_tuples = [dask.delayed(r, nout=2) for r in result_tuples] + + # Create the ddf and get the converged bool from the delayed objs. Use a + # meta DataFrame to pass the expected dtypes for the DataFrame to prevent + # another compute to determine them automatically. + meta = cudf.DataFrame(columns=["vertex", "pagerank"]) + meta = meta.astype({"pagerank": "float64", "vertex": vertex_dtype}) + ddf = dask_cudf.from_delayed([t[0] for t in result_tuples], meta=meta).persist() + converged = all(dask.compute(*[t[1] for t in result_tuples])) - ddf = dask_cudf.from_delayed(cudf_result).persist() wait(ddf) # Wait until the inactive futures are released - wait([(r.release(), c_r.release()) for r, c_r in zip(result, cudf_result)]) + wait([(r.release(), c_r.release()) for r, c_r in zip(result, result_tuples)]) if input_graph.renumbered: ddf = input_graph.unrenumber(ddf, "vertex") - return ddf + if fail_on_nonconvergence: + return ddf + else: + return (ddf, converged) diff --git a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py index 7d8972a7385..d74a8df14eb 100644 --- a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py @@ -14,10 +14,11 @@ from __future__ import annotations +import warnings + import numpy from dask import delayed -from dask.distributed import wait, Lock, get_client -from cugraph.dask.common.input_utils import get_distributed_data +from dask.distributed import Lock, get_client, wait import dask_cudf import cudf @@ -26,12 +27,20 @@ from pylibcugraph import ResourceHandle from pylibcugraph import uniform_neighbor_sample as pylibcugraph_uniform_neighbor_sample +from pylibcugraph.utilities.api_tools import deprecated_warning_wrapper from cugraph.dask.comms import comms as Comms +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.dask import get_n_workers from typing import Sequence, List, Union, Tuple from typing import TYPE_CHECKING +from cugraph.dask.common.part_utils import ( + get_persisted_df_worker_map, + persist_dask_df_equal_parts_per_worker, +) + if TYPE_CHECKING: from cugraph import Graph @@ -150,7 +159,63 @@ def convert_to_cudf(cp_arrays, weight_t, with_edge_properties, return_offsets=Fa return df +def __get_label_to_output_comm_rank(min_batch_id, max_batch_id, n_workers): + num_batches = max_batch_id - min_batch_id + 1 + num_batches = int(num_batches) + z = cp.zeros(num_batches, dtype="int32") + s = cp.array_split(cp.arange(num_batches), n_workers) + for i, t in enumerate(s): + z[t] = i + + return z + + def _call_plc_uniform_neighbor_sample( + sID, + mg_graph_x, + st_x, + keep_batches_together, + n_workers, + min_batch_id, + max_batch_id, + fanout_vals, + with_replacement, + weight_t, + with_edge_properties, + random_state=None, + return_offsets=False, +): + st_x = st_x[0] + start_list_x = st_x[start_col_name] + batch_id_list_x = st_x[batch_col_name] if batch_col_name in st_x else None + + label_list = None + label_to_output_comm_rank = None + if keep_batches_together: + label_list = cp.arange(min_batch_id, max_batch_id + 1, dtype="int32") + label_to_output_comm_rank = __get_label_to_output_comm_rank( + min_batch_id, max_batch_id, n_workers + ) + + cp_arrays = pylibcugraph_uniform_neighbor_sample( + resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), + input_graph=mg_graph_x, + start_list=start_list_x, + label_list=label_list, + label_to_output_comm_rank=label_to_output_comm_rank, + h_fan_out=fanout_vals, + with_replacement=with_replacement, + do_expensive_check=False, + with_edge_properties=with_edge_properties, + batch_id_list=batch_id_list_x, + random_state=random_state, + ) + return convert_to_cudf( + cp_arrays, weight_t, with_edge_properties, return_offsets=return_offsets + ) + + +def _call_plc_uniform_neighbor_sample_legacy( sID, mg_graph_x, st_x, @@ -183,7 +248,7 @@ def _call_plc_uniform_neighbor_sample( ) -def _mg_call_plc_uniform_neighbor_sample( +def _mg_call_plc_uniform_neighbor_sample_legacy( client, session_id, input_graph, @@ -200,7 +265,7 @@ def _mg_call_plc_uniform_neighbor_sample( ): result = [ client.submit( - _call_plc_uniform_neighbor_sample, + _call_plc_uniform_neighbor_sample_legacy, session_id, input_graph._plc_graph[w], ddf[w][0], @@ -247,7 +312,92 @@ def _mg_call_plc_uniform_neighbor_sample( return ddf -def uniform_neighbor_sample( +def _mg_call_plc_uniform_neighbor_sample( + client, + session_id, + input_graph, + ddf, + keep_batches_together, + min_batch_id, + max_batch_id, + fanout_vals, + with_replacement, + weight_t, + indices_t, + with_edge_properties, + random_state, + return_offsets=False, +): + n_workers = None + if keep_batches_together: + n_workers = get_n_workers() + + if hasattr(min_batch_id, "compute"): + min_batch_id = min_batch_id.compute() + if hasattr(max_batch_id, "compute"): + max_batch_id = max_batch_id.compute() + + result = [ + client.submit( + _call_plc_uniform_neighbor_sample, + session_id, + input_graph._plc_graph[w], + starts, + keep_batches_together, + n_workers, + min_batch_id, + max_batch_id, + fanout_vals, + with_replacement, + weight_t=weight_t, + with_edge_properties=with_edge_properties, + # FIXME accept and properly transmute a numpy/cupy random state. + random_state=hash((random_state, w)), + return_offsets=return_offsets, + allow_other_workers=False, + pure=False, + ) + for w, starts in ddf.items() + ] + del ddf + + empty_df = ( + create_empty_df_with_edge_props( + indices_t, weight_t, return_offsets=return_offsets + ) + if with_edge_properties + else create_empty_df(indices_t, weight_t) + ) + + wait(result) + + if return_offsets: + result_split = [delayed(lambda x: x, nout=2)(r) for r in result] + ddf = dask_cudf.from_delayed( + [r[0] for r in result_split], meta=empty_df[0], verify_meta=False + ).persist() + ddf_offsets = dask_cudf.from_delayed( + [r[1] for r in result_split], meta=empty_df[1], verify_meta=False + ).persist() + + wait([ddf, ddf_offsets]) + wait([r.release() for r in result_split]) + wait([r.release() for r in result]) + + del result + + return ddf, ddf_offsets + else: + ddf = dask_cudf.from_delayed(result, meta=empty_df, verify_meta=False).persist() + + wait(ddf) + wait([r.release() for r in result]) + del result + + return ddf + + +def _uniform_neighbor_sample_legacy( input_graph: Graph, start_list: Sequence, fanout_vals: List[int], @@ -259,6 +409,162 @@ def uniform_neighbor_sample( random_state: int = None, return_offsets: bool = False, _multiple_clients: bool = False, +) -> Union[dask_cudf.DataFrame, Tuple[dask_cudf.DataFrame, dask_cudf.DataFrame]]: + warnings.warn( + "The batch_id_list, label_list, and label_to_output_comm_rank " + "parameters are deprecated. Consider using with_batch_ids, " + "keep_batches_together, min_batch_id, and max_batch_id instead." + ) + + if isinstance(start_list, int): + start_list = [start_list] + + if isinstance(start_list, list): + start_list = cudf.Series( + start_list, + dtype=input_graph.edgelist.edgelist_df[ + input_graph.renumber_map.renumbered_src_col_name + ].dtype, + ) + + elif with_edge_properties and batch_id_list is None: + batch_id_list = cudf.Series(cp.zeros(len(start_list), dtype="int32")) + + # fanout_vals must be a host array! + # FIXME: ensure other sequence types (eg. cudf Series) can be handled. + if isinstance(fanout_vals, list): + fanout_vals = numpy.asarray(fanout_vals, dtype="int32") + else: + raise TypeError("fanout_vals must be a list, " f"got: {type(fanout_vals)}") + + if "value" in input_graph.edgelist.edgelist_df: + weight_t = input_graph.edgelist.edgelist_df["value"].dtype + else: + weight_t = "float32" + + if "_SRC_" in input_graph.edgelist.edgelist_df: + indices_t = input_graph.edgelist.edgelist_df["_SRC_"].dtype + elif src_n in input_graph.edgelist.edgelist_df: + indices_t = input_graph.edgelist.edgelist_df[src_n].dtype + else: + indices_t = numpy.int32 + + start_list = start_list.rename(start_col_name) + if batch_id_list is not None: + batch_id_list = batch_id_list.rename(batch_col_name) + if hasattr(start_list, "compute"): + # mg input + start_list = start_list.to_frame() + batch_id_list = batch_id_list.to_frame() + ddf = start_list.merge( + batch_id_list, + how="left", + left_index=True, + right_index=True, + ) + else: + # sg input + ddf = cudf.concat( + [ + start_list, + batch_id_list, + ], + axis=1, + ) + else: + ddf = start_list.to_frame() + + if input_graph.renumbered: + ddf = input_graph.lookup_internal_vertex_id(ddf, column_name=start_col_name) + + if hasattr(ddf, "compute"): + ddf = get_distributed_data(ddf) + wait(ddf) + ddf = ddf.worker_to_parts + else: + splits = cp.array_split(cp.arange(len(ddf)), len(Comms.get_workers())) + ddf = {w: [ddf.iloc[splits[i]]] for i, w in enumerate(Comms.get_workers())} + + client = get_client() + session_id = Comms.get_session_id() + if _multiple_clients: + # Distributed centralized lock to allow + # two disconnected processes (clients) to coordinate a lock + # https://docs.dask.org/en/stable/futures.html?highlight=lock#distributed.Lock + lock = Lock("plc_graph_access") + if lock.acquire(timeout=100): + try: + ddf = _mg_call_plc_uniform_neighbor_sample_legacy( + client=client, + session_id=session_id, + input_graph=input_graph, + ddf=ddf, + label_list=label_list, + label_to_output_comm_rank=label_to_output_comm_rank, + fanout_vals=fanout_vals, + with_replacement=with_replacement, + weight_t=weight_t, + indices_t=indices_t, + with_edge_properties=with_edge_properties, + random_state=random_state, + return_offsets=return_offsets, + ) + finally: + lock.release() + else: + raise RuntimeError( + "Failed to acquire lock(plc_graph_access) while trying to sampling" + ) + else: + ddf = _mg_call_plc_uniform_neighbor_sample_legacy( + client=client, + session_id=session_id, + input_graph=input_graph, + ddf=ddf, + label_list=label_list, + label_to_output_comm_rank=label_to_output_comm_rank, + fanout_vals=fanout_vals, + with_replacement=with_replacement, + weight_t=weight_t, + indices_t=indices_t, + with_edge_properties=with_edge_properties, + random_state=random_state, + return_offsets=return_offsets, + ) + + if return_offsets: + ddf, offsets_ddf = ddf + if input_graph.renumbered: + ddf = input_graph.unrenumber(ddf, "sources", preserve_order=True) + ddf = input_graph.unrenumber(ddf, "destinations", preserve_order=True) + + if return_offsets: + return ddf, offsets_ddf + + return ddf + + +uniform_neighbor_sample_legacy = deprecated_warning_wrapper( + _uniform_neighbor_sample_legacy +) + + +def uniform_neighbor_sample( + input_graph: Graph, + start_list: Sequence, + fanout_vals: List[int], + with_replacement: bool = True, + with_edge_properties: bool = False, + batch_id_list: Sequence = None, # deprecated + label_list: Sequence = None, # deprecated + label_to_output_comm_rank: bool = None, # deprecated + with_batch_ids: bool = False, + keep_batches_together=False, + min_batch_id=None, + max_batch_id=None, + random_state: int = None, + return_offsets: bool = False, + _multiple_clients: bool = False, ) -> Union[dask_cudf.DataFrame, Tuple[dask_cudf.DataFrame, dask_cudf.DataFrame]]: """ Does neighborhood sampling, which samples nodes from a graph based on the @@ -285,20 +591,36 @@ def uniform_neighbor_sample( edge type, batch id, hop id) with the sampled edges. batch_id_list: cudf.Series or dask_cudf.Series (int32), optional (default=None) + Deprecated. List of batch ids that will be returned with the sampled edges if with_edge_properties is set to True. label_list: cudf.Series or dask_cudf.Series (int32), optional (default=None) + Deprecated. List of unique batch id labels. Used along with label_to_output_comm_rank to assign batch ids to GPUs. label_to_out_comm_rank: cudf.Series or dask_cudf.Series (int32), optional (default=None) + Deprecated. List of output GPUs (by rank) corresponding to batch id labels in the label list. Used to assign each batch id to a GPU. Must be in ascending order (i.e. [0, 0, 1, 2]). + with_batch_ids: bool, optional (default=False) + Flag to specify whether batch ids are present in the start_list + + keep_batches_together: bool (optional, default=False) + If True, will ensure that the returned samples for each batch are on the + same partition. + + min_batch_id: int (optional, default=None) + Required for the keep_batches_together option. The minimum batch id. + + max_batch_id: int (optional, default=None) + Required for the keep_batches_together option. The maximum batch id. + random_state: int, optional Random seed to use when making sampling calls. @@ -363,6 +685,25 @@ def uniform_neighbor_sample( Contains the offsets of each batch in the sampling result """ + if ( + batch_id_list is not None + or label_list is not None + or label_to_output_comm_rank is not None + ): + return uniform_neighbor_sample_legacy( + input_graph, + start_list, + fanout_vals, + with_replacement=with_replacement, + with_edge_properties=with_edge_properties, + batch_id_list=batch_id_list, + label_list=label_list, + label_to_output_comm_rank=label_to_output_comm_rank, + random_state=random_state, + return_offsets=return_offsets, + _multiple_clients=_multiple_clients, + ) + if isinstance(start_list, int): start_list = [start_list] @@ -373,9 +714,21 @@ def uniform_neighbor_sample( input_graph.renumber_map.renumbered_src_col_name ].dtype, ) + elif with_edge_properties and not with_batch_ids: + if isinstance(start_list, (cudf.DataFrame, dask_cudf.DataFrame)): + raise ValueError("expected 1d input for start list without batch ids") - elif with_edge_properties and batch_id_list is None: - batch_id_list = cudf.Series(cp.zeros(len(start_list), dtype="int32")) + start_list = start_list.to_frame() + start_list[batch_id_n] = cudf.Series(cp.zeros(len(start_list), dtype="int32")) + + if keep_batches_together and min_batch_id is None: + raise ValueError( + "must provide min_batch_id if using keep_batches_together option" + ) + if keep_batches_together and max_batch_id is None: + raise ValueError( + "must provide max_batch_id if using keep_batches_together option" + ) # fanout_vals must be a host array! # FIXME: ensure other sequence types (eg. cudf Series) can be handled. @@ -396,44 +749,30 @@ def uniform_neighbor_sample( else: indices_t = numpy.int32 - start_list = start_list.rename(start_col_name) - if batch_id_list is not None: - batch_id_list = batch_id_list.rename(batch_col_name) - if hasattr(start_list, "compute"): - # mg input - start_list = start_list.to_frame() - batch_id_list = batch_id_list.to_frame() - ddf = start_list.merge( - batch_id_list, - how="left", - left_index=True, - right_index=True, - ) - else: - # sg input - ddf = cudf.concat( - [ - start_list, - batch_id_list, - ], - axis=1, - ) - else: + if isinstance(start_list, (cudf.Series, dask_cudf.Series)): + start_list = start_list.rename(start_col_name) ddf = start_list.to_frame() + else: + ddf = start_list + columns = ddf.columns + ddf = ddf.rename( + columns={columns[0]: start_col_name, columns[-1]: batch_col_name} + ) if input_graph.renumbered: ddf = input_graph.lookup_internal_vertex_id(ddf, column_name=start_col_name) - if hasattr(ddf, "compute"): - ddf = get_distributed_data(ddf) - wait(ddf) - ddf = ddf.worker_to_parts - else: - splits = cp.array_split(cp.arange(len(ddf)), len(Comms.get_workers())) - ddf = {w: [ddf.iloc[splits[i]]] for i, w in enumerate(Comms.get_workers())} - client = get_client() session_id = Comms.get_session_id() + n_workers = get_n_workers() + + if isinstance(ddf, cudf.DataFrame): + ddf = dask_cudf.from_cudf(ddf, npartitions=n_workers) + + ddf = ddf.repartition(npartitions=n_workers) + ddf = persist_dask_df_equal_parts_per_worker(ddf, client) + ddf = get_persisted_df_worker_map(ddf, client) + if _multiple_clients: # Distributed centralized lock to allow # two disconnected processes (clients) to coordinate a lock @@ -446,8 +785,9 @@ def uniform_neighbor_sample( session_id=session_id, input_graph=input_graph, ddf=ddf, - label_list=label_list, - label_to_output_comm_rank=label_to_output_comm_rank, + keep_batches_together=keep_batches_together, + min_batch_id=min_batch_id, + max_batch_id=max_batch_id, fanout_vals=fanout_vals, with_replacement=with_replacement, weight_t=weight_t, @@ -468,8 +808,9 @@ def uniform_neighbor_sample( session_id=session_id, input_graph=input_graph, ddf=ddf, - label_list=label_list, - label_to_output_comm_rank=label_to_output_comm_rank, + keep_batches_together=keep_batches_together, + min_batch_id=min_batch_id, + max_batch_id=max_batch_id, fanout_vals=fanout_vals, with_replacement=with_replacement, weight_t=weight_t, diff --git a/python/cugraph/cugraph/exceptions.py b/python/cugraph/cugraph/exceptions.py new file mode 100644 index 00000000000..64280603112 --- /dev/null +++ b/python/cugraph/cugraph/exceptions.py @@ -0,0 +1,26 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Exception classes for cugraph. +""" + + +class FailedToConvergeError(Exception): + """ + Raised when an algorithm fails to converge within a predetermined set of + constraints which vary based on the algorithm, and may or may not be + user-configurable. + """ + + pass diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py index 0257a56ba08..a2b0a367d1d 100644 --- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py +++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py @@ -15,16 +15,21 @@ from typing import Union -import cupy import cudf import dask_cudf -import cugraph.dask as dask_cugraph + +from dask.distributed import wait +from dask.distributed import futures_of import cugraph import pylibcugraph from cugraph.gnn.data_loading.bulk_sampler_io import write_samples +import warnings +import logging +import time + class EXPERIMENTAL__BulkSampler: start_col_name = "_START_" @@ -36,7 +41,8 @@ def __init__( output_path: str, graph, seeds_per_call: int = 200_000, - batches_per_partition=100, + batches_per_partition: int = 100, + log_level: int = None, **kwargs, ): """ @@ -55,13 +61,19 @@ def __init__( a single sampling call. batches_per_partition: int (optional, default=100) The number of batches outputted to a single parquet partition. + log_level: int (optional, default=None) + Whether to enable logging for this sampler. Supports 3 levels + of logging if enabled (INFO, WARNING, ERROR). If not provided, + defaults to WARNING. kwargs: kwargs Keyword arguments to be passed to the sampler (i.e. fanout). """ + self.__logger = logging.getLogger(__name__) + self.__logger.setLevel(log_level or logging.WARNING) + max_batches_per_partition = seeds_per_call // batch_size if batches_per_partition > max_batches_per_partition: - import warnings warnings.warn( f"batches_per_partition ({batches_per_partition}) is >" @@ -140,7 +152,7 @@ def add_batches( ... start_col_name="start_vid", ... batch_col_name="start_batch") """ - df = df.rename( + df = df[[start_col_name, batch_col_name]].rename( columns={ start_col_name: self.start_col_name, batch_col_name: self.batch_col_name, @@ -163,6 +175,11 @@ def add_batches( ) if self.size >= self.seeds_per_call: + self.__logger.info( + f"Number of input seeds ({self.size})" + f" is >= seeds per call ({self.seeds_per_call})." + " Calling flush() to compute and write minibatches." + ) self.flush() def flush(self) -> None: @@ -171,12 +188,16 @@ def flush(self) -> None: """ if self.size == 0: return - self.__batches.reset_index(drop=True) + + start_time_calc_batches = time.perf_counter() + if isinstance(self.__batches, dask_cudf.DataFrame): + self.__batches = self.__batches.persist() min_batch_id = self.__batches[self.batch_col_name].min() if isinstance(self.__batches, dask_cudf.DataFrame): - min_batch_id = min_batch_id.compute() - min_batch_id = int(min_batch_id) + min_batch_id = min_batch_id.persist() + else: + min_batch_id = int(min_batch_id) partition_size = self.batches_per_partition * self.batch_size partitions_per_call = ( @@ -185,7 +206,19 @@ def flush(self) -> None: npartitions = partitions_per_call max_batch_id = min_batch_id + npartitions * self.batches_per_partition - 1 + if isinstance(self.__batches, dask_cudf.DataFrame): + max_batch_id = max_batch_id.persist() + batch_id_filter = self.__batches[self.batch_col_name] <= max_batch_id + if isinstance(batch_id_filter, dask_cudf.Series): + batch_id_filter = batch_id_filter.persist() + + end_time_calc_batches = time.perf_counter() + self.__logger.info( + f"Calculated batches to sample; min = {min_batch_id}" + f" and max = {max_batch_id};" + f" took {end_time_calc_batches - start_time_calc_batches:.4f} s" + ) if isinstance(self.__graph._plc_graph, pylibcugraph.graphs.SGGraph): sample_fn = cugraph.uniform_neighbor_sample @@ -194,28 +227,62 @@ def flush(self) -> None: self.__sample_call_args.update( { "_multiple_clients": True, - "label_to_output_comm_rank": self.__get_label_to_output_comm_rank( - min_batch_id, max_batch_id - ), - "label_list": cupy.arange( - min_batch_id, max_batch_id + 1, dtype="int32" - ), + "keep_batches_together": True, + "min_batch_id": min_batch_id, + "max_batch_id": max_batch_id, } ) + start_time_sample_call = time.perf_counter() + + # Call uniform neighbor sample samples, offsets = sample_fn( self.__graph, **self.__sample_call_args, - start_list=self.__batches[self.start_col_name][batch_id_filter], - batch_id_list=self.__batches[self.batch_col_name][batch_id_filter], + start_list=self.__batches[[self.start_col_name, self.batch_col_name]][ + batch_id_filter + ], + with_batch_ids=True, with_edge_properties=True, return_offsets=True, ) + end_time_sample_call = time.perf_counter() + sample_runtime = end_time_sample_call - start_time_sample_call + + self.__logger.info( + f"Called uniform neighbor sample, took {sample_runtime:.4f} s" + ) + + # Filter batches to remove those already processed self.__batches = self.__batches[~batch_id_filter] + del batch_id_filter + if isinstance(self.__batches, dask_cudf.DataFrame): + self.__batches = self.__batches.persist() + + start_time_write = time.perf_counter() + + # Write batches to parquet self.__write(samples, offsets) + if isinstance(self.__batches, dask_cudf.DataFrame): + wait( + [f.release() for f in futures_of(samples)] + + [f.release() for f in futures_of(offsets)] + ) + + del samples + del offsets + + end_time_write = time.perf_counter() + write_runtime = end_time_write - start_time_write + self.__logger.info(f"Wrote samples to parquet, took {write_runtime} seconds") - if self.size > 0: + current_size = self.size + if current_size > 0: + self.__logger.info( + f"There are still {current_size} samples remaining, " + "calling flush() again..." + ) self.flush() def __write( @@ -227,13 +294,3 @@ def __write( write_samples( samples, offsets, self.__batches_per_partition, self.__output_path ) - - def __get_label_to_output_comm_rank(self, min_batch_id, max_batch_id): - num_workers = dask_cugraph.get_n_workers() - num_batches = max_batch_id - min_batch_id + 1 - z = cupy.zeros(num_batches, dtype="int32") - s = cupy.array_split(cupy.arange(num_batches), num_workers) - for i, t in enumerate(s): - z[t] = i - - return cudf.Series(z) diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py index d7f1c136484..44c1185bbf1 100644 --- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py +++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py @@ -24,7 +24,7 @@ def _write_samples_to_parquet( batches_per_partition: int, output_path: str, partition_info: Optional[Union[dict, str]] = None, -) -> None: +) -> cudf.Series: """ Writes the samples to parquet. results: cudf.DataFrame @@ -40,11 +40,13 @@ def _write_samples_to_parquet( Either a dictionary containing partition data from dask, the string 'sg' indicating that this is a single GPU write, or None indicating that this function should perform a no-op (required by dask). + + Returns an empty cudf series. """ # Required by dask; need to skip dummy partitions. if partition_info is None or len(results) == 0: - return + return cudf.Series(dtype="int64") if partition_info != "sg" and (not isinstance(partition_info, dict)): raise ValueError("Invalid value of partition_info") @@ -71,6 +73,8 @@ def _write_samples_to_parquet( ).values results_p.to_parquet(full_output_path, compression=None, index=False) + return cudf.Series(dtype="int64") + def write_samples( results: cudf.DataFrame, @@ -97,7 +101,9 @@ def write_samples( batches_per_partition, output_path, align_dataframes=False, + meta=cudf.Series(dtype="int64"), ).compute() + else: _write_samples_to_parquet( results, offsets, batches_per_partition, output_path, partition_info="sg" diff --git a/python/cugraph/cugraph/link_analysis/pagerank.py b/python/cugraph/cugraph/link_analysis/pagerank.py index 6696512dcf0..d2b827fa7c8 100644 --- a/python/cugraph/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/cugraph/link_analysis/pagerank.py @@ -11,20 +11,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.utilities import ( - ensure_cugraph_obj_for_nx, - df_score_to_dictionary, -) +import warnings + import cudf import numpy as np -import warnings from pylibcugraph import ( - pagerank as pylibcugraph_pagerank, - personalized_pagerank as pylibcugraph_p_pagerank, + pagerank as plc_pagerank, + personalized_pagerank as plc_p_pagerank, + exceptions as plc_exceptions, ResourceHandle, ) +from cugraph.utilities import ( + ensure_cugraph_obj_for_nx, + df_score_to_dictionary, +) +from cugraph.exceptions import FailedToConvergeError + def renumber_vertices(input_graph, input_df): if len(input_graph.renumber_map.implementation.col_names) > 1: @@ -86,9 +90,9 @@ def pagerank( nstart=None, weight=None, dangling=None, + fail_on_nonconvergence=True, ): - """ - Find the PageRank score for every vertex in a graph. cuGraph computes an + """Find the PageRank score for every vertex in a graph. cuGraph computes an approximation of the Pagerank eigenvector using the power method. The number of iterations depends on the properties of the network itself; it increases when the tolerance descreases and/or alpha increases toward the @@ -163,8 +167,18 @@ def pagerank( dangling : dict, optional (default=None) This parameter is here for NetworkX compatibility and ignored + fail_on_nonconvergence : bool (default=True) + If the solver does not reach convergence, raise an exception if + fail_on_nonconvergence is True. If fail_on_nonconvergence is False, + the return value is a tuple of (pagerank, converged) where pagerank is + a cudf.DataFrame as described below, and converged is a boolean + indicating if the solver converged (True) or not (False). + Returns ------- + The return value varies based on the value of the fail_on_nonconvergence + paramter. If fail_on_nonconvergence is True: + PageRank : cudf.DataFrame GPU data frame containing two cudf.Series of size V: the vertex identifiers and the corresponding PageRank values. @@ -185,6 +199,12 @@ def pagerank( df['pagerank'] : cudf.Series Contains the PageRank score + If fail_on_nonconvergence is False: + + (PageRank, converged) : tuple of (cudf.DataFrame, bool) + PageRank is the GPU dataframe described above, converged is a bool + indicating if the solver converged (True) or not (False). + Examples -------- >>> from cugraph.experimental.datasets import karate @@ -226,47 +246,55 @@ def pagerank( pre_vtx_o_wgt_vertices = precomputed_vertex_out_weight["vertex"] pre_vtx_o_wgt_sums = precomputed_vertex_out_weight["sums"] - if personalization is not None: - if not isinstance(personalization, cudf.DataFrame): - raise NotImplementedError( - "personalization other than a cudf dataframe " "currently not supported" + try: + if personalization is not None: + if not isinstance(personalization, cudf.DataFrame): + raise NotImplementedError( + "personalization other than a cudf dataframe currently not " + "supported" + ) + if G.renumbered is True: + personalization = renumber_vertices(G, personalization) + + personalization = ensure_valid_dtype(G, personalization, "personalization") + + result_tuple = plc_p_pagerank( + resource_handle=ResourceHandle(), + graph=G._plc_graph, + precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, + precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, + personalization_vertices=personalization["vertex"], + personalization_values=personalization["values"], + initial_guess_vertices=initial_guess_vertices, + initial_guess_values=initial_guess_values, + alpha=alpha, + epsilon=tol, + max_iterations=max_iter, + do_expensive_check=do_expensive_check, + fail_on_nonconvergence=fail_on_nonconvergence, ) - if G.renumbered is True: - personalization = renumber_vertices(G, personalization) - - personalization = ensure_valid_dtype(G, personalization, "personalization") - - vertex, pagerank_values = pylibcugraph_p_pagerank( - resource_handle=ResourceHandle(), - graph=G._plc_graph, - precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, - precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, - personalization_vertices=personalization["vertex"], - personalization_values=personalization["values"], - initial_guess_vertices=initial_guess_vertices, - initial_guess_values=initial_guess_values, - alpha=alpha, - epsilon=tol, - max_iterations=max_iter, - do_expensive_check=do_expensive_check, - ) - else: - vertex, pagerank_values = pylibcugraph_pagerank( - resource_handle=ResourceHandle(), - graph=G._plc_graph, - precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, - precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, - initial_guess_vertices=initial_guess_vertices, - initial_guess_values=initial_guess_values, - alpha=alpha, - epsilon=tol, - max_iterations=max_iter, - do_expensive_check=do_expensive_check, - ) + else: + result_tuple = plc_pagerank( + resource_handle=ResourceHandle(), + graph=G._plc_graph, + precomputed_vertex_out_weight_vertices=pre_vtx_o_wgt_vertices, + precomputed_vertex_out_weight_sums=pre_vtx_o_wgt_sums, + initial_guess_vertices=initial_guess_vertices, + initial_guess_values=initial_guess_values, + alpha=alpha, + epsilon=tol, + max_iterations=max_iter, + do_expensive_check=do_expensive_check, + fail_on_nonconvergence=fail_on_nonconvergence, + ) + # Re-raise this as a cugraph exception so users trying to catch this do not + # have to know to import another package. + except plc_exceptions.FailedToConvergeError as exc: + raise FailedToConvergeError from exc df = cudf.DataFrame() - df["vertex"] = vertex - df["pagerank"] = pagerank_values + df["vertex"] = result_tuple[0] + df["pagerank"] = result_tuple[1] if G.renumbered: df = G.unrenumber(df, "vertex") @@ -274,4 +302,7 @@ def pagerank( if isNx is True: df = df_score_to_dictionary(df, "pagerank") - return df + if fail_on_nonconvergence: + return df + else: + return (df, result_tuple[2]) diff --git a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py index d6acaa550eb..d239f92d485 100644 --- a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py @@ -15,6 +15,7 @@ from pylibcugraph import ResourceHandle from pylibcugraph import uniform_neighbor_sample as pylibcugraph_uniform_neighbor_sample +from pylibcugraph.utilities.api_tools import deprecated_warning_wrapper import numpy @@ -29,6 +30,10 @@ from cugraph import Graph +start_col_name = "_START_" +batch_col_name = "_BATCH_" + + # FIXME: Move this function to the utility module so that it can be # shared by other algos def ensure_valid_dtype(input_graph, start_list): @@ -50,7 +55,7 @@ def ensure_valid_dtype(input_graph, start_list): return start_list -def uniform_neighbor_sample( +def _uniform_neighbor_sample_legacy( G: Graph, start_list: Sequence, fanout_vals: List[int], @@ -60,6 +65,135 @@ def uniform_neighbor_sample( random_state: int = None, return_offsets: bool = False, ) -> Union[cudf.DataFrame, Tuple[cudf.DataFrame, cudf.DataFrame]]: + + warnings.warn( + "The batch_id_list parameter is deprecated. " + "Consider passing a DataFrame where the last column " + "is the batch ids and setting with_batch_ids=True" + ) + + if isinstance(start_list, int): + start_list = [start_list] + + if isinstance(start_list, list): + start_list = cudf.Series( + start_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype + ) + + if with_edge_properties and batch_id_list is None: + batch_id_list = cp.zeros(len(start_list), dtype="int32") + + # fanout_vals must be a host array! + # FIXME: ensure other sequence types (eg. cudf Series) can be handled. + if isinstance(fanout_vals, list): + fanout_vals = numpy.asarray(fanout_vals, dtype="int32") + else: + raise TypeError("fanout_vals must be a list, " f"got: {type(fanout_vals)}") + + if "weights" in G.edgelist.edgelist_df: + weight_t = G.edgelist.edgelist_df["weights"].dtype + else: + weight_t = "float32" + + start_list = ensure_valid_dtype(G, start_list) + + if G.renumbered is True: + if isinstance(start_list, cudf.DataFrame): + start_list = G.lookup_internal_vertex_id(start_list, start_list.columns) + else: + start_list = G.lookup_internal_vertex_id(start_list) + + sampling_result = pylibcugraph_uniform_neighbor_sample( + resource_handle=ResourceHandle(), + input_graph=G._plc_graph, + start_list=start_list, + h_fan_out=fanout_vals, + with_replacement=with_replacement, + do_expensive_check=False, + with_edge_properties=with_edge_properties, + batch_id_list=batch_id_list, + random_state=random_state, + ) + + df = cudf.DataFrame() + + if with_edge_properties: + ( + sources, + destinations, + weights, + edge_ids, + edge_types, + batch_ids, + offsets, + hop_ids, + ) = sampling_result + + df["sources"] = sources + df["destinations"] = destinations + df["weight"] = weights + df["edge_id"] = edge_ids + df["edge_type"] = edge_types + df["hop_id"] = hop_ids + + if return_offsets: + offsets_df = cudf.DataFrame( + { + "batch_id": batch_ids, + "offsets": offsets[:-1], + } + ) + + else: + if len(batch_ids) > 0: + batch_ids = cudf.Series(batch_ids).repeat(cp.diff(offsets)) + batch_ids.reset_index(drop=True, inplace=True) + + df["batch_id"] = batch_ids + + else: + sources, destinations, indices = sampling_result + + df["sources"] = sources + df["destinations"] = destinations + + if indices is None: + df["indices"] = None + else: + df["indices"] = indices + if weight_t == "int32": + df["indices"] = indices.astype("int32") + elif weight_t == "int64": + df["indices"] = indices.astype("int64") + else: + df["indices"] = indices + + if G.renumbered: + df = G.unrenumber(df, "sources", preserve_order=True) + df = G.unrenumber(df, "destinations", preserve_order=True) + + if return_offsets: + return df, offsets_df + + return df + + +uniform_neighbor_sample_legacy = deprecated_warning_wrapper( + _uniform_neighbor_sample_legacy +) + + +def uniform_neighbor_sample( + G: Graph, + start_list: Sequence, + fanout_vals: List[int], + with_replacement: bool = True, + with_edge_properties: bool = False, + batch_id_list: Sequence = None, # deprecated + with_batch_ids: bool = False, + random_state: int = None, + return_offsets: bool = False, +) -> Union[cudf.DataFrame, Tuple[cudf.DataFrame, cudf.DataFrame]]: """ Does neighborhood sampling, which samples nodes from a graph based on the current node's neighbors, with a corresponding fanout value at each hop. @@ -85,9 +219,14 @@ def uniform_neighbor_sample( edge type, batch id, hop id) with the sampled edges. batch_id_list: list (int32) + Deprecated. List of batch ids that will be returned with the sampled edges if with_edge_properties is set to True. + with_batch_ids: bool, optional (default=False) + Flag to specify whether batch ids are present in the start_list + Assumes they are the last column in the start_list dataframe + random_state: int, optional Random seed to use when making sampling calls. @@ -148,6 +287,18 @@ def uniform_neighbor_sample( Contains the offsets of each batch in the sampling result """ + if batch_id_list is not None: + return uniform_neighbor_sample_legacy( + G, + start_list, + fanout_vals, + with_replacement=with_replacement, + with_edge_properties=with_edge_properties, + batch_id_list=batch_id_list, + random_state=random_state, + return_offsets=return_offsets, + ) + if isinstance(start_list, int): start_list = [start_list] @@ -156,8 +307,13 @@ def uniform_neighbor_sample( start_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype ) - if with_edge_properties and batch_id_list is None: - batch_id_list = cp.zeros(len(start_list), dtype="int32") + if with_edge_properties and not with_batch_ids: + if isinstance(start_list, cudf.Series): + start_list = start_list.to_frame() + + start_list[batch_col_name] = cudf.Series( + cp.zeros(len(start_list), dtype="int32") + ) # fanout_vals must be a host array! # FIXME: ensure other sequence types (eg. cudf Series) can be handled. @@ -173,21 +329,37 @@ def uniform_neighbor_sample( start_list = ensure_valid_dtype(G, start_list) - if G.renumbered is True: - if isinstance(start_list, cudf.DataFrame): - start_list = G.lookup_internal_vertex_id(start_list, start_list.columns) + if isinstance(start_list, cudf.Series): + start_list = start_list.rename(start_col_name) + start_list = start_list.to_frame() + + if G.renumbered: + start_list = G.lookup_internal_vertex_id(start_list, start_col_name) + else: + columns = start_list.columns + + if with_batch_ids: + if G.renumbered: + start_list = G.lookup_internal_vertex_id(start_list, columns[:-1]) + start_list = start_list.rename( + columns={columns[0]: start_col_name, columns[-1]: batch_col_name} + ) else: - start_list = G.lookup_internal_vertex_id(start_list) + if G.renumbered: + start_list = G.lookup_internal_vertex_id(start_list, columns) + start_list = start_list.rename(columns={columns[0]: start_col_name}) sampling_result = pylibcugraph_uniform_neighbor_sample( resource_handle=ResourceHandle(), input_graph=G._plc_graph, - start_list=start_list, + start_list=start_list[start_col_name], + batch_id_list=start_list[batch_col_name] + if batch_col_name in start_list + else None, h_fan_out=fanout_vals, with_replacement=with_replacement, do_expensive_check=False, with_edge_properties=with_edge_properties, - batch_id_list=batch_id_list, random_state=random_state, ) diff --git a/python/cugraph/cugraph/testing/mg_utils.py b/python/cugraph/cugraph/testing/mg_utils.py index 1e1a481e4d6..bd165ba3db5 100644 --- a/python/cugraph/cugraph/testing/mg_utils.py +++ b/python/cugraph/cugraph/testing/mg_utils.py @@ -29,6 +29,7 @@ def start_dask_client( protocol=None, + rmm_async=False, rmm_pool_size=None, dask_worker_devices=None, jit_unspill=False, @@ -137,6 +138,7 @@ def start_dask_client( local_directory=local_directory, protocol=protocol, rmm_pool_size=rmm_pool_size, + rmm_async=rmm_async, CUDA_VISIBLE_DEVICES=dask_worker_devices, jit_unspill=jit_unspill, device_memory_limit=device_memory_limit, @@ -287,6 +289,15 @@ def persist_dask_object(arg): # Function to convert bytes into human readable format def sizeof_fmt(num, suffix="B"): + if isinstance(num, str): + if num[-2:] == "GB": + return num[:-2] + "G" + elif num[-2:] == "MB": + return num[:-2] + "M" + elif num[-2:] == "KB": + return num[:-2] + "K" + else: + raise ValueError("unknown unit") for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: if abs(num) < 1024.0: return "%3.1f%s%s" % (num, unit, suffix) diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py index ba136963b60..b7487ae329c 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py @@ -432,3 +432,49 @@ def test_pagerank_transposed_false(): with pytest.warns(UserWarning, match=warning_msg): cugraph.pagerank(G) + + +@pytest.mark.sg +def test_pagerank_non_convergence(): + G = karate.get_graph(create_using=cugraph.Graph(directed=True)) + + # Not enough allowed iterations, should not converge + with pytest.raises(cugraph.exceptions.FailedToConvergeError): + df = cugraph.pagerank(G, max_iter=1, fail_on_nonconvergence=True) + + # Not enough allowed iterations, should not converge but do not consider + # that an error + (df, converged) = cugraph.pagerank(G, max_iter=1, fail_on_nonconvergence=False) + assert type(df) is cudf.DataFrame + assert type(converged) is bool + assert converged is False + + # The default max_iter value should allow convergence for this graph + (df, converged) = cugraph.pagerank(G, fail_on_nonconvergence=False) + assert type(df) is cudf.DataFrame + assert type(converged) is bool + assert converged is True + + # Test personalized pagerank the same way + personalization = cudf.DataFrame() + personalization["vertex"] = [17, 26] + personalization["values"] = [0.5, 0.75] + + with pytest.raises(cugraph.exceptions.FailedToConvergeError): + df = cugraph.pagerank( + G, max_iter=1, personalization=personalization, fail_on_nonconvergence=True + ) + + (df, converged) = cugraph.pagerank( + G, max_iter=1, personalization=personalization, fail_on_nonconvergence=False + ) + assert type(df) is cudf.DataFrame + assert type(converged) is bool + assert converged is False + + (df, converged) = cugraph.pagerank( + G, personalization=personalization, fail_on_nonconvergence=False + ) + assert type(df) is cudf.DataFrame + assert type(converged) is bool + assert converged is True diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py index 941974eea4f..14a512c59e5 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py @@ -48,6 +48,25 @@ def personalize(vertices, personalization_perc): return cu_personalization, personalization +def create_distributed_karate_graph(store_transposed=True): + input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() + + chunksize = dcg.get_chunksize(input_data_path) + + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + dg = cugraph.Graph(directed=True) + dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=store_transposed) + + return dg + + # ============================================================================= # Parameters # ============================================================================= @@ -197,20 +216,7 @@ def test_pagerank_invalid_personalization_dtype(dask_client): @pytest.mark.mg def test_dask_pagerank_transposed_false(dask_client): - input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() - - chunksize = dcg.get_chunksize(input_data_path) - - ddf = dask_cudf.read_csv( - input_data_path, - chunksize=chunksize, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - dg = cugraph.Graph(directed=True) - dg.from_dask_cudf_edgelist(ddf, "src", "dst", store_transposed=False) + dg = create_distributed_karate_graph(store_transposed=False) warning_msg = ( "Pagerank expects the 'store_transposed' " @@ -220,3 +226,49 @@ def test_dask_pagerank_transposed_false(dask_client): with pytest.warns(UserWarning, match=warning_msg): dcg.pagerank(dg) + + +@pytest.mark.mg +def test_pagerank_non_convergence(dask_client): + dg = create_distributed_karate_graph() + + # Not enough allowed iterations, should not converge + with pytest.raises(cugraph.exceptions.FailedToConvergeError): + ddf = dcg.pagerank(dg, max_iter=1, fail_on_nonconvergence=True) + + # Not enough allowed iterations, should not converge but do not consider + # that an error + (ddf, converged) = dcg.pagerank(dg, max_iter=1, fail_on_nonconvergence=False) + assert type(ddf) is dask_cudf.DataFrame + assert type(converged) is bool + assert converged is False + + # The default max_iter value should allow convergence for this graph + (ddf, converged) = dcg.pagerank(dg, fail_on_nonconvergence=False) + assert type(ddf) is dask_cudf.DataFrame + assert type(converged) is bool + assert converged is True + + # Test personalized pagerank the same way + personalization = cudf.DataFrame() + personalization["vertex"] = [17, 26] + personalization["values"] = [0.5, 0.75] + + with pytest.raises(cugraph.exceptions.FailedToConvergeError): + df = dcg.pagerank( + dg, max_iter=1, personalization=personalization, fail_on_nonconvergence=True + ) + + (df, converged) = dcg.pagerank( + dg, max_iter=1, personalization=personalization, fail_on_nonconvergence=False + ) + assert type(df) is dask_cudf.DataFrame + assert type(converged) is bool + assert converged is False + + (df, converged) = dcg.pagerank( + dg, personalization=personalization, fail_on_nonconvergence=False + ) + assert type(df) is dask_cudf.DataFrame + assert type(converged) is bool + assert converged is True diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py index 5d2f050bce9..39d2fbea7dd 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py @@ -285,7 +285,7 @@ def test_uniform_neighbor_sample_unweighted(simple_unweighted_input_expected_out sampling_results = uniform_neighbor_sample( test_data["Graph"], - test_data["start_list"], + test_data["start_list"].astype("int64"), test_data["fanout_vals"], test_data["with_replacement"], ) @@ -330,11 +330,11 @@ def test_uniform_neighbor_sample_edge_properties(return_offsets): sampling_results = uniform_neighbor_sample( G, - start_list=start_df["seed"], + start_list=start_df, fanout_vals=[2, 2], with_replacement=False, with_edge_properties=True, - batch_id_list=start_df["batch"], + with_batch_ids=True, return_offsets=return_offsets, ) if return_offsets: @@ -389,11 +389,16 @@ def test_uniform_neighbor_sample_edge_properties_self_loops(): sampling_results = cugraph.uniform_neighbor_sample( G, - start_list=cudf.Series([0, 1, 2]), - batch_id_list=cudf.Series([1, 1, 1], dtype="int32"), + start_list=cudf.DataFrame( + { + "start": cudf.Series([0, 1, 2]), + "batch": cudf.Series([1, 1, 1], dtype="int32"), + } + ), fanout_vals=[2, 2], with_replacement=False, with_edge_properties=True, + with_batch_ids=True, random_state=80, ) @@ -460,11 +465,16 @@ def test_uniform_neighbor_sample_hop_id_order_multi_batch(): sampling_results = cugraph.uniform_neighbor_sample( G, - cudf.Series([0, 1], dtype="int64"), + start_list=cudf.DataFrame( + { + "start": cudf.Series([0, 1], dtype="int64"), + "batch": cudf.Series([0, 1], dtype="int32"), + } + ), fanout_vals=[2, 2, 2], - batch_id_list=cudf.Series([0, 1], dtype="int32"), with_replacement=False, with_edge_properties=True, + with_batch_ids=True, ) for b in range(2): @@ -502,11 +512,16 @@ def test_uniform_neighbor_sample_empty_start_list(): sampling_results = cugraph.uniform_neighbor_sample( G, - start_list=cudf.Series([], dtype="int64"), - batch_id_list=cudf.Series([], dtype="int32"), + start_list=cudf.DataFrame( + { + "start_list": cudf.Series(dtype="int64"), + "batch_id_list": cudf.Series(dtype="int32"), + } + ), fanout_vals=[2, 2], with_replacement=False, with_edge_properties=True, + with_batch_ids=True, random_state=32, ) diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index 033b96487c4..4da3f3cf950 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -327,7 +327,8 @@ def test_mg_uniform_neighbor_sample_ensure_no_duplicates(dask_client): @pytest.mark.cugraph_ops @pytest.mark.parametrize("return_offsets", [True, False]) def test_uniform_neighbor_sample_edge_properties(dask_client, return_offsets): - if len(dask_client.scheduler_info()["workers"]) <= 1: + n_workers = len(dask_client.scheduler_info()["workers"]) + if n_workers <= 1: pytest.skip("Test only valid for MG environments") edgelist_df = dask_cudf.from_cudf( cudf.DataFrame( @@ -352,43 +353,58 @@ def test_uniform_neighbor_sample_edge_properties(dask_client, return_offsets): edge_attr=["w", "eid", "etp"], ) - dest_rank = [0, 1] sampling_results = cugraph.dask.uniform_neighbor_sample( G, - start_list=cudf.Series([0, 4], dtype="int64"), + start_list=cudf.DataFrame( + { + "start": cudf.Series([0, 4], dtype="int64"), + "batch": cudf.Series([0, 1], dtype="int32"), + } + ), fanout_vals=[-1, -1], with_replacement=False, with_edge_properties=True, - batch_id_list=cudf.Series([0, 1], dtype="int32"), - label_list=cudf.Series([0, 1], dtype="int32") if return_offsets else None, - label_to_output_comm_rank=cudf.Series(dest_rank, dtype="int32") - if return_offsets - else None, + with_batch_ids=True, + keep_batches_together=True, + min_batch_id=0, + max_batch_id=1, return_offsets=return_offsets, ) if return_offsets: sampling_results, sampling_offsets = sampling_results - df_p0 = sampling_results.get_partition(0).compute() - assert sorted(df_p0.sources.values_host.tolist()) == ( - [0, 0, 0, 1, 1, 2, 2, 2, 4, 4] - ) - assert sorted(df_p0.destinations.values_host.tolist()) == ( - [1, 1, 1, 2, 2, 3, 3, 4, 4, 4] - ) - - df_p1 = sampling_results.get_partition(1).compute() - assert sorted(df_p1.sources.values_host.tolist()) == ([1, 1, 3, 3, 4, 4]) - assert sorted(df_p1.destinations.values_host.tolist()) == ([1, 2, 2, 3, 3, 4]) - - offsets_p0 = sampling_offsets.get_partition(0).compute() - assert offsets_p0.batch_id.values_host.tolist() == [0] - assert offsets_p0.offsets.values_host.tolist() == [0] - - offsets_p1 = sampling_offsets.get_partition(1).compute() - assert offsets_p1.batch_id.values_host.tolist() == [1] - assert offsets_p1.offsets.values_host.tolist() == [0] + batches_found = {0: 0, 1: 0} + for i in range(n_workers): + dfp = sampling_results.get_partition(i).compute() + if len(dfp) > 0: + offsets_p = sampling_offsets.get_partition(i).compute() + assert len(offsets_p) > 0 + + if offsets_p.batch_id.iloc[0] == 1: + batches_found[1] += 1 + + assert offsets_p.batch_id.values_host.tolist() == [1] + assert offsets_p.offsets.values_host.tolist() == [0] + + assert sorted(dfp.sources.values_host.tolist()) == ( + [1, 1, 3, 3, 4, 4] + ) + assert sorted(dfp.destinations.values_host.tolist()) == ( + [1, 2, 2, 3, 3, 4] + ) + elif offsets_p.batch_id.iloc[0] == 0: + batches_found[0] += 1 + + assert offsets_p.batch_id.values_host.tolist() == [0] + assert offsets_p.offsets.values_host.tolist() == [0] + + assert sorted(dfp.sources.values_host.tolist()) == ( + [0, 0, 0, 1, 1, 2, 2, 2, 4, 4] + ) + assert sorted(dfp.destinations.values_host.tolist()) == ( + [1, 1, 1, 2, 2, 3, 3, 4, 4, 4] + ) mdf = cudf.merge( sampling_results.compute(), @@ -446,13 +462,19 @@ def test_uniform_neighbor_sample_edge_properties_self_loops(dask_client): sampling_results = cugraph.dask.uniform_neighbor_sample( G, - start_list=dask_cudf.from_cudf(cudf.Series([0, 1, 2]), npartitions=2), - batch_id_list=dask_cudf.from_cudf( - cudf.Series([1, 1, 1], dtype="int32"), npartitions=2 + start_list=dask_cudf.from_cudf( + cudf.DataFrame( + { + "start": cudf.Series([0, 1, 2], dtype="int64"), + "batch": cudf.Series([1, 1, 1], dtype="int32"), + } + ), + npartitions=2, ), fanout_vals=[2, 2], with_replacement=False, with_edge_properties=True, + with_batch_ids=True, ).compute() assert sorted(sampling_results.sources.values_host.tolist()) == [0, 0, 1, 1, 2, 2] @@ -526,23 +548,32 @@ def test_uniform_neighbor_sample_hop_id_order_multi_batch(): sampling_results = cugraph.dask.uniform_neighbor_sample( G, - cudf.Series([0, 1], dtype="int64"), + dask_cudf.from_cudf( + cudf.DataFrame( + { + "start": cudf.Series([0, 1], dtype="int64"), + "batch": cudf.Series([0, 1], dtype="int32"), + } + ), + npartitions=2, + ), fanout_vals=[2, 2, 2], - batch_id_list=cudf.Series([0, 1], dtype="int32"), with_replacement=False, with_edge_properties=True, + with_batch_ids=True, ) for p in range(sampling_results.npartitions): sampling_results_p = sampling_results.get_partition(p) - for b in range(2): - sampling_results_pb = sampling_results_p[ - sampling_results_p.batch_id == b - ].compute() - assert ( - sorted(sampling_results_pb.hop_id.values_host.tolist()) - == sampling_results_pb.hop_id.values_host.tolist() - ) + if len(sampling_results_p) > 0: + for b in range(2): + sampling_results_pb = sampling_results_p[ + sampling_results_p.batch_id == b + ].compute() + assert ( + sorted(sampling_results_pb.hop_id.values_host.tolist()) + == sampling_results_pb.hop_id.values_host.tolist() + ) @pytest.mark.mg @@ -577,11 +608,19 @@ def test_uniform_neighbor_edge_properties_sample_small_start_list( cugraph.dask.uniform_neighbor_sample( G, - start_list=cudf.Series([0]), + start_list=dask_cudf.from_cudf( + cudf.Series( + { + "start": cudf.Series([0]), + "batch": cudf.Series([10], dtype="int32"), + } + ), + npartitions=1, + ), fanout_vals=[10, 25], with_replacement=with_replacement, with_edge_properties=True, - batch_id_list=cudf.Series([10], dtype="int32"), + with_batch_ids=True, ) @@ -610,11 +649,16 @@ def test_uniform_neighbor_sample_without_dask_inputs(dask_client): sampling_results = cugraph.dask.uniform_neighbor_sample( G, - start_list=cudf.Series([0, 1, 2]), - batch_id_list=cudf.Series([1, 1, 1], dtype="int32"), + start_list=cudf.DataFrame( + { + "start": cudf.Series([0, 1, 2]), + "batch": cudf.Series([1, 1, 1], dtype="int32"), + } + ), fanout_vals=[2, 2], with_replacement=False, with_edge_properties=True, + with_batch_ids=True, ).compute() assert sorted(sampling_results.sources.values_host.tolist()) == [0, 0, 1, 1, 2, 2] @@ -664,24 +708,24 @@ def test_uniform_neighbor_sample_batched(dask_client, dataset, input_df, max_bat input_vertices = dask_cudf.concat([df.src, df.dst]).unique().compute() assert isinstance(input_vertices, cudf.Series) + input_vertices.name = "start" input_vertices.index = cupy.random.permutation(len(input_vertices)) + input_vertices = input_vertices.to_frame().reset_index(drop=True) - input_batch = cudf.Series( + input_vertices["batch"] = cudf.Series( cupy.random.randint(0, max_batches, len(input_vertices)), dtype="int32" ) - input_batch.index = cupy.random.permutation(len(input_vertices)) if input_df == dask_cudf.DataFrame: - input_batch = dask_cudf.from_cudf(input_batch, npartitions=num_workers) input_vertices = dask_cudf.from_cudf(input_vertices, npartitions=num_workers) sampling_results = cugraph.dask.uniform_neighbor_sample( G, start_list=input_vertices, - batch_id_list=input_batch, fanout_vals=[5, 5], with_replacement=False, with_edge_properties=True, + with_batch_ids=True, ) for batch_id in range(max_batches): @@ -693,7 +737,7 @@ def test_uniform_neighbor_sample_batched(dask_client, dataset, input_df, max_bat .compute() ) - input_starts_per_batch = len(input_batch[input_batch == batch_id]) + input_starts_per_batch = len(input_vertices[input_vertices.batch == batch_id]) # Should be <= to account for starts without outgoing edges assert output_starts_per_batch <= input_starts_per_batch diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py index e0d7b6797d4..5c03d8f98cc 100644 --- a/python/pylibcugraph/pylibcugraph/__init__.py +++ b/python/pylibcugraph/pylibcugraph/__init__.py @@ -81,4 +81,6 @@ from pylibcugraph.select_random_vertices import select_random_vertices +from pylibcugraph import exceptions + __version__ = "23.08.00" diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd index 06838256f30..6cd02ed6f17 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd @@ -47,6 +47,16 @@ cdef extern from "cugraph_c/centrality_algorithms.h": cugraph_centrality_result_t* result ) + cdef size_t \ + cugraph_centrality_result_get_num_iterations( + cugraph_centrality_result_t* result + ) + + cdef bool_t \ + cugraph_centrality_result_converged( + cugraph_centrality_result_t* result + ) + cdef void \ cugraph_centrality_result_free( cugraph_centrality_result_t* result @@ -68,6 +78,22 @@ cdef extern from "cugraph_c/centrality_algorithms.h": cugraph_error_t** error ) + cdef cugraph_error_code_t \ + cugraph_pagerank_allow_nonconvergence( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, + double alpha, + double epsilon, + size_t max_iterations, + bool_t do_expensive_check, + cugraph_centrality_result_t** result, + cugraph_error_t** error + ) + cdef cugraph_error_code_t \ cugraph_personalized_pagerank( const cugraph_resource_handle_t* handle, @@ -86,6 +112,24 @@ cdef extern from "cugraph_c/centrality_algorithms.h": cugraph_error_t** error ) + cdef cugraph_error_code_t \ + cugraph_personalized_pagerank_allow_nonconvergence( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_vertices, + const cugraph_type_erased_device_array_view_t* precomputed_vertex_out_weight_sums, + const cugraph_type_erased_device_array_view_t* initial_guess_vertices, + const cugraph_type_erased_device_array_view_t* initial_guess_values, + const cugraph_type_erased_device_array_view_t* personalization_vertices, + const cugraph_type_erased_device_array_view_t* personalization_values, + double alpha, + double epsilon, + size_t max_iterations, + bool_t do_expensive_check, + cugraph_centrality_result_t** result, + cugraph_error_t** error + ) + ########################################################################### # eigenvector centrality cdef cugraph_error_code_t \ @@ -167,4 +211,4 @@ cdef extern from "cugraph_c/centrality_algorithms.h": bool_t do_expensive_check, cugraph_centrality_result_t** result, cugraph_error_t** error - ) \ No newline at end of file + ) diff --git a/python/pylibcugraph/pylibcugraph/exceptions.py b/python/pylibcugraph/pylibcugraph/exceptions.py new file mode 100644 index 00000000000..54b58d840b3 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/exceptions.py @@ -0,0 +1,26 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Exception classes for pylibcugraph. +""" + + +class FailedToConvergeError(Exception): + """ + Raised when an algorithm fails to converge within a predetermined set of + constraints which vary based on the algorithm, and may or may not be + user-configurable. + """ + + pass diff --git a/python/pylibcugraph/pylibcugraph/pagerank.pyx b/python/pylibcugraph/pylibcugraph/pagerank.pyx index 7d8f7807ead..a5022072b4c 100644 --- a/python/pylibcugraph/pylibcugraph/pagerank.pyx +++ b/python/pylibcugraph/pylibcugraph/pagerank.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -35,7 +35,8 @@ from pylibcugraph._cugraph_c.graph cimport ( ) from pylibcugraph._cugraph_c.centrality_algorithms cimport ( cugraph_centrality_result_t, - cugraph_pagerank, + cugraph_pagerank_allow_nonconvergence, + cugraph_centrality_result_converged, cugraph_centrality_result_get_vertices, cugraph_centrality_result_get_values, cugraph_centrality_result_free, @@ -53,6 +54,7 @@ from pylibcugraph.utils cimport ( get_c_type_from_numpy_type, create_cugraph_type_erased_device_array_view_from_py_obj, ) +from pylibcugraph.exceptions import FailedToConvergeError def pagerank(ResourceHandle resource_handle, @@ -64,7 +66,8 @@ def pagerank(ResourceHandle resource_handle, double alpha, double epsilon, size_t max_iterations, - bool_t do_expensive_check): + bool_t do_expensive_check, + fail_on_nonconvergence=True): """ Find the PageRank score for every vertex in a graph by computing an approximation of the Pagerank eigenvector using the power method. The @@ -123,13 +126,29 @@ def pagerank(ResourceHandle resource_handle, If True, performs more extensive tests on the inputs to ensure validitity, at the expense of increased run time. + fail_on_nonconvergence : bool (default=True) + If the solver does not reach convergence, raise an exception if + fail_on_nonconvergence is True. If fail_on_nonconvergence is False, + the return value is a tuple of (pagerank, converged) where pagerank is + a cudf.DataFrame as described below, and converged is a boolean + indicating if the solver converged (True) or not (False). + Returns ------- - A tuple of device arrays, where the first item in the tuple is a device - array containing the vertex identifiers, and the second item is a device - array containing the pagerank values for the corresponding vertices. For - example, the vertex identifier at the ith element of the vertex array has - the pagerank value of the ith element in the pagerank array. + The return value varies based on the value of the fail_on_nonconvergence + paramter. If fail_on_nonconvergence is True: + + A tuple of device arrays, where the first item in the tuple is a device + array containing the vertex identifiers, and the second item is a device + array containing the pagerank values for the corresponding vertices. For + example, the vertex identifier at the ith element of the vertex array + has the pagerank value of the ith element in the pagerank array. + + If fail_on_nonconvergence is False: + + A three-tuple where the first two items are the device arrays described + above, and the third is a bool indicating if the solver converged (True) + or not (False). Examples -------- @@ -195,30 +214,35 @@ def pagerank(ResourceHandle resource_handle, cdef cugraph_centrality_result_t* result_ptr cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr + cdef bool_t converged + cdef cugraph_type_erased_device_array_view_t* vertices_ptr + cdef cugraph_type_erased_device_array_view_t* pageranks_ptr + + error_code = cugraph_pagerank_allow_nonconvergence( + c_resource_handle_ptr, + c_graph_ptr, + precomputed_vertex_out_weight_vertices_view_ptr, + precomputed_vertex_out_weight_sums_view_ptr, + initial_guess_vertices_view_ptr, + initial_guess_values_view_ptr, + alpha, + epsilon, + max_iterations, + do_expensive_check, + &result_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_pagerank_allow_nonconvergence") + + converged = cugraph_centrality_result_converged(result_ptr) - error_code = cugraph_pagerank(c_resource_handle_ptr, - c_graph_ptr, - precomputed_vertex_out_weight_vertices_view_ptr, - precomputed_vertex_out_weight_sums_view_ptr, - initial_guess_vertices_view_ptr, - initial_guess_values_view_ptr, - alpha, - epsilon, - max_iterations, - do_expensive_check, - &result_ptr, - &error_ptr) - assert_success(error_code, error_ptr, "cugraph_pagerank") - - # Extract individual device array pointers from result and copy to cupy - # arrays for returning. - cdef cugraph_type_erased_device_array_view_t* vertices_ptr = \ - cugraph_centrality_result_get_vertices(result_ptr) - cdef cugraph_type_erased_device_array_view_t* pageranks_ptr = \ - cugraph_centrality_result_get_values(result_ptr) - - cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) - cupy_pageranks = copy_to_cupy_array(c_resource_handle_ptr, pageranks_ptr) + # Only extract results if necessary + if (fail_on_nonconvergence is False) or (converged is True): + # Extract individual device array pointers from result and copy to cupy + # arrays for returning. + vertices_ptr = cugraph_centrality_result_get_vertices(result_ptr) + pageranks_ptr = cugraph_centrality_result_get_values(result_ptr) + cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) + cupy_pageranks = copy_to_cupy_array(c_resource_handle_ptr, pageranks_ptr) # Free all pointers cugraph_centrality_result_free(result_ptr) @@ -231,4 +255,10 @@ def pagerank(ResourceHandle resource_handle, if precomputed_vertex_out_weight_sums is not None: cugraph_type_erased_device_array_view_free(precomputed_vertex_out_weight_sums_view_ptr) - return (cupy_vertices, cupy_pageranks) + if fail_on_nonconvergence is False: + return (cupy_vertices, cupy_pageranks, bool(converged)) + else: + if converged is True: + return (cupy_vertices, cupy_pageranks) + else: + raise FailedToConvergeError diff --git a/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx b/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx index 89b57f139a1..e60e7fa2c3e 100644 --- a/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx +++ b/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -35,7 +35,8 @@ from pylibcugraph._cugraph_c.graph cimport ( ) from pylibcugraph._cugraph_c.centrality_algorithms cimport ( cugraph_centrality_result_t, - cugraph_personalized_pagerank, + cugraph_personalized_pagerank_allow_nonconvergence, + cugraph_centrality_result_converged, cugraph_centrality_result_get_vertices, cugraph_centrality_result_get_values, cugraph_centrality_result_free, @@ -53,6 +54,7 @@ from pylibcugraph.utils cimport ( get_c_type_from_numpy_type, create_cugraph_type_erased_device_array_view_from_py_obj, ) +from pylibcugraph.exceptions import FailedToConvergeError def personalized_pagerank(ResourceHandle resource_handle, @@ -66,7 +68,8 @@ def personalized_pagerank(ResourceHandle resource_handle, double alpha, double epsilon, size_t max_iterations, - bool_t do_expensive_check): + bool_t do_expensive_check, + fail_on_nonconvergence=True): """ Find the PageRank score for every vertex in a graph by computing an approximation of the Pagerank eigenvector using the power method. The @@ -85,27 +88,21 @@ def personalized_pagerank(ResourceHandle resource_handle, precomputed_vertex_out_weight_vertices: device array type Subset of vertices of graph for precomputed_vertex_out_weight - (a performance optimization) precomputed_vertex_out_weight_sums : device array type Corresponding precomputed sum of outgoing vertices weight - (a performance optimization) - + initial_guess_vertices : device array type Subset of vertices of graph for initial guess for pagerank values - (a performance optimization) - + initial_guess_values : device array type Pagerank values for vertices - (a performance optimization) - + personalization_vertices : device array type Subset of vertices of graph for personalization - (a performance optimization) - + personalization_values : device array type Personalization values for vertices - (a performance optimization) alpha : double The damping factor alpha represents the probability to follow an @@ -133,13 +130,29 @@ def personalized_pagerank(ResourceHandle resource_handle, If True, performs more extensive tests on the inputs to ensure validitity, at the expense of increased run time. + fail_on_nonconvergence : bool (default=True) + If the solver does not reach convergence, raise an exception if + fail_on_nonconvergence is True. If fail_on_nonconvergence is False, + the return value is a tuple of (pagerank, converged) where pagerank is + a cudf.DataFrame as described below, and converged is a boolean + indicating if the solver converged (True) or not (False). + Returns ------- - A tuple of device arrays, where the first item in the tuple is a device - array containing the vertex identifiers, and the second item is a device - array containing the pagerank values for the corresponding vertices. For - example, the vertex identifier at the ith element of the vertex array has - the pagerank value of the ith element in the pagerank array. + The return value varies based on the value of the fail_on_nonconvergence + paramter. If fail_on_nonconvergence is True: + + A tuple of device arrays, where the first item in the tuple is a device + array containing the vertex identifiers, and the second item is a device + array containing the pagerank values for the corresponding vertices. For + example, the vertex identifier at the ith element of the vertex array has + the pagerank value of the ith element in the pagerank array. + + If fail_on_nonconvergence is False: + + A three-tuple where the first two items are the device arrays described + above, and the third is a bool indicating if the solver converged (True) + or not (False). Examples -------- @@ -207,12 +220,12 @@ def personalized_pagerank(ResourceHandle resource_handle, precomputed_vertex_out_weight_sums_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( precomputed_vertex_out_weight_sums) - + cdef cugraph_type_erased_device_array_view_t* \ personalization_vertices_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( personalization_vertices) - + cdef cugraph_type_erased_device_array_view_t* \ personalization_values_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( @@ -221,32 +234,38 @@ def personalized_pagerank(ResourceHandle resource_handle, cdef cugraph_centrality_result_t* result_ptr cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr + cdef bool_t converged + cdef cugraph_type_erased_device_array_view_t* vertices_ptr + cdef cugraph_type_erased_device_array_view_t* pageranks_ptr + + error_code = cugraph_personalized_pagerank_allow_nonconvergence( + c_resource_handle_ptr, + c_graph_ptr, + precomputed_vertex_out_weight_vertices_view_ptr, + precomputed_vertex_out_weight_sums_view_ptr, + initial_guess_vertices_view_ptr, + initial_guess_values_view_ptr, + personalization_vertices_view_ptr, + personalization_values_view_ptr, + alpha, + epsilon, + max_iterations, + do_expensive_check, + &result_ptr, + &error_ptr) + assert_success( + error_code, error_ptr, "cugraph_personalized_pagerank_allow_nonconvergence") + + converged = cugraph_centrality_result_converged(result_ptr) - error_code = cugraph_personalized_pagerank(c_resource_handle_ptr, - c_graph_ptr, - precomputed_vertex_out_weight_vertices_view_ptr, - precomputed_vertex_out_weight_sums_view_ptr, - initial_guess_vertices_view_ptr, - initial_guess_values_view_ptr, - personalization_vertices_view_ptr, - personalization_values_view_ptr, - alpha, - epsilon, - max_iterations, - do_expensive_check, - &result_ptr, - &error_ptr) - assert_success(error_code, error_ptr, "cugraph_personalized_pagerank") - - # Extract individual device array pointers from result and copy to cupy - # arrays for returning. - cdef cugraph_type_erased_device_array_view_t* vertices_ptr = \ - cugraph_centrality_result_get_vertices(result_ptr) - cdef cugraph_type_erased_device_array_view_t* pageranks_ptr = \ - cugraph_centrality_result_get_values(result_ptr) - - cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) - cupy_pageranks = copy_to_cupy_array(c_resource_handle_ptr, pageranks_ptr) + # Only extract results if necessary + if (fail_on_nonconvergence is False) or (converged is True): + # Extract individual device array pointers from result and copy to cupy + # arrays for returning. + vertices_ptr = cugraph_centrality_result_get_vertices(result_ptr) + pageranks_ptr = cugraph_centrality_result_get_values(result_ptr) + cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) + cupy_pageranks = copy_to_cupy_array(c_resource_handle_ptr, pageranks_ptr) # Free all pointers cugraph_centrality_result_free(result_ptr) @@ -263,4 +282,10 @@ def personalized_pagerank(ResourceHandle resource_handle, if personalization_values is not None: cugraph_type_erased_device_array_view_free(personalization_values_view_ptr) - return (cupy_vertices, cupy_pageranks) + if fail_on_nonconvergence is False: + return (cupy_vertices, cupy_pageranks, bool(converged)) + else: + if converged is True: + return (cupy_vertices, cupy_pageranks) + else: + raise FailedToConvergeError