Skip to content

Commit

Permalink
Add initial attempt at pagerank with hacky tests (#1)
Browse files Browse the repository at this point in the history
* Add initial attempt at pagerank with hacky tests

* Install networkx too

* Fix failing test; maybe faster/better?

* Even better

* Split pagerank into two functions: grblas-native and networkx-facing.

Also, optimize if adjacency matrix is iso-valued.
I would bring this implementation to a benchmarking shootout!

* Change how we convert NetworkX dicts to vectors.

Also, sparsify vectors.  May be a decent idea?  Not sure.  It probably
doesn't matter most of the time, but I guess there's a chance it can make
the matrix-vector multiply faster for some inputs.  We don't drop 0s from
the input matrix, because that would be expensive.

* Clean up

* Don't be cute; don't use masks, because they're bad for benchmarks.

Also, add basic benchmark script.

* Update to use latest grblas; also, add verify option to bench script.

* Show grid of absolute differences between benchmark results
  • Loading branch information
eriknw authored Apr 4, 2022
1 parent c544c79 commit 67a9e4d
Show file tree
Hide file tree
Showing 9 changed files with 440 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
activate-environment: testing
- name: Install dependencies
run: |
conda install -c conda-forge grblas pytest coverage black flake8 coveralls
conda install -c conda-forge grblas networkx scipy pytest coverage black flake8 coveralls
pip install -e .
- name: Style checks
run: |
Expand Down
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
# **GraphBLAS Algorithms**

[![conda-forge](https://img.shields.io/conda/vn/conda-forge/graphblas-algorithms.svg)](https://anaconda.org/conda-forge/graphblas-algorithms)
[![pypi](https://img.shields.io/pypi/v/graphblas-algorithms.svg)](https://pypi.python.org/pypi/graphblas-algorithms/)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/metagraph-dev/graphblas-algorithms/blob/main/LICENSE)
[![Tests](https://github.com/metagraph-dev/graphblas-algorithms/workflows/Tests/badge.svg?branch=main)](https://github.com/metagraph-dev/graphblas-algorithms/actions)
[![Docs](https://readthedocs.org/projects/graphblas-algorithms/badge/?version=latest)](https://graphblas-algorithms.readthedocs.io/en/latest/)
[![Coverage](https://coveralls.io/repos/metagraph-dev/graphblas-algorithms/badge.svg?branch=main)](https://coveralls.io/r/metagraph-dev/graphblas-algorithms)
[![Code style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
<!--- [![conda-forge](https://img.shields.io/conda/vn/conda-forge/graphblas-algorithms.svg)](https://anaconda.org/conda-forge/graphblas-algorithms) --->
<!--- [![Docs](https://readthedocs.org/projects/graphblas-algorithms/badge/?version=latest)](https://graphblas-algorithms.readthedocs.io/en/latest/) --->

GraphBLAS algorithms written in Python with [`grblas`](https://github.com/metagraph-dev/grblas).
GraphBLAS algorithms written in Python with [`grblas`](https://github.com/metagraph-dev/grblas). We are trying to target the NetworkX API algorithms where possible.

This is a work in progress. Stay tuned!
### Installation
```
pip install graphblas-algorithms
```

This is a work in progress. Stay tuned (or come help 😃)!
1 change: 1 addition & 0 deletions graphblas_algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import _version
from .link_analysis import pagerank # noqa

__version__ = _version.get_versions()["version"]
1 change: 1 addition & 0 deletions graphblas_algorithms/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from networkx.conftest import * # noqa
155 changes: 155 additions & 0 deletions graphblas_algorithms/link_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from collections import OrderedDict
from warnings import warn

import grblas as gb
import networkx as nx
from grblas import Vector, binary, unary
from grblas.semiring import plus_first, plus_times


def pagerank_core(
A,
alpha=0.85,
personalization=None,
max_iter=100,
tol=1e-06,
nstart=None,
dangling=None,
row_degrees=None,
name="pagerank",
):
N = A.nrows
if A.nvals == 0:
return Vector.new(float, N, name=name)

# Initial vector
x = Vector.new(float, N, name="x")
if nstart is None:
x[:] = 1.0 / N
else:
denom = nstart.reduce(allow_empty=False).value
if denom == 0:
raise ZeroDivisionError()
x << nstart / denom

# Personalization vector or scalar
if personalization is None:
p = 1.0 / N
else:
denom = personalization.reduce(allow_empty=False).value
if denom == 0:
raise ZeroDivisionError()
p = (personalization / denom).new(name="p")

# Inverse of row_degrees
# Fold alpha constant into S
if row_degrees is None:
S = A.reduce_rowwise().new(float, name="S")
S << alpha / S
else:
S = (alpha / row_degrees).new(name="S")

if A.ss.is_iso:
# Fold iso-value of A into S
# This lets us use the plus_first semiring, which is faster
iso_value = A.ss.iso_value
if iso_value != 1:
S *= iso_value
semiring = plus_first[float]
else:
semiring = plus_times[float]

is_dangling = S.nvals < N
if is_dangling:
dangling_mask = Vector.new(float, N, name="dangling_mask")
dangling_mask(mask=~S.S) << 1.0
# Fold alpha constant into dangling_weights (or dangling_mask)
if dangling is not None:
dangling_weights = (alpha / dangling.reduce(allow_empty=False).value * dangling).new(
name="dangling_weights"
)
elif personalization is None:
# Fast case (and common case); is iso-valued
dangling_mask(mask=dangling_mask.S) << alpha * p
else:
dangling_weights = (alpha * p).new(name="dangling_weights")

# Fold constant into p
p *= 1 - alpha

# Power iteration: make up to max_iter iterations
xprev = Vector.new(float, N, name="x_prev")
w = Vector.new(float, N, name="w")
for _ in range(max_iter):
xprev, x = x, xprev

# x << alpha * ((xprev * S) @ A + "dangling_weights") + (1 - alpha) * p
x << p
if is_dangling:
if dangling is None and personalization is None:
# Fast case: add a scalar; x is still iso-valued (b/c p is also scalar)
x += xprev @ dangling_mask
else:
# Add a vector
x += plus_first(xprev @ dangling_mask) * dangling_weights
w << xprev * S
x += semiring(w @ A) # plus_first if A.ss.is_iso else plus_times

# Check convergence, l1 norm: err = sum(abs(xprev - x))
xprev << binary.minus(xprev | x, require_monoid=False)
xprev << unary.abs(xprev)
err = xprev.reduce().value
if err < N * tol:
x.name = name
return x
raise nx.PowerIterationFailedConvergence(max_iter)


def pagerank(
G,
alpha=0.85,
personalization=None,
max_iter=100,
tol=1e-06,
nstart=None,
weight="weight",
dangling=None,
):
warn("", DeprecationWarning, stacklevel=2)
N = len(G)
if N == 0:
return {}
node_ids = OrderedDict((k, i) for i, k in enumerate(G))
A = gb.io.from_networkx(G, nodelist=node_ids, weight=weight, dtype=float)

x = p = dangling_weights = None
# Initial vector (we'll normalize later)
if nstart is not None:
indices, values = zip(*((node_ids[key], val) for key, val in nstart.items()))
x = Vector.from_values(indices, values, size=N, dtype=float, name="nstart")
# Personalization vector (we'll normalize later)
if personalization is not None:
indices, values = zip(*((node_ids[key], val) for key, val in personalization.items()))
p = Vector.from_values(indices, values, size=N, dtype=float, name="personalization")
# Dangling nodes (we'll normalize later)
row_degrees = A.reduce_rowwise().new(name="row_degrees")
if dangling is not None:
if row_degrees.nvals < N: # is_dangling
indices, values = zip(*((node_ids[key], val) for key, val in dangling.items()))
dangling_weights = Vector.from_values(
indices, values, size=N, dtype=float, name="dangling"
)
result = pagerank_core(
A,
alpha=alpha,
personalization=p,
max_iter=max_iter,
tol=tol,
nstart=x,
dangling=dangling_weights,
row_degrees=row_degrees,
)
if result.nvals != N:
# Not likely, but fill with 0 just in case
result(mask=~result.S) << 0
return dict(zip(node_ids, result.to_values()[1]))
Empty file.
21 changes: 21 additions & 0 deletions graphblas_algorithms/tests/test_pagerank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import inspect

import networkx as nx

from graphblas_algorithms import pagerank

nx_pagerank = nx.pagerank
nx_pagerank_scipy = nx.pagerank_scipy

nx.pagerank = pagerank
nx.pagerank_scipy = pagerank
nx.algorithms.link_analysis.pagerank_alg.pagerank_scipy = pagerank


def test_signatures():
nx_sig = inspect.signature(nx_pagerank)
sig = inspect.signature(pagerank)
assert nx_sig == sig


from networkx.algorithms.link_analysis.tests.test_pagerank import * # isort:skip
Loading

0 comments on commit 67a9e4d

Please sign in to comment.