Skip to content

Commit

Permalink
Merge pull request #16 from Amps-GPU/more_performance
Browse files Browse the repository at this point in the history
More changes for performance
  • Loading branch information
scarlehoff authored Jul 28, 2024
2 parents 6b1f63a + 8abb2c5 commit f4e36bd
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 107 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![CI Lint](https://github.com/Amps-GPU/BG-Trees/actions/workflows/ci_lint.yml/badge.svg)](https://github.com/Amps-GPU/BG-Trees/actions/workflows/ci_lint.yml)
[![CI Test](https://github.com/Amps-GPU/BG-Trees/actions/workflows/ci_test_gpu.yml/badge.svg)](https://github.com/Amps-GPU/BG-Trees/actions/workflows/ci_test_gpu.yml)
[![Coverage](https://img.shields.io/badge/Coverage-76%25-yellow?labelColor=2a2f35)](https://github.com/Amps-GPU/BG-Trees/actions)
[![Coverage](https://img.shields.io/badge/Coverage-74%25-yellow?labelColor=2a2f35)](https://github.com/Amps-GPU/BG-Trees/actions)

## Installation

Expand Down
32 changes: 24 additions & 8 deletions benchmarks/timing_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument(
"data_file", help="A npz data file with the right information (use `create_array` to run this benchmark)"
"data_file",
help="A npz data file with the right information (use `create_array` to run this benchmark)",
)
parser.add_argument(
"-n",
Expand All @@ -30,8 +31,11 @@
type=int,
default=[10, 100, 1000],
)
parser.add_argument("-a", "--average", help="Run <average> times and take the average", type=int, default=1)
parser.add_argument(
"-a", "--average", help="Run <average> times and take the average", type=int, default=1
)
parser.add_argument("-o", "--output", help="Output file for results as <n> <events>", type=str)
parser.add_argument("--profile", action="store_true")
args = parser.parse_args()

load_info = np.load(args.data_file)
Expand All @@ -57,11 +61,20 @@

res_per_n = {}

for nev in list_of_n:
# Run a bit just to activate the JIT compilation
if not settings.executing_eagerly():
_ = another_j(ff_moms[:10, 1:], ff_pols[:10, 1:], put_propagator=False, verbose=False)

if args.profile:
import tensorflow as tf

# Run a bit just to activate the JIT compilation
if not settings.executing_eagerly():
_ = another_j(ff_moms[:10, 1:], ff_pols[:10, 1:], put_propagator=False, verbose=False)
logdir_path = "profiling_here"
options = tf.profiler.experimental.ProfilerOptions(
host_tracer_level=3, python_tracer_level=1, device_tracer_level=1
)
tf.profiler.experimental.start(logdir_path, options=options)

for nev in list_of_n:

timing_raw = 0

Expand All @@ -86,8 +99,11 @@

print(f"n = {nev} took {timing:.5}s")

# finres = np.concatenate([i.values.numpy() for i in total_final_results])
# np.testing.assert_allclose(finres, load_info["target"][:nev])
# finres = np.concatenate([i.values.numpy() for i in total_final_results])
# np.testing.assert_allclose(finres, load_info["target"][:nev])

if args.profile:
tf.profiler.experimental.stop()

if args.output is not None:
res_as_str = "\n".join([f"{i} {j}" for i, j in res_per_n.items()])
Expand Down
7 changes: 4 additions & 3 deletions bgtrees/currents.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from bgtrees.finite_gpufields import operations as op
from bgtrees.finite_gpufields.finite_fields_tf import FiniteField

from .metric_and_verticies import V3g, V4g, η
from .metric_and_verticies import V3g, V4g, new_V3g, η
from .settings import settings


Expand Down Expand Up @@ -77,7 +77,7 @@ def _vert_3_gluon(slice_left, slice_right):
"""Computes the 3g vertex for the two slices coming in."""
moms_sl = tf.reduce_sum(slice_left, axis=1)
moms_sr = tf.reduce_sum(slice_right, axis=1)
return V3g(moms_sl, moms_sr, einsum=op.ff_tensor_product)
return new_V3g(moms_sl, moms_sr)


@tf.function(reduce_retracing=True)
Expand Down Expand Up @@ -107,7 +107,8 @@ def _contract_v4_current(vertex, jnu, jo, jrho):

tmp_1 = op.ff_dot_product_single_batch(jrho, v4, rank_x=2, rank_y=2) # rp, pN -> rN
tmp_1 = tmp_1.reshape_ff((-1, D, D, D)) # rN -> ronm
tmp_1 = op.ff_index_permutation("ronm->rmno", tmp_1)
tmp_1 = tmp_1.transpose_ff((0, 3, 2, 1))
# tmp_1 = op.ff_index_permutation("ronm->rmno", tmp_1)

tmp_2 = op.ff_dot_product_tris(tmp_1, jo, rank_x=4, rank_y=2) # rmno, ro -> rmn
return op.ff_dot_product(tmp_2, jnu, rank_x=3, rank_y=2) # rmn, rn -> rm
Expand Down
132 changes: 53 additions & 79 deletions bgtrees/finite_gpufields/finite_fields_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ def reshape_ff(self, new_shape):
new_values = tf.reshape(self.n, new_shape)
return self.__class__(new_values, self.p)

def transpose_ff(self, permutation):
new_values = tf.transpose(self.n, perm=permutation)
return self.__class__(new_values, self.p)

# Mirror version
def __radd__(self, b):
return self + b
Expand Down Expand Up @@ -218,99 +222,69 @@ def finite_field_squeeze(input, axis, name=None):
return FiniteField(new_n, input.p)


######
def _finite_field_reduce(red_op, input_tensor, axis=None, keepdims=False):
"""Auxiliar function to reduce Finite Field containers"""
if axis is None:
all_axes = list(range(len(input_tensor.shape)))
return _finite_field_reduce(red_op, input_tensor, axis=all_axes, keepdims=keepdims)

# Hopefully the exception block is compiled away upon first pass...
try:
res = input_tensor
for ax in axis:
res = _finite_field_reduce(red_op, res, axis=ax, keepdims=True)
if not keepdims:
res = tf.squeeze(res, axis=axis)
return res
except TypeError:
pass

@tf.py_function(Tout=settings.dtype)
def reduce_me(itensor):
# First separate the FF in the axis that we want to sum over
unstacked_ff = tf.unstack(itensor, axis=axis)
summed_ff = functools.reduce(red_op, unstacked_ff)
# Add a dummy dimension if the user asked for it
if keepdims:
summed_ff = tf.expand_dims(summed_ff, axis)
return summed_ff.n

return FiniteField(reduce_me(input_tensor), input_tensor.p)


@experimental.dispatch_for_api(tf.reduce_sum, {"input_tensor": FiniteField})
def finite_field_reduce_sum(input_tensor, axis=None, keepdims=False, name=None):
"""Override the reduce_sum operation for a FiniteField container"""
return _finite_field_reduce(operator.add, input_tensor, axis=axis, keepdims=keepdims)
def _ff_reduce_internal(accumulated, next_element, operation=operator.add, p=settings.tf_p):
"""Apply the operation ``operation`` inmediately taking the %
Acts on integers
"""
return tf.math.floormod(operation(accumulated, next_element), p)


@experimental.dispatch_for_api(tf.reduce_prod, {"input_tensor": FiniteField})
def finite_field_reduce_prod(input_tensor, axis=None, keepdims=False, name=None):
"""Override the reduce_sum operation for a FiniteField container"""
return _finite_field_reduce(operator.mul, input_tensor, axis=axis, keepdims=keepdims)

@functools.lru_cache
def _dispatch_ff_reduction(operation, p):
"""Dispatchs the _ff_reduce_internal with the right settings"""
return tf.function(functools.partial(_ff_reduce_internal, operation=operation, p=p))

def oinsum(eq, *arrays):
"""A ``einsum`` implementation for ``numpy`` object arrays."""
lhs, output = eq.split("->")
inputs = lhs.split(",")

sizes = {}
for term, array in zip(inputs, arrays):
for k, d in zip(term, array.shape):
sizes[k] = d
@tf.function
def _ff_reduce_single_axis(input_tensor, axis, operation=operator.add, p=settings.p):
"""Utilize the numpy experimental API and foldl to apply a reduction to the input tensor.
Acts on integer, the calling function should then make it into a FF.
"""
reduce_fn = _dispatch_ff_reduction(operation, p)
operate_on = tf.experimental.numpy.moveaxis(input_tensor, axis, 0)
return tf.foldl(reduce_fn, operate_on)

out_size = tuple(sizes[k] for k in output)
out = np.empty(out_size, dtype=object)

inner = [k for k in sizes if k not in output]
inner_size = [sizes[k] for k in inner]
@tf.function
def _ff_reduce(input_tensor, axis, keepdims, operation):
raw_input = input_tensor.n

for coo_o in np.ndindex(*out_size):
coord = dict(zip(output, coo_o))
if axis is None:
ret = raw_input
for _ in range(raw_input.ndim):
ret = _ff_reduce_single_axis(ret, 0, operation=operation)

def gen_inner_sum():
for coo_i in np.ndindex(*inner_size):
coord.update(dict(zip(inner, coo_i)))
if keepdims:
for _ in range(raw_input.ndim):
ret = tf.expand_dims(ret)

locs = []
for term in inputs:
locs.append(tuple(coord[k] for k in term))
elif isinstance(axis, int):
ret = _ff_reduce_single_axis(raw_input, axis, operation=operation)

elements = []
for array, loc in zip(arrays, locs):
elements.append(array[loc])
if keepdims:
ret = tf.expand_dims(ret, axis)

yield functools.reduce(operator.mul, elements)
else:
# A tuple has been received
ret = input_tensor
for ax in axis:
ret = _ff_reduce(ret, ax, True, operation)
ret = ret.n

tmp = functools.reduce(operator.add, gen_inner_sum())
out[coo_o] = tmp
# Now, if keepdims was False, remove the dimensions
if not keepdims:
ret = tf.squeeze(ret, axis)

# if the output is made of finite fields, take them out
if isinstance(tmp, FiniteField) and len(out_size) == 0:
out = tmp
elif isinstance(tmp, FiniteField):
p = tmp.p
return FiniteField(ret, input_tensor.p)

def unff(x):
if isinstance(x, FiniteField):
return x.n.numpy()
return x

vunff = np.vectorize(unff)
@experimental.dispatch_for_api(tf.reduce_sum, {"input_tensor": FiniteField})
def finite_field_reduce_sum(input_tensor, axis=None, keepdims=False, name=None):
"""Override the reduce_sum operation for a FiniteField container"""
return _ff_reduce(input_tensor, axis, keepdims, operation=operator.add)

new_out = vunff(out)
out = FiniteField(new_out, p)

return out
@experimental.dispatch_for_api(tf.reduce_prod, {"input_tensor": FiniteField})
def finite_field_reduce_prod(input_tensor, axis=None, keepdims=False, name=None):
"""Override the reduce_sum operation for a FiniteField container"""
return _ff_reduce(input_tensor, axis, keepdims, operation=operator.mul)
13 changes: 1 addition & 12 deletions bgtrees/finite_gpufields/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,11 @@ def ff_einsum_generic(einstr, *args):
ff_tensor_product
ff_index_permutation
"""
if len(args) == 1:
return ff_index_permutation(einstr, *args)
elif len(args) == 2:
if len(args) == 2:
return ff_tensor_product(einstr, *args)
raise NotImplementedError(f"Automatic understanding of contractions not implemented for {einstr}")


@tf.function(reduce_retracing=True)
def ff_index_permutation(einstr, x):
"""Uses tf.einsum to permute the index of the tensor x
Since this is simply an index permutation, it goes transparently to tf.einsum
"""
ret = tf.einsum(einstr, x.values)
return FiniteField(ret, x.p)


@tf.function(reduce_retracing=True)
def ff_dot_product(x, y, rank_x=None, rank_y=None):
"""Perform a dot product between two batched Finite Fields
Expand Down
26 changes: 26 additions & 0 deletions bgtrees/metric_and_verticies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@
import numpy
import tensorflow as tf

from .finite_gpufields.finite_fields_tf import FiniteField
from .settings import settings
from .tools import gpu_constant, gpu_function

Gamma = γμ = numpy.block([[numpy.zeros((4, 2, 2)), Pauli_bar], [Pauli, numpy.zeros((4, 2, 2))]])
Gamma5 = γ5 = numpy.block([[numpy.identity(2), numpy.zeros((2, 2))], [numpy.zeros((2, 2)), -numpy.identity(2)]])


@gpu_constant
def diag_mink(D):
return numpy.array([1] + [-1] * (D - 1)).astype(settings.dtype)


@gpu_constant
def MinkowskiMetric(D):
"""D-dimensional Minkowski metric in the mostly negative convention."""
Expand Down Expand Up @@ -39,3 +45,23 @@ def V3g(lp1, lp2, einsum=numpy.einsum):
return (
einsum("mn,rl->rlmn", mm, (lp1 - lp2)) + 2 * einsum("nl,rm->rlmn", mm, lp2) - 2 * einsum("lm,rn->rlmn", mm, lp1)
)


@tf.function(reduce_retracing=True)
def new_V3g(lp1, lp2):
"""3-gluon vertex, upper indices μνρ, D-dimensional. Reduce tensor products."""
D = lp1.shape[1]
if D is None:
D = settings.D

mm = diag_mink(D)
r1 = tf.tensordot(lp1.n, mm, 0)
r2 = tf.tensordot(lp2.n, mm, 0)

r1 = tf.linalg.diag(r1)
r2 = tf.linalg.diag(r2)

a1 = FiniteField(r1, lp1.p)
a2 = FiniteField(r2, lp1.p)

return (a1 - a2) + 2.0 * a2.transpose_ff((0, 3, 1, 2)) - 2.0 * a1.transpose_ff((0, 2, 3, 1))
5 changes: 5 additions & 0 deletions bgtrees/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class Settings:
use_gpu: bool = False
D: int = 4
dtype: type = np.int64
p: int = 2**31 - 19

# Tensorflow settings
def run_tf_eagerly(self):
Expand All @@ -17,6 +18,10 @@ def run_tf_eagerly(self):
def executing_eagerly(self):
return tf.executing_eagerly()

@property
def tf_p(self):
return tf.cast(self.p, dtype=settings.dtype)


settings = Settings()

Expand Down
Loading

0 comments on commit f4e36bd

Please sign in to comment.