diff --git a/.github/workflows/examples-mpi.yml b/.github/workflows/examples-mpi.yml
index d6498da49a..2c8fb09f85 100644
--- a/.github/workflows/examples-mpi.yml
+++ b/.github/workflows/examples-mpi.yml
@@ -57,6 +57,7 @@ jobs:
       run: |
         pip install --upgrade pip
         pip install -e .[extras,mpi,tests]
+        python3 scripts/clear_devito_cache.py
 
     - name: Test mpi notebooks
       continue-on-error: true
diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
index deac69f1b0..b4eb3d55a1 100644
--- a/.github/workflows/examples.yml
+++ b/.github/workflows/examples.yml
@@ -49,7 +49,8 @@ jobs:
         pip install -e .[tests,extras]
 
     - name: Tests in examples
-      run: py.test --cov --cov-config=.coveragerc --cov-report=xml examples/
+      run: |
+        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/
 
     - name: Seismic acoustic examples
       run: |
diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index 22b018cbdd..0c0185055e 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -1,10 +1,10 @@
 from collections.abc import Iterable
-from operator import attrgetter
 
 from sympy import sympify
 
-from devito.symbolics import retrieve_indexed, uxreplace
-from devito.tools import PartialOrderTuple, as_tuple, filter_sorted, flatten
+from devito.symbolics import retrieve_indexed, uxreplace, retrieve_dimensions
+from devito.tools import (PartialOrderTuple, as_tuple, flatten,
+                          filter_sorted, filter_ordered)
 from devito.types import Dimension, IgnoreDimSort
 from devito.types.basic import AbstractFunction
 
@@ -33,8 +33,7 @@ def handle_indexed(indexed):
 
                 # Fallback: Just insert all the Dimensions we find, regardless of
                 # what the user is attempting to do
-                relation.extend([d for d in filter_sorted(i.free_symbols)
-                                 if isinstance(d, Dimension)])
+                relation.extend(filter_sorted(i.atoms(Dimension)))
 
         # StencilDimensions are lowered subsequently through special compiler
         # passes, so they can be ignored here
@@ -51,15 +50,16 @@ def handle_indexed(indexed):
     relations.add(expr.implicit_dims)
 
     # Add in leftover free dimensions (not an Indexed' index)
-    extra = set([i for i in expr.free_symbols if isinstance(i, Dimension)])
+    extra = set(retrieve_dimensions(expr, deep=True))
 
     # Add in pure data dimensions (e.g., those accessed only via explicit values,
     # such as A[3])
     indexeds = retrieve_indexed(expr, deep=True)
-    extra.update(set().union(*[set(i.function.dimensions) for i in indexeds]))
+    for i in indexeds:
+        extra.update({d for d in i.function.dimensions if i.indices[d].is_integer})
 
     # Enforce determinism
-    extra = filter_sorted(extra, key=attrgetter('name'))
+    extra = filter_sorted(extra)
 
     # Add in implicit relations for parent dimensions
     # -----------------------------------------------
@@ -67,14 +67,25 @@ def handle_indexed(indexed):
     # wrong; for example, in `((t, time), (t, x, y), (x, y))`, `x` could now
     # preceed `time`, while `t`, and therefore `time`, *must* appear before `x`,
     # as indicated by the second relation
-    implicit_relations = {(d.parent, d) for d in extra if d.is_Derived}
+    implicit_relations = {(d.parent, d) for d in extra if d.is_Derived and not d.indirect}
+
     # 2) To handle cases such as `((time, xi), (x,))`, where `xi` a SubDimension
     # of `x`, besides `(x, xi)`, we also have to add `(time, x)` so that we
     # obtain the desired ordering `(time, x, xi)`. W/o `(time, x)`, the ordering
     # `(x, time, xi)` might be returned instead, which would be non-sense
-    implicit_relations.update({tuple(d.root for d in i) for i in relations})
-
-    ordering = PartialOrderTuple(extra, relations=(relations | implicit_relations))
+    for i in relations:
+        dims = []
+        for d in i:
+            # Only add index if a different Dimension name to avoid dropping conditionals
+            # with the same name as the parent
+            if d.index.name == d.name:
+                dims.append(d)
+            else:
+                dims.extend([d.index, d])
+
+        implicit_relations.update({tuple(filter_ordered(dims))})
+
+    ordering = PartialOrderTuple(extra, relations=implicit_relations)
 
     return ordering
 
diff --git a/devito/ir/support/basic.py b/devito/ir/support/basic.py
index 36ea735109..98ba9da51a 100644
--- a/devito/ir/support/basic.py
+++ b/devito/ir/support/basic.py
@@ -123,7 +123,7 @@ def index_mode(self):
     def aindices(self):
         retval = []
         for i, fi in zip(self, self.findices):
-            dims = {j for j in i.free_symbols if isinstance(j, Dimension)}
+            dims = set(d.root if d.indirect else d for d in i.atoms(Dimension))
             sdims = {d for d in dims if d.is_Stencil}
             candidates = dims - sdims
 
@@ -660,9 +660,9 @@ def is_const(self, dim):
         """
         True if a constant dependence, that is no Dimensions involved, False otherwise.
         """
-        return (self.source.aindices[dim] is None and
-                self.sink.aindices[dim] is None and
-                self.distance_mapper[dim] == 0)
+        return (self.source.aindices.get(dim) is None and
+                self.sink.aindices.get(dim) is None and
+                self.distance_mapper.get(dim, 0) == 0)
 
     @memoized_meth
     def is_carried(self, dim=None):
diff --git a/devito/ir/support/utils.py b/devito/ir/support/utils.py
index 5f08f48020..3750b08a0e 100644
--- a/devito/ir/support/utils.py
+++ b/devito/ir/support/utils.py
@@ -183,6 +183,7 @@ def detect_accesses(exprs):
     for e in as_tuple(exprs):
         other_dims.update(i for i in e.free_symbols if isinstance(i, Dimension))
         other_dims.update(e.implicit_dims)
+    other_dims = filter_sorted(other_dims)
     mapper[None] = Stencil([(i, 0) for i in other_dims])
 
     return mapper
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 2322ad2a7a..3f1ad5e3b6 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -1,14 +1,15 @@
 from abc import ABC, abstractmethod
 
 import sympy
-import numpy as np
 from cached_property import cached_property
 
-from devito.logger import warning
-from devito.symbolics import retrieve_function_carriers, indexify, INT
-from devito.tools import as_tuple, powerset, flatten, prod
-from devito.types import (ConditionalDimension, Dimension, DefaultDimension, Eq, Inc,
-                          Evaluable, Symbol, SubFunction)
+from devito.finite_differences.differentiable import Mul
+from devito.finite_differences.elementary import floor
+from devito.symbolics import retrieve_function_carriers, INT
+from devito.tools import as_tuple, flatten
+from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol,
+                          CustomDimension)
+from devito.types.utils import DimensionTuple
 
 __all__ = ['LinearInterpolator', 'PrecomputedInterpolator']
 
@@ -29,19 +30,22 @@ class UnevaluatedSparseOperation(sympy.Expr, Evaluable):
 
     subdomain = None
 
-    def __new__(cls, interpolator, callback):
+    def __new__(cls, interpolator):
         obj = super().__new__(cls)
 
         obj.interpolator = interpolator
-        obj.callback = callback
 
         return obj
 
     def _evaluate(self, **kwargs):
-        return_value = self.callback()
+        return_value = self.operation(**kwargs)
         assert(all(isinstance(i, Eq) for i in return_value))
         return return_value
 
+    @abstractmethod
+    def operation(self, **kwargs):
+        pass
+
     def __add__(self, other):
         return flatten([self, other])
 
@@ -56,17 +60,22 @@ class Interpolation(UnevaluatedSparseOperation):
     Evaluates to a list of Eq objects.
     """
 
-    def __new__(cls, expr, offset, increment, self_subs, interpolator, callback):
-        obj = super().__new__(cls, interpolator, callback)
+    def __new__(cls, expr, increment, implicit_dims, self_subs, interpolator):
+        obj = super().__new__(cls, interpolator)
 
         # TODO: unused now, but will be necessary to compute the adjoint
         obj.expr = expr
-        obj.offset = offset
         obj.increment = increment
         obj.self_subs = self_subs
+        obj.implicit_dims = implicit_dims
 
         return obj
 
+    def operation(self, **kwargs):
+        return self.interpolator._interpolate(expr=self.expr, increment=self.increment,
+                                              self_subs=self.self_subs,
+                                              implicit_dims=self.implicit_dims)
+
     def __repr__(self):
         return "Interpolation(%s into %s)" % (repr(self.expr),
                                               repr(self.interpolator.sfunction))
@@ -79,16 +88,20 @@ class Injection(UnevaluatedSparseOperation):
     Evaluates to a list of Eq objects.
     """
 
-    def __new__(cls, field, expr, offset, interpolator, callback):
-        obj = super().__new__(cls, interpolator, callback)
+    def __new__(cls, field, expr, implicit_dims, interpolator):
+        obj = super().__new__(cls, interpolator)
 
         # TODO: unused now, but will be necessary to compute the adjoint
         obj.field = field
         obj.expr = expr
-        obj.offset = offset
+        obj.implicit_dims = implicit_dims
 
         return obj
 
+    def operation(self, **kwargs):
+        return self.interpolator._inject(expr=self.expr, field=self.field,
+                                         implicit_dims=self.implicit_dims)
+
     def __repr__(self):
         return "Injection(%s into %s)" % (repr(self.expr), repr(self.field))
 
@@ -108,15 +121,12 @@ def interpolate(self, *args, **kwargs):
         pass
 
 
-class LinearInterpolator(GenericInterpolator):
+class WeightedInterpolator(GenericInterpolator):
 
     """
-    Concrete implementation of GenericInterpolator implementing a Linear interpolation
-    scheme, i.e. Bilinear for 2D and Trilinear for 3D problems.
-
-    Parameters
-    ----------
-    sfunction: The SparseFunction that this Interpolator operates on.
+    Represent an Interpolation operation on a SparseFunction that is separable
+    in space, meaning the coefficients are defined for each Dimension separately
+    and multiplied at a given point: `w[x, y] = wx[x] * wy[y]`
     """
 
     def __init__(self, sfunction):
@@ -126,94 +136,69 @@ def __init__(self, sfunction):
     def grid(self):
         return self.sfunction.grid
 
+    @property
+    def _weights(self):
+        raise NotImplementedError
+
+    @property
+    def _gdims(self):
+        return self.grid.dimensions
+
+    @property
+    def r(self):
+        return self.sfunction.r
+
     @cached_property
-    def _interpolation_coeffs(self):
-        """
-        Symbolic expression for the coefficients for sparse point interpolation
-        according to:
+    def _rdim(self):
+        parent = self.sfunction.dimensions[-1]
+        dims = [CustomDimension("r%s%s" % (self.sfunction.name, d.name),
+                                -self.r+1, self.r, 2*self.r, parent)
+                for d in self._gdims]
 
-            https://en.wikipedia.org/wiki/Bilinear_interpolation.
+        return DimensionTuple(*dims, getters=self._gdims)
 
-        Returns
-        -------
-        Matrix of coefficient expressions.
-        """
-        # Grid indices corresponding to the corners of the cell ie x1, y1, z1
-        indices1 = tuple(sympy.symbols('%s1' % d) for d in self.grid.dimensions)
-        indices2 = tuple(sympy.symbols('%s2' % d) for d in self.grid.dimensions)
-        # 1, x1, y1, z1, x1*y1, ...
-        indices = list(powerset(indices1))
-        indices[0] = (1,)
-        point_sym = list(powerset(self.sfunction._point_symbols))
-        point_sym[0] = (1,)
-        # 1, px. py, pz, px*py, ...
-        A = []
-        ref_A = [np.prod(ind) for ind in indices]
-        # Create the matrix with the same increment order as the point increment
-        for i in self.sfunction._point_increments:
-            # substitute x1 by x2 if increment in that dimension
-            subs = dict((indices1[d], indices2[d] if i[d] == 1 else indices1[d])
-                        for d in range(len(i)))
-            A += [[1] + [a.subs(subs) for a in ref_A[1:]]]
-
-        A = sympy.Matrix(A)
-        # Coordinate values of the sparse point
-        p = sympy.Matrix([[np.prod(ind)] for ind in point_sym])
-
-        # reference cell x1:0, x2:h_x
-        left = dict((a, 0) for a in indices1)
-        right = dict((b, dim.spacing) for b, dim in zip(indices2, self.grid.dimensions))
-        reference_cell = {**left, **right}
-        # Substitute in interpolation matrix
-        A = A.subs(reference_cell)
-        return A.inv().T * p
-
-    def _interpolation_indices(self, variables, offset=0, field_offset=0,
-                               implicit_dims=None):
+    def _augment_implicit_dims(self, implicit_dims):
+        if self.sfunction._sparse_position == -1:
+            return self.sfunction.dimensions + as_tuple(implicit_dims)
+        else:
+            return as_tuple(implicit_dims) + self.sfunction.dimensions
+
+    def _coeff_temps(self, implicit_dims):
+        return []
+
+    def _positions(self, implicit_dims):
+        return [Eq(v, INT(floor(k)), implicit_dims=implicit_dims)
+                for k, v in self.sfunction._position_map.items()]
+
+    def _interp_idx(self, variables, implicit_dims=None):
         """
         Generate interpolation indices for the DiscreteFunctions in ``variables``.
         """
-        index_matrix, points = self.sfunction._index_matrix(offset)
-
-        idx_subs = []
-        for i, idx in enumerate(index_matrix):
-            # Introduce ConditionalDimension so that we don't go OOB
-            mapper = {}
-            for j, d in zip(idx, self.grid.dimensions):
-                p = points[j]
-                lb = sympy.And(p >= d.symbolic_min - self.sfunction._radius,
-                               evaluate=False)
-                ub = sympy.And(p <= d.symbolic_max + self.sfunction._radius,
-                               evaluate=False)
-                condition = sympy.And(lb, ub, evaluate=False)
-                mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                                 condition=condition, indirect=True)
-
-            # Apply mapper to each variable with origin correction before the
-            # Dimensions get replaced
-            subs = {v: v.subs({k: c - v.origin.get(k, 0) for k, c in mapper.items()})
-                    for v in variables}
+        mapper = {}
+        pos = self.sfunction._position_map.values()
 
-            # Track Indexed substitutions
-            idx_subs.append(subs)
+        for ((di, d), rd, p) in zip(enumerate(self._gdims), self._rdim, pos):
+            # Add conditional to avoid OOB
+            lb = sympy.And(rd + p >= d.symbolic_min - self.r, evaluate=False)
+            ub = sympy.And(rd + p <= d.symbolic_max + self.r, evaluate=False)
+            cond = sympy.And(lb, ub, evaluate=False)
+            mapper[d] = ConditionalDimension(rd.name, rd, condition=cond, indirect=True)
 
         # Temporaries for the position
-        temps = [Eq(v, k, implicit_dims=implicit_dims)
-                 for k, v in self.sfunction._position_map.items()]
-        # Temporaries for the indirection dimensions
-        temps.extend([Eq(v, k.subs(self.sfunction._position_map),
-                         implicit_dims=implicit_dims)
-                      for k, v in points.items()])
-        # Temporaries for the coefficients
-        temps.extend([Eq(p, c.subs(self.sfunction._position_map),
-                         implicit_dims=implicit_dims)
-                      for p, c in zip(self.sfunction._point_symbols,
-                                      self.sfunction._coordinate_bases(field_offset))])
+        temps = self._positions(implicit_dims)
+
+        # Coefficient symbol expression
+        temps.extend(self._coeff_temps(implicit_dims))
+
+        # Substitution mapper for variables
+        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) + p
+                    for ((k, c), p) in zip(mapper.items(), pos)})
+                    for v in variables}
+        idx_subs.update(dict(zip(self._rdim, mapper.values())))
 
         return idx_subs, temps
 
-    def interpolate(self, expr, offset=0, increment=False, self_subs={},
-                    implicit_dims=None):
+    def interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None):
         """
         Generate equations interpolating an arbitrary expression into ``self``.
 
@@ -221,8 +206,6 @@ def interpolate(self, expr, offset=0, increment=False, self_subs={},
         ----------
         expr : expr-like
             Input expression to interpolate.
-        offset : int, optional
-            Additional offset from the boundary.
         increment: bool, optional
             If True, generate increments (Inc) rather than assignments (Eq).
         implicit_dims : Dimension or list of Dimension, optional
@@ -230,45 +213,9 @@ def interpolate(self, expr, offset=0, increment=False, self_subs={},
             interpolation expression, but that should be honored when constructing
             the operator.
         """
-        implicit_dims = as_tuple(implicit_dims) + self.sfunction.dimensions
-
-        def callback():
-            # Derivatives must be evaluated before the introduction of indirect accesses
-            try:
-                _expr = expr.evaluate
-            except AttributeError:
-                # E.g., a generic SymPy expression or a number
-                _expr = expr
-
-            variables = list(retrieve_function_carriers(_expr))
-
-            # Need to get origin of the field in case it is staggered
-            # TODO: handle each variable staggereing spearately
-            field_offset = variables[0].origin
-            # List of indirection indices for all adjacent grid points
-            idx_subs, temps = self._interpolation_indices(
-                variables, offset, field_offset=field_offset, implicit_dims=implicit_dims
-            )
+        return Interpolation(expr, increment, implicit_dims, self_subs, self)
 
-            # Substitute coordinate base symbols into the interpolation coefficients
-            args = [_expr.xreplace(v_sub) * b.xreplace(v_sub)
-                    for b, v_sub in zip(self._interpolation_coeffs, idx_subs)]
-
-            # Accumulate point-wise contributions into a temporary
-            rhs = Symbol(name='sum', dtype=self.sfunction.dtype)
-            summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
-            summands.extend([Inc(rhs, i, implicit_dims=implicit_dims) for i in args])
-
-            # Write/Incr `self`
-            lhs = self.sfunction.subs(self_subs)
-            ecls = Inc if increment else Eq
-            last = [ecls(lhs, rhs, implicit_dims=implicit_dims)]
-
-            return temps + summands + last
-
-        return Interpolation(expr, offset, increment, self_subs, self, callback)
-
-    def inject(self, field, expr, offset=0, implicit_dims=None):
+    def inject(self, field, expr, implicit_dims=None):
         """
         Generate equations injecting an arbitrary expression into a field.
 
@@ -278,77 +225,14 @@ def inject(self, field, expr, offset=0, implicit_dims=None):
             Input field into which the injection is performed.
         expr : expr-like
             Injected expression.
-        offset : int, optional
-            Additional offset from the boundary.
         implicit_dims : Dimension or list of Dimension, optional
             An ordered list of Dimensions that do not explicitly appear in the
             injection expression, but that should be honored when constructing
             the operator.
         """
-        implicit_dims = as_tuple(implicit_dims) + self.sfunction.dimensions
-
-        def callback():
-            # Derivatives must be evaluated before the introduction of indirect accesses
-            try:
-                _expr = expr.evaluate
-            except AttributeError:
-                # E.g., a generic SymPy expression or a number
-                _expr = expr
-
-            variables = list(retrieve_function_carriers(_expr)) + [field]
-
-            # Need to get origin of the field in case it is staggered
-            field_offset = field.origin
-            # List of indirection indices for all adjacent grid points
-            idx_subs, temps = self._interpolation_indices(
-                variables, offset, field_offset=field_offset, implicit_dims=implicit_dims
-            )
-
-            # Substitute coordinate base symbols into the interpolation coefficients
-            eqns = [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * b,
-                        implicit_dims=implicit_dims)
-                    for b, vsub in zip(self._interpolation_coeffs, idx_subs)]
-
-            return temps + eqns
-
-        return Injection(field, expr, offset, self, callback)
-
-
-class PrecomputedInterpolator(GenericInterpolator):
-
-    def __init__(self, obj, r, gridpoints_data, coefficients_data):
-        if not isinstance(r, int):
-            raise TypeError('Need `r` int argument')
-        if r <= 0:
-            raise ValueError('`r` must be > 0')
-        self.r = r
-        self.obj = obj
-        self._npoint = obj._npoint
-        gridpoints = SubFunction(name="%s_gridpoints" % self.obj.name, dtype=np.int32,
-                                 dimensions=(self.obj.indices[-1], Dimension(name='d')),
-                                 shape=(self._npoint, self.obj.grid.dim), space_order=0,
-                                 parent=self.obj)
-
-        assert(gridpoints_data is not None)
-        gridpoints.data[:] = gridpoints_data[:]
-        self.obj._gridpoints = gridpoints
-
-        interpolation_coeffs = SubFunction(name="%s_interpolation_coeffs" % self.obj.name,
-                                           dimensions=(self.obj.indices[-1],
-                                                       Dimension(name='d'),
-                                                       Dimension(name='i')),
-                                           shape=(self.obj.npoint, self.obj.grid.dim,
-                                                  self.r),
-                                           dtype=self.obj.dtype, space_order=0,
-                                           parent=self.obj)
-        assert(coefficients_data is not None)
-        interpolation_coeffs.data[:] = coefficients_data[:]
-        self.obj._interpolation_coeffs = interpolation_coeffs
-        warning("Ensure that the provided interpolation coefficient and grid point " +
-                "values are computed on the final grid that will be used for other " +
-                "computations.")
-
-    def interpolate(self, expr, offset=0, increment=False, self_subs={}):
+        return Injection(field, expr, implicit_dims, self)
+
+    def _interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None):
         """
         Generate equations interpolating an arbitrary expression into ``self``.
 
@@ -356,30 +240,42 @@ def interpolate(self, expr, offset=0, increment=False, self_subs={}):
         ----------
         expr : expr-like
             Input expression to interpolate.
-        offset : int, optional
-            Additional offset from the boundary.
         increment: bool, optional
             If True, generate increments (Inc) rather than assignments (Eq).
+        implicit_dims : Dimension or list of Dimension, optional
+            An ordered list of Dimensions that do not explicitly appear in the
+            interpolation expression, but that should be honored when constructing
+            the operator.
         """
-        def callback():
-            _expr = indexify(expr)
+        implicit_dims = self._augment_implicit_dims(implicit_dims)
+
+        # Derivatives must be evaluated before the introduction of indirect accesses
+        try:
+            _expr = expr.evaluate
+        except AttributeError:
+            # E.g., a generic SymPy expression or a number
+            _expr = expr
+
+        variables = list(retrieve_function_carriers(_expr))
 
-            p, _, _ = self.obj.interpolation_coeffs.indices
-            dim_subs = []
-            coeffs = []
-            for i, d in enumerate(self.obj.grid.dimensions):
-                rd = DefaultDimension(name="r%s" % d.name, default_value=self.r)
-                dim_subs.append((d, INT(rd + self.obj.gridpoints[p, i])))
-                coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
-            # Apply optional time symbol substitutions to lhs of assignment
-            lhs = self.obj.subs(self_subs)
-            rhs = prod(coeffs) * _expr.subs(dim_subs)
+        # List of indirection indices for all adjacent grid points
+        idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims)
 
-            return [Inc(lhs, rhs)]
+        # Accumulate point-wise contributions into a temporary
+        rhs = Symbol(name='sum', dtype=self.sfunction.dtype)
+        summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
+        # Substitute coordinate base symbols into the interpolation coefficients
+        summands.extend([Inc(rhs, (_expr * self._weights).xreplace(idx_subs),
+                             implicit_dims=implicit_dims)])
 
-        return Interpolation(expr, offset, increment, self_subs, self, callback)
+        # Write/Incr `self`
+        lhs = self.sfunction.subs(self_subs)
+        ecls = Inc if increment else Eq
+        last = [ecls(lhs, rhs, implicit_dims=implicit_dims)]
 
-    def inject(self, field, expr, offset=0):
+        return temps + summands + last
+
+    def _inject(self, field, expr, implicit_dims=None):
         """
         Generate equations injecting an arbitrary expression into a field.
 
@@ -389,22 +285,100 @@ def inject(self, field, expr, offset=0):
             Input field into which the injection is performed.
         expr : expr-like
             Injected expression.
-        offset : int, optional
-            Additional offset from the boundary.
+        implicit_dims : Dimension or list of Dimension, optional
+            An ordered list of Dimensions that do not explicitly appear in the
+            injection expression, but that should be honored when constructing
+            the operator.
         """
-        def callback():
-            _expr = indexify(expr)
-            _field = indexify(field)
-
-            p, _ = self.obj.gridpoints.indices
-            dim_subs = []
-            coeffs = []
-            for i, d in enumerate(self.obj.grid.dimensions):
-                rd = DefaultDimension(name="r%s" % d.name, default_value=self.r)
-                dim_subs.append((d, INT(rd + self.obj.gridpoints[p, i])))
-                coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
-            rhs = prod(coeffs) * _expr
-            _field = _field.subs(dim_subs)
-            return [Inc(_field, rhs.subs(dim_subs))]
-
-        return Injection(field, expr, offset, self, callback)
+        implicit_dims = self._augment_implicit_dims(implicit_dims) + self._rdim
+
+        # Make iterable to support inject((u, v), expr=expr)
+        # or inject((u, v), expr=(expr1, expr2))
+        fields, exprs = as_tuple(field), as_tuple(expr)
+        # Provide either one expr per field or on expr for all fields
+        if len(fields) > 1:
+            if len(exprs) == 1:
+                exprs = tuple(exprs[0] for _ in fields)
+            else:
+                assert len(exprs) == len(fields)
+
+        # Derivatives must be evaluated before the introduction of indirect accesses
+        try:
+            _exprs = tuple(e.evaluate for e in exprs)
+        except AttributeError:
+            # E.g., a generic SymPy expression or a number
+            _exprs = exprs
+
+        variables = list(v for e in _exprs for v in retrieve_function_carriers(e))
+        variables = variables + list(fields)
+
+        # List of indirection indices for all adjacent grid points
+        idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims)
+
+        # Substitute coordinate base symbols into the interpolation coefficients
+        eqns = [Inc(_field.xreplace(idx_subs),
+                    (_expr * self._weights).xreplace(idx_subs),
+                    implicit_dims=implicit_dims)
+                for (_field, _expr) in zip(fields, _exprs)]
+
+        return temps + eqns
+
+
+class LinearInterpolator(WeightedInterpolator):
+    """
+    Concrete implementation of WeightedInterpolator implementing a Linear interpolation
+    scheme, i.e. Bilinear for 2D and Trilinear for 3D problems.
+
+    Parameters
+    ----------
+    sfunction: The SparseFunction that this Interpolator operates on.
+    """
+    @property
+    def _weights(self):
+        c = [(1 - p) * (1 - r) + p * r
+             for (p, d, r) in zip(self._point_symbols, self._gdims, self._rdim)]
+        return Mul(*c)
+
+    @cached_property
+    def _point_symbols(self):
+        """Symbol for coordinate value in each Dimension of the point."""
+        return DimensionTuple(*(Symbol(name='p%s' % d, dtype=self.sfunction.dtype)
+                                for d in self.grid.dimensions),
+                              getters=self.grid.dimensions)
+
+    def _coeff_temps(self, implicit_dims):
+        # Positions
+        pmap = self.sfunction._position_map
+        poseq = [Eq(self._point_symbols[d], pos - floor(pos),
+                    implicit_dims=implicit_dims)
+                 for (d, pos) in zip(self._gdims, pmap.keys())]
+        return poseq
+
+
+class PrecomputedInterpolator(WeightedInterpolator):
+    """
+    Concrete implementation of WeightedInterpolator implementing a Precomputed
+    interpolation scheme, i.e. an interpolation with user provided precomputed
+    weigths/coefficients.
+
+    Parameters
+    ----------
+    sfunction: The SparseFunction that this Interpolator operates on.
+    """
+
+    def _positions(self, implicit_dims):
+        if self.sfunction.gridpoints is None:
+            return super()._positions(implicit_dims)
+        # No position temp as we have directly the gridpoints
+        return [Eq(p, k, implicit_dims=implicit_dims)
+                for (k, p) in self.sfunction._position_map.items()]
+
+    @property
+    def interpolation_coeffs(self):
+        return self.sfunction.interpolation_coeffs
+
+    @property
+    def _weights(self):
+        ddim, cdim = self.interpolation_coeffs.dimensions[1:]
+        return Mul(*[self.interpolation_coeffs.subs({ddim: ri, cdim: rd-rd.symbolic_min})
+                     for (ri, rd) in enumerate(self._rdim)])
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index 26202a419b..d1bee9fa66 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -632,9 +632,10 @@ def _postprocess_arguments(self, args, **kwargs):
         """Process runtime arguments upon returning from ``.apply()``."""
         for p in self.parameters:
             try:
-                p._arg_apply(args[p.name], args[p.coordinates.name], kwargs.get(p.name))
+                subfuncs = (args[getattr(p, s).name] for s in p._sub_functions)
+                p._arg_apply(args[p.name], *subfuncs, alias=kwargs.get(p.name))
             except AttributeError:
-                p._arg_apply(args[p.name], kwargs.get(p.name))
+                p._arg_apply(args[p.name], alias=kwargs.get(p.name))
 
     @cached_property
     def _known_arguments(self):
diff --git a/devito/passes/clusters/aliases.py b/devito/passes/clusters/aliases.py
index 1e8626da18..e0783323fe 100644
--- a/devito/passes/clusters/aliases.py
+++ b/devito/passes/clusters/aliases.py
@@ -12,7 +12,8 @@
                        Queue, IntervalGroup, LabeledVector, normalize_properties,
                        relax_properties, sdims_min, sdims_max)
 from devito.symbolics import (Uxmapper, compare_ops, estimate_cost, q_constant,
-                              reuse_if_untouched, retrieve_indexed, search, uxreplace)
+                              reuse_if_untouched, retrieve_indexed, search, uxreplace,
+                              sympy_dtype)
 from devito.tools import (Stamp, as_mapper, as_tuple, flatten, frozendict, generator,
                           split, timed_pass)
 from devito.types import (Array, TempFunction, Eq, Symbol, Temp, ModuloDimension,
@@ -832,7 +833,11 @@ def lower_schedule(schedule, meta, sregistry, ftemps):
     subs = {}
     for pivot, writeto, ispace, aliaseds, indicess, _ in schedule:
         name = sregistry.make_name()
-        dtype = meta.dtype
+        # Infer the dtype for the pivot
+        # This prevents cases such as `floor(a*b)` with `a` and `b` floats
+        # that would creat a temporary `int r = b` leading to erronous numerical results
+        # Such cases happen with the positions for sparse functions for example.
+        dtype = sympy_dtype(pivot, meta.dtype)
 
         if writeto:
             # The Dimensions defining the shape of Array
diff --git a/devito/passes/iet/languages/openacc.py b/devito/passes/iet/languages/openacc.py
index af23264b16..939a68f304 100644
--- a/devito/passes/iet/languages/openacc.py
+++ b/devito/passes/iet/languages/openacc.py
@@ -155,7 +155,7 @@ class DeviceAccizer(PragmaDeviceAwareTransformer):
 
     lang = AccBB
 
-    def _make_partree(self, candidates, nthreads=None, index=0):
+    def _make_partree(self, candidates, nthreads=None):
         assert candidates
 
         root, collapsable = self._select_candidates(candidates)
@@ -164,8 +164,7 @@ def _make_partree(self, candidates, nthreads=None, index=0):
         if self._is_offloadable(root) and \
            all(i.is_Affine for i in [root] + collapsable) and \
            self.par_tile:
-            idx = min(index, len(self.par_tile) - 1)
-            tile = self.par_tile[idx]
+            tile = self.par_tile.next()
             assert isinstance(tile, tuple)
             nremainder = (ncollapsable + 1) - len(tile)
             if nremainder >= 0:
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 08818787f9..44ee6afd6c 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -14,6 +14,7 @@
                                         make_sections_from_imask)
 from devito.symbolics import INT, ccode
 from devito.tools import as_tuple, flatten, is_integer, prod
+from devito.tools.data_structures import UnboundTuple
 from devito.types import Symbol
 
 __all__ = ['PragmaSimdTransformer', 'PragmaShmTransformer',
@@ -294,7 +295,10 @@ def _select_candidates(self, candidates):
                     except TypeError:
                         pass
 
-                collapsable.append(i)
+                # At least one inner loop (nested) or
+                # we do not collapse most inner loop if it is an atomic reduction
+                if not i.is_ParallelAtomic or nested:
+                    collapsable.append(i)
 
             # Give a score to this candidate, based on the number of fully-parallel
             # Iterations and their position (i.e. outermost to innermost) in the nest
@@ -347,7 +351,7 @@ def _make_threaded_prodders(self, partree):
         partree = Transformer(mapper).visit(partree)
         return partree
 
-    def _make_partree(self, candidates, nthreads=None, index=None):
+    def _make_partree(self, candidates, nthreads=None):
         assert candidates
 
         # Get the collapsable Iterations
@@ -424,6 +428,11 @@ def _make_nested_partree(self, partree):
         if self.nhyperthreads <= self.nested:
             return partree
 
+        # Loop nest with atomic reductions are more likely to have less latency
+        # keep outer loop parallel
+        if partree.root.is_ParallelAtomic:
+            return partree
+
         # Note: there might be multiple sub-trees amenable to nested parallelism,
         # hence we loop over all of them
         #
@@ -465,7 +474,7 @@ def _make_nested_partree(self, partree):
     def _make_parallel(self, iet):
         mapper = {}
         parrays = {}
-        for i, tree in enumerate(retrieve_iteration_tree(iet, mode='superset')):
+        for tree in retrieve_iteration_tree(iet, mode='superset'):
             # Get the parallelizable Iterations in `tree`
             candidates = filter_iterations(tree, key=self.key)
             if not candidates:
@@ -477,7 +486,7 @@ def _make_parallel(self, iet):
                 continue
 
             # Outer parallelism
-            root, partree = self._make_partree(candidates, index=i)
+            root, partree = self._make_partree(candidates)
             if partree is None or root in mapper:
                 continue
 
@@ -566,7 +575,7 @@ def __init__(self, sregistry, options, platform, compiler):
         super().__init__(sregistry, options, platform, compiler)
 
         self.gpu_fit = options['gpu-fit']
-        self.par_tile = options['par-tile']
+        self.par_tile = UnboundTuple(options['par-tile'])
         self.par_disabled = options['par-disabled']
 
     def _make_threaded_prodders(self, partree):
diff --git a/devito/symbolics/inspection.py b/devito/symbolics/inspection.py
index cc05188cfb..94279db4ab 100644
--- a/devito/symbolics/inspection.py
+++ b/devito/symbolics/inspection.py
@@ -1,7 +1,9 @@
 from functools import singledispatch
 
 import numpy as np
-from sympy import Function, Indexed, Integer, Mul, Number, Pow, S, Symbol, Tuple
+from sympy import (Function, Indexed, Integer, Mul, Number,
+                   Pow, S, Symbol, Tuple)
+from sympy.core.operations import AssocOp
 
 from devito.finite_differences import Derivative
 from devito.finite_differences.differentiable import IndexDerivative
@@ -10,8 +12,9 @@
                                              DefFunction, ReservedWord)
 from devito.symbolics.queries import q_routine
 from devito.tools import as_tuple, prod
+from devito.tools.dtypes_lowering import infer_dtype
 
-__all__ = ['compare_ops', 'estimate_cost', 'has_integer_args']
+__all__ = ['compare_ops', 'estimate_cost', 'has_integer_args', 'sympy_dtype']
 
 
 def compare_ops(e1, e2):
@@ -260,3 +263,23 @@ def has_integer_args(*args):
         except AttributeError:
             res = res and has_integer_args(a)
     return res
+
+
+def sympy_dtype(expr, default):
+    """
+    Infer the dtype of the expression
+    or default if could not be determined.
+    """
+    # Symbol/... without argument, check its dtype
+    if len(expr.args) == 0:
+        try:
+            return expr.dtype
+        except AttributeError:
+            return default
+    else:
+        if not (isinstance(expr.func, AssocOp) or expr.is_Pow):
+            return default
+        else:
+            # Infer expression dtype from its arguments
+            dtype = infer_dtype([sympy_dtype(a, default) for a in expr.args])
+            return dtype or default
diff --git a/devito/symbolics/printer.py b/devito/symbolics/printer.py
index c47ef95bfc..8f7ef6a719 100644
--- a/devito/symbolics/printer.py
+++ b/devito/symbolics/printer.py
@@ -105,6 +105,10 @@ def _print_Mod(self, expr):
         args = ['(%s)' % self._print(a) for a in expr.args]
         return '%'.join(args)
 
+    def _print_Mul(self, expr):
+        term = super()._print_Mul(expr)
+        return term.replace("(-1)*", "-")
+
     def _print_Min(self, expr):
         if has_integer_args(*expr.args) and len(expr.args) == 2:
             return "MIN(%s)" % self._print(expr.args)[1:-1]
diff --git a/devito/tools/algorithms.py b/devito/tools/algorithms.py
index 0021a6f608..13d349149e 100644
--- a/devito/tools/algorithms.py
+++ b/devito/tools/algorithms.py
@@ -10,10 +10,10 @@
 
 def build_dependence_lists(elements):
     """
-    Given an iterable of dependences, return the dependence lists as a
+    Given an iterable of dependencies, return the dependence lists as a
     mapper suitable for graph-like algorithms. A dependence is an iterable of
-    elements ``[a, b, c, ...]``, meaning that ``a`` preceeds ``b`` and ``c``,
-    ``b`` preceeds ``c``, and so on.
+    elements ``[a, b, c, ...]``, meaning that ``a`` precedes ``b`` and ``c``,
+    ``b`` precedes ``c``, and so on.
     """
     mapper = OrderedDict()
     for element in elements:
@@ -35,7 +35,7 @@ def toposort(data):
           dependent items. The dictionary may contain self-dependencies
           (which are ignored), and dependent items that are not also
           dict keys.
-        * An iterable of dependences as expected by :func:`build_dependence_lists`.
+        * An iterable of dependencies as expected by :func:`build_dependence_lists`.
 
     Readapted from: ::
 
@@ -60,16 +60,21 @@ def toposort(data):
     # Perform the topological sorting
     extra_items_in_deps = reduce(set.union, mapper.values()) - set(mapper)
     mapper.update(OrderedDict([(item, set()) for item in extra_items_in_deps]))
+
     while True:
         ordered = set(item for item, dep in mapper.items() if not dep)
         if not ordered:
             break
+
         try:
             processed = sorted(ordered, key=attrgetter('name')) + processed
         except AttributeError:
             processed = sorted(ordered) + processed
+
         mapper = OrderedDict([(item, (dep - ordered)) for item, dep in mapper.items()
                               if item not in ordered])
+
     if len(processed) != len(set(flatten(data) + flatten(data.values()))):
         raise ValueError("A cyclic dependency exists amongst %r" % data)
+
     return processed
diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py
index 10a1d90672..539f75d593 100644
--- a/devito/tools/data_structures.py
+++ b/devito/tools/data_structures.py
@@ -66,7 +66,7 @@ def __getnewargs_ex__(self):
         # objects with varying number of attributes
         return (tuple(self), dict(self.__dict__))
 
-    def get(self, key, val):
+    def get(self, key, val=None):
         return self._getters.get(key, val)
 
 
@@ -599,3 +599,22 @@ def next(self):
         if self.curiter is None:
             raise StopIteration
         return next(self.curiter)
+
+
+class UnboundTuple(object):
+    """
+    A simple data structure that returns the last element forever once reached
+    """
+
+    def __init__(self, items):
+        self.items = as_tuple(items)
+        self.last = len(self.items)
+        self.current = 0
+
+    def next(self):
+        item = self.items[self.current]
+        self.current = min(self.last - 1, self.current+1)
+        return item
+
+    def __len__(self):
+        return self.last
diff --git a/devito/tools/dtypes_lowering.py b/devito/tools/dtypes_lowering.py
index 9793904ac2..0b3cd53ebf 100644
--- a/devito/tools/dtypes_lowering.py
+++ b/devito/tools/dtypes_lowering.py
@@ -8,7 +8,7 @@
 from cgen import dtype_to_ctype as cgen_dtype_to_ctype
 
 __all__ = ['int2', 'int3', 'int4', 'float2', 'float3', 'float4', 'double2',  # noqa
-           'double3', 'double4', 'dtypes_vector_mapper',
+           'double3', 'double4', 'dtypes_vector_mapper', 'dtype_to_mpidtype',
            'dtype_to_cstr', 'dtype_to_ctype', 'dtype_to_mpitype', 'dtype_len',
            'ctypes_to_cstr', 'c_restrict_void_p', 'ctypes_vector_mapper',
            'is_external_ctype', 'infer_dtype']
@@ -104,6 +104,7 @@ def dtype_to_ctype(dtype):
         return ctypes_vector_mapper[dtype]
     except KeyError:
         pass
+
     if issubclass(dtype, ctypes._SimpleCData):
         # Bypass np.ctypeslib's normalization rules such as
         # `np.ctypeslib.as_ctypes_type(ctypes.c_void_p) -> ctypes.c_ulong`
@@ -128,6 +129,14 @@ def dtype_to_mpitype(dtype):
     }[dtype]
 
 
+def dtype_to_mpidtype(dtype):
+    """
+    Map numpy type to MPI internal types for communication
+    """
+    from devito.mpi import MPI
+    return MPI._typedict[np.dtype(dtype).char]
+
+
 def dtype_len(dtype):
     """
     Number of elements associated with one object of type `dtype`. Thus,
diff --git a/devito/types/basic.py b/devito/types/basic.py
index d7a422b39e..55fe1d07a1 100644
--- a/devito/types/basic.py
+++ b/devito/types/basic.py
@@ -607,11 +607,11 @@ def _new(cls, *args, **kwargs):
         if args:
             try:
                 # Constructor if input is (rows, cols, lambda)
-                newobj = super(AbstractTensor, cls)._new(*args)
+                newobj = super()._new(*args)
             except ValueError:
                 # Constructor if input is list of list as (row, cols, list_of_list)
                 # doesn't work as it expects a flattened.
-                newobj = super(AbstractTensor, cls)._new(args[2])
+                newobj = super()._new(args[2])
 
             # Filter grid and dimensions
             grid, dimensions = newobj._infer_dims()
@@ -624,7 +624,7 @@ def _new(cls, *args, **kwargs):
             # Initialize components and create new Matrix from standard
             # Devito inputs
             comps = cls.__subfunc_setup__(*args, **kwargs)
-            newobj = super(AbstractTensor, cls)._new(comps)
+            newobj = super()._new(comps)
             newobj.__init_finalize__(*args, **kwargs)
 
         return newobj
@@ -638,7 +638,7 @@ def _fromrep(cls, rep):
         This class method is only accessible from an existing AbstractTensor
         that contains a grid or dimensions.
         """
-        newobj = super(AbstractTensor, cls)._fromrep(rep)
+        newobj = super()._fromrep(rep)
         grid, dimensions = newobj._infer_dims()
         try:
             # This is needed when `_fromrep` is called directly in 1.9
@@ -830,7 +830,7 @@ def __new__(cls, *args, **kwargs):
             # Go straight through Basic, thus bypassing caching and machinery
             # in sympy.Application/Function that isn't really necessary
             # AbstractFunctions are unique by construction!
-            newobj = sympy.Basic.__new__(cls, *indices)
+            newobj = sympy.Basic.__new__(cls, *sympy.sympify(indices))
 
         # Initialization. The following attributes must be available
         # when executing __init_finalize__
@@ -979,7 +979,7 @@ def origin(self):
         f(x) : origin = 0
         f(x + hx/2) : origin = hx/2
         """
-        return DimensionTuple(*(r-d for d, r in zip(self.dimensions, self.indices_ref)),
+        return DimensionTuple(*(r - d for d, r in zip(self.dimensions, self.indices_ref)),
                               getters=self.dimensions)
 
     @property
@@ -1249,6 +1249,7 @@ def indexify(self, indices=None, subs=None):
                    zip(self.args, self.dimensions, self.origin, subs)]
         indices = [i.xreplace({k: sympy.Integer(k) for k in i.atoms(sympy.Float)})
                    for i in indices]
+
         return self.indexed[indices]
 
     def __getitem__(self, index):
@@ -1404,7 +1405,7 @@ def __str__(self):
         return super().__str__()
 
     def _hashable_content(self):
-        return super(Indexed, self)._hashable_content() + (self.base.function,)
+        return super()._hashable_content() + (self.base.function,)
 
     @cached_property
     def indices(self):
@@ -1429,7 +1430,7 @@ def origin(self):
     @cached_property
     def free_symbols(self):
         # Make it cached, since it's relatively expensive and called often
-        ret = super(Indexed, self).free_symbols
+        ret = super().free_symbols
         # Get rid of the IndexedBase label this Indexed stems from
         # as in Devito we can't have it floating around in Eq's
         ret.discard(self.base.label)
@@ -1462,3 +1463,17 @@ def compare(self, other):
             if c:
                 return c
         return 0
+
+    def _subs(self, old, new, **hints):
+        # Wrap in a try to make sure no substitution happens when
+        # old is an Indexed as only checkink `old is new` would lead to
+        # incorrect substitution of `old.base` by `new`
+        try:
+            if old.is_Indexed:
+                if old.base == self.base and old.indices == self.indices:
+                    return new
+                else:
+                    return self
+        except AttributeError:
+            pass
+        return super()._subs(old, new, **hints)
diff --git a/devito/types/dense.py b/devito/types/dense.py
index aec69bc1eb..d9adfcedc3 100644
--- a/devito/types/dense.py
+++ b/devito/types/dense.py
@@ -1477,6 +1477,11 @@ def _arg_values(self, **kwargs):
     def parent(self):
         return self._parent
 
+    @property
+    def origin(self):
+        # SubFunction have zero origin
+        return DimensionTuple(*(0 for _ in range(self.ndim)), getters=self.dimensions)
+
 
 class TempFunction(DiscreteFunction):
 
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index 6626f09517..76d9d9e60a 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -181,6 +181,14 @@ def min_name(self):
     def max_name(self):
         return "%s_M" % self.name
 
+    @property
+    def indirect(self):
+        return False
+
+    @property
+    def index(self):
+        return self
+
     @property
     def is_const(self):
         return False
@@ -469,6 +477,10 @@ def __init_finalize__(self, name, parent):
     def parent(self):
         return self._parent
 
+    @property
+    def index(self):
+        return self if self.indirect else self.parent
+
     @property
     def root(self):
         return self._parent.root
@@ -485,7 +497,7 @@ def _defines(self):
     def _arg_names(self):
         return self.parent._arg_names
 
-    def _arg_check(self, *args):
+    def _arg_check(self, *args, **kwargs):
         """A DerivedDimension performs no runtime checks."""
         return
 
@@ -816,13 +828,9 @@ def condition(self):
     def indirect(self):
         return self._indirect
 
-    @property
-    def index(self):
-        return self if self.indirect is True else self.parent
-
     @cached_property
     def free_symbols(self):
-        retval = set(super(ConditionalDimension, self).free_symbols)
+        retval = set(super().free_symbols)
         if self.condition is not None:
             retval |= self.condition.free_symbols
         try:
@@ -1213,7 +1221,7 @@ def __init_finalize__(self, name, symbolic_min=None, symbolic_max=None,
         self._symbolic_min = symbolic_min
         self._symbolic_max = symbolic_max
         self._symbolic_size = symbolic_size
-        self._parent = parent
+        self._parent = parent or BOTTOM
         super().__init_finalize__(name)
 
     @property
@@ -1224,6 +1232,10 @@ def is_Derived(self):
     def parent(self):
         return self._parent
 
+    @property
+    def index(self):
+        return self.parent or self
+
     @property
     def root(self):
         if self.is_Derived:
@@ -1581,9 +1593,12 @@ def _separate_dims(cls, d0, d1, ofs_items):
             return None
 
 
-def dimensions(names):
-    assert type(names) is str
-    return tuple(Dimension(i) for i in names.split())
+def dimensions(names, n=1):
+    if n > 1:
+        return tuple(Dimension('%s%s' % (names, i)) for i in range(n))
+    else:
+        assert type(names) is str
+        return tuple(Dimension(i) for i in names.split())
 
 
 BOTTOM = Dimension(name='⊥')
diff --git a/devito/types/grid.py b/devito/types/grid.py
index b916ab4dbf..faecb25236 100644
--- a/devito/types/grid.py
+++ b/devito/types/grid.py
@@ -222,11 +222,16 @@ def origin_map(self):
         return dict(zip(self.origin_symbols, self.origin))
 
     @property
-    def origin_offset(self):
-        """Offset of the local (per-process) origin from the domain origin."""
+    def origin_ioffset(self):
+        """Offset index of the local (per-process) origin from the domain origin."""
         grid_origin = [min(i) for i in self.distributor.glb_numb]
         assert len(grid_origin) == len(self.spacing)
-        return tuple(i*h for i, h in zip(grid_origin, self.spacing))
+        return tuple(grid_origin)
+
+    @property
+    def origin_offset(self):
+        """Physical offset of the local (per-process) origin from the domain origin."""
+        return tuple(i*h for i, h in zip(self.origin_ioffset, self.spacing))
 
     @property
     def time_dim(self):
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 7ce74e0586..a1aef68f5f 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -1,4 +1,9 @@
 from collections import OrderedDict
+try:
+    from collections.abc import Iterable
+except ImportError:
+    # Before python 3.10
+    from collections import Iterable
 from itertools import product
 
 import sympy
@@ -6,16 +11,15 @@
 from cached_property import cached_property
 
 from devito.finite_differences import generate_fd_shortcuts
-from devito.finite_differences.elementary import floor
 from devito.mpi import MPI, SparseDistributor
 from devito.operations import LinearInterpolator, PrecomputedInterpolator
-from devito.symbolics import (INT, cast_mapper, indexify,
-                              retrieve_function_carriers)
+from devito.symbolics import indexify, retrieve_function_carriers
 from devito.tools import (ReducerMap, as_tuple, flatten, prod, filter_ordered,
-                          memoized_meth, is_integer)
-from devito.types.dense import DiscreteFunction, Function, SubFunction
+                          is_integer, dtype_to_mpidtype)
+from devito.types.dense import DiscreteFunction, SubFunction
 from devito.types.dimension import (Dimension, ConditionalDimension, DefaultDimension,
                                     DynamicDimension)
+from devito.types.dimension import dimensions as mkdims
 from devito.types.basic import Symbol
 from devito.types.equation import Eq, Inc
 from devito.types.utils import IgnoreDimSort
@@ -50,12 +54,6 @@ def __init_finalize__(self, *args, **kwargs):
         # Dynamically add derivative short-cuts
         self._fd = self.__fd_setup__()
 
-    def __fd_setup__(self):
-        """
-        Dynamically add derivative short-cuts.
-        """
-        return generate_fd_shortcuts(self.dimensions, self.space_order)
-
     @classmethod
     def __indices_setup__(cls, *args, **kwargs):
         dimensions = as_tuple(kwargs.get('dimensions'))
@@ -82,9 +80,128 @@ def __shape_setup__(cls, **kwargs):
             shape = (glb_npoint[grid.distributor.myrank],)
         return shape
 
-    def _halo_exchange(self):
-        # no-op for SparseFunctions
-        return
+    def func(self, *args, **kwargs):
+        # Rebuild subfunctions first to avoid new data creation as we have to use `_data`
+        # as a reconstruction kwargs to avoid the circular dependency
+        # with the parent in SubFunction
+        # This is also necessary to avoid shape issue in the SubFunction with mpi
+        for s in self._sub_functions:
+            if getattr(self, s) is not None:
+                kwargs.update({s: getattr(self, s).func(*args, **kwargs)})
+        return super().func(*args, **kwargs)
+
+    def __fd_setup__(self):
+        """
+        Dynamically add derivative short-cuts.
+        """
+        return generate_fd_shortcuts(self.dimensions, self.space_order)
+
+    def __distributor_setup__(self, **kwargs):
+        """
+        A `SparseDistributor` handles the SparseFunction decomposition based on
+        physical ownership, and allows to convert between global and local indices.
+        """
+        return SparseDistributor(
+            kwargs.get('npoint', kwargs.get('npoint_global')),
+            self._sparse_dim,
+            kwargs['grid'].distributor
+        )
+
+    def __subfunc_setup__(self, key, suffix, dtype=None):
+        if isinstance(key, SubFunction):
+            return key
+        elif key is not None and not isinstance(key, Iterable):
+            raise ValueError("`%s` must be either SubFunction "
+                             "or iterable (e.g., list, np.ndarray)" % key)
+
+        name = '%s_%s' % (self.name, suffix)
+        dimensions = (self._sparse_dim, Dimension(name='d'))
+        shape = (self.npoint, self.grid.dim)
+
+        if key is None:
+            # Fallback to default behaviour
+            dtype = dtype or self.dtype
+        else:
+            if key is not None:
+                key = np.array(key)
+
+            if (shape != key.shape[:2] and key.shape != (shape[1],)) and \
+                    self._distributor.nprocs == 1:
+                raise ValueError("Incompatible shape for %s, `%s`; expected `%s`" %
+                                 (suffix, key.shape[:2], shape))
+
+            # Infer dtype
+            if np.issubdtype(key.dtype.type, np.integer):
+                dtype = dtype or np.int32
+            else:
+                dtype = dtype or self.dtype
+
+        if key is not None and key.ndim > 2:
+            shape = (*shape, *key.shape[2:])
+            dimensions = (*dimensions, *mkdims("i", n=key.ndim-2))
+
+        sf = SubFunction(
+            name=name, parent=self, dtype=dtype, dimensions=dimensions,
+            shape=shape, space_order=0, initializer=key, alias=self.alias,
+            distributor=self._distributor
+        )
+
+        if self.npoint == 0:
+            # This is a corner case -- we might get here, for example, when
+            # running with MPI and some processes get 0-size arrays after
+            # domain decomposition. We "touch" the data anyway to avoid the
+            # case ``self._data is None``
+            sf.data
+
+        return sf
+
+    @property
+    def _sparse_dim(self):
+        return self.dimensions[self._sparse_position]
+
+    @property
+    def _mpitype(self):
+        return dtype_to_mpidtype(self.dtype)
+
+    @property
+    def _smpitype(self):
+        sfuncs = [getattr(self, s) for s in self._sub_functions
+                  if getattr(self, s) is not None]
+        return {s: dtype_to_mpidtype(s.dtype) for s in sfuncs}
+
+    @property
+    def _comm(self):
+        return self._distributor.comm
+
+    @property
+    def _coords_indices(self):
+        if self.gridpoints_data is not None:
+            return self.gridpoints_data
+        else:
+            if self.coordinates_data is None:
+                raise ValueError("No coordinates or gridpoints attached"
+                                 "to this SparseFunction")
+            return (
+                np.floor((self.coordinates_data - self.grid.origin) / self.grid.spacing)
+            ).astype(int)
+
+    @property
+    def _support(self):
+        """
+        The grid points surrounding each sparse point within the radius of self's
+        injection/interpolation operators.
+        """
+        max_shape = np.array(self.grid.shape).reshape(1, self.grid.dim)
+        minmax = lambda arr: np.minimum(max_shape, np.maximum(0, arr))
+        return np.stack([minmax(self._coords_indices + s) for s in self._point_support],
+                        axis=2)
+
+    @property
+    def _dist_datamap(self):
+        """
+        Mapper ``M : MPI rank -> required sparse data``.
+        """
+        return self.grid.distributor.glb_to_rank(self._support) or {}
 
     @property
     def npoint(self):
@@ -94,7 +211,6 @@ def npoint(self):
     def npoint_global(self):
         """
         Global `npoint`s. This only differs from `self.npoint` in an MPI context.
-
         Issues
         ------
         * https://github.com/devitocodes/devito/issues/1498
@@ -107,20 +223,76 @@ def space_order(self):
         return self._space_order
 
     @property
-    def _sparse_dim(self):
-        return self.dimensions[self._sparse_position]
+    def r(self):
+        return self._radius
 
     @property
     def gridpoints(self):
+        try:
+            return self._gridpoints
+        except AttributeError:
+            return self._coords_indices
+
+    @property
+    def gridpoints_data(self):
+        try:
+            return self._gridpoints.data._local.view(np.ndarray)
+        except AttributeError:
+            return None
+
+    @property
+    def coordinates(self):
+        try:
+            return self._coordinates
+        except AttributeError:
+            return None
+
+    @property
+    def coordinates_data(self):
+        try:
+            return self.coordinates.data._local.view(np.ndarray)
+        except AttributeError:
+            return None
+
+    @cached_property
+    def _pos_symbols(self):
+        return [Symbol(name='pos%s' % d, dtype=np.int32)
+                for d in self.grid.dimensions]
+
+    @cached_property
+    def _point_increments(self):
+        """Index increments in each Dimension for each point symbol."""
+        return tuple(product(range(-self.r+1, self.r+1), repeat=self.grid.dim))
+
+    @cached_property
+    def _point_support(self):
+        return np.array(self._point_increments)
+
+    @cached_property
+    def _position_map(self):
         """
-        The *reference* grid point corresponding to each sparse point.
+        Symbols map for the physical position of the sparse points relative to the grid
+        origin.
+        """
+        return OrderedDict([((c - o)/d.spacing, p)
+                            for p, c, d, o in zip(self._pos_symbols,
+                                                  self._coordinate_symbols,
+                                                  self.grid.dimensions,
+                                                  self.grid.origin_symbols)])
 
-        Notes
-        -----
-        When using MPI, this property refers to the *physically* owned
-        sparse points.
+    @cached_property
+    def _dist_reorder_mask(self):
+        """
+        An ordering mask that puts ``self._sparse_position`` at the front.
         """
-        raise NotImplementedError
+        ret = (self._sparse_position,)
+        ret += tuple(i for i, d in enumerate(self.dimensions)
+                     if d is not self._sparse_dim)
+        return ret
+
+    @cached_property
+    def dist_origin(self):
+        return self._dist_origin
 
     def interpolate(self, *args, **kwargs):
         """
@@ -134,28 +306,46 @@ def inject(self, *args, **kwargs):
         """
         return self.interpolator.inject(*args, **kwargs)
 
-    @cached_property
-    def _point_support(self):
-        return np.array(tuple(product(range(-self._radius + 1, self._radius + 1),
-                                      repeat=self.grid.dim)))
-
-    @property
-    def _support(self):
+    def guard(self, expr=None):
         """
-        The grid points surrounding each sparse point within the radius of self's
-        injection/interpolation operators.
-        """
-        max_shape = np.array(self.grid.shape).reshape(1, self.grid.dim)
-        minmax = lambda arr: np.minimum(max_shape, np.maximum(0, arr))
-        return np.stack([minmax(self.gridpoints + s) for s in self._point_support],
-                        axis=2)
+        Generate guarded expressions, that is expressions that are evaluated
+        by an Operator only if certain conditions are met.  The introduced
+        condition, here, is that all grid points in the support of a sparse
+        value must fall within the grid domain (i.e., *not* on the halo).
 
-    @property
-    def _dist_datamap(self):
-        """
-        Mapper ``M : MPI rank -> required sparse data``.
+        Parameters
+        ----------
+        expr : expr-like, optional
+            Input expression, from which the guarded expression is derived.
+            If not specified, defaults to ``self``.
         """
-        return self.grid.distributor.glb_to_rank(self._support) or {}
+        conditions = {}
+
+        # Positon map and temporaries for it
+        pmap = self._position_map
+
+        # Temporaries for the position
+        temps = self.interpolator._positions(self.dimensions)
+
+        # Create positions and indices temporaries/indirections
+        for ((di, d), pos) in zip(enumerate(self.grid.dimensions), pmap.values()):
+            # Add conditional to avoid OOB
+            lb = sympy.And(pos >= d.symbolic_min, evaluate=False)
+            ub = sympy.And(pos <= d.symbolic_max, evaluate=False)
+            conditions[d] = sympy.And(lb, ub, evaluate=False)
+        condition = sympy.And(*conditions.values(), evaluate=False)
+        cd = ConditionalDimension(self._sparse_dim.name,
+                                  self._sparse_dim,
+                                  condition=condition, indirect=True)
+
+        if expr is None:
+            out = self.indexify().xreplace({self._sparse_dim: cd})
+        else:
+            functions = {f for f in retrieve_function_carriers(expr)
+                         if f.is_SparseFunction}
+            out = indexify(expr).xreplace({f._sparse_dim: cd for f in functions})
+
+        return out, temps
 
     def _dist_scatter_mask(self, dmap=None):
         """
@@ -177,7 +367,7 @@ def _dist_count(self, dmap=None):
         is this MPI rank expected to send/receive to/from each other MPI rank.
         """
         dmap = dmap or self._dist_datamap
-        comm = self.grid.distributor.comm
+        comm = self._comm
 
         ssparse = np.array([len(dmap.get(i, [])) for i in range(comm.size)], dtype=int)
         rsparse = np.empty(comm.size, dtype=int)
@@ -185,15 +375,6 @@ def _dist_count(self, dmap=None):
 
         return ssparse, rsparse
 
-    @cached_property
-    def _dist_reorder_mask(self):
-        """
-        An ordering mask that puts ``self._sparse_position`` at the front.
-        """
-        ret = (self._sparse_position,)
-        ret += tuple(i for i, d in enumerate(self.indices) if d is not self._sparse_dim)
-        return ret
-
     def _dist_alltoall(self, dmap=None):
         """
         The metadata necessary to perform an ``MPI_Alltoallv`` distributing the
@@ -229,31 +410,180 @@ def _dist_alltoall(self, dmap=None):
         rshape[self._sparse_position] = sum(rsparse)
 
         # May have to swap axes, as `MPI_Alltoallv` expects contiguous data, and
-        # the sparse dimension may not be the outermost
+        # the sparse Dimension may not be the outermost
         sshape = tuple(sshape[i] for i in self._dist_reorder_mask)
         rshape = tuple(rshape[i] for i in self._dist_reorder_mask)
 
         return sshape, scount, sdisp, rshape, rcount, rdisp
 
-    def _dist_subfunc_alltoall(self, dmap=None):
+    def _dist_subfunc_alltoall(self, subfunc, dmap=None):
         """
         The metadata necessary to perform an ``MPI_Alltoallv`` distributing
         self's SubFunction values across the MPI ranks needing them.
         """
-        raise NotImplementedError
+        dmap = dmap or self._dist_datamap
+        ssparse, rsparse = self._dist_count(dmap=dmap)
+
+        # Per-rank shape of send/recv `coordinates`
+        shape = subfunc.shape[1:]
+        sshape = [(i, *shape) for i in ssparse]
+        rshape = [(i, *shape) for i in rsparse]
+
+        # Per-rank count of send/recv `coordinates`
+        scount = [prod(i) for i in sshape]
+        rcount = [prod(i) for i in rshape]
+
+        # Per-rank displacement of send/recv `coordinates` (it's actually all
+        # contiguous, but the Alltoallv needs this information anyway)
+        sdisp = np.concatenate([[0], np.cumsum(scount)[:-1]])
+        rdisp = np.concatenate([[0], tuple(np.cumsum(rcount))[:-1]])
+
+        # Total shape of send/recv `coordinates`
+        sshape = list(subfunc.shape)
+        sshape[0] = sum(ssparse)
+        rshape = list(subfunc.shape)
+        rshape[0] = sum(rsparse)
+
+        return sshape, scount, sdisp, rshape, rcount, rdisp
 
-    def _dist_scatter(self):
+    def _dist_data_scatter(self, data=None):
         """
         A ``numpy.ndarray`` containing up-to-date data values belonging
         to the calling MPI rank. A data value belongs to a given MPI rank R
         if its coordinates fall within R's local domain.
         """
-        raise NotImplementedError
+        data = data if data is not None else self.data._local
+
+        # If not using MPI, don't waste time
+        if self._distributor.nprocs == 1:
+            return data
+
+        # Compute dist map only once
+        dmap = self._dist_datamap
+        mask = self._dist_scatter_mask(dmap=dmap)
+
+        # Pack sparse data values so that they can be sent out via an Alltoallv
+        data = data[mask]
+        data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
+
+        # Send out the sparse point values
+        _, scount, sdisp, rshape, rcount, rdisp = self._dist_alltoall(dmap=dmap)
+        scattered = np.empty(shape=rshape, dtype=self.dtype)
+        self._comm.Alltoallv([data, scount, sdisp, self._mpitype],
+                             [scattered, rcount, rdisp, self._mpitype])
+
+        # Unpack data values so that they follow the expected storage layout
+        return np.ascontiguousarray(np.transpose(scattered, self._dist_reorder_mask))
+
+    def _dist_subfunc_scatter(self, subfunc):
+        # If not using MPI, don't waste time
+        if self._distributor.nprocs == 1:
+            return {subfunc: subfunc.data}
+
+        # Compute dist map only once
+        dmap = self._dist_datamap
+        mask = self._dist_scatter_mask(dmap=dmap)
+
+        # Pack (reordered) SubFuncion values so that they can be sent out via an Alltoallv
+        sfuncd = subfunc.data._local[mask[self._sparse_position]]
+
+        # Send out the sparse point SubFuncion
+        _, scount, sdisp, rshape, rcount, rdisp = \
+            self._dist_subfunc_alltoall(subfunc, dmap=dmap)
+        scattered = np.empty(shape=rshape, dtype=subfunc.dtype)
+        self._comm.Alltoallv([sfuncd, scount, sdisp, self._smpitype[subfunc]],
+                             [scattered, rcount, rdisp, self._smpitype[subfunc]])
+        sfuncd = scattered
+
+        # Translate global SubFuncion values into local SubFuncion values
+        if self.dist_origin[subfunc] is not None:
+            sfuncd = sfuncd - np.array(self.dist_origin[subfunc], dtype=subfunc.dtype)
+        return {subfunc: sfuncd}
+
+    def _dist_data_gather(self, data):
+        # If not using MPI, don't waste time
+        if self._distributor.nprocs == 1:
+            return
+
+        # Compute dist map only once
+        try:
+            data = self._C_as_ndarray(data)
+        except AttributeError:
+            pass
+        dmap = self._dist_datamap
+        mask = self._dist_scatter_mask(dmap=dmap)
+
+        # Pack sparse data values so that they can be sent out via an Alltoallv
+        data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
+
+        # Send back the sparse point values
+        sshape, scount, sdisp, rshape, rcount, rdisp = self._dist_alltoall(dmap=dmap)
+        gathered = np.empty(shape=sshape, dtype=self.dtype)
+
+        self._comm.Alltoallv([data, rcount, rdisp, self._mpitype],
+                             [gathered, scount, sdisp, self._mpitype])
+
+        # Unpack data values so that they follow the expected storage layout
+        gathered = np.ascontiguousarray(np.transpose(gathered, self._dist_reorder_mask))
+        self._data[mask] = gathered[:]
+
+    def _dist_subfunc_gather(self, sfuncd, subfunc):
+        try:
+            sfuncd = subfunc._C_as_ndarray(sfuncd)
+        except AttributeError:
+            pass
+        # If not using MPI, don't waste time
+        if self._distributor.nprocs == 1:
+            return
+
+        # Compute dist map only once
+        dmap = self._dist_datamap
+        mask = self._dist_scatter_mask(dmap=dmap)
+
+        # Pack (reordered) SubFuncion values so that they can be sent out via an Alltoallv
+        if self.dist_origin[subfunc] is not None:
+            sfuncd = sfuncd + np.array(self.dist_origin[subfunc], dtype=subfunc.dtype)
+
+        # Send out the sparse point SubFuncion values
+        sshape, scount, sdisp, _, rcount, rdisp = \
+            self._dist_subfunc_alltoall(subfunc, dmap=dmap)
+        gathered = np.empty(shape=sshape, dtype=subfunc.dtype)
+        self._comm.Alltoallv([sfuncd, rcount, rdisp, self._smpitype[subfunc]],
+                             [gathered, scount, sdisp, self._smpitype[subfunc]])
+        subfunc.data._local[mask[self._sparse_position]] = gathered[:]
+
+        # Note: this method "mirrors" `_dist_scatter`: a sparse point that is sent
+        # in `_dist_scatter` is here received; a sparse point that is received in
+        # `_dist_scatter` is here sent.
+
+    def _dist_scatter(self, data=None):
+        mapper = {self: self._dist_data_scatter(data=data)}
+        for i in self._sub_functions:
+            if getattr(self, i) is not None:
+                mapper.update(self._dist_subfunc_scatter(getattr(self, i)))
+        return mapper
+
+    def _dist_gather(self, data, *subfunc):
+        self._dist_data_gather(data)
+        for (sg, s) in zip(subfunc, self._sub_functions):
+            if getattr(self, s) is not None:
+                self._dist_subfunc_gather(sg, getattr(self, s))
+
+    def _eval_at(self, func):
+        return self
+
+    def _halo_exchange(self):
+        # no-op for SparseFunctions
+        return
 
     def _arg_defaults(self, alias=None):
         key = alias or self
         mapper = {self: key}
-        mapper.update({getattr(self, i): getattr(key, i) for i in self._sub_functions})
+        for i in self._sub_functions:
+            f = getattr(key, i)
+            if f is not None:
+                mapper[getattr(self, i)] = f
+
         args = ReducerMap()
 
         # Add in the sparse data (as well as any SubFunction data) belonging to
@@ -265,9 +595,6 @@ def _arg_defaults(self, alias=None):
 
         return args
 
-    def _eval_at(self, func):
-        return self
-
     def _arg_values(self, **kwargs):
         # Add value override for own data if it is provided, otherwise
         # use defaults
@@ -289,17 +616,12 @@ def _arg_values(self, **kwargs):
 
         return values
 
-    def _arg_apply(self, dataobj, coordsobj, alias=None):
+    def _arg_apply(self, dataobj, *subfuncs, alias=None):
         key = alias if alias is not None else self
         if isinstance(key, AbstractSparseFunction):
             # Gather into `self.data`
-            # Coords may be None if the coordinates are not used in the Operator
-            if coordsobj is None:
-                pass
-            elif np.sum([coordsobj._obj.size[i] for i in range(self.ndim)]) > 0:
-                coordsobj = self.coordinates._C_as_ndarray(coordsobj)
-            key._dist_gather(self._C_as_ndarray(dataobj), coordsobj)
-        elif self.grid.distributor.nprocs > 1:
+            key._dist_gather(dataobj, *subfuncs)
+        elif self._distributor.nprocs > 1:
             raise NotImplementedError("Don't know how to gather data from an "
                                       "object of type `%s`" % type(key))
 
@@ -321,7 +643,7 @@ def __init_finalize__(self, *args, **kwargs):
         if not isinstance(self.time_order, int):
             raise ValueError("`time_order` must be int")
 
-        super(AbstractSparseTimeFunction, self).__init_finalize__(*args, **kwargs)
+        super().__init_finalize__(*args, **kwargs)
 
     def __fd_setup__(self):
         """
@@ -332,7 +654,7 @@ def __fd_setup__(self):
 
     @property
     def time_dim(self):
-        """The time dimension."""
+        """The time Dimension."""
         return self._time_dim
 
     @classmethod
@@ -446,7 +768,7 @@ class SparseFunction(AbstractSparseFunction):
     Notes
     -----
     The parameters must always be given as keyword arguments, since SymPy
-    uses ``*args`` to (re-)create the dimension arguments of the symbolic object.
+    uses ``*args`` to (re-)create the Dimension arguments of the symbolic object.
     About SparseFunction and MPI. There is a clear difference between:
 
         * Where the sparse points *physically* live, i.e., on which MPI rank. This
@@ -472,297 +794,26 @@ class SparseFunction(AbstractSparseFunction):
     __rkwargs__ = AbstractSparseFunction.__rkwargs__ + ('coordinates_data',)
 
     def __init_finalize__(self, *args, **kwargs):
-        super(SparseFunction, self).__init_finalize__(*args, **kwargs)
+        super().__init_finalize__(*args, **kwargs)
         self.interpolator = LinearInterpolator(self)
+
         # Set up sparse point coordinates
         coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
-        if isinstance(coordinates, Function):
-            self._coordinates = coordinates
-        else:
-            dimensions = (self.indices[self._sparse_position], Dimension(name='d'))
-            # Only retain the local data region
-            if coordinates is not None:
-                coordinates = np.array(coordinates)
-            self._coordinates = SubFunction(
-                name='%s_coords' % self.name, parent=self, dtype=self.dtype,
-                dimensions=dimensions, shape=(self.npoint, self.grid.dim),
-                space_order=0, initializer=coordinates, alias=self.alias,
-                distributor=self._distributor
-            )
-            if self.npoint == 0:
-                # This is a corner case -- we might get here, for example, when
-                # running with MPI and some processes get 0-size arrays after
-                # domain decomposition. We "touch" the data anyway to avoid the
-                # case ``self._data is None``
-                self.coordinates.data
-
-    def __distributor_setup__(self, **kwargs):
-        """
-        A `SparseDistributor` handles the SparseFunction decomposition based on
-        physical ownership, and allows to convert between global and local indices.
-        """
-        return SparseDistributor(
-            kwargs.get('npoint', kwargs.get('npoint_global')),
-            self._sparse_dim,
-            kwargs['grid'].distributor
-        )
-
-    @property
-    def coordinates(self):
-        """The SparseFunction coordinates."""
-        return self._coordinates
-
-    @property
-    def coordinates_data(self):
-        try:
-            return self.coordinates.data.view(np.ndarray)
-        except AttributeError:
-            return None
-
-    @cached_property
-    def _point_symbols(self):
-        """Symbol for coordinate value in each dimension of the point."""
-        return tuple(Symbol(name='p%s' % d, dtype=self.dtype)
-                     for d in self.grid.dimensions)
-
-    @cached_property
-    def _position_map(self):
-        """
-        Symbols map for the position of the sparse points relative to the grid
-        origin.
-
-        Notes
-        -----
-        The expression `(coord - origin)/spacing` could also be computed in the
-        mathematically equivalent expanded form `coord/spacing -
-        origin/spacing`. This particular form is problematic when a sparse
-        point is in close proximity of the grid origin, since due to a larger
-        machine precision error it may cause a +-1 error in the computation of
-        the position. We mitigate this problem by computing the positions
-        individually (hence the need for a position map).
-        """
-        symbols = [Symbol(name='pos%s' % d, dtype=self.dtype)
-                   for d in self.grid.dimensions]
-        return OrderedDict([(c - o, p) for p, c, o in zip(symbols,
-                                                          self._coordinate_symbols,
-                                                          self.grid.origin_symbols)])
-
-    @cached_property
-    def _point_increments(self):
-        """Index increments in each dimension for each point symbol."""
-        return tuple(product(range(2), repeat=self.grid.dim))
+        self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
+        self._dist_origin = {self._coordinates: self.grid.origin_offset}
 
     @cached_property
     def _coordinate_symbols(self):
-        """Symbol representing the coordinate values in each dimension."""
-        p_dim = self.indices[self._sparse_position]
-        return tuple([self.coordinates.indexify((p_dim, i))
+        """Symbol representing the coordinate values in each Dimension."""
+        d_dim = self.coordinates.dimensions[1]
+        return tuple([self.coordinates._subs(d_dim, i)
                       for i in range(self.grid.dim)])
 
-    @cached_property
-    def _coordinate_indices(self):
-        """Symbol for each grid index according to the coordinates."""
-        return tuple([INT(floor((c - o) / i.spacing))
-                      for c, o, i in zip(self._coordinate_symbols,
-                                         self.grid.origin_symbols,
-                                         self.grid.dimensions[:self.grid.dim])])
-
-    def _coordinate_bases(self, field_offset):
-        """Symbol for the base coordinates of the reference grid point."""
-        return tuple([cast_mapper[self.dtype](c - o - idx * i.spacing)
-                      for c, o, idx, i, of in zip(self._coordinate_symbols,
-                                                  self.grid.origin_symbols,
-                                                  self._coordinate_indices,
-                                                  self.grid.dimensions[:self.grid.dim],
-                                                  field_offset)])
-
-    @memoized_meth
-    def _index_matrix(self, offset):
-        # Note about the use of *memoization*
-        # Since this method is called by `_interpolation_indices`, using
-        # memoization avoids a proliferation of symbolically identical
-        # ConditionalDimensions for a given set of indirection indices
-
-        # List of indirection indices for all adjacent grid points
-        index_matrix = [tuple(idx + ii + offset for ii, idx
-                              in zip(inc, self._coordinate_indices))
-                        for inc in self._point_increments]
-
-        # A unique symbol for each indirection index
-        indices = filter_ordered(flatten(index_matrix))
-        points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
-                              for i, p in enumerate(indices)])
-
-        return index_matrix, points
-
-    @property
-    def gridpoints(self):
-        if self.coordinates._data is None:
-            raise ValueError("No coordinates attached to this SparseFunction")
-        return (
-            np.floor(self.coordinates.data._local - self.grid.origin) / self.grid.spacing
-        ).astype(int)
-
-    def guard(self, expr=None, offset=0):
-        """
-        Generate guarded expressions, that is expressions that are evaluated
-        by an Operator only if certain conditions are met.  The introduced
-        condition, here, is that all grid points in the support of a sparse
-        value must fall within the grid domain (i.e., *not* on the halo).
-
-        Parameters
-        ----------
-        expr : expr-like, optional
-            Input expression, from which the guarded expression is derived.
-            If not specified, defaults to ``self``.
-        offset : int, optional
-            Relax the guard condition by introducing a tolerance offset.
-        """
-        _, points = self._index_matrix(offset)
-
-        # Guard through ConditionalDimension
-        conditions = {}
-        for d, idx in zip(self.grid.dimensions, self._coordinate_indices):
-            p = points[idx]
-            lb = sympy.And(p >= d.symbolic_min - offset, evaluate=False)
-            ub = sympy.And(p <= d.symbolic_max + offset, evaluate=False)
-            conditions[p] = sympy.And(lb, ub, evaluate=False)
-        condition = sympy.And(*conditions.values(), evaluate=False)
-        cd = ConditionalDimension(self._sparse_dim.name, self._sparse_dim,
-                                  condition=condition, indirect=True)
-
-        if expr is None:
-            out = self.indexify().xreplace({self._sparse_dim: cd})
-        else:
-            functions = {f for f in retrieve_function_carriers(expr)
-                         if f.is_SparseFunction}
-            out = indexify(expr).xreplace({f._sparse_dim: cd for f in functions})
-
-        # Temporaries for the position
-        temps = [Eq(v, k, implicit_dims=self.dimensions)
-                 for k, v in self._position_map.items()]
-        # Temporaries for the indirection dimensions
-        temps.extend([Eq(v, k.subs(self._position_map),
-                         implicit_dims=self.dimensions)
-                      for k, v in points.items() if v in conditions])
-
-        return out, temps
-
     @cached_property
     def _decomposition(self):
         mapper = {self._sparse_dim: self._distributor.decomposition[self._sparse_dim]}
         return tuple(mapper.get(d) for d in self.dimensions)
 
-    def _dist_subfunc_alltoall(self, dmap=None):
-        dmap = dmap or self._dist_datamap
-        ssparse, rsparse = self._dist_count(dmap=dmap)
-
-        # Per-rank shape of send/recv `coordinates`
-        sshape = [(i, self.grid.dim) for i in ssparse]
-        rshape = [(i, self.grid.dim) for i in rsparse]
-
-        # Per-rank count of send/recv `coordinates`
-        scount = [prod(i) for i in sshape]
-        rcount = [prod(i) for i in rshape]
-
-        # Per-rank displacement of send/recv `coordinates` (it's actually all
-        # contiguous, but the Alltoallv needs this information anyway)
-        sdisp = np.concatenate([[0], np.cumsum(scount)[:-1]])
-        rdisp = np.concatenate([[0], tuple(np.cumsum(rcount))[:-1]])
-
-        # Total shape of send/recv `coordinates`
-        sshape = list(self.coordinates.shape)
-        sshape[0] = sum(ssparse)
-        rshape = list(self.coordinates.shape)
-        rshape[0] = sum(rsparse)
-
-        return sshape, scount, sdisp, rshape, rcount, rdisp
-
-    def _dist_scatter(self, data=None):
-        data = data if data is not None else self.data._local
-        distributor = self.grid.distributor
-
-        # If not using MPI, don't waste time
-        if distributor.nprocs == 1:
-            return {self: data, self.coordinates: self.coordinates.data}
-
-        comm = distributor.comm
-        mpitype = MPI._typedict[np.dtype(self.dtype).char]
-
-        # Compute dist map only once
-        dmap = self._dist_datamap
-        mask = self._dist_scatter_mask(dmap=dmap)
-
-        # Pack sparse data values so that they can be sent out via an Alltoallv
-        data = data[mask]
-        data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
-
-        # Send out the sparse point values
-        _, scount, sdisp, rshape, rcount, rdisp = self._dist_alltoall(dmap=dmap)
-        scattered = np.empty(shape=rshape, dtype=self.dtype)
-        comm.Alltoallv([data, scount, sdisp, mpitype],
-                       [scattered, rcount, rdisp, mpitype])
-        data = scattered
-
-        # Unpack data values so that they follow the expected storage layout
-        data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
-
-        # Pack (reordered) coordinates so that they can be sent out via an Alltoallv
-        coords = self.coordinates.data._local[mask[self._sparse_position]]
-
-        # Send out the sparse point coordinates
-        _, scount, sdisp, rshape, rcount, rdisp = self._dist_subfunc_alltoall(dmap=dmap)
-        scattered = np.empty(shape=rshape, dtype=self.coordinates.dtype)
-        comm.Alltoallv([coords, scount, sdisp, mpitype],
-                       [scattered, rcount, rdisp, mpitype])
-        coords = scattered
-
-        # Translate global coordinates into local coordinates
-        coords = coords - np.array(self.grid.origin_offset, dtype=self.dtype)
-
-        return {self: data, self.coordinates: coords}
-
-    def _dist_gather(self, data, coords):
-        distributor = self.grid.distributor
-
-        # If not using MPI, don't waste time
-        if distributor.nprocs == 1:
-            return
-
-        comm = distributor.comm
-
-        # Compute dist map only once
-        dmap = self._dist_datamap
-        mask = self._dist_scatter_mask(dmap=dmap)
-
-        # Pack sparse data values so that they can be sent out via an Alltoallv
-        data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
-        # Send back the sparse point values
-        sshape, scount, sdisp, _, rcount, rdisp = self._dist_alltoall(dmap=dmap)
-        gathered = np.empty(shape=sshape, dtype=self.dtype)
-        mpitype = MPI._typedict[np.dtype(self.dtype).char]
-        comm.Alltoallv([data, rcount, rdisp, mpitype],
-                       [gathered, scount, sdisp, mpitype])
-        # Unpack data values so that they follow the expected storage layout
-        gathered = np.ascontiguousarray(np.transpose(gathered, self._dist_reorder_mask))
-        self._data[mask] = gathered[:]
-
-        if coords is not None:
-            # Pack (reordered) coordinates so that they can be sent out via an Alltoallv
-            coords = coords + np.array(self.grid.origin_offset, dtype=self.dtype)
-            # Send out the sparse point coordinates
-            sshape, scount, sdisp, _, rcount, rdisp = \
-                self._dist_subfunc_alltoall(dmap=dmap)
-            gathered = np.empty(shape=sshape, dtype=self.coordinates.dtype)
-            mpitype = MPI._typedict[np.dtype(self.coordinates.dtype).char]
-            comm.Alltoallv([coords, rcount, rdisp, mpitype],
-                           [gathered, scount, sdisp, mpitype])
-            self._coordinates.data._local[mask[self._sparse_position]] = gathered[:]
-
-        # Note: this method "mirrors" `_dist_scatter`: a sparse point that is sent
-        # in `_dist_scatter` is here received; a sparse point that is received in
-        # `_dist_scatter` is here sent.
-
 
 class SparseTimeFunction(AbstractSparseTimeFunction, SparseFunction):
     """
@@ -783,7 +834,7 @@ class SparseTimeFunction(AbstractSparseTimeFunction, SparseFunction):
     npoint : int
         Number of sparse points.
     nt : int
-        Number of timesteps along the time dimension.
+        Number of timesteps along the time Dimension.
     grid : Grid
         The computational domain from which the sparse points are sampled.
     coordinates : np.ndarray, optional
@@ -840,7 +891,7 @@ class SparseTimeFunction(AbstractSparseTimeFunction, SparseFunction):
     Notes
     -----
     The parameters must always be given as keyword arguments, since SymPy
-    uses ``*args`` to (re-)create the dimension arguments of the symbolic object.
+    uses ``*args`` to (re-)create the Dimension arguments of the symbolic object.
     """
 
     is_SparseTimeFunction = True
@@ -848,7 +899,7 @@ class SparseTimeFunction(AbstractSparseTimeFunction, SparseFunction):
     __rkwargs__ = tuple(filter_ordered(AbstractSparseTimeFunction.__rkwargs__ +
                                        SparseFunction.__rkwargs__))
 
-    def interpolate(self, expr, offset=0, u_t=None, p_t=None, increment=False):
+    def interpolate(self, expr, u_t=None, p_t=None, increment=False):
         """
         Generate equations interpolating an arbitrary expression into ``self``.
 
@@ -856,8 +907,6 @@ def interpolate(self, expr, offset=0, u_t=None, p_t=None, increment=False):
         ----------
         expr : expr-like
             Input expression to interpolate.
-        offset : int, optional
-            Additional offset from the boundary.
         u_t : expr-like, optional
             Time index at which the interpolation is performed.
         p_t : expr-like, optional
@@ -875,11 +924,9 @@ def interpolate(self, expr, offset=0, u_t=None, p_t=None, increment=False):
         if p_t is not None:
             subs = {self.time_dim: p_t}
 
-        return super(SparseTimeFunction, self).interpolate(expr, offset=offset,
-                                                           increment=increment,
-                                                           self_subs=subs)
+        return super().interpolate(expr, increment=increment, self_subs=subs)
 
-    def inject(self, field, expr, offset=0, u_t=None, p_t=None, implicit_dims=None):
+    def inject(self, field, expr, u_t=None, p_t=None, implicit_dims=None):
         """
         Generate equations injecting an arbitrary expression into a field.
 
@@ -889,8 +936,6 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None, implicit_dims=None):
             Input field into which the injection is performed.
         expr : expr-like
             Injected expression.
-        offset : int, optional
-            Additional offset from the boundary.
         u_t : expr-like, optional
             Time index at which the interpolation is performed.
         p_t : expr-like, optional
@@ -906,7 +951,7 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None, implicit_dims=None):
         if p_t is not None:
             expr = expr.subs({self.time_dim: p_t})
 
-        return super().inject(field, expr, offset=offset, implicit_dims=implicit_dims)
+        return super().inject(field, expr, implicit_dims=implicit_dims)
 
 
 class PrecomputedSparseFunction(AbstractSparseFunction):
@@ -924,32 +969,35 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     grid : Grid
         The computational domain from which the sparse points are sampled.
     r : int
-        Number of gridpoints in each dimension to interpolate a single sparse
-        point to. E.g. ``r=2`` for linear interpolation.
+        Number of gridpoints in each Dimension to interpolate a single sparse
+        point to. E.g. `r=2` for linear interpolation.
+    coordinates : np.ndarray, optional
+        The coordinates of each sparse point.
     gridpoints : np.ndarray, optional
-        An array carrying the *reference* grid point corresponding to each sparse point.
-        Of all the gridpoints that one sparse point would be interpolated to, this is the
-        grid point closest to the origin, i.e. the one with the lowest value of each
-        coordinate dimension. Must be a two-dimensional array of shape
-        ``(npoint, grid.ndim)``.
+        An array carrying the *reference* grid point corresponding to each
+        sparse point.  Of all the gridpoints that one sparse point would be
+        interpolated to, this is the grid point closest to the origin, i.e. the
+        one with the lowest value of each coordinate Dimension. Must be a
+        two-dimensional array of shape `(npoint, grid.ndim)`.
     interpolation_coeffs : np.ndarray, optional
-        An array containing the coefficient for each of the r^2 (2D) or r^3 (3D)
-        gridpoints that each sparse point will be interpolated to. The coefficient is
-        split across the n dimensions such that the contribution of the point (i, j, k)
-        will be multiplied by ``interpolation_coeffs[..., i]*interpolation_coeffs[...,
-        j]*interpolation_coeffs[...,k]``. So for ``r=6``, we will store 18
-        coefficients per sparse point (instead of potentially 216).
-        Must be a three-dimensional array of shape ``(npoint, grid.ndim, r)``.
+        An array containing the coefficient for each of the r^2 (2D) or r^3
+        (3D) gridpoints that each sparse point will be interpolated to. The
+        coefficient is split across the n Dimensions such that the contribution
+        of the point (i, j, k) will be multiplied by
+        `interp_coeffs[..., i]*interp_coeffs[...,j]*interp_coeffs[...,k]`.
+        So for `r=6`, we will store 18 coefficients per sparse point (instead of
+        potentially 216).  Must be a three-dimensional array of shape
+        `(npoint, grid.ndim, r)`.
     space_order : int, optional
         Discretisation order for space derivatives. Defaults to 0.
     shape : tuple of ints, optional
-        Shape of the object. Defaults to ``(npoint,)``.
+        Shape of the object. Defaults to `(npoint,)`.
     dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
         Any object that can be interpreted as a numpy data type. Defaults
-        to ``np.float32``.
+        to `np.float32`.
     initializer : callable or any object exposing the buffer interface, optional
         Data initializer. If a callable is provided, data is allocated lazily.
     allocator : MemoryAllocator, optional
@@ -960,61 +1008,111 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     Notes
     -----
     The parameters must always be given as keyword arguments, since SymPy
-    uses ``*args`` to (re-)create the dimension arguments of the symbolic object.
+    uses `*args` to (re-)create the Dimension arguments of the symbolic object.
     """
 
-    is_PrecomputedSparseFunction = True
+    _sub_functions = ('gridpoints', 'coordinates', 'interpolation_coeffs')
 
-    _sub_functions = ('gridpoints', 'interpolation_coeffs')
+    __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
+                   ('r', 'gridpoints_data', 'coordinates_data',
+                    'interpolation_coeffs_data'))
 
     def __init_finalize__(self, *args, **kwargs):
-        super(PrecomputedSparseFunction, self).__init_finalize__(*args, **kwargs)
+        super().__init_finalize__(*args, **kwargs)
 
+        # Process kwargs
+        coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
+        gridpoints = kwargs.get('gridpoints', kwargs.get('gridpoints_data'))
+        interpolation_coeffs = kwargs.get('interpolation_coeffs',
+                                          kwargs.get('interpolation_coeffs_data'))
         # Grid points per sparse point (2 in the case of bilinear and trilinear)
         r = kwargs.get('r')
-        gridpoints = kwargs.get('gridpoints')
-        interpolation_coeffs = kwargs.get('interpolation_coeffs')
+        if not is_integer(r):
+            raise TypeError('Need `r` int argument')
+        if r <= 0:
+            raise ValueError('`r` must be > 0')
+        # Make sure radius matches the coefficients size
+        nr = interpolation_coeffs.shape[-1]
+        if nr // 2 != r:
+            if nr == r:
+                r = r // 2
+            else:
+                raise ValueError("Interpolation coefficients shape %d do "
+                                 "not match specified radius %d" % (r, nr))
+        self._radius = r
 
-        self.interpolator = PrecomputedInterpolator(self, r, gridpoints,
-                                                    interpolation_coeffs)
+        if coordinates is not None and gridpoints is not None:
+            raise ValueError("Either `coordinates` or `gridpoints` must be "
+                             "provided, but not both")
 
-    @property
-    def gridpoints(self):
-        return self._gridpoints
+        # Specifying only `npoints` is acceptable; this will require the user
+        # to setup the coordinates data later on
+        npoint = kwargs.get('npoint', None)
+        if self.npoint and coordinates is None and gridpoints is None:
+            coordinates = np.zeros((npoint, self.grid.dim))
+
+        if coordinates is not None:
+            self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
+            self._gridpoints = None
+            self._dist_origin = {self._coordinates: self.grid.origin_offset}
+        else:
+            assert gridpoints is not None
+            self._coordinates = None
+            self._gridpoints = self.__subfunc_setup__(gridpoints, 'gridpoints',
+                                                      dtype=np.int32)
+            self._dist_origin = {self._gridpoints: self.grid.origin_ioffset}
+
+        # Setup the interpolation coefficients. These are compulsory
+        self._interpolation_coeffs = \
+            self.__subfunc_setup__(interpolation_coeffs, 'interp_coeffs')
+        self._dist_origin.update({self._interpolation_coeffs: None})
+
+        self.interpolator = PrecomputedInterpolator(self)
 
     @property
     def interpolation_coeffs(self):
         """ The Precomputed interpolation coefficients."""
         return self._interpolation_coeffs
 
-    def _dist_scatter(self, data=None):
-        data = data if data is not None else self.data
-        distributor = self.grid.distributor
-
-        # If not using MPI, don't waste time
-        if distributor.nprocs == 1:
-            return {self: data, self.gridpoints: self.gridpoints.data,
-                    self._interpolation_coeffs: self._interpolation_coeffs.data}
-
-        raise NotImplementedError
-
-    def _dist_gather(self, data):
-        distributor = self.grid.distributor
-
-        # If not using MPI, don't waste time
-        if distributor.nprocs == 1:
-            return
-
-        raise NotImplementedError
+    @property
+    def interpolation_coeffs_data(self):
+        return self.interpolation_coeffs.data._local.view(np.ndarray)
 
-    def _arg_apply(self, *args, **kwargs):
-        distributor = self.grid.distributor
+    @cached_property
+    def _coordinate_symbols(self):
+        """Symbol representing the coordinate values in each Dimension."""
+        if self.gridpoints is not None:
+            d_dim = self.gridpoints.dimensions[1]
+            return tuple([self.gridpoints._subs(d_dim, di) * d.spacing + o
+                          for ((di, d), o) in zip(enumerate(self.grid.dimensions),
+                                                  self.grid.origin)])
+        else:
+            d_dim = self.coordinates.dimensions[1]
+            return tuple([self.coordinates._subs(d_dim, i)
+                          for i in range(self.grid.dim)])
 
-        # If not using MPI, don't waste time
-        if distributor.nprocs == 1:
-            return
+    @cached_property
+    def _position_map(self):
+        """
+        Symbol for each grid index according to the coordinates.
 
-        raise NotImplementedError
+        Notes
+        -----
+        The expression `(coord - origin)/spacing` could also be computed in the
+        mathematically equivalent expanded form `coord/spacing -
+        origin/spacing`. This particular form is problematic when a sparse
+        point is in close proximity of the grid origin, since due to a larger
+        machine precision error it may cause a +-1 error in the computation of
+        the position. We mitigate this problem by computing the positions
+        individually (hence the need for a position map).
+        """
+        if self.gridpoints is not None:
+            ddim = self.gridpoints.dimensions[-1]
+            return OrderedDict((self.gridpoints._subs(ddim, di), p)
+                               for (di, p) in zip(range(self.grid.dim),
+                                                  self._pos_symbols))
+        else:
+            return super()._position_map
 
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
@@ -1033,34 +1131,37 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
     grid : Grid
         The computational domain from which the sparse points are sampled.
     r : int
-        Number of gridpoints in each dimension to interpolate a single sparse
-        point to. E.g. ``r=2`` for linear interpolation.
+        Number of gridpoints in each Dimension to interpolate a single sparse
+        point to. E.g. `r=2` for linear interpolation.
+    coordinates : np.ndarray, optional
+        The coordinates of each sparse point.
     gridpoints : np.ndarray, optional
-        An array carrying the *reference* grid point corresponding to each sparse point.
-        Of all the gridpoints that one sparse point would be interpolated to, this is the
-        grid point closest to the origin, i.e. the one with the lowest value of each
-        coordinate dimension. Must be a two-dimensional array of shape
-        ``(npoint, grid.ndim)``.
+        An array carrying the *reference* grid point corresponding to each
+        sparse point.  Of all the gridpoints that one sparse point would be
+        interpolated to, this is the grid point closest to the origin, i.e. the
+        one with the lowest value of each coordinate Dimension. Must be a
+        two-dimensional array of shape `(npoint, grid.ndim)`.
     interpolation_coeffs : np.ndarray, optional
-        An array containing the coefficient for each of the r^2 (2D) or r^3 (3D)
-        gridpoints that each sparse point will be interpolated to. The coefficient is
-        split across the n dimensions such that the contribution of the point (i, j, k)
-        will be multiplied by ``interpolation_coeffs[..., i]*interpolation_coeffs[...,
-        j]*interpolation_coeffs[...,k]``. So for ``r=6``, we will store 18 coefficients
-        per sparse point (instead of potentially 216). Must be a three-dimensional array
-        of shape ``(npoint, grid.ndim, r)``.
+        An array containing the coefficient for each of the r^2 (2D) or r^3
+        (3D) gridpoints that each sparse point will be interpolated to. The
+        coefficient is split across the n Dimensions such that the contribution
+        of the point (i, j, k) will be multiplied by
+        `interp_coeffs[..., i]*interp_coeffs[...,j]*interp_coeffs[...,k]`.
+        So for `r=6`, we will store 18 coefficients per sparse point (instead of
+        potentially 216).  Must be a three-dimensional array of shape
+        `(npoint, grid.ndim, r)`.
     space_order : int, optional
         Discretisation order for space derivatives. Defaults to 0.
     time_order : int, optional
         Discretisation order for time derivatives. Default to 1.
     shape : tuple of ints, optional
-        Shape of the object. Defaults to ``(npoint,)``.
+        Shape of the object. Defaults to `(npoint,)`.
     dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
         Any object that can be interpreted as a numpy data type. Defaults
-        to ``np.float32``.
+        to `np.float32`.
     initializer : callable or any object exposing the buffer interface, optional
         Data initializer. If a callable is provided, data is allocated lazily.
     allocator : MemoryAllocator, optional
@@ -1071,40 +1172,11 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
     Notes
     -----
     The parameters must always be given as keyword arguments, since SymPy
-    uses ``*args`` to (re-)create the dimension arguments of the symbolic object.
+    uses ``*args`` to (re-)create the Dimension arguments of the symbolic object.
     """
 
-    is_PrecomputedSparseTimeFunction = True
-
-    def interpolate(self, expr, offset=0, u_t=None, p_t=None, increment=False):
-        """
-        Generate equations interpolating an arbitrary expression into ``self``.
-
-        Parameters
-        ----------
-        expr : expr-like
-            Input expression to interpolate.
-        offset : int, optional
-            Additional offset from the boundary.
-        u_t : expr-like, optional
-            Time index at which the interpolation is performed.
-        p_t : expr-like, optional
-            Time index at which the result of the interpolation is stored.
-        increment: bool, optional
-            If True, generate increments (Inc) rather than assignments (Eq).
-        """
-        subs = {}
-        if u_t is not None:
-            time = self.grid.time_dim
-            t = self.grid.stepping_dim
-            expr = expr.subs({time: u_t, t: u_t})
-
-        if p_t is not None:
-            subs = {self.time_dim: p_t}
-
-        return super(PrecomputedSparseTimeFunction, self).interpolate(
-            expr, offset=offset, increment=increment, self_subs=subs
-        )
+    __rkwargs__ = tuple(filter_ordered(AbstractSparseTimeFunction.__rkwargs__ +
+                                       PrecomputedSparseFunction.__rkwargs__))
 
 
 class MatrixSparseTimeFunction(AbstractSparseTimeFunction):
@@ -1126,21 +1198,21 @@ class MatrixSparseTimeFunction(AbstractSparseTimeFunction):
         data array.
 
     r: int or Mapping[Dimension, Optional[int]]
-        The number of gridpoints in each dimension used to inject/interpolate
+        The number of gridpoints in each Dimension used to inject/interpolate
         each physical point.  e.g. bi-/tri-linear interplation would use 2 coefficients
-        in each dimension.
+        in each Dimension.
 
         The Mapping version of this parameter allows a different number of grid points
-        in each dimension. If a Dimension maps to None, this has a special
-        interpretation - sources are not localised to coordinates in that dimension.
+        in each Dimension. If a Dimension maps to None, this has a special
+        interpretation - sources are not localised to coordinates in that Dimension.
         This is loosely equivalent to specifying r[dim] = dim_size, and with all
-        gridpoint locations along that dimension equal to zero.
+        gridpoint locations along that Dimension equal to zero.
 
     par_dim: Dimension
-        If set, this is the dimension used to split the sources for parallel
+        If set, this is the Dimension used to split the sources for parallel
         injection. The source injection loop becomes a loop over this spatial
-        dimension, and then a loop over sources which touch that spatial
-        dimension coordinate. This defaults to grid.dimensions[0], and if specified
+        Dimension, and then a loop over sources which touch that spatial
+        Dimension coordinate. This defaults to grid.dimensions[0], and if specified
         must correspond to one of the grid.dimensions.
 
     other parameters as per SparseTimeFunction
@@ -1149,11 +1221,11 @@ class MatrixSparseTimeFunction(AbstractSparseTimeFunction):
         msf.gridpoints.data[iloc, idim]: int
             integer, position (in global coordinates)
             of the _minimum_ index that location index
-            `iloc` is interpolated from / injected into, in dimension `idim`
+            `iloc` is interpolated from / injected into, in Dimension `idim`
             where idim is an index into the grid.dimensions
 
         msf.interpolation_coefficients: Dict[Dimension, np.ndarray]
-            For each dimension, there is an array of interpolation coefficients
+            For each Dimension, there is an array of interpolation coefficients
             for each location `iloc`.
 
             This array is of shape (nloc, r), and is also available as
@@ -1186,7 +1258,7 @@ class MatrixSparseTimeFunction(AbstractSparseTimeFunction):
     .. note::
 
         The parameters must always be given as keyword arguments, since
-        SymPy uses `*args` to (re-)create the dimension arguments of the
+        SymPy uses `*args` to (re-)create the Dimension arguments of the
         symbolic function.
     """
 
@@ -1211,9 +1283,7 @@ def __init_finalize__(self, *args, **kwargs):
 
         # Rows are locations, columns are source/receivers
         nloc, npoint = self.matrix.shape
-
-        super().__init_finalize__(
-            *args, **kwargs, npoint=npoint)
+        super().__init_finalize__(*args, **kwargs, npoint=npoint)
 
         # Grid points per sparse point
         r = kwargs.get('r')
@@ -1226,7 +1296,7 @@ def __init_finalize__(self, *args, **kwargs):
             # convert to dictionary with same size in all dims
             r = {dim: r for dim in self.grid.dimensions}
 
-        # Validate radius is set correctly for all grid dimensions
+        # Validate radius is set correctly for all grid Dimensions
         for d in self.grid.dimensions:
             if d not in r:
                 raise ValueError("dimension %s not specified in r mapping" % d)
@@ -1238,14 +1308,14 @@ def __init_finalize__(self, *args, **kwargs):
         # TODO is this going to cause some trouble with users of self.r?
         self._radius = r
 
-        # Get the parallelism dimension for injection
+        # Get the parallelism Dimension for injection
         self._par_dim = kwargs.get("par_dim")
         if self._par_dim is not None:
             assert self._par_dim in self.grid.dimensions
         else:
             self._par_dim = self.grid.dimensions[0]
 
-        # This has one value per dimension (e.g. size=3 for 3D)
+        # This has one value per Dimension (e.g. size=3 for 3D)
         # Maybe this should be unique per SparseFunction,
         # but I can't see a need yet.
         ddim = Dimension('d')
@@ -1262,7 +1332,7 @@ def __init_finalize__(self, *args, **kwargs):
             allocator=self._allocator,
             space_order=0, parent=self)
 
-        # There is a coefficient array per grid dimension
+        # There is a coefficient array per grid Dimension
         # I could pack these into one array but that seems less readable?
         self.interpolation_coefficients = {}
         self.interpolation_coefficients_t_bogus = {}
@@ -1302,7 +1372,7 @@ def __init_finalize__(self, *args, **kwargs):
         self.nnzdim = Dimension('nnz_%s' % self.name)
 
         # In the non-MPI case, at least, we should fill these in once
-        if self.grid.distributor.nprocs == 1:
+        if self._distributor.nprocs == 1:
             m_coo = self.matrix.tocoo(copy=False)
             nnz_size = m_coo.nnz
         else:
@@ -1337,13 +1407,13 @@ def __init_finalize__(self, *args, **kwargs):
         )
 
         # This loop maintains a map of nnz indices which touch each
-        # coordinate of the parallised injection dimension
+        # coordinate of the parallised injection Dimension
         # This takes the form of a list of nnz indices, and a start/end
         # position in that list for each index in the parallel dim
         self.par_dim_to_nnz_dim = DynamicDimension('par_dim_to_nnz_%s' % self.name)
 
         # This map acts as an indirect sort of the sources according to their
-        # position along the parallelisation dimension
+        # position along the parallelisation Dimension
         self._par_dim_to_nnz_map = SubFunction(
             name='par_dim_to_nnz_map_%s' % self.name,
             dtype=np.int32,
@@ -1372,7 +1442,7 @@ def __init_finalize__(self, *args, **kwargs):
             parent=self,
         )
 
-        if self.grid.distributor.nprocs == 1:
+        if self._distributor.nprocs == 1:
             self._mrow.data[:] = m_coo.row
             self._mcol.data[:] = m_coo.col
             self._mval.data[:] = m_coo.data
@@ -1397,6 +1467,8 @@ def free_data(self):
         self.scatter_result = None
         self.scattered_data = None
 
+    __distributor_setup__ = DiscreteFunction.__distributor_setup__
+
     @property
     def dt(self):
         t = self.time_dim
@@ -1440,17 +1512,11 @@ def _sub_functions(self):
                 'mrow', 'mcol', 'mval', 'par_dim_to_nnz_map',
                 'par_dim_to_nnz_m', 'par_dim_to_nnz_M')
 
-    @property
-    def r(self):
-        return self._radius
-
-    def interpolate(self, expr, offset=0, u_t=None, p_t=None):
+    def interpolate(self, expr, u_t=None, p_t=None):
         """Creates a :class:`sympy.Eq` equation for the interpolation
         of an expression onto this sparse point collection.
 
         :param expr: The expression to interpolate.
-        :param offset: Additional offset from the boundary for
-                       absorbing boundary conditions.
         :param u_t: (Optional) time index to use for indexing into
                     field data in `expr`.
         :param p_t: (Optional) time index to use for indexing into
@@ -1481,7 +1547,7 @@ def interpolate(self, expr, offset=0, u_t=None, p_t=None):
             coefficients = self.interpolation_coefficients[d].indexed
 
             # If radius is set to None, then the coefficient array is
-            # actually the full size of the grid dimension itself
+            # actually the full size of the grid Dimension itself
             if self._radius[d] is not None:
                 dim_subs.append((d, rd + gridpoints[row, i]))
             else:
@@ -1496,13 +1562,11 @@ def interpolate(self, expr, offset=0, u_t=None, p_t=None):
 
         return [Eq(self, 0), Inc(lhs, rhs)]
 
-    def inject(self, field, expr, offset=0, u_t=None, p_t=None):
+    def inject(self, field, expr, u_t=None, p_t=None):
         """Symbol for injection of an expression onto a grid
 
         :param field: The grid field into which we inject.
         :param expr: The expression to inject.
-        :param offset: Additional offset from the boundary for
-                       absorbing boundary conditions.
         :param u_t: (Optional) time index to use for indexing into `field`.
         :param p_t: (Optional) time index to use for indexing into `expr`.
         """
@@ -1530,7 +1594,7 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None):
         dim_subs = [(pdim, mcol[nnz_index])]
         coeffs = [mval[nnz_index]]
 
-        # Devito requires a fixed ordering of dimensions across
+        # Devito requires a fixed ordering of Dimensions across
         # all loops, which means we need to respect that when constructing
         # the loops for this injection.
 
@@ -1548,14 +1612,14 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None):
             # There are four cases here.
             if d is self._par_dim:
                 if self._radius[d] is None:
-                    # If d is the parallelism dimension, AND this dimension is
+                    # If d is the parallelism Dimension, AND this Dimension is
                     # non-local (i.e. all sources touch all indices, and
                     # gridpoint for this dim is ignored)
                     coeffs.append(coefficients[row, d])
                 else:
-                    # d is the parallelism dimension, so the index into
+                    # d is the parallelism Dimension, so the index into
                     # the coefficients array is derived from the value of
-                    # this dimension minus the gridpoint of the point
+                    # this Dimension minus the gridpoint of the point
                     coeffs.append(coefficients[row, d - gridpoints[row, i]])
 
                 # loop dim here is always d
@@ -1565,16 +1629,16 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None):
                 par_dim_seen = True
             else:
                 if self._radius[d] is None:
-                    # d is not the parallelism dimension, AND this dimension
+                    # d is not the parallelism Dimension, AND this Dimension
                     # is non-local (i.e. all sources touch all indices,
                     # and gridpoint for this dim is ignored)
 
-                    # the loop is therefore over the original dimension d
+                    # the loop is therefore over the original Dimension d
                     coeffs.append(coefficients[row, d])
                     loop_dim = d
                 else:
-                    # d is not the parallelism dimension, and it _is_
-                    # local. In this case the loop is over the radius dimension
+                    # d is not the parallelism Dimension, and it _is_
+                    # local. In this case the loop is over the radius Dimension
                     # and we need to substitute d with the offset from the
                     # grid point
                     dim_subs.append((d, rd + gridpoints[row, i]))
@@ -1610,7 +1674,7 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None):
     @classmethod
     def __indices_setup__(cls, *args, **kwargs):
         """
-        Return the default dimension indices for a given data shape.
+        Return the default Dimension indices for a given data shape.
         """
         dimensions = kwargs.get('dimensions')
         if dimensions is None:
@@ -1644,7 +1708,7 @@ def gridpoints(self):
 
     def _rank_to_points(self):
         """
-        For each rank in self.grid.distributor, return
+        For each rank in self._distributor, return
         a numpy array of int32s for the positions within
         this rank's self.gridpoints/self.interpolation_coefficients (i.e.
         the locdim) which must be injected into that rank.
@@ -1662,9 +1726,9 @@ def _rank_to_points(self):
         It also requires that the halos be exchanged before
         interpolation (must verify that this occurs).
         """
-        distributor = self.grid.distributor
+        distributor = self._distributor
 
-        # Along each dimension, the coordinate indices are broken into
+        # Along each Dimension, the coordinate indices are broken into
         # 2*decomposition_size+3 groups, numbered starting at 0
 
         # Group 2*i contributes only to rank i-1
@@ -1676,7 +1740,7 @@ def _rank_to_points(self):
         #  (these contributes to rank "decomp_size")
 
         # binned_gridpoints will hold which group the particular
-        # point is along that decomposed dimension.
+        # point is along that decomposed Dimension.
         binned_gridpoints = np.empty_like(self._gridpoints.data)
         dim_group_dim_rank = []
 
@@ -1723,7 +1787,7 @@ def _rank_to_points(self):
             dim_group_dim_rank.append(this_group_rank_map)
 
         # This allows the points to be grouped into non-overlapping sets
-        # based on their bin in each dimension.  For each set we build a list
+        # based on their bin in each Dimension.  For each set we build a list
         # of points.
         bins, inverse, counts = np.unique(
             binned_gridpoints,
@@ -1755,7 +1819,7 @@ def _rank_to_points(self):
 
         from itertools import product
         for bi in bins:
-            # This is a list of sets for the dimension-specific rank
+            # This is a list of sets for the Dimension-specific rank
             dim_rank_sets = [dgdr[bii]
                              for dgdr, bii in zip(dim_group_dim_rank, bi)]
 
@@ -1794,7 +1858,7 @@ def _build_par_dim_to_nnz(self, active_gp, active_mrow):
                 ),
             }
 
-        # Get the radius along the parallel dimension
+        # Get the radius along the parallel Dimension
         r = self._radius[self._par_dim]
 
         # now, the parameters can be devito.Data, which doesn't like fancy indexing
@@ -1831,7 +1895,7 @@ def _build_par_dim_to_nnz(self, active_gp, active_mrow):
         }
 
     def manual_scatter(self, *, data_all_zero=False):
-        distributor = self.grid.distributor
+        distributor = self._distributor
 
         if distributor.nprocs == 1:
             self.scattered_data = self.data
@@ -1920,7 +1984,7 @@ def manual_scatter(self, *, data_all_zero=False):
 
         # now recreate the matrix to only contain points in our
         # local domain.
-        # along each dimension, each point is in one of 5 groups
+        # along each Dimension, each point is in one of 5 groups
         #  0 - completely to the left
         #  1 - to the left, but the injection stencil touches our domain
         #  2 - completely in our domain
@@ -1983,7 +2047,7 @@ def manual_scatter(self, *, data_all_zero=False):
                 scattered_coeffs[idim][mask, -(ir+1)] = 0
 
             # finally, we translate to local coordinates
-            # no need for this in the broadcasted dimensions
+            # no need for this in the broadcasted Dimensions
             if self.r[dim] is not None:
                 scattered_gp[:, idim] -= _left
 
@@ -2009,19 +2073,19 @@ def _dist_scatter(self, data=None):
 
     # The implementation in AbstractSparseFunction now relies on us
     # having a .coordinates property, which we don't have.
-    def _arg_apply(self, dataobj, alias=None):
+    def _arg_apply(self, dataobj, *subfuncs, alias=None):
         key = alias if alias is not None else self
         if isinstance(key, AbstractSparseFunction):
             # Gather into `self.data`
             key._dist_gather(self._C_as_ndarray(dataobj))
-        elif self.grid.distributor.nprocs > 1:
+        elif self._distributor.nprocs > 1:
             raise NotImplementedError("Don't know how to gather data from an "
                                       "object of type `%s`" % type(key))
 
     def manual_gather(self):
         # data, in this case, is set to whatever dist_scatter provided?
         # on rank 0, this is the original data array (hack...)
-        distributor = self.grid.distributor
+        distributor = self._distributor
 
         # If not using MPI, don't waste time
         if distributor.nprocs == 1:
diff --git a/examples/seismic/elastic/operators.py b/examples/seismic/elastic/operators.py
index d6714797e8..dd9d793dfe 100644
--- a/examples/seismic/elastic/operators.py
+++ b/examples/seismic/elastic/operators.py
@@ -17,12 +17,7 @@ def src_rec(v, tau, model, geometry):
                     npoint=geometry.nrec)
 
     # The source injection term
-    src_xx = src.inject(field=tau[0, 0].forward, expr=src * s)
-    src_zz = src.inject(field=tau[-1, -1].forward, expr=src * s)
-    src_expr = src_xx + src_zz
-    if model.grid.dim == 3:
-        src_yy = src.inject(field=tau[1, 1].forward, expr=src * s)
-        src_expr += src_yy
+    src_expr = src.inject(tau.forward.diagonal(), expr=src * s)
 
     # Create interpolation expression for receivers
     rec_term1 = rec1.interpolate(expr=tau[-1, -1])
diff --git a/examples/seismic/tutorials/08_snapshotting.ipynb b/examples/seismic/tutorials/08_snapshotting.ipynb
index f0cd092d70..3784bc87bd 100644
--- a/examples/seismic/tutorials/08_snapshotting.ipynb
+++ b/examples/seismic/tutorials/08_snapshotting.ipynb
@@ -159,9 +159,8 @@
     "# Set symbolics of the operator, source and receivers:\n",
     "pde = model.m * u.dt2 - u.laplace + model.damp * u.dt\n",
     "stencil = Eq(u.forward, solve(pde, u.forward))\n",
-    "src_term = src.inject(field=u.forward, expr=src * dt**2 / model.m,\n",
-    "                      offset=model.nbl)\n",
-    "rec_term = rec.interpolate(expr=u, offset=model.nbl)\n",
+    "src_term = src.inject(field=u.forward, expr=src * dt**2 / model.m)\n",
+    "rec_term = rec.interpolate(expr=u)\n",
     "op = Operator([stencil] + src_term + rec_term, subs=model.spacing_map)\n",
     "\n",
     "# Run the operator for `(nt-2)` time steps:\n",
@@ -350,9 +349,8 @@
     "stencil = Eq(u.forward, solve(pde, u.forward))\n",
     "src_term = src.inject(\n",
     "    field=u.forward,\n",
-    "    expr=src * dt**2 / model.m,\n",
-    "    offset=model.nbl)\n",
-    "rec_term = rec.interpolate(expr=u, offset=model.nbl)\n",
+    "    expr=src * dt**2 / model.m)\n",
+    "rec_term = rec.interpolate(expr=u)\n",
     "\n",
     "#Part 2 #############\n",
     "op1 = Operator([stencil] + src_term + rec_term,\n",
diff --git a/requirements-optional.txt b/requirements-optional.txt
index eaff31931b..b3f80f0d42 100644
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -1,5 +1,5 @@
 matplotlib
 pandas
-pyrevolve
+pyrevolve==2.2.3
 scipy
 distributed
\ No newline at end of file
diff --git a/requirements-testing.txt b/requirements-testing.txt
index 1459047d8b..88b7e88518 100644
--- a/requirements-testing.txt
+++ b/requirements-testing.txt
@@ -4,4 +4,5 @@ pytest-cov
 codecov
 flake8>=2.1.0
 nbval
-scipy
\ No newline at end of file
+scipy
+pooch; python_version >= "3.8"
diff --git a/tests/test_buffering.py b/tests/test_buffering.py
index 815347f21b..16f98b4f94 100644
--- a/tests/test_buffering.py
+++ b/tests/test_buffering.py
@@ -271,8 +271,8 @@ def test_over_injection():
     op1 = Operator(eqns, opt='buffering')
 
     # Check generated code
-    assert len(retrieve_iteration_tree(op1)) ==\
-        6 + bool(configuration['language'] != 'C')
+    assert len(retrieve_iteration_tree(op1)) == \
+        7 + int(configuration['language'] != 'C')
     buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
     assert len(buffers) == 1
 
diff --git a/tests/test_builtins.py b/tests/test_builtins.py
index 17b08213e8..21c36e319b 100644
--- a/tests/test_builtins.py
+++ b/tests/test_builtins.py
@@ -1,7 +1,7 @@
 import pytest
 import numpy as np
 from scipy.ndimage import gaussian_filter
-from scipy import misc
+from scipy.misc import ascent
 
 from conftest import skipif
 from devito import ConditionalDimension, Grid, Function, TimeFunction, switchconfig
@@ -154,7 +154,7 @@ def test_gs_1d_float(self, sigma):
     def test_gs_2d_int(self, sigma):
         """Test the Gaussian smoother in 2d."""
 
-        a = misc.ascent()
+        a = ascent()
         sp_smoothed = gaussian_filter(a, sigma=sigma)
         dv_smoothed = gaussian_smooth(a, sigma=sigma)
 
@@ -168,7 +168,7 @@ def test_gs_2d_int(self, sigma):
     def test_gs_2d_float(self, sigma):
         """Test the Gaussian smoother in 2d."""
 
-        a = misc.ascent()
+        a = ascent()
         a = a+0.1
         sp_smoothed = gaussian_filter(a, sigma=sigma)
         dv_smoothed = gaussian_smooth(a, sigma=sigma)
diff --git a/tests/test_caching.py b/tests/test_caching.py
index 408a49453e..a11c6319a3 100644
--- a/tests/test_caching.py
+++ b/tests/test_caching.py
@@ -631,15 +631,16 @@ def test_sparse_function(self, operate_on_empty_cache):
         # With the legacy caching model also u, u(inds), u_coords, and
         # u_coords(inds) would have been added to the cache; not anymore!
         ncreated = 4
+
         assert len(_SymbolCache) == cur_cache_size + ncreated
 
         cur_cache_size = len(_SymbolCache)
 
         i = u.inject(expr=u, field=u)
 
-        # created: ii_u_0*2 (Symbol and ConditionalDimension), ii_u_1*2, ii_u_2*2,
-        # ii_u_3*2, px, py, posx, posy, u_coords (as indexified),
-        ncreated = 2+2+2+2+2+1+1+1
+        # created: rux, ruy (radius dimensions) and spacings
+        # posx, posy, px, py, u_coords (as indexified),
+        ncreated = 2+1+2+2+2+1
         # Note that injection is now lazy so no new symbols should be created
         assert len(_SymbolCache) == cur_cache_size
         i.evaluate
@@ -654,12 +655,16 @@ def test_sparse_function(self, operate_on_empty_cache):
         del u
         del i
         clear_cache()
-        # At this point, not all children objects have been cleared. In
-        # particular, the ii_u_* Symbols are still alive, as well as p_u and
-        # h_p_u. This is because in the first clear_cache they were still
-        # referenced by their "parent" objects (e.g., ii_u_* by
-        # ConditionalDimensions, through `condition`)
-        assert len(_SymbolCache) == init_cache_size + 6
+        # At this point, not all children objects have been cleared. In particular, the
+        # ru* Symbols are still alive, as well as p_u and h_p_u and pos*. This is because
+        # in the first clear_cache they were still referenced by their "parent" objects
+        # (e.g., ru* by ConditionalDimensions, through `condition`)
+
+        assert len(_SymbolCache) == init_cache_size + 8
+        clear_cache()
+        # Now we should be back to the original state except for
+        # pos* that belong to the abstract class
+        assert len(_SymbolCache) == init_cache_size + 2
         clear_cache()
         # Now we should be back to the original state
         assert len(_SymbolCache) == init_cache_size
diff --git a/tests/test_dimension.py b/tests/test_dimension.py
index 7bf28faa35..32da3b22e3 100644
--- a/tests/test_dimension.py
+++ b/tests/test_dimension.py
@@ -5,15 +5,14 @@
 import pytest
 
 from conftest import assert_blocking, assert_structure, skipif, opts_tiling
-from devito import (ConditionalDimension, Grid, Function, TimeFunction,  # noqa
+from devito import (ConditionalDimension, Grid, Function, TimeFunction, floor,  # noqa
                     SparseFunction, SparseTimeFunction, Eq, Operator, Constant,
                     Dimension, DefaultDimension, SubDimension, switchconfig,
                     SubDomain, Lt, Le, Gt, Ge, Ne, Buffer, sin, SpaceDimension,
                     CustomDimension, dimensions, configuration)
-from devito.arch.compiler import IntelCompiler, OneapiCompiler
 from devito.ir.iet import (Conditional, Expression, Iteration, FindNodes,
                            FindSymbols, retrieve_iteration_tree)
-from devito.symbolics import indexify, retrieve_functions, IntDiv
+from devito.symbolics import indexify, retrieve_functions, IntDiv, INT
 from devito.types import Array, StencilDimension, Symbol
 from devito.types.dimension import AffineIndexAccessFunction
 
@@ -1051,10 +1050,11 @@ def test_no_index_sparse(self):
         # 0 --- 0 --- 0 --- 0
 
         radius = 1
-        indices = [(i, i+radius) for i in sf._coordinate_indices]
+        indices = [(INT(floor(i)), INT(floor(i))+radius)
+                   for i in sf._position_map.keys()]
         bounds = [i.symbolic_size - radius for i in grid.dimensions]
 
-        eqs = []
+        eqs = [Eq(p, v) for (v, p) in sf._position_map.items()]
         for e, i in enumerate(product(*indices)):
             args = [j > 0 for j in i]
             args.extend([j < k for j, k in zip(i, bounds)])
@@ -1416,8 +1416,7 @@ def test_affiness(self):
         iterations = [i for i in FindNodes(Iteration).visit(op) if i.dim is not time]
         assert all(i.is_Affine for i in iterations)
 
-    @switchconfig(condition=isinstance(configuration['compiler'],
-                  (IntelCompiler, OneapiCompiler)), safe_math=True)
+    @switchconfig(safe_math=True)
     def test_sparse_time_function(self):
         nt = 20
 
@@ -1449,9 +1448,11 @@ def test_sparse_time_function(self):
 
         assert np.all(p.data[0] == 0)
         # Note the endpoint of the range is 12 because we inject at p.forward
-        assert all(p.data[i].sum() == i - 1 for i in range(1, 12))
-        assert all(p.data[i, 10, 10, 10] == i - 1 for i in range(1, 12))
-        assert all(np.all(p.data[i] == 0) for i in range(12, 20))
+        for i in range(1, 12):
+            assert p.data[i].sum() == i - 1
+            assert p.data[i, 10, 10, 10] == i - 1
+        for i in range(12, 20):
+            assert np.all(p.data[i] == 0)
 
     @pytest.mark.parametrize('init_value,expected', [
         ([2, 1, 3], [2, 2, 0]),  # updates f1, f2
diff --git a/tests/test_dle.py b/tests/test_dle.py
index cbe19aee7e..86a288ac00 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -190,9 +190,11 @@ def test_cache_blocking_structure_optrelax():
     bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'})
 
     iters = FindNodes(Iteration).visit(bns['p_src0_blk0'])
-    assert len(iters) == 2
+    assert len(iters) == 5
     assert iters[0].dim.is_Block
     assert iters[1].dim.is_Block
+    for i in range(2, 5):
+        assert not iters[i].dim.is_Block
 
 
 def test_cache_blocking_structure_optrelax_customdim():
@@ -284,8 +286,8 @@ def test_cache_blocking_structure_optrelax_prec_inject():
                                           'openmp': True,
                                           'par-collapse-ncores': 1}))
 
-    assert_structure(op, ['t,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rx,ry'],
-                     't,p_s0_blk0,p_s,rx,ry')
+    assert_structure(op, ['t', 't,p_s0_blk0,p_s,rsx,rsy'],
+                     't,p_s0_blk0,p_s,rsx,rsy')
 
 
 class TestBlockingParTile(object):
@@ -718,7 +720,8 @@ def test_scheduling(self):
         op = Operator(eqns, opt=('openmp', {'par-dynamic-work': 0}))
 
         iterations = FindNodes(Iteration).visit(op)
-        assert len(iterations) == 4
+
+        assert len(iterations) == 6
         assert iterations[1].is_Affine
         assert 'schedule(dynamic,1)' in iterations[1].pragmas[0].value
         assert not iterations[3].is_Affine
@@ -742,13 +745,14 @@ def test_array_sum_reduction(self, so, dim):
         iterations = FindNodes(Iteration).visit(op)
         parallelized = iterations[dim+1]
         assert parallelized.pragmas
-        if parallelized is iterations[-1]:
+        if parallelized.dim is iterations[-1]:
             # With the `f[z] += u[t0][x + 1][y + 1][z + 1] + 1` expr, the innermost
             # `z` Iteration gets parallelized, nothing is collapsed, hence no
             # reduction is required
             assert "reduction" not in parallelized.pragmas[0].value
         elif Ompizer._support_array_reduction(configuration['compiler']):
-            assert "reduction(+:f[0:f_vec->size[0]])" in parallelized.pragmas[0].value
+            if "collapse" in parallelized.pragmas[0].value:
+                assert "reduction(+:f[0:f_vec->size[0]])" in parallelized.pragmas[0].value
         else:
             # E.g. old GCC's
             assert "atomic update" in str(iterations[-1])
@@ -809,8 +813,10 @@ def test_incs_no_atomic(self):
         # All loops get collapsed, but the `y` and `z` loops are PARALLEL_IF_ATOMIC,
         # hence an atomic pragma is expected
         op0 = Operator(Inc(uf, 1), opt=('advanced', {'openmp': True,
-                                                     'par-collapse-ncores': 1}))
-        assert 'collapse(3)' in str(op0)
+                                                     'par-collapse-ncores': 1,
+                                                     'par-collapse-work': 0}))
+
+        assert 'collapse(2)' in str(op0)
         assert 'atomic' in str(op0)
 
         # Now only `x` is parallelized
@@ -942,17 +948,17 @@ def test_parallel_prec_inject(self):
         op0 = Operator(eqns, opt=('advanced', {'openmp': True,
                                                'par-collapse-ncores': 1}))
         iterations = FindNodes(Iteration).visit(op0)
-        assert all(not i.pragmas for i in iterations[:2])
-        assert 'omp for collapse(2) schedule(dynamic,chunk_size)'\
-            in iterations[2].pragmas[0].value
 
-        op1 = Operator(eqns, opt=('advanced', {'openmp': True,
+        assert not iterations[0].pragmas
+        assert 'omp for' in iterations[1].pragmas[0].value
+
+        op0 = Operator(eqns, opt=('advanced', {'openmp': True,
                                                'par-collapse-ncores': 1,
                                                'par-collapse-work': 1}))
-        iterations = FindNodes(Iteration).visit(op1)
+        iterations = FindNodes(Iteration).visit(op0)
+
         assert not iterations[0].pragmas
-        assert 'omp for collapse(3) schedule(dynamic,chunk_size)'\
-            in iterations[1].pragmas[0].value
+        assert 'omp for collapse(2)' in iterations[1].pragmas[0].value
 
 
 class TestNestedParallelism(object):
@@ -1006,6 +1012,7 @@ def test_collapsing(self):
 
         # Does it produce the right result
         op.apply(t_M=9)
+
         assert np.all(u.data[0] == 10)
 
         bns, _ = assert_blocking(op, {'x0_blk0'})
diff --git a/tests/test_dse.py b/tests/test_dse.py
index b346e00092..730021c3d8 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -2627,6 +2627,21 @@ def test_issue_2163(self):
                          subdomain=grid.interior))
         assert_structure(op, ['t,i0x,i0y'], 'ti0xi0y')
 
+    def test_dtype_aliases(self):
+        a = np.arange(64).reshape((8, 8))
+        grid = Grid(shape=a.shape, extent=(7, 7))
+
+        so = 2
+        f = Function(name='f', grid=grid, space_order=so, dtype=np.int32)
+        f.data[:] = a
+
+        fo = Function(name='fo', grid=grid, space_order=so, dtype=np.int32)
+        op = Operator(Eq(fo, f.dx))
+        op.apply()
+
+        assert FindNodes(Expression).visit(op)[0].dtype == np.float32
+        assert np.all(fo.data[:-1, :-1] == 8)
+
 
 class TestIsoAcoustic(object):
 
@@ -2670,11 +2685,13 @@ def test_fullopt(self):
         bns, _ = assert_blocking(op1, {'x0_blk0'})  # due to loop blocking
 
         assert summary0[('section0', None)].ops == 50
-        assert summary0[('section1', None)].ops == 148
+        assert summary0[('section1', None)].ops == 44
         assert np.isclose(summary0[('section0', None)].oi, 2.851, atol=0.001)
 
-        assert summary1[('section0', None)].ops == 31
-        assert np.isclose(summary1[('section0', None)].oi, 1.767, atol=0.001)
+        assert summary1[('section0', None)].ops == 9
+        assert summary1[('section1', None)].ops == 31
+        assert summary1[('section2', None)].ops == 88
+        assert np.isclose(summary1[('section1', None)].oi, 1.767, atol=0.001)
 
         assert np.allclose(u0.data, u1.data, atol=10e-5)
         assert np.allclose(rec0.data, rec1.data, atol=10e-5)
@@ -2734,8 +2751,8 @@ def test_fullopt(self):
         assert np.allclose(self.tti_noopt[1].data, rec.data, atol=10e-1)
 
         # Check expected opcount/oi
-        assert summary[('section1', None)].ops == 92
-        assert np.isclose(summary[('section1', None)].oi, 2.074, atol=0.001)
+        assert summary[('section2', None)].ops == 92
+        assert np.isclose(summary[('section2', None)].oi, 2.074, atol=0.001)
 
         # With optimizations enabled, there should be exactly four BlockDimensions
         op = wavesolver.op_fwd()
@@ -2746,12 +2763,14 @@ def test_fullopt(self):
         assert y.parent is y0_blk0
         assert not x._defines & y._defines
 
-        # Also, in this operator, we expect seven temporary Arrays:
-        # * all of the seven Arrays are allocated on the heap
-        # * with OpenMP, five Arrays are defined globally, and two additional
-        #   Arrays are defined locally
+        # Also, in this operator, we expect six temporary Arrays:
+        # * all of the six Arrays are allocated on the heap
+        # * with OpenMP:
+        #   four Arrays are defined globally for the cos/sin temporaries
+        #   3 Arrays are defined globally for the sparse positions temporaries
+        # and two additional bock-sized Arrays are defined locally
         arrays = [i for i in FindSymbols().visit(op) if i.is_Array]
-        extra_arrays = 2
+        extra_arrays = 2+3
         assert len(arrays) == 4 + extra_arrays
         assert all(i._mem_heap and not i._mem_external for i in arrays)
         bns, pbs = assert_blocking(op, {'x0_blk0'})
@@ -2787,7 +2806,7 @@ def test_fullopt_w_mpi(self):
     def test_opcounts(self, space_order, expected):
         op = self.tti_operator(opt='advanced', space_order=space_order)
         sections = list(op.op_fwd()._profiler._sections.values())
-        assert sections[1].sops == expected
+        assert sections[2].sops == expected
 
     @switchconfig(profiling='advanced')
     @pytest.mark.parametrize('space_order,expected', [
@@ -2797,8 +2816,8 @@ def test_opcounts_adjoint(self, space_order, expected):
         wavesolver = self.tti_operator(opt=('advanced', {'openmp': False}))
         op = wavesolver.op_adj()
 
-        assert op._profiler._sections['section1'].sops == expected
-        assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == 7
+        assert op._profiler._sections['section2'].sops == expected
+        assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == 7+3
 
 
 class TestTTIv2(object):
diff --git a/tests/test_gpu_common.py b/tests/test_gpu_common.py
index a93d280fc7..031bd9181b 100644
--- a/tests/test_gpu_common.py
+++ b/tests/test_gpu_common.py
@@ -1403,7 +1403,9 @@ def test_empty_arrays(self):
         f = TimeFunction(name='f', grid=grid, space_order=0)
         f.data[:] = 1.
         sf1 = SparseTimeFunction(name='sf1', grid=grid, npoint=0, nt=10)
-        sf2 = SparseTimeFunction(name='sf2', grid=grid, npoint=0, nt=10)
+        sf2 = SparseTimeFunction(name='sf2', grid=grid, npoint=0, nt=10,
+                                 coordinates=sf1.coordinates,
+                                 dimensions=sf1.dimensions)
         assert sf1.size == 0
         assert sf2.size == 0
 
diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py
index 38b7eb5514..823d11854d 100644
--- a/tests/test_gpu_openacc.py
+++ b/tests/test_gpu_openacc.py
@@ -110,7 +110,7 @@ def test_tile_insteadof_collapse(self, par_tile):
             'acc parallel loop tile(32,4) present(u)'
         # Only the AFFINE Iterations are tiled
         assert trees[3][1].pragmas[0].value ==\
-            'acc parallel loop present(src,src_coords,u)'
+            'acc parallel loop collapse(3) present(src,src_coords,u)'
 
     @pytest.mark.parametrize('par_tile', [((32, 4, 4), (8, 8)), ((32, 4), (8, 8)),
                                           ((32, 4, 4), (8, 8, 8))])
diff --git a/tests/test_gpu_openmp.py b/tests/test_gpu_openmp.py
index 29866508d8..bc2de71708 100644
--- a/tests/test_gpu_openmp.py
+++ b/tests/test_gpu_openmp.py
@@ -265,7 +265,7 @@ def test_timeparallel_reduction(self):
         assert not tree.root.pragmas
         assert len(tree[1].pragmas) == 1
         assert tree[1].pragmas[0].value ==\
-            ('omp target teams distribute parallel for collapse(3)'
+            ('omp target teams distribute parallel for collapse(2)'
              ' reduction(+:f[0])')
 
 
diff --git a/tests/test_gradient.py b/tests/test_gradient.py
index 9c91138c84..5624c5d461 100644
--- a/tests/test_gradient.py
+++ b/tests/test_gradient.py
@@ -15,6 +15,7 @@
 class TestGradient(object):
 
     @skipif(['chkpnt', 'cpu64-icc'])
+    @switchconfig(safe_math=True)
     @pytest.mark.parametrize('dtype', [np.float32, np.float64])
     @pytest.mark.parametrize('opt', [('advanced', {'openmp': True}),
                                      ('noop', {'openmp': True})])
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index d46cec7992..dca94c8f40 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -84,25 +84,31 @@ def custom_points(grid, ranges, npoints, name='points'):
     return points
 
 
-def precompute_linear_interpolation(points, grid, origin):
-    """ Sample precompute function that, given point and grid information
-        precomputes gridpoints and interpolation coefficients according to a linear
-        scheme to be used in PrecomputedSparseFunction.
+def precompute_linear_interpolation(points, grid, origin, r=2):
+    """
+    Sample precompute function that, given point and grid information
+    precomputes gridpoints and interpolation coefficients according to a linear
+    scheme to be used in PrecomputedSparseFunction.
+
+    Allow larger radius with zero weights for testing.
     """
     gridpoints = [tuple(floor((point[i]-origin[i])/grid.spacing[i])
                         for i in range(len(point))) for point in points]
 
-    interpolation_coeffs = np.zeros((len(points), 2, 2))
+    interpolation_coeffs = np.zeros((len(points), grid.dim, r))
+    rs = r // 2 - 1
     for i, point in enumerate(points):
         for d in range(grid.dim):
-            interpolation_coeffs[i, d, 0] = ((gridpoints[i][d] + 1)*grid.spacing[d] -
-                                             point[d])/grid.spacing[d]
-            interpolation_coeffs[i, d, 1] = (point[d]-gridpoints[i][d]*grid.spacing[d])\
+            gd = gridpoints[i][d]
+            interpolation_coeffs[i, d, rs] = ((gd + 1)*grid.spacing[d] -
+                                              point[d])/grid.spacing[d]
+            interpolation_coeffs[i, d, rs+1] = (point[d]-gd*grid.spacing[d])\
                 / grid.spacing[d]
     return gridpoints, interpolation_coeffs
 
 
-def test_precomputed_interpolation():
+@pytest.mark.parametrize('r', [2, 4, 6])
+def test_precomputed_interpolation(r):
     """ Test interpolation with PrecomputedSparseFunction which accepts
         precomputed values for interpolation coefficients
     """
@@ -123,19 +129,22 @@ def init(data):
     m = Function(name='m', grid=grid, initializer=init, space_order=0)
 
     gridpoints, interpolation_coeffs = precompute_linear_interpolation(points,
-                                                                       grid, origin)
+                                                                       grid, origin,
+                                                                       r=r)
 
     sf = PrecomputedSparseFunction(name='s', grid=grid, r=r, npoint=len(points),
                                    gridpoints=gridpoints,
                                    interpolation_coeffs=interpolation_coeffs)
     eqn = sf.interpolate(m)
     op = Operator(eqn)
+
     op()
     expected_values = [sin(point[0]) + sin(point[1]) for point in points]
     assert(all(np.isclose(sf.data, expected_values, rtol=1e-6)))
 
 
-def test_precomputed_interpolation_time():
+@pytest.mark.parametrize('r', [2, 4, 6])
+def test_precomputed_interpolation_time(r):
     """ Test interpolation with PrecomputedSparseFunction which accepts
         precomputed values for interpolation coefficients, but this time
         with a TimeFunction
@@ -153,7 +162,8 @@ def test_precomputed_interpolation_time():
         u.data[it, :] = it
 
     gridpoints, interpolation_coeffs = precompute_linear_interpolation(points,
-                                                                       grid, origin)
+                                                                       grid, origin,
+                                                                       r=r)
 
     sf = PrecomputedSparseTimeFunction(name='s', grid=grid, r=r, npoint=len(points),
                                        nt=5, gridpoints=gridpoints,
@@ -163,13 +173,15 @@ def test_precomputed_interpolation_time():
 
     eqn = sf.interpolate(u)
     op = Operator(eqn)
+
     op(time_m=0, time_M=4)
 
     for it in range(5):
         assert np.allclose(sf.data[it, :], it)
 
 
-def test_precomputed_injection():
+@pytest.mark.parametrize('r', [2, 4, 6])
+def test_precomputed_injection(r):
     """Test injection with PrecomputedSparseFunction which accepts
        precomputed values for interpolation coefficients
     """
@@ -186,7 +198,8 @@ def test_precomputed_injection():
     m.data[:] = 0.
 
     gridpoints, interpolation_coeffs = precompute_linear_interpolation(coords,
-                                                                       m.grid, origin)
+                                                                       m.grid, origin,
+                                                                       r=r)
 
     sf = PrecomputedSparseFunction(name='s', grid=m.grid, r=r, npoint=len(coords),
                                    gridpoints=gridpoints,
@@ -194,8 +207,9 @@ def test_precomputed_injection():
 
     expr = sf.inject(m, Float(1.))
 
-    Operator(expr)()
+    op = Operator(expr)
 
+    op()
     indices = [slice(0, 2, 1), slice(9, 11, 1)]
     assert np.allclose(m.data[indices], result, rtol=1.e-5)
 
@@ -203,6 +217,45 @@ def test_precomputed_injection():
     assert np.allclose(m.data[indices], result, rtol=1.e-5)
 
 
+@pytest.mark.parametrize('r', [2, 4, 6])
+def test_precomputed_injection_time(r):
+    """Test injection with PrecomputedSparseFunction which accepts
+       precomputed values for interpolation coefficients
+    """
+    shape = (11, 11)
+    coords = [(.05, .95), (.45, .45)]
+    origin = (0, 0)
+    result = 0.25
+    nt = 20
+
+    # Constant for linear interpolation
+    # because we interpolate across 2 neighbouring points in each dimension
+    r = 2
+
+    m = unit_box_time(shape=shape)
+    m.data[:] = 0.
+
+    gridpoints, interpolation_coeffs = precompute_linear_interpolation(coords,
+                                                                       m.grid, origin,
+                                                                       r=r)
+
+    sf = PrecomputedSparseTimeFunction(name='s', grid=m.grid, r=r, npoint=len(coords),
+                                       gridpoints=gridpoints, nt=nt,
+                                       interpolation_coeffs=interpolation_coeffs)
+
+    expr = sf.inject(m, Float(1.))
+
+    op = Operator(expr)
+
+    op()
+    for ti in range(2):
+        indices = [slice(0, 2, 1), slice(9, 11, 1)]
+        assert np.allclose(m.data[ti][indices], nt*result/2, rtol=1.e-5)
+
+        indices = [slice(4, 6, 1) for _ in coords]
+        assert np.allclose(m.data[ti][indices], nt*result/2, rtol=1.e-5)
+
+
 @pytest.mark.parametrize('shape, coords', [
     ((11, 11), [(.05, .9), (.01, .8)]),
     ((11, 11, 11), [(.05, .9), (.01, .8), (0.07, 0.84)])
@@ -216,8 +269,9 @@ def test_interpolate(shape, coords, npoints=20):
     xcoords = p.coordinates.data[:, 0]
 
     expr = p.interpolate(a)
-    Operator(expr)(a=a)
+    op = Operator(expr)
 
+    op(a=a)
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
 
 
@@ -235,7 +289,9 @@ def test_interpolate_cumm(shape, coords, npoints=20):
 
     p.data[:] = 1.
     expr = p.interpolate(a, increment=True)
-    Operator(expr)(a=a)
+    op = Operator(expr)
+
+    op(a=a)
 
     assert np.allclose(p.data[:], xcoords + 1., rtol=1e-6)
 
@@ -255,20 +311,26 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
 
     p.data[:] = 1.
     expr = p.interpolate(a, u_t=a.indices[0]+1)
-    Operator(expr)(a=a)
+    op = Operator(expr)
+
+    op(a=a)
 
     assert np.allclose(p.data[0, :], xcoords, rtol=1e-6)
 
     p.data[:] = 1.
     expr = p.interpolate(a, p_t=p.indices[0]+1)
-    Operator(expr)(a=a)
+    op = Operator(expr)
+
+    op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
 
     p.data[:] = 1.
     expr = p.interpolate(a, u_t=a.indices[0]+1,
                          p_t=p.indices[0]+1)
-    Operator(expr)(a=a)
+    op = Operator(expr)
+
+    op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
 
@@ -286,7 +348,9 @@ def test_interpolate_array(shape, coords, npoints=20):
     xcoords = p.coordinates.data[:, 0]
 
     expr = p.interpolate(a)
-    Operator(expr)(a=a, points=p.data[:])
+    op = Operator(expr)
+
+    op(a=a, points=p.data[:])
 
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
 
@@ -305,7 +369,9 @@ def test_interpolate_custom(shape, coords, npoints=20):
 
     p.data[:] = 1.
     expr = p.interpolate(a * p.indices[0])
-    Operator(expr)(a=a)
+    op = Operator(expr)
+
+    op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
     assert np.allclose(p.data[1, :], 1.0 * xcoords, rtol=1e-6)
@@ -349,7 +415,9 @@ def test_interpolate_indexed(shape, coords, npoints=20):
 
     p.data[:] = 1.
     expr = p.interpolate(a[a.grid.dimensions] * p.indices[0])
-    Operator(expr)(a=a)
+    op = Operator(expr)
+
+    op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
     assert np.allclose(p.data[1, :], 1.0 * xcoords, rtol=1e-6)
@@ -370,13 +438,45 @@ def test_inject(shape, coords, result, npoints=19):
 
     expr = p.inject(a, Float(1.))
 
-    Operator(expr)(a=a)
+    op = Operator(expr)
+
+    op(a=a)
 
     indices = [slice(4, 6, 1) for _ in coords]
     indices[0] = slice(1, -1, 1)
     assert np.allclose(a.data[indices], result, rtol=1.e-5)
 
 
+@pytest.mark.parametrize('shape, coords, nexpr, result', [
+    ((11, 11), [(.05, .95), (.45, .45)], 1, 1.),
+    ((11, 11), [(.05, .95), (.45, .45)], 2, 1.),
+    ((11, 11, 11), [(.05, .95), (.45, .45), (.45, .45)], 1, 0.5),
+    ((11, 11, 11), [(.05, .95), (.45, .45), (.45, .45)], 2, 0.5)
+])
+def test_multi_inject(shape, coords, nexpr, result, npoints=19):
+    """Test point injection with a set of points forming a line
+    through the middle of the grid.
+    """
+    a1 = unit_box(name='a1', shape=shape)
+    a2 = unit_box(name='a2', shape=shape, grid=a1.grid)
+    a1.data[:] = 0.
+    a2.data[:] = 0.
+    p = points(a1.grid, ranges=coords, npoints=npoints)
+
+    iexpr = Float(1.) if nexpr == 1 else (Float(1.), Float(2.))
+    expr = p.inject((a1, a2), iexpr)
+
+    op = Operator(expr)
+
+    op(a1=a1, a2=a2)
+
+    indices = [slice(4, 6, 1) for _ in coords]
+    indices[0] = slice(1, -1, 1)
+    result = (result, result) if nexpr == 1 else (result, 2 * result)
+    for r, a in zip(result, (a1, a2)):
+        assert np.allclose(a.data[indices], r, rtol=1.e-5)
+
+
 @pytest.mark.parametrize('shape, coords, result', [
     ((11, 11), [(.05, .95), (.45, .45)], 1.),
     ((11, 11, 11), [(.05, .95), (.45, .45), (.45, .45)], 0.5)
@@ -392,7 +492,9 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
 
     expr = p.inject(a, Float(1.), u_t=a.indices[0]+1)
 
-    Operator(expr)(a=a, time=1)
+    op = Operator(expr)
+
+    op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
     indices[1] = slice(1, -1, 1)
@@ -401,7 +503,9 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     a.data[:] = 0.
     expr = p.inject(a, Float(1.), p_t=p.indices[0]+1)
 
-    Operator(expr)(a=a, time=1)
+    op = Operator(expr)
+
+    op(a=a, time=1)
 
     indices = [slice(0, 0, 1)] + [slice(4, 6, 1) for _ in coords]
     indices[1] = slice(1, -1, 1)
@@ -410,7 +514,9 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     a.data[:] = 0.
     expr = p.inject(a, Float(1.), u_t=a.indices[0]+1, p_t=p.indices[0]+1)
 
-    Operator(expr)(a=a, time=1)
+    op = Operator(expr)
+
+    op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
     indices[1] = slice(1, -1, 1)
@@ -432,7 +538,9 @@ def test_inject_array(shape, coords, result, npoints=19):
     p2.data[:] = 1.
     expr = p.inject(a, p)
 
-    Operator(expr)(a=a, points=p2.data[:])
+    op = Operator(expr)
+
+    op(a=a, points=p2.data[:])
 
     indices = [slice(4, 6, 1) for _ in coords]
     indices[0] = slice(1, -1, 1)
@@ -454,7 +562,9 @@ def test_inject_from_field(shape, coords, result, npoints=19):
     p = points(a.grid, ranges=coords, npoints=npoints)
 
     expr = p.inject(field=a, expr=b)
-    Operator(expr)(a=a, b=b)
+    op = Operator(expr)
+
+    op(a=a, b=b)
 
     indices = [slice(4, 6, 1) for _ in coords]
     indices[0] = slice(1, -1, 1)
@@ -527,6 +637,7 @@ def test_edge_sparse():
     expr = sf1.interpolate(u)
     subs = {d.spacing: v for d, v in zip(u.grid.dimensions, u.grid.spacing)}
     op = Operator(expr, subs=subs)
+
     op()
     assert sf1.data[0] == 0
 
@@ -565,7 +676,6 @@ def test_msf_interpolate():
 
     eqn = sf.interpolate(u)
     op = Operator(eqn)
-    print(op)
 
     sf.manual_scatter()
     op(time_m=0, time_M=4)
@@ -579,7 +689,7 @@ def test_msf_interpolate():
 
     eqn_inject = sf.inject(field=u, expr=sf)
     op2 = Operator(eqn_inject)
-    print(op2)
+
     op2(time_m=0, time_M=4)
 
     # There should be 4 points touched for each source point
diff --git a/tests/test_ir.py b/tests/test_ir.py
index fd961fe9e9..ac2977d15f 100644
--- a/tests/test_ir.py
+++ b/tests/test_ir.py
@@ -954,7 +954,7 @@ def test_iteration_parallelism_3d(self, exprs, atomic, parallel):
 class TestEquationAlgorithms(object):
 
     @pytest.mark.parametrize('expr,expected', [
-        ('Eq(a[time, p], b[time, c[p, 0]+r, c[p, 1]] * f[p, r])', '[time, p, r, d, x, y]')
+        ('Eq(a[time, p], b[time, c[p, 0]+r, c[p, 1]] * f[p, r])', '[time, p, r, d]')
     ])
     def test_dimension_sort(self, expr, expected):
         """
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 8639a435e4..14ddbec249 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -6,7 +6,7 @@
 from devito import (Grid, Constant, Function, TimeFunction, SparseFunction,
                     SparseTimeFunction, Dimension, ConditionalDimension, SubDimension,
                     SubDomain, Eq, Ne, Inc, NODE, Operator, norm, inner, configuration,
-                    switchconfig, generic_derivative)
+                    switchconfig, generic_derivative, PrecomputedSparseFunction)
 from devito.arch.compiler import OneapiCompiler
 from devito.data import LEFT, RIGHT
 from devito.ir.iet import (Call, Conditional, Iteration, FindNodes, FindSymbols,
@@ -556,6 +556,35 @@ def test_sparse_coords_issue1823(self):
 
         assert np.allclose(rec.coordinates.data[:], ref.coordinates.data)
 
+    @pytest.mark.parallel(mode=4)
+    @pytest.mark.parametrize('r', [2])
+    def test_precomputed_sparse(self, r):
+        grid = Grid(shape=(4, 4), extent=(3.0, 3.0))
+
+        coords = np.array([(1.0, 1.0), (2.0, 2.0), (1.0, 2.0), (2.0, 1.0)])
+        points = np.array([(1, 1), (2, 2), (1, 2), (2, 1)])
+        coeffs = np.ones((4, 2, r))
+
+        sf1 = PrecomputedSparseFunction(name="sf1", grid=grid, coordinates=coords,
+                                        npoint=4, interpolation_coeffs=coeffs, r=r)
+        sf2 = PrecomputedSparseFunction(name="sf2", grid=grid, gridpoints=points,
+                                        npoint=4, interpolation_coeffs=coeffs, r=r)
+
+        assert sf1.npoint == 1
+        assert sf2.npoint == 1
+        assert np.all(sf1.coordinates.data.shape == (1, 2))
+        assert np.all(sf2.gridpoints.data.shape == (1, 2))
+        assert np.all(sf1._coords_indices == sf2.gridpoints_data)
+        assert np.all(sf1.interpolation_coeffs.shape == (1, 2, r))
+        assert np.all(sf2.interpolation_coeffs.shape == (1, 2, r))
+
+        u = Function(name="u", grid=grid, space_order=r)
+        u._data_with_outhalo[:] = 1
+        Operator(sf2.interpolate(u))()
+        assert np.all(sf2.data == 4)
+        Operator(sf1.interpolate(u))()
+        assert np.all(sf1.data == 4)
+
 
 class TestOperatorSimple(object):
 
@@ -2470,8 +2499,10 @@ def test_adjoint_codegen(self, shape, kernel, space_order, save):
         op_adj = solver.op_adj()
         adj_calls = FindNodes(Call).visit(op_adj)
 
-        assert len(fwd_calls) == 1
-        assert len(adj_calls) == 1
+        # one halo, ndim memalign and free (pos temp rec)
+        sf_calls = 2 * len(shape)
+        assert len(fwd_calls) == 1 + sf_calls
+        assert len(adj_calls) == 1 + sf_calls
 
     def run_adjoint_F(self, nd):
         """
@@ -2523,10 +2554,11 @@ def test_adjoint_F_no_omp(self):
 
 
 if __name__ == "__main__":
-    configuration['mpi'] = 'overlap'
+    # configuration['mpi'] = 'overlap'
     # TestDecomposition().test_reshape_left_right()
-    TestOperatorSimple().test_trivial_eq_2d()
+    # TestOperatorSimple().test_trivial_eq_2d()
     # TestFunction().test_halo_exchange_bilateral()
-    # TestSparseFunction().test_scatter_gather()
+    TestSparseFunction().test_sparse_coords()
+    # TestSparseFunction().test_precomputed_sparse(2)
     # TestOperatorAdvanced().test_fission_due_to_antidep()
-    # TestIsotropicAcoustic().test_adjoint_F_no_omp()
+    # TestIsotropicAcoustic().test_adjoint_F(1)
diff --git a/tests/test_msparse.py b/tests/test_msparse.py
index b9d65de125..5cbfde848a 100644
--- a/tests/test_msparse.py
+++ b/tests/test_msparse.py
@@ -392,3 +392,7 @@ def test_mpi(self):
 
         if grid.distributor.myrank == 0:
             assert sf.data[0, 0] == -3.0  # 1 * (1 * 1) * 1 + (-1) * (2 * 2) * 1
+
+
+if __name__ == "__main__":
+    TestMatrixSparseTimeFunction().test_mpi()
diff --git a/tests/test_operator.py b/tests/test_operator.py
index 5cdc546d51..f38ac01942 100644
--- a/tests/test_operator.py
+++ b/tests/test_operator.py
@@ -1800,20 +1800,20 @@ def test_scheduling_sparse_functions(self):
         eqn4 = sf2.interpolate(u2)
 
         # Note: opts disabled only because with OpenMP otherwise there might be more
-        # `trees` than 4
+        # `trees` than 6
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 4
+        assert len(trees) == 5
         # Time loop not shared due to the WAR
         assert trees[0][0].dim is time and trees[0][0] is trees[1][0]  # this IS shared
-        assert trees[1][0] is not trees[2][0]
-        assert trees[2][0].dim is time and trees[2][0] is trees[3][0]  # this IS shared
+        assert trees[1][0] is not trees[3][0]
+        assert trees[3][0].dim is time and trees[3][0] is trees[4][0]  # this IS shared
 
         # Now single, shared time loop expected
         eqn2 = sf1.inject(u1.forward, expr=sf1)
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 4
+        assert len(trees) == 5
         assert all(trees[0][0] is i[0] for i in trees)
 
     def test_scheduling_with_free_dims(self):
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index 27e9f80c7a..62423b2c15 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -8,7 +8,8 @@
 from conftest import skipif
 from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid,
                     Dimension, SubDimension, ConditionalDimension, IncrDimension,
-                    TimeDimension, SteppingDimension, Operator, MPI, Min, solve)
+                    TimeDimension, SteppingDimension, Operator, MPI, Min, solve,
+                    PrecomputedSparseTimeFunction)
 from devito.ir import GuardFactor
 from devito.data import LEFT, OWNED
 from devito.mpi.halo_scheme import Halo
@@ -98,6 +99,60 @@ def test_sparse_function(self, pickle):
         assert sf.dtype == new_sf.dtype
         assert sf.npoint == new_sf.npoint
 
+    @pytest.mark.parametrize('mode', ['coordinates', 'gridpoints'])
+    def test_precomputed_sparse_function(self, mode, pickle):
+        grid = Grid(shape=(11, 11))
+
+        coords = [(0., 0.), (.5, .5), (.7, .2)]
+        gridpoints = [(0, 0), (6, 6), (8, 3)]
+        keys = {'coordinates': coords, 'gridpoints': gridpoints}
+        kw = {mode: keys[mode]}
+        othermode = 'coordinates' if mode == 'gridpoints' else 'gridpoints'
+
+        sf = PrecomputedSparseTimeFunction(
+            name='sf', grid=grid, r=2, npoint=3, nt=5,
+            interpolation_coeffs=np.ndarray(shape=(3, 2, 2)), **kw
+        )
+        sf.data[2, 1] = 5.
+
+        pkl_sf = pickle.dumps(sf)
+        new_sf = pickle.loads(pkl_sf)
+
+        # .data is initialized, so it should have been pickled too
+        assert new_sf.data[2, 1] == 5.
+
+        # gridpoints and interpolation coefficients must have been pickled
+        assert np.all(sf.interpolation_coeffs.data == new_sf.interpolation_coeffs.data)
+
+        # coordinates, since they were given, should also have been pickled
+        assert np.all(getattr(sf, mode).data == getattr(new_sf, mode).data)
+        assert getattr(sf, othermode) is None
+        assert getattr(new_sf, othermode) is None
+
+        assert sf._radius == new_sf._radius == 1
+        assert sf.space_order == new_sf.space_order
+        assert sf.time_order == new_sf.time_order
+        assert sf.dtype == new_sf.dtype
+        assert sf.npoint == new_sf.npoint == 3
+
+    def test_alias_sparse_function(self, pickle):
+        grid = Grid(shape=(3,))
+        sf = SparseFunction(name='sf', grid=grid, npoint=3, space_order=2,
+                            coordinates=[(0.,), (1.,), (2.,)])
+        sf.data[0] = 1.
+
+        # Create alias
+        f0 = sf._rebuild(name='f0', alias=True)
+        pkl_f0 = pickle.dumps(f0)
+        new_f0 = pickle.loads(pkl_f0)
+
+        assert f0.data is None and new_f0.data is None
+        assert f0.coordinates.data is None and new_f0.coordinates.data is None
+
+        assert sf.space_order == f0.space_order == new_f0.space_order
+        assert sf.dtype == f0.dtype == new_f0.dtype
+        assert sf.npoint == f0.npoint == new_f0.npoint
+
     def test_internal_symbols(self, pickle):
         s = dSymbol(name='s', dtype=np.float32)
         pkl_s = pickle.dumps(s)
@@ -393,25 +448,6 @@ def test_receiver(self, pickle):
         assert np.all(new_rec.data == 1)
         assert np.all(new_rec.coordinates.data == [[0.], [1.], [2.]])
 
-    def test_alias_sparse_function(self, pickle):
-        grid = Grid(shape=(3,))
-        sf = SparseFunction(name='sf', grid=grid, npoint=3, space_order=2,
-                            coordinates=[(0.,), (1.,), (2.,)])
-        sf.data[0] = 1.
-
-        # Create alias
-        f0 = sf._rebuild(name='f0', alias=True)
-
-        pkl_f0 = pickle.dumps(f0)
-        new_f0 = pickle.loads(pkl_f0)
-
-        assert f0.data is None and new_f0.data is None
-        assert f0.coordinates.data is None and new_f0.coordinates.data is None
-
-        assert sf.space_order == f0.space_order == new_f0.space_order
-        assert sf.dtype == f0.dtype == new_f0.dtype
-        assert sf.npoint == f0.npoint == new_f0.npoint
-
 
 @pytest.mark.parametrize('pickle', [pickle0, pickle1])
 class TestOperator(object):