add WSLQ and use Numba

samayala22 · Jan 10, 2025 · fb8a32d · fb8a32d
1 parent 90b1199
commit fb8a32d
Showing 1 changed file with 116 additions and 96 deletions.
diff --git a/data/gradient.py b/data/gradient.py
@@ -2,14 +2,20 @@
 import matplotlib.pyplot as plt
 from matplotlib.collections import PolyCollection
 from matplotlib.colors import Normalize
+import numba as nb
 
-import scipy as sp
+import time
 
+@nb.njit
+def cross2d(a, b):
+    return a[0] * b[1] - a[1] * b[0]
+
+@nb.njit
 def mesh_metrics(verts):
     ni = verts.shape[0] - 1
     nj = verts.shape[1] - 1
-    centroids = np.zeros((ni, nj, 2))
-    areas = np.zeros((ni, nj))
+    centroids = np.zeros((ni, nj, 2), dtype=np.float64)
+    areas = np.zeros((ni, nj), dtype=np.float64)
 
     for j in range(nj):
         for i in range(ni):
@@ -18,13 +24,16 @@ def mesh_metrics(verts):
             v2 = verts[i+1, j+1]
             v3 = verts[i, j+1]
             centroids[i, j] = 0.25 * (v0 + v1 + v2 + v3)
-            f = v3 - v0
-            b = v2 - v0
-            e = v1 - v0
-            areas[i, j] = 0.5 * (np.linalg.norm(np.cross(f, b)) + np.linalg.norm(np.cross(b, e)))
+            area = 0.0
+            area += cross2d(v0, v1)
+            area += cross2d(v1, v2)
+            area += cross2d(v2, v3)
+            area += cross2d(v3, v0)
+            areas[i, j] = 0.5 * np.abs(area)
 
     return centroids, areas
 
+@nb.njit
 def create_semicylinder_mesh(ni, nj, ri=0.2, ro=1.0):
     """
     Creates a structured semicylinder mesh centered at 0,0
@@ -34,9 +43,12 @@ def create_semicylinder_mesh(ni, nj, ri=0.2, ro=1.0):
     """
 
     theta_vec = np.linspace(np.pi, 0, ni+1) # clockwise
-    r_vec = np.linspace(ri, ro, nj+1)
+
+    # Geometric progression with direct ratio
+    ratio = (ro/ri)**(1.0/nj)  # geometric ratio between consecutive points
+    r_vec = ri * ratio**np.arange(nj+1)
 
-    verts = np.zeros((ni+1, nj+1, 2))
+    verts = np.zeros((ni+1, nj+1, 2), dtype=np.float64)
 
     for j, r in enumerate(r_vec):
         for i, theta in enumerate(theta_vec):
@@ -47,6 +59,7 @@ def create_semicylinder_mesh(ni, nj, ri=0.2, ro=1.0):
 
     return verts
 
+@nb.njit
 def create_rectangle_mesh(ni, nj, w=1.0, h=1.0):
     """
     Creates a structured rectangular mesh centered at 0,0
@@ -61,7 +74,7 @@ def create_rectangle_mesh(ni, nj, w=1.0, h=1.0):
     x_vec = np.linspace(-0.5*w, 0.5*w, ni+1)
     y_vec = np.linspace(-0.5*h, 0.5*h, nj+1)
 
-    verts = np.zeros((ni+1, nj+1, 2))
+    verts = np.zeros((ni+1, nj+1, 2), dtype=np.float64)
 
     for j, y in enumerate(y_vec):
         for i, x in enumerate(x_vec):
@@ -70,35 +83,50 @@ def create_rectangle_mesh(ni, nj, w=1.0, h=1.0):
 
     return verts
 
-def f(x, y): return (3*x**2*y - y**3) / (x**2 + y**2)**1.5 + x**2 + y**2
-def dfdx(x, y):  return 2*x + (3*x*y*(3*y**2-x**2)) / (x**2 + y**2)**2.5
-def dfdy(x, y):  return 2*y + (3*x*x*(x**2-3*y**2)) / (x**2 + y**2)**2.5
+# @nb.njit
+# def f(x, y): return (3*x**2*y - y**3) / (x**2 + y**2)**1.5 + x**2 + y**2
+# @nb.njit
+# def dfdx(x, y):  return 2*x + (3*x*y*(3*y**2-x**2)) / (x**2 + y**2)**2.5
+# @nb.njit
+# def dfdy(x, y):  return 2*y + (3*x*x*(x**2-3*y**2)) / (x**2 + y**2)**2.5
 
+# @nb.njit
 # def f(x, y): return y**2 / (x**2 + y**2)
+# @nb.njit
 # def dfdx(x, y): return - 2*y**2*x / (x**2 + y**2)**2
+# @nb.njit
 # def dfdy(x, y): return 2*y*x**2 / (x**2 + y**2)**2
 
-# def f(x, y): return x**2 + y**2
-# def dfdx(x, y): return 2*x
-# def dfdy(x, y): return 2*y
+@nb.njit
+def f(x, y): return x**2 + y**2
+@nb.njit
+def dfdx(x, y): return 2*x
+@nb.njit
+def dfdy(x, y): return 2*y
 
+# @nb.njit
 # def f(x, y): return x + y
+# @nb.njit
 # def dfdx(x, y): return 1
+# @nb.njit
 # def dfdy(x, y): return 1
-
+@nb.njit
 def f_(pair): return f(pair[0], pair[1])
+@nb.njit
 def dfdx_(pair): return dfdx(pair[0], pair[1])
+@nb.njit
 def dfdy_(pair): return dfdy(pair[0], pair[1])
 
-rot_mat_cc = np.array([[0, -1], [1, 0]]) # counter-clockwise rotation matrix
-rot_mat_c = np.array([[0, 1], [-1, 0]]) # clockwise rotation matrix
+rot_mat_cc = np.array([[0, -1], [1, 0]], dtype=np.float64) # counter-clockwise rotation matrix
+rot_mat_c = np.array([[0, 1], [-1, 0]], dtype=np.float64) # clockwise rotation matrix
 
+@nb.njit(parallel=True)
 def gg(f: callable, centroids, areas, verts):
     ni, nj = centroids.shape[:2]
-    grads = np.zeros((ni, nj, 2))
+    grads = np.zeros((ni, nj, 2), dtype=np.float64)
     # Green-Gauss method
-    for j in range(1, nj-1):
-        for i in range(1, ni-1):
+    for j in nb.prange(1, nj-1):
+        for i in nb.prange(1, ni-1):
             f0 = f_(centroids[i, j])
             grads[i, j] += 0.5 * (f_(centroids[i, j-1]) + f0) * rot_mat_c @ (verts[i+1, j] - verts[i, j])
             grads[i, j] += 0.5 * (f_(centroids[i+1, j]) + f0) * rot_mat_c @ (verts[i+1, j+1] - verts[i+1, j])
@@ -108,12 +136,8 @@ def gg(f: callable, centroids, areas, verts):
 
     return grads
 
-def mgg(f: callable, centroids, areas, verts, tol=1e-8, max_iter=10):
-    ni, nj = centroids.shape[:2]
-    grads = np.zeros((ni, nj, 2))
-    new_grads = np.zeros((ni, nj, 2))
-
-    def mgg_face_contribution(v0, v1, c0, c1, f0, f1, g0, g1):
+@nb.njit
+def mgg_face_contribution(v0, v1, c0, c1, f0, f1, g0, g1):
         x_f = 0.5 * (v1 + v0)
         delta_s = np.linalg.norm(v1 - v0)
         normal = rot_mat_c @ (v1 - v0) / delta_s
@@ -123,18 +147,25 @@ def mgg_face_contribution(v0, v1, c0, c1, f0, f1, g0, g1):
         grad_n = alpha * (f1 - f0) / delta_r + 0.5 * np.dot(g0 + g1, normal - alpha * r_f)
         return grad_n * (x_f - c0) * delta_s
 
+@nb.njit(parallel=True)
+def mgg(f: callable, centroids, areas, verts, tol=1e-8, max_iter=10):
+    ni, nj = centroids.shape[:2]
+    grads = np.zeros((ni, nj, 2), dtype=np.float64)
+    # new_grads = np.zeros((ni, nj, 2))
+    new_grads = gg(f, centroids, areas, verts) # initialize with Green-Gauss
+
     delta = 1
     iteration = 0
     while delta > tol and iteration < max_iter:
         delta = 0
-        for j in range(1, nj-1):
-            for i in range(1, ni-1):
+        for j in nb.prange(1, nj-1):
+            for i in nb.prange(1, ni-1):
                 f0 = f_(centroids[i, j])
                 v0 = verts[i, j]
                 v1 = verts[i+1, j]
                 v2 = verts[i+1, j+1]
                 v3 = verts[i, j+1]
-                grad = np.zeros(2)
+                grad = np.zeros(2, dtype=np.float64)
                 grad += mgg_face_contribution(v0, v1, centroids[i, j], centroids[i, j-1], f0, f_(centroids[i, j-1]), new_grads[i, j], new_grads[i, j-1])
                 grad += mgg_face_contribution(v1, v2, centroids[i, j], centroids[i+1, j], f0, f_(centroids[i+1, j]), new_grads[i, j], new_grads[i+1, j])
                 grad += mgg_face_contribution(v2, v3, centroids[i, j], centroids[i, j+1], f0, f_(centroids[i, j+1]), new_grads[i, j], new_grads[i, j+1])
@@ -151,70 +182,54 @@ def mgg_face_contribution(v0, v1, c0, c1, f0, f1, g0, g1):
 
     return grads
 
-def rbf_gradients(f: callable, centroids, areas, verts, epsilon=1.0):
-    ni, nj = centroids.shape[:2]
-    grads = np.zeros((ni, nj, 2))
-
-    def gaussian_rbf(r, eps=epsilon):
-        return np.exp(-(eps*r)**2)
-
-    def gaussian_rbf_dx(x, y, x0, y0, eps=epsilon):
-        r = np.sqrt((x-x0)**2 + (y-y0)**2)
-        return -2*eps**2*(x-x0)*gaussian_rbf(r, eps)
-
-    def gaussian_rbf_dy(x, y, x0, y0, eps=epsilon):
-        r = np.sqrt((x-x0)**2 + (y-y0)**2)
-        return -2*eps**2*(y-y0)*gaussian_rbf(r, eps)
-
-    # For each interior point
-    for j in range(1, nj-1):
-        for i in range(1, ni-1):
-            # Collect stencil points (including point itself)
-            stencil_i = []
-            stencil_j = []
-            stencil_f = []
-
-            # Add neighbors in a wider stencil
-            for di in range(-2, 3):
-                for dj in range(-2, 3):
-                    ii = i + di
-                    jj = j + dj
-                    if 0 <= ii < ni and 0 <= jj < nj:  # check both bounds
-                        stencil_i.append(centroids[ii, jj, 0])
-                        stencil_j.append(centroids[ii, jj, 1])
-                        stencil_f.append(f_(centroids[ii, jj]))
-
-            stencil_i = np.array(stencil_i)
-            stencil_j = np.array(stencil_j)
-            stencil_f = np.array(stencil_f)
-
-            # Center point
-            x0 = centroids[i, j, 0]
-            y0 = centroids[i, j, 1]
-
-            # Build RBF interpolation matrix
-            n_points = len(stencil_i)
-            A = np.zeros((n_points, n_points))
-            for k in range(n_points):
-                for l in range(n_points):
-                    r = np.sqrt((stencil_i[k]-stencil_i[l])**2 + 
-                              (stencil_j[k]-stencil_j[l])**2)
-                    A[k, l] = gaussian_rbf(r)
-
-            # Solve for RBF coefficients
-            coeffs = np.linalg.solve(A, stencil_f)
-
-            # Evaluate derivatives at center point
-            dfdx = 0
-            dfdy = 0
-            for k in range(n_points):
-                dfdx += coeffs[k] * gaussian_rbf_dx(x0, y0, stencil_i[k], stencil_j[k])
-                dfdy += coeffs[k] * gaussian_rbf_dy(x0, y0, stencil_i[k], stencil_j[k])
-
-            grads[i, j] = [dfdx, dfdy]
+@nb.njit
+def gs_wlsq_R(c0, c: np.ndarray, sigma: np.ndarray):
+    delta = c - c0 # (4,2)
+    r11 = np.sqrt(np.sum((sigma*delta[:, 0])**2))
+    r12 = np.sum(sigma**2*delta[:, 0]*delta[:, 1]) / r11
+    r22 = np.sqrt(np.sum((sigma*delta[:, 1])**2) - r12**2)
+    return r11, r12, r22
 
+@nb.njit
+def gs_wlsq_face_contribution(c0, c1, f0, f1, r11, r12, r22, sigma_j: np.float64):
+    delta = c1 - c0
+    alpha1 = sigma_j * delta[0] / r11**2
+    alpha2 = sigma_j * (delta[1] - r12 * delta[0] / r11) / r22**2
+    w = np.array([
+        alpha1 - r12 * alpha2 / r11,
+        alpha2
+    ])
+    return w * sigma_j * (f1 - f0)
+
+@nb.njit(parallel=True)
+def gs_wlsq(f: callable, centroids, areas, verts):
+    """
+    Gram-Schmidt solution for the weighted least squares method according to Blazek
+    """
+    ni, nj = centroids.shape[:2]
+    grads = np.zeros((ni, nj, 2), dtype=np.float64)
+    for j in nb.prange(1, nj-1):
+        for i in nb.prange(1, ni-1):
+            c0 = centroids[i, j]
+            c = np.zeros((4, 2), dtype=np.float64)
+            c[0] = centroids[i, j-1]
+            c[1] = centroids[i+1, j]
+            c[2] = centroids[i, j+1]
+            c[3] = centroids[i-1, j]
+            delta= c - c0
+            sigma = 1 / np.sqrt(delta[:,0]**2+delta[:,1]**2) # Inverse distance weighting
+            # sigma = np.ones(4, dtype=np.float64) # Unweighted
+            r11, r12, r22 = gs_wlsq_R(centroids[i, j], c, sigma)
+            grad = np.zeros(2, dtype=np.float64)
+            grad += gs_wlsq_face_contribution(centroids[i, j], c[0], f_(c0), f_(c[0]), r11, r12, r22, sigma[0])
+            grad += gs_wlsq_face_contribution(centroids[i, j], c[1], f_(c0), f_(c[1]), r11, r12, r22, sigma[1])
+            grad += gs_wlsq_face_contribution(centroids[i, j], c[2], f_(c0), f_(c[2]), r11, r12, r22, sigma[2])
+            grad += gs_wlsq_face_contribution(centroids[i, j], c[3], f_(c0), f_(c[3]), r11, r12, r22, sigma[3])
+            grads[i, j] = grad
+
     return grads
 
+@nb.njit
 def calculate_l2_error(grads, analytical_grads, areas):
     """Calculate L2 error excluding boundary cells"""
     diff = np.zeros_like(grads)
@@ -263,14 +278,16 @@ def plot_results(verts, vals, grads, analytical_grads, title=""):
     plt.show()
 
 if __name__ == "__main__":
+    start = time.time()
+
     ni_vec = [8, 16, 32, 64, 128, 256, 512]
     nj_vec = [8, 16, 32, 64, 128, 256, 512]
 
     # Define methods to compare
     gradient_methods = [
         ("Green-Gauss", gg),
         ("Modified Green-Gauss", mgg),
-        ("RBF", rbf_gradients)
+        ("GS-WLSQ", gs_wlsq)
     ]
 
     delta_r = [(ni*nj)**(-1/2) for ni, nj in zip(ni_vec, nj_vec)]
@@ -283,7 +300,7 @@ def plot_results(verts, vals, grads, analytical_grads, title=""):
 
         # Calculate analytical solution
         vals = f(centroids[..., 0], centroids[..., 1])
-        analytical_grads = np.zeros((ni, nj, 2))
+        analytical_grads = np.zeros((ni, nj, 2), dtype=np.float64, order='F')
         analytical_grads[..., 0] = dfdx(centroids[..., 0], centroids[..., 1])
         analytical_grads[..., 1] = dfdy(centroids[..., 0], centroids[..., 1])
 
@@ -294,8 +311,11 @@ def plot_results(verts, vals, grads, analytical_grads, title=""):
             errors[method_name].append(l2_err)
 
             # Plot first case only
-            if ni == ni_vec[1]:
-                plot_results(verts, vals, grads, analytical_grads, method_name)
+            # if ni == ni_vec[1]:
+            #     plot_results(verts, vals, grads, analytical_grads, method_name)
+
+    end = time.time()
+    print(f"{end - start:.3f} seconds")
 
     # Convergence plot
     plt.figure(figsize=(8, 6))