From a7509755fffa38a55617b7a977f0308c336e54d1 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Fri, 25 Nov 2022 16:09:42 +0800
Subject: [PATCH 01/18] Update numpy_backend.py

---
 pygmtools/numpy_backend.py | 528 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 527 insertions(+), 1 deletion(-)

diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py
index d5f7d5df..427d1511 100644
--- a/pygmtools/numpy_backend.py
+++ b/pygmtools/numpy_backend.py
@@ -328,6 +328,466 @@ def _check_and_init_gm(K, n1, n2, n1max, n2max, x0):
     return batch_num, n1, n2, n1max, n2max, n1n2, v0
 
 
+############################################
+#      Multi-Graph Matching Solvers        #
+############################################
+def cao_solver(K, X, num_graph, num_node, max_iter, lambda_init, lambda_step, lambda_max, iter_boost):
+
+    m, n = num_graph, num_node
+    param_lambda = lambda_init
+
+    def _comp_aff_score(x, k):
+        return np.expand_dims(np.expand_dims(pygmtools.utils.compute_affinity_score(x, k, backend='numpy'),axis=-1),axis=-1)
+
+    for iter in range(max_iter):
+        if iter >= iter_boost:
+            param_lambda = np.min([param_lambda * lambda_step, lambda_max])
+        # pair_con = get_batch_pc_opt(X)
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+        for i in range(m):
+            for j in range(m):
+                if i >= j:
+                    continue
+                aff_ori = _comp_aff_score(X[i, j], K[i, j]) / norm
+                con_ori = _get_single_pc_opt(X, i, j)
+                # con_ori = torch.sqrt(pair_con[i, j])
+                if iter < iter_boost:
+                    score_ori = aff_ori
+                else:
+                    score_ori = aff_ori * (1 - param_lambda) + con_ori * param_lambda
+                X_upt = X[i, j]
+                for k in range(m):
+                    X_combo = np.matmul(X[i, k], X[k, j])
+                    aff_combo = _comp_aff_score(X_combo, K[i, j]) / norm
+                    con_combo = _get_single_pc_opt(X, i, j, X_combo)
+                    # con_combo = torch.sqrt(pair_con[i, k] * pair_con[k, j])
+                    if iter < iter_boost:
+                        score_combo = aff_combo
+                    else:
+                        score_combo = aff_combo * (1 - param_lambda) + con_combo * param_lambda
+                    if score_combo > score_ori:
+                        X_upt = X_combo
+                X[i, j] = X_upt
+                X[j, i] = X_upt.swapaxes(0,1)
+    return X
+
+def cao_fast_solver(K, X, num_graph, num_node, max_iter, lambda_init, lambda_step, lambda_max, iter_boost):
+    r"""
+    Numpy implementation of CAO solver in fast config (mode="pc")
+
+    :param K: affinity matrix, (m, m, n*n, n*n)
+    :param X: initial matching, (m, m, n, n)
+    :param num_graph: number of graphs, int
+    :param num_node: number of nodes, int
+    :return: X, (m, m, n, n)
+    """
+    m, n = num_graph, num_node
+    param_lambda = lambda_init
+
+    def _comp_aff_score(x, k):
+        return np.expand_dims(np.expand_dims(pygmtools.utils.compute_affinity_score(x, k, backend='numpy'),axis=-1),axis=-1)
+
+    mask1 = np.arange(m).reshape(m, 1).repeat(m,axis=1)
+    mask2 = np.arange(m).reshape(1, m).repeat(m,axis=0)
+    mask = (mask1 < mask2).astype(float)
+    X_mask = mask.reshape(m, m, 1, 1)
+
+    for iter in range(max_iter):
+        if iter >= iter_boost:
+            param_lambda = np.min([param_lambda * lambda_step, lambda_max])
+
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+
+        X1 = X.reshape(m, 1, m, n, n)
+        X1 = np.tile(X1,(1, m, 1, 1, 1)).reshape(-1, n, n)  # X1[i,j,k] = X[i,k]
+        X2 = X.reshape(1, m, m, n, n)
+        X2 = np.tile(X2,(m, 1, 1, 1, 1)).swapaxes(1, 2).reshape(-1, n, n)  # X2[i,j,k] = X[k,j]
+        X_combo = np.matmul(X1, X2).reshape(m, m, m, n, n) # X_combo[i,j,k] = X[i, k] * X[k, j]
+
+        aff_ori = (_comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)) / norm).reshape(m, m)
+        pair_con = _get_batch_pc_opt(X)
+        con_ori = np.sqrt(pair_con)
+
+        K_repeat = np.repeat(K.reshape(m, m, 1, n * n, n * n),m,axis=2).reshape(-1, n * n, n * n)
+        aff_combo = (_comp_aff_score(X_combo.reshape(-1, n, n), K_repeat) / norm).reshape(m, m, m)
+        con1 = pair_con.reshape(m, 1, m)
+        con1 = np.tile(con1,(1, m, 1))  # con1[i,j,k] = pair_con[i,k]
+        con2 = pair_con.reshape(1, m, m)
+        con2 = np.tile(con2,(m, 1, 1)).swapaxes(1,2)  # con2[i,j,k] = pair_con[j,k]
+        con_combo = np.sqrt(con1 * con2)
+
+        if iter < iter_boost:
+            score_ori = aff_ori
+            score_combo = aff_combo
+        else:
+            score_ori = aff_ori * (1 - param_lambda) + con_ori * param_lambda
+            score_combo = aff_combo * (1 - param_lambda) + con_combo * param_lambda
+        
+        idx = np.argmax(score_combo,axis=-1)
+        score_combo = np.max(score_combo, axis=-1)
+        
+        assert np.all(score_combo >= score_ori), np.min(score_combo - score_ori)
+        X_upt = X_combo[mask1, mask2, idx, :, :]
+        X = X_upt * X_mask + X_upt.swapaxes(0,1).swapaxes(2,3) * X_mask.swapaxes(0,1) + X * (1 - X_mask - X_mask.swapaxes(0, 1))
+        assert np.all(X.swapaxes(0,1).swapaxes(2,3) == X)
+    return X
+
+def mgm_floyd_solver(K, X, num_graph, num_node, param_lambda):
+    m, n = num_graph, num_node
+
+    def _comp_aff_score(x, k):
+        return np.expand_dims(np.expand_dims(pygmtools.utils.compute_affinity_score(x, k, backend='numpy'),axis=-1),axis=-1)
+
+    for k in range(m):
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+
+        # print("iter:{} aff:{:.4f} con:{:.4f}".format(
+        #     k, torch.mean(pair_aff).item(), torch.mean(get_batch_pc_opt(X)).item()
+        # ))
+
+        for i in range(m):
+            for j in range(m):
+                if i >= j:
+                    continue
+                score_ori = _comp_aff_score(X[i, j], K[i, j]) / norm
+                X_combo = np.matmul(X[i, k], X[k, j])
+                score_combo = _comp_aff_score(X_combo, K[i, j]) / norm
+
+                if score_combo > score_ori:
+                    X[i, j] = X_combo
+                    X[j, i] = X_combo.swapaxes(0, 1)
+
+    for k in range(m):
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+
+        pair_con = _get_batch_pc_opt(X)
+        for i in range(m):
+            for j in range(m):
+                if i >= j:
+                    continue
+                aff_ori = _comp_aff_score(X[i, j], K[i, j]) / norm
+                con_ori = _get_single_pc_opt(X, i, j)
+                # con_ori = torch.sqrt(pair_con[i, j])
+                score_ori = aff_ori * (1 - param_lambda) + con_ori * param_lambda
+
+                X_combo = np.matmul(X[i, k], X[k, j])
+                aff_combo = _comp_aff_score(X_combo, K[i, j]) / norm
+                con_combo = _get_single_pc_opt(X, i, j, X_combo)
+                # con_combo = torch.sqrt(pair_con[i, k] * pair_con[k, j])
+                score_combo = aff_combo * (1 - param_lambda) + con_combo * param_lambda
+
+                if score_combo > score_ori:
+                    X[i, j] = X_combo
+                    X[j, i] = X_combo.swapaxes(0,1)
+    return X
+
+def mgm_floyd_fast_solver(K, X, num_graph, num_node, param_lambda):
+    m, n = num_graph, num_node
+
+    def _comp_aff_score(x, k):
+        return np.expand_dims(np.expand_dims(pygmtools.utils.compute_affinity_score(x, k, backend='numpy'),axis=-1),axis=-1)
+
+    mask1 = np.arange(m).reshape(m, 1).repeat(m,axis=1)
+    mask2 = np.arange(m).reshape(1, m).repeat(m,axis=0)
+    mask = (mask1 < mask2).astype(float)
+    X_mask = mask.reshape(m, m, 1, 1)
+
+    for k in range(m):
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+
+        # print("iter:{} aff:{:.4f} con:{:.4f}".format(
+        #     k, torch.mean(pair_aff).item(), torch.mean(get_batch_pc_opt(X)).item()
+        # ))
+
+        X1 = X[:, k].reshape(m, 1, n, n)
+        X1 = np.tile(X1,(1, m, 1, 1)).reshape(-1, n, n)  # X[i, j] = X[i, k]
+        X2 = X[k, :].reshape(1, m, n, n)
+        X2 = np.tile(X2,(m, 1, 1, 1)).reshape(-1, n, n)  # X[i, j] = X[j, k]
+        X_combo = np.matmul(X1, X2).reshape(m, m, n, n)
+
+        aff_ori = (_comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)) / norm).reshape(m, m)
+        aff_combo = (_comp_aff_score(X_combo.reshape(-1, n, n), K.reshape(-1, n * n, n * n)) / norm).reshape(m, m)
+
+        score_ori = aff_ori
+        score_combo = aff_combo
+
+        upt = (score_ori < score_combo).astype(float)
+        upt = (upt * mask).reshape(m, m, 1, 1)
+        X = X * (1.0 - upt) + X_combo * upt
+        X = X * X_mask + X.swapaxes(0,1).swapaxes(2, 3) * (1 - X_mask)
+
+    for k in range(m):
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+
+        pair_con = _get_batch_pc_opt(X)
+
+        X1 = X[:, k].reshape(m, 1, n, n)
+        X1 = np.tile(X1,(1, m, 1, 1)).reshape(-1, n, n)  # X[i, j] = X[i, k]
+        X2 = X[k, :].reshape(1, m, n, n)
+        X2 = np.tile(X2,(m, 1, 1, 1)).reshape(-1, n, n)  # X[i, j] = X[j, k]
+        X_combo = np.matmul(X1, X2).reshape(m, m, n, n)
+
+        aff_ori = (_comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)) / norm).reshape(m, m)
+        aff_combo = (_comp_aff_score(X_combo.reshape(-1, n, n), K.reshape(-1, n * n, n * n)) / norm).reshape(m, m)
+
+        con_ori = np.sqrt(pair_con)
+        con1 = pair_con[:, k].reshape(m, 1).repeat(m,axis=1)
+        con2 = pair_con[k, :].reshape(1, m).repeat(m,axis=0)
+        con_combo = np.sqrt(con1 * con2)
+
+        score_ori = aff_ori * (1 - param_lambda) + con_ori * param_lambda
+        score_combo = aff_combo * (1 - param_lambda) + con_combo * param_lambda
+
+        upt = (score_ori < score_combo).astype(float)
+        upt = (upt * mask).reshape(m, m, 1, 1)
+        X = X * (1.0 - upt) + X_combo * upt
+        X = X * X_mask + X.swapaxes(0,1).swapaxes(2, 3) * (1 - X_mask)
+    return X
+
+def _get_single_pc_opt(X, i, j, Xij=None):
+    """
+    CAO/Floyd helper function (compute consistency)
+    :param X: (m, m, n, n) all the matching results
+    :param i: index
+    :param j: index
+    :return: the consistency of X_ij
+    """
+    m, _, n, _ = X.shape
+    if Xij is None:
+        Xij = X[i, j]
+    X1 = X[i, :].reshape(-1, n, n)
+    X2 = X[:, j].reshape(-1, n, n)
+    X_combo = np.matmul(X1, X2)
+    pair_con = 1 - np.sum(np.abs(Xij - X_combo)) / (2 * n * m)
+    return pair_con
+
+def _get_batch_pc_opt(X):
+    """
+    CAO/Floyd-fast helper function (compute consistency in batch)
+    :param X: (m, m, n, n) all the matching results
+    :return: (m, m) the consistency of X
+    """
+    m = X.shape[0]
+    n = X.shape[2]
+    X1 = X.reshape(m, 1, m, n, n)
+    X1 = np.tile(X1,(1, m, 1, 1, 1)).reshape(-1, n, n)  # X1[i, j, k] = X[i, k]
+    X2 = X.reshape(1, m, m, n, n)
+    X2 = np.tile(X2,(m, 1, 1, 1, 1)).swapaxes(1,2).reshape(-1, n, n)  # X2[i, j, k] = X[k, j]
+    X_combo = np.matmul(X1, X2).reshape(m, m, m, n, n)
+    X_ori = X.reshape(m, m, 1, n, n)
+    X_ori = np.tile(X_ori,(1, 1, m, 1, 1))
+    pair_con = 1 - np.sum(np.abs(X_combo - X_ori), axis=(2, 3, 4)) / (2 * n * m)
+    return pair_con
+
+def gamgm(
+        A, W, ns, n_univ, U0,
+        init_tau, min_tau, sk_gamma,
+        sk_iter, max_iter, quad_weight,
+        converge_thresh, outlier_thresh, bb_smooth,
+        verbose,
+        cluster_M=None, projector='sinkhorn', hung_iter=True # these arguments are reserved for clustering
+):
+    """
+    Numpy implementation of Graduated Assignment for Multi-Graph Matching (with compatibility for 2GM and clustering)
+    """
+    num_graphs = A.shape[0]
+    if ns is None:
+        ns = np.full((num_graphs,), A.shape[1], dtype='i4')
+    n_indices = np.cumsum(ns, axis=0)
+
+    # build a super adjacency matrix A
+    supA = np.zeros((n_indices[-1], n_indices[-1]))
+    for i in range(num_graphs):
+        start_n = n_indices[i] - ns[i]
+        end_n = n_indices[i]
+        supA[start_n:end_n, start_n:end_n] = A[i, :ns[i], :ns[i]]
+
+    # handle the type of n_univ
+    if type(n_univ) is np.ndarray:
+        n_univ = n_univ.item()
+
+    # randomly init U
+    if U0 is None:
+        U0 = np.full((n_indices[-1], n_univ), 1 / n_univ)
+        U0 += np.random.rand(n_indices[-1], n_univ) / 1000
+
+    # init cluster_M if not given
+    if cluster_M is None:
+        cluster_M = np.ones((num_graphs, num_graphs))
+
+    # reshape W into supW
+    supW = np.zeros((n_indices[-1], n_indices[-1]))
+    for i, j in itertools.product(range(num_graphs), repeat=2):
+        start_x = n_indices[i] - ns[i]
+        end_x = n_indices[i]
+        start_y = n_indices[j] - ns[j]
+        end_y = n_indices[j]
+        supW[start_x:end_x, start_y:end_y] = W[i, j, :ns[i], :ns[j]]
+
+    U = gamgm_real(
+        supA, supW, ns, n_indices, n_univ, num_graphs, U0,
+        init_tau, min_tau, sk_gamma,
+        sk_iter, max_iter, quad_weight,
+        converge_thresh, outlier_thresh,
+        verbose,
+        cluster_M, projector, hung_iter
+        )
+
+    result = pygmtools.utils.MultiMatchingResult(True, 'numpy')
+
+    for i in range(num_graphs):
+        start_n = n_indices[i] - ns[i]
+        end_n = n_indices[i]
+        result[i] = U[start_n:end_n]
+
+    return result
+
+def gamgm_real(
+        supA, supW, ns, n_indices, n_univ, num_graphs, U0,
+        init_tau, min_tau, sk_gamma,
+        sk_iter, max_iter, quad_weight,
+        converge_thresh, outlier_thresh,
+        verbose,
+        cluster_M, projector, hung_iter # these arguments are reserved for clustering
+        ):
+    """
+    The real forward function of GAMGM
+    """
+    U = U0
+    sinkhorn_tau = init_tau
+    iter_flag = True
+
+    while iter_flag:
+        for i in range(max_iter):
+            # compact matrix form update of V
+            UUt = np.matmul(U, U.T)
+            lastUUt = UUt
+            cluster_weight = np.repeat(cluster_M, ns.astype('i4'), axis=0)
+            cluster_weight = np.repeat(cluster_weight, ns.astype('i4'), axis=1)
+            quad = np.matmul(np.matmul(np.matmul(supA, UUt * cluster_weight), supA), U) * quad_weight * 2
+            unary = np.matmul(supW * cluster_weight, U)
+            if verbose:
+                if projector == 'sinkhorn':
+                    print_str = f'tau={sinkhorn_tau:.3e}'
+                else:
+                    print_str = 'hungarian'
+                print(print_str + f' #iter={i}/{max_iter} '
+                      f'quad score: {(quad * U).sum():.3e}, unary score: {(unary * U).sum():.3e}')
+            V = (quad + unary) / num_graphs
+
+            U_list = []
+            if projector == 'hungarian':
+                n_start = 0
+                for n_end in n_indices:
+                    U_list.append(pygmtools.hungarian(V[n_start:n_end, :n_univ], backend='numpy'))
+                    n_start = n_end
+            elif projector == 'sinkhorn':
+                if np.all(ns == ns[0]):
+                    if ns[0] <= n_univ:
+                        U_list.append(
+                            sinkhorn(
+                                V.reshape(num_graphs, -1, n_univ),
+                                max_iter=sk_iter, tau=sinkhorn_tau, batched_operation=True, dummy_row=True
+                            ).reshape(-1, n_univ))
+                    else:
+                        U_list.append(
+                            sinkhorn(
+                                V.reshape(num_graphs, -1, n_univ).swapaxes(1, 2),
+                                max_iter=sk_iter, tau=sinkhorn_tau, batched_operation=True, dummy_row=True
+                            ).swapaxes(1, 2).reshape(-1, n_univ))
+                else:
+                    V_list = []
+                    n1 = []
+                    n_start = 0
+                    for n_end in n_indices:
+                        V_list.append(V[n_start:n_end, :n_univ])
+                        n1.append(n_end - n_start)
+                        n_start = n_end
+                    V_batch = build_batch(V_list)
+                    n1 = np.ndarray(n1)
+                    U = sinkhorn(V_batch, n1,
+                                 max_iter=sk_iter, tau=sinkhorn_tau, batched_operation=True, dummy_row=True)
+                    n_start = 0
+                    for idx, n_end in enumerate(n_indices):
+                        U_list.append(U[idx, :n_end - n_start, :])
+                        n_start = n_end
+            else:
+                raise NameError('Unknown projecter name: {}'.format(projector))
+
+            U = np.concatenate(U_list, axis=0)
+            if num_graphs == 2:
+                U[:ns[0], :] = np.eye(ns[0], n_univ)
+
+            # calculate gap to discrete
+            if projector == 'sinkhorn' and verbose:
+                U_list_hung = []
+                n_start = 0
+                for n_end in n_indices:
+                    U_list_hung.append(pygmtools.hungarian(V[n_start:n_end, :n_univ], backend='numpy'))
+                    n_start = n_end
+                U_hung = np.concatenate(U_list_hung, axis=0)
+                diff = np.linalg.norm(np.matmul(U, U.t()) - lastUUt)
+                print(f'tau={sinkhorn_tau:.3e} #iter={i}/{max_iter} '
+                      f'gap to discrete: {np.mean(np.abs(U - U_hung)):.3e}, iter diff: {diff:.3e}')
+
+            if projector == 'hungarian' and outlier_thresh > 0:
+                U_hung = U
+                UUt = np.matmul(U_hung, U_hung.t())
+                cluster_weight = np.repeat(cluster_M, ns.astype('i4'), axis=0)
+                cluster_weight = np.repeat(cluster_weight, ns.astype('i4'), axis=1)
+                quad = np.linalg.multi_dot(supA, UUt * cluster_weight, supA, U_hung) * quad_weight * 2
+                unary = np.matmul(supW * cluster_weight, U_hung)
+                max_vals = (unary + quad).max(axis=1).values
+                U = U * (unary + quad > outlier_thresh)
+                if verbose:
+                    print(f'hungarian #iter={i}/{max_iter} '
+                          f'unary+quad score thresh={outlier_thresh:.3f}, #>thresh={np.sum(max_vals > outlier_thresh)}/{max_vals.shape[0]}'
+                          f' min:{max_vals.min():.4f}, mean:{max_vals.mean():.4f}, median:{max_vals.median():.4f}, max:{max_vals.max():.4f}')
+
+            if np.linalg.norm(np.matmul(U, U.T) - lastUUt) < converge_thresh:
+                break
+
+        if verbose: print('-' * 20)
+
+        if i == max_iter - 1: # not converged
+            if hung_iter:
+                pass
+            else:
+                U_list = [pygmtools.hungarian(_, backend='numpy') for _ in U_list]
+                U = np.concatenate(U_list, axis=0)
+                break
+
+        # projection control
+        if projector == 'hungarian':
+            break
+        elif sinkhorn_tau > min_tau:
+            sinkhorn_tau *= sk_gamma
+        else:
+            if hung_iter:
+                projector = 'hungarian'
+            else:
+                U_list = [pygmtools.hungarian(_, backend='numpy') for _ in U_list]
+                U = np.concatenate(U_list, axis=0)
+                break
+
+    return U
+
+############################################
+#          Neural Network Solvers          #
+############################################
+
 #############################################
 #              Utils Functions              #
 #############################################
@@ -337,7 +797,7 @@ def inner_prod_aff_fn(feat1, feat2):
     """
     numpy implementation of inner product affinity function
     """
-    return np.matmul(feat1, feat2.transpose((0, 2, 1)))
+    return np.matmul(feat1, feat2.swapaxes(1,2))
 
 
 def gaussian_aff_fn(feat1, feat2, sigma):
@@ -390,6 +850,15 @@ def dense_to_sparse(dense_adj):
     edge_weight = build_batch([dense_adj[b][(conn[b, :, 0], conn[b, :, 1])] for b in range(batch_size)])
     return conn, np.expand_dims(edge_weight, axis=-1), nedges
 
+def compute_affinity_score(X, K):
+    """
+    Numpy implementation of computing affinity score
+    """
+    b, n, _ = X.shape
+    vx = X.swapaxes(1,2).reshape(b, -1, 1)  # (b, n*n, 1)
+    vxt = vx.swapaxes(1, 2)  # (b, 1, n*n)
+    affinity = np.squeeze(np.squeeze(np.matmul(np.matmul(vxt, K), vx),axis=-1),axis=-1)
+    return affinity
 
 def to_numpy(input):
     """
@@ -404,7 +873,64 @@ def from_numpy(input, device):
     """
     return input
 
+def generate_isomorphic_graphs(node_num, graph_num, node_feat_dim=0):
+    """
+    Numpy implementation of generate_isomorphic_graphs
+    """
+    X_gt = np.zeros((graph_num, node_num, node_num))
+    X_gt[0, np.arange(0, node_num, dtype='i4'), np.arange(0, node_num, dtype='i4')] = 1
+    for i in range(graph_num):
+        if i > 0:
+            X_gt[i, np.arange(0, node_num, dtype='i4'), np.random.permutation(node_num)] = 1
+    joint_X = X_gt.reshape(graph_num * node_num, node_num)
+    X_gt = np.matmul(joint_X, joint_X.T)
+    X_gt = X_gt.reshape(graph_num, node_num, graph_num, node_num).transpose(0, 2, 1, 3)
+    A0 = np.random.rand(node_num, node_num)
+    A0[np.arange(node_num),np.arange(node_num)] = 0
+    As = [A0]
+    for i in range(graph_num):
+        if i > 0:
+            As.append(np.matmul(np.matmul(X_gt[i, 0], A0), X_gt[0, i]))
+    if node_feat_dim > 0:
+        F0 = np.random.rand(node_num, node_feat_dim)
+        Fs = [F0]
+        for i in range(graph_num):
+            if i > 0:
+                Fs.append(np.matmul(X_gt[i, 0], F0))
+        return np.stack(As,axis=0), X_gt, np.stack(Fs,axis=0)
+    else:
+        return np.stack(As,axis=0), X_gt
+"""
+def permutation_loss(pred_dsmat:np.ndarray, gt_perm: np.ndarray, n1: np.ndarray, n2:np.ndarray) -> np.ndarray:
+
+    #Numpy implementation of permutation_loss
+
+    batch_num = pred_dsmat.shape[0]
 
+    pred_dsmat = pred_dsmat.to(dtype='f')
+
+    if not np.all((pred_dsmat >= 0) * (pred_dsmat <= 1)):
+        raise ValueError("pred_dsmat contains invalid numerical entries.")
+    if not np.all((gt_perm >= 0) * (gt_perm <= 1)):
+        raise ValueError("gt_perm contains invalid numerical entries.")
+
+    if n1 is None:
+        n1 = np.array([pred_dsmat.shape[1] for _ in range(batch_num)])
+    if n2 is None:
+        n2 = np.array([pred_dsmat.shape[2] for _ in range(batch_num)])
+
+    loss = np.array(0.)
+    n_sum = np.zeros_like(loss)
+    for b in range(batch_num):
+        batch_slice = [b, slice(n1[b]), slice(n2[b])]
+        loss += array.nn.functional.binary_cross_entropy(
+            pred_dsmat[batch_slice],
+            gt_perm[batch_slice],
+            reduction='sum')
+        n_sum += n1[b].to(n_sum.dtype).to(pred_dsmat.device)
+
+    return loss / n_sum
+"""
 def _aff_mat_from_node_edge_aff(node_aff: np.ndarray, edge_aff: np.ndarray, connectivity1: np.ndarray, connectivity2: np.ndarray,
                                 n1, n2, ne1, ne2):
     """

From 82800a1d05b2e1d28411a7a8d7880def712761c4 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Fri, 25 Nov 2022 16:11:06 +0800
Subject: [PATCH 02/18] Update test_multi_graph_solvers.py

---
 tests/test_multi_graph_solvers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_multi_graph_solvers.py b/tests/test_multi_graph_solvers.py
index bd8cbb5f..65d6b91a 100644
--- a/tests/test_multi_graph_solvers.py
+++ b/tests/test_multi_graph_solvers.py
@@ -145,7 +145,7 @@ def test_cao():
         'qap_solver': [functools.partial(pygm.ipfp, n1max=num_nodes, n2max=num_nodes), None],
         'edge_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.), pygm.utils.inner_prod_aff_fn],
         'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn]
-    }, ['pytorch', 'paddle', 'jittor'])
+    }, ['pytorch', 'numpy', 'paddle', 'jittor'])
     for i in range(max_retries - 1):
         error_flag = False
         try:
@@ -170,7 +170,7 @@ def test_mgm_floyd():
         'qap_solver': [functools.partial(pygm.ipfp, n1max=num_nodes, n2max=num_nodes), None],
         'edge_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.), pygm.utils.inner_prod_aff_fn],
         'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn]
-    }, ['pytorch', 'paddle', 'jittor'])
+    }, ['pytorch', 'numpy', 'paddle', 'jittor'])
     for i in range(max_retries - 1):
         error_flag = False
         try:
@@ -194,7 +194,7 @@ def test_gamgm():
             'sk_min_tau': [0.1, 0.05],
             'param_lambda': [0.1, 0.5],
             'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn]
-        }, ['pytorch', 'paddle', 'jittor']
+        }, ['pytorch', 'numpy', 'paddle', 'jittor']
     )
     for i in range(max_retries - 1):
         error_flag = False

From 6a0d9b45061cd7ee58822785ace76911e60b22b5 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Fri, 25 Nov 2022 16:28:30 +0800
Subject: [PATCH 03/18] Update numpy_backend.py

---
 pygmtools/numpy_backend.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py
index 427d1511..a34d7053 100644
--- a/pygmtools/numpy_backend.py
+++ b/pygmtools/numpy_backend.py
@@ -1,8 +1,10 @@
+import itertools
+import functools
 import scipy.special
 import scipy.optimize
 import numpy as np
 from multiprocessing import Pool
-
+import pygmtools.utils
 
 #############################################
 #     Linear Assignment Problem Solvers     #

From 1afb7ac97b2c952dcefb4a5522618542016181a3 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Mon, 28 Nov 2022 12:52:50 +0800
Subject: [PATCH 04/18] Update numpy_backend.py

---
 pygmtools/numpy_backend.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py
index a34d7053..3ee47e33 100644
--- a/pygmtools/numpy_backend.py
+++ b/pygmtools/numpy_backend.py
@@ -718,7 +718,6 @@ def gamgm_real(
                         n1.append(n_end - n_start)
                         n_start = n_end
                     V_batch = build_batch(V_list)
-                    n1 = np.ndarray(n1)
                     U = sinkhorn(V_batch, n1,
                                  max_iter=sk_iter, tau=sinkhorn_tau, batched_operation=True, dummy_row=True)
                     n_start = 0

From 55aec4052a4167c74b1ccbc132d268ef28ff5d09 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Mon, 28 Nov 2022 12:55:18 +0800
Subject: [PATCH 05/18] Update multi_graph_solvers.py

---
 pygmtools/multi_graph_solvers.py | 144 +++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)

diff --git a/pygmtools/multi_graph_solvers.py b/pygmtools/multi_graph_solvers.py
index f8979683..63935b2c 100644
--- a/pygmtools/multi_graph_solvers.py
+++ b/pygmtools/multi_graph_solvers.py
@@ -56,6 +56,53 @@ def cao(K, x0=None, qap_solver=None,
 
        Multi-graph matching methods process all graphs at once and do not support the additional batch dimension. Please
        note that this behavior is different from two-graph matching solvers in :mod:`~pygmtools.classic_solvers`.
+    
+    .. dropdown:: Numpy Example
+
+        ::
+
+            >>> import numpy as np
+            >>> import pygmtools as pygm
+            >>> pygm.BACKEND = 'numpy'
+            >>> np.random.seed(1)
+
+            # Generate 10 isomorphic graphs
+            >>> graph_num = 10
+            >>> As, X_gt = pygm.utils.generate_isomorphic_graphs(node_num=4, graph_num=10)
+            >>> As_1, As_2 = [], []
+            >>> for i in range(graph_num):
+            ...     for j in range(graph_num):
+            ...         As_1.append(As[i])
+            ...         As_2.append(As[j])
+            >>> As_1 = np.stack(As_1, axis=0)
+            >>> As_2 = np.stack(As_2, axis=0)
+
+            # Build affinity matrix
+            >>> conn1, edge1, ne1 = pygm.utils.dense_to_sparse(As_1)
+            >>> conn2, edge2, ne2 = pygm.utils.dense_to_sparse(As_2)
+            >>> import functools
+            >>> gaussian_aff = functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.) # set affinity function
+            >>> K = pygm.utils.build_aff_mat(None, edge1, conn1, None, edge2, conn2, None, None, None, None, edge_aff_fn=gaussian_aff)
+            >>> K = K.reshape(graph_num, graph_num, 4*4, 4*4)
+            >>> K.shape
+            (10, 10, 16, 16)
+
+            # Solve the multi-matching problem
+            >>> X = pygm.cao(K)
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
+            # Use the IPFP solver for two-graph matching
+            >>> ipfp_func = functools.partial(pygmtools.ipfp, n1max=4, n2max=4)
+            >>> X = pygm.cao(K, qap_solver=ipfp_func)
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
+            # Run the faster version of CAO algorithm
+            >>> X = pygm.cao(K, mode='fast')
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
 
     .. dropdown:: Pytorch Example
 
@@ -290,6 +337,53 @@ def mgm_floyd(K, x0=None, qap_solver=None,
     :param backend: (default: ``pygmtools.BACKEND`` variable) the backend for computation.
     :return: :math:`(m\times m \times n \times n)` the multi-graph matching result
 
+    .. dropdown:: Numpy Example
+
+        ::
+
+            >>> import numpy as np
+            >>> import pygmtools as pygm
+            >>> pygm.BACKEND = 'numpy'
+            >>> np.random.seed(1)
+
+            # Generate 10 isomorphic graphs
+            >>> graph_num = 10
+            >>> As, X_gt = pygm.utils.generate_isomorphic_graphs(node_num=4, graph_num=10)
+            >>> As_1, As_2 = [], []
+            >>> for i in range(graph_num):
+            ...     for j in range(graph_num):
+            ...         As_1.append(As[i])
+            ...         As_2.append(As[j])
+            >>> As_1 = np.stack(As_1, axis=0)
+            >>> As_2 = np.stack(As_2, axis=0)
+
+            # Build affinity matrix
+            >>> conn1, edge1, ne1 = pygm.utils.dense_to_sparse(As_1)
+            >>> conn2, edge2, ne2 = pygm.utils.dense_to_sparse(As_2)
+            >>> import functools
+            >>> gaussian_aff = functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.) # set affinity function
+            >>> K = pygm.utils.build_aff_mat(None, edge1, conn1, None, edge2, conn2, None, None, None, None, edge_aff_fn=gaussian_aff)
+            >>> K = K.reshape(graph_num, graph_num, 4*4, 4*4)
+            >>> K.shape
+            (10, 10, 16, 16)
+
+            # Solve the multi-matching problem
+            >>> X = pygm.mgm_floyd(K)
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
+            # Use the IPFP solver for two-graph matching
+            >>> ipfp_func = functools.partial(pygm.ipfp, n1max=4, n2max=4)
+            >>> X = pygm.mgm_floyd(K, qap_solver=ipfp_func)
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
+            # Run the faster version of CAO algorithm
+            >>> X = pygm.mgm_floyd(K, mode='fast')
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
+
     .. dropdown:: Pytorch Example
 
         ::
@@ -558,6 +652,56 @@ def gamgm(A, W,
 
         Setting ``verbose=True`` may help you tune the parameters.
 
+    .. dropdown:: Numpy Example
+
+        ::
+            >>> import numpy as np
+            >>> import pygmtools as pygm
+            >>> import itertools
+            >>> import time
+            >>> pygm.BACKEND = 'numpy'
+            >>> np.random.seed(1)
+
+            # Generate 10 isomorphic graphs
+            >>> graph_num = 10
+            >>> As, X_gt, Fs = pygm.utils.generate_isomorphic_graphs(node_num=4, graph_num=10, node_feat_dim=20)
+
+            # Compute node-wise similarity by inner-product and Sinkhorn
+            >>> W = np.matmul(np.expand_dims(Fs,axis=1), np.expand_dims(Fs.swapaxes(1, 2),axis=0))
+            >>> W = pygm.sinkhorn(W.reshape(graph_num ** 2, 4, 4)).reshape(graph_num, graph_num, 4, 4)
+
+            # Solve the multi-matching problem
+            >>> X = pygm.gamgm(As, W)
+            >>> matched = 0
+            for i, j in itertools.product(range(graph_num), repeat=2):
+            ...    matched += (X[i,j] * X_gt[i,j]).sum()
+            >>> acc = matched / X_gt.sum()
+            >>> acc
+            1.0
+
+            # This function supports graphs with different nodes (also known as partial matching)
+            # In the following we ignore the last node from the last 5 graphs
+            >>> ns = np.array([4, 4, 4, 4, 4, 3, 3, 3, 3, 3], dtype='i4')
+            >>> for i in range(graph_num):
+            ...    As[i, ns[i]:, :] = 0
+            ...    As[i, :, ns[i]:] = 0
+            >>> for i, j in itertools.product(range(graph_num), repeat=2):
+            ...    X_gt[i, j, ns[i]:, :] = 0
+            ...    X_gt[i, j, :, ns[j]:] = 0
+            ...    W[i, j, ns[i]:, :] = 0
+            ...    W[i, j, :, ns[j]:] = 0
+
+            # Partial matching is challenging and the following parameters are carefully tuned
+            >>> X = pygm.gamgm(As, W, ns, n_univ=4, sk_init_tau=.1, sk_min_tau=0.01, param_lambda=0.3)
+
+            # Check the partial matching result
+            >>> matched = 0
+            >>> for i, j in itertools.product(range(graph_num), repeat=2):
+            ...    matched += (X[i,j] * X_gt[i, j, :ns[i], :ns[j]]).sum()
+            >>> matched / X_gt.sum()
+            1.0
+
+
     .. dropdown:: Pytorch Example
 
         ::

From 21248f19e868e1fd7d2e45126eac3492f309cfa7 Mon Sep 17 00:00:00 2001
From: Runzhong Wang <18309862+rogerwwww@users.noreply.github.com>
Date: Mon, 28 Nov 2022 13:06:26 +0800
Subject: [PATCH 06/18] fix double blank lines between functions

---
 pygmtools/numpy_backend.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py
index 3ee47e33..e48d0a19 100644
--- a/pygmtools/numpy_backend.py
+++ b/pygmtools/numpy_backend.py
@@ -375,6 +375,7 @@ def _comp_aff_score(x, k):
                 X[j, i] = X_upt.swapaxes(0,1)
     return X
 
+
 def cao_fast_solver(K, X, num_graph, num_node, max_iter, lambda_init, lambda_step, lambda_max, iter_boost):
     r"""
     Numpy implementation of CAO solver in fast config (mode="pc")
@@ -438,6 +439,7 @@ def _comp_aff_score(x, k):
         assert np.all(X.swapaxes(0,1).swapaxes(2,3) == X)
     return X
 
+
 def mgm_floyd_solver(K, X, num_graph, num_node, param_lambda):
     m, n = num_graph, num_node
 
@@ -491,6 +493,7 @@ def _comp_aff_score(x, k):
                     X[j, i] = X_combo.swapaxes(0,1)
     return X
 
+
 def mgm_floyd_fast_solver(K, X, num_graph, num_node, param_lambda):
     m, n = num_graph, num_node
 
@@ -558,6 +561,7 @@ def _comp_aff_score(x, k):
         X = X * X_mask + X.swapaxes(0,1).swapaxes(2, 3) * (1 - X_mask)
     return X
 
+
 def _get_single_pc_opt(X, i, j, Xij=None):
     """
     CAO/Floyd helper function (compute consistency)
@@ -575,6 +579,7 @@ def _get_single_pc_opt(X, i, j, Xij=None):
     pair_con = 1 - np.sum(np.abs(Xij - X_combo)) / (2 * n * m)
     return pair_con
 
+
 def _get_batch_pc_opt(X):
     """
     CAO/Floyd-fast helper function (compute consistency in batch)
@@ -656,6 +661,7 @@ def gamgm(
 
     return result
 
+
 def gamgm_real(
         supA, supW, ns, n_indices, n_univ, num_graphs, U0,
         init_tau, min_tau, sk_gamma,
@@ -785,10 +791,12 @@ def gamgm_real(
 
     return U
 
+
 ############################################
 #          Neural Network Solvers          #
 ############################################
 
+
 #############################################
 #              Utils Functions              #
 #############################################
@@ -851,6 +859,7 @@ def dense_to_sparse(dense_adj):
     edge_weight = build_batch([dense_adj[b][(conn[b, :, 0], conn[b, :, 1])] for b in range(batch_size)])
     return conn, np.expand_dims(edge_weight, axis=-1), nedges
 
+
 def compute_affinity_score(X, K):
     """
     Numpy implementation of computing affinity score
@@ -861,6 +870,7 @@ def compute_affinity_score(X, K):
     affinity = np.squeeze(np.squeeze(np.matmul(np.matmul(vxt, K), vx),axis=-1),axis=-1)
     return affinity
 
+
 def to_numpy(input):
     """
     identity function
@@ -874,6 +884,7 @@ def from_numpy(input, device):
     """
     return input
 
+
 def generate_isomorphic_graphs(node_num, graph_num, node_feat_dim=0):
     """
     Numpy implementation of generate_isomorphic_graphs
@@ -901,6 +912,8 @@ def generate_isomorphic_graphs(node_num, graph_num, node_feat_dim=0):
         return np.stack(As,axis=0), X_gt, np.stack(Fs,axis=0)
     else:
         return np.stack(As,axis=0), X_gt
+
+
 """
 def permutation_loss(pred_dsmat:np.ndarray, gt_perm: np.ndarray, n1: np.ndarray, n2:np.ndarray) -> np.ndarray:
 
@@ -932,6 +945,8 @@ def permutation_loss(pred_dsmat:np.ndarray, gt_perm: np.ndarray, n1: np.ndarray,
 
     return loss / n_sum
 """
+
+
 def _aff_mat_from_node_edge_aff(node_aff: np.ndarray, edge_aff: np.ndarray, connectivity1: np.ndarray, connectivity2: np.ndarray,
                                 n1, n2, ne1, ne2):
     """
@@ -1020,6 +1035,7 @@ def _transpose(input: np.ndarray, dim1, dim2):
     """
     return np.swapaxes(input, dim1, dim2)
 
+
 def _mm(input1: np.ndarray, input2: np.ndarray):
     """
     numpy implementation of _mm

From edcf6e4868fe5617e9db9e6356d034e0016d3846 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Wed, 7 Dec 2022 16:09:45 +0800
Subject: [PATCH 07/18] Update multi_graph_solvers.py

---
 pygmtools/multi_graph_solvers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pygmtools/multi_graph_solvers.py b/pygmtools/multi_graph_solvers.py
index 63935b2c..3bde768e 100644
--- a/pygmtools/multi_graph_solvers.py
+++ b/pygmtools/multi_graph_solvers.py
@@ -655,6 +655,7 @@ def gamgm(A, W,
     .. dropdown:: Numpy Example
 
         ::
+        
             >>> import numpy as np
             >>> import pygmtools as pygm
             >>> import itertools

From 7039df3d628fe4825dc0a0eb8c0bdd2347f8a6c8 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Thu, 8 Dec 2022 17:32:27 +0800
Subject: [PATCH 08/18] Add numpy's doc in neural_solvers.py

---
 pygmtools/neural_solvers.py | 197 +++++++++++++++++++++++++++++++++++-
 1 file changed, 194 insertions(+), 3 deletions(-)

diff --git a/pygmtools/neural_solvers.py b/pygmtools/neural_solvers.py
index f3b28196..e7c16271 100644
--- a/pygmtools/neural_solvers.py
+++ b/pygmtools/neural_solvers.py
@@ -8,7 +8,6 @@
 from pygmtools.utils import NOT_IMPLEMENTED_MSG, _check_shape, _get_shape, _unsqueeze, _squeeze, _check_data_type
 from pygmtools.classic_solvers import __check_gm_arguments
 
-
 def pca_gm(feat1, feat2, A1, A2, n1=None, n2=None,
            in_channel=1024, hidden_channel=2048, out_channel=2048, num_layers=2, sk_max_iter=20, sk_tau=0.05,
            network=None, return_network=False, pretrain='voc',
@@ -19,8 +18,8 @@ def pca_gm(feat1, feat2, A1, A2, n1=None, n2=None,
     The graph matching module is composed of several intra-graph embedding layers, a cross-graph embedding layer, and
     a Sinkhorn matching layer. Only the second last layer has a cross-graph update layer.
 
-    See the following pipeline for an example, with application to visual graph matching (layers in the gray box are
-    implemented by pygmtools):
+    See the following pipeline for an example, with application to visual graph matching (layers in the gray box
+    + Affinity Metric + Sinkhorn are implemented by pygmtools):
 
     .. image:: ../../images/pca_gm.png
 
@@ -67,6 +66,53 @@ def pca_gm(feat1, feat2, A1, A2, n1=None, n2=None,
     .. note::
         This function also supports non-batched input, by ignoring all batch dimensions in the input tensors.
 
+    .. dropdown:: Numpy Example
+
+        ::
+
+            >>> import numpy as np
+            >>> import pygmtools as pygm
+            >>> pygm.BACKEND = 'numpy'
+            >>> np.random.seed(1)
+
+            # Generate a batch of isomorphic graphs
+            >>> batch_size = 10
+            >>> X_gt = np.zeros((batch_size, 4, 4))
+            >>> X_gt[:, np.arange(0, 4, dtype='i4'), np.random.permutation(4)] = 1
+            >>> A1 = 1. * (np.random.rand(batch_size, 4, 4) > 0.5)
+            >>> for i in np.arange(4): # discard self-loop edges
+            ...    for j in np.arange(batch_size):
+            ...        A1[j][i][i] = 0
+            >>> A2 = np.matmul(np.matmul(X_gt.swapaxes(1, 2), A1), X_gt)
+            >>> feat1 = np.random.rand(batch_size, 4, 1024) - 0.5
+            >>> feat2 = np.matmul(X_gt.swapaxes(1, 2), feat1)
+            >>> n1 = n2 = np.array([4] * batch_size)
+
+            # Match by PCA-GM (load pretrained model)
+            >>> X, net = pygm.pca_gm(feat1, feat2, A1, A2, n1, n2, return_network=True)
+            Downloading to ~/.cache/pygmtools/pca_gm_voc_numpy.npy...
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # Pass the net object to avoid rebuilding the model agian
+            >>> X = pygm.pca_gm(feat1, feat2, A1, A2, n1, n2, network=net)
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # You may also load other pretrained weights
+            >>> X, net = pygm.pca_gm(feat1, feat2, A1, A2, n1, n2, return_network=True, pretrain='willow')
+            Downloading to ~/.cache/pygmtools/pca_gm_willow_numpy.npy...
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # You may configure your own model and integrate the model into a deep learning pipeline. For example:
+            >>> net = pygm.utils.get_network(pygm.pca_gm, in_channel=1024, hidden_channel=2048, out_channel=512, num_layers=3, pretrain=False)
+            # feat1/feat2 may be outputs by other neural networks
+            >>> X = pygm.pca_gm(feat1, feat2, A1, A2, n1, n2, network=net)
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+
     .. dropdown:: PyTorch Example
 
         ::
@@ -274,6 +320,53 @@ def ipca_gm(feat1, feat2, A1, A2, n1=None, n2=None,
     .. note::
         This function also supports non-batched input, by ignoring all batch dimensions in the input tensors.
 
+        .. dropdown:: Numpy Example
+
+        ::
+
+            >>> import numpy as np
+            >>> import pygmtools as pygm
+            >>> pygm.BACKEND = 'numpy'
+            >>> np.random.seed(1)
+
+            # Generate a batch of isomorphic graphs
+            >>> batch_size = 10
+            >>> X_gt = np.zeros((batch_size, 4, 4))
+            >>> X_gt[:, np.arange(0, 4, dtype='i4'), np.random.permutation(4)] = 1
+            >>> A1 = 1. * (np.random.rand(batch_size, 4, 4) > 0.5)
+            >>> for i in np.arange(4): # discard self-loop edges
+            ...    for j in np.arange(batch_size):
+            ...        A1[j][i][i] = 0
+            >>> A2 = np.matmul(np.matmul(X_gt.swapaxes(1, 2), A1), X_gt)
+            >>> feat1 = np.random.rand(batch_size, 4, 1024) - 0.5
+            >>> feat2 = np.matmul(X_gt.swapaxes(1, 2), feat1)
+            >>> n1 = n2 = np.array([4] * batch_size)
+
+            # Match by IPCA-GM (load pretrained model)
+            >>> X, net = pygm.ipca_gm(feat1, feat2, A1, A2, n1, n2, return_network=True)
+            Downloading to ~/.cache/pygmtools/ipca_gm_voc_numpy.npy...
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # Pass the net object to avoid rebuilding the model agian
+            >>> X = pygm.ipca_gm(feat1, feat2, A1, A2, n1, n2, network=net)
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # You may also load other pretrained weights
+            >>> X, net = pygm.ipca_gm(feat1, feat2, A1, A2, n1, n2, return_network=True, pretrain='willow')
+            Downloading to ~/.cache/pygmtools/ipca_gm_willow_numpy.npy...
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # You may configure your own model and integrate the model into a deep learning pipeline. For example:
+            >>> net = pygm.utils.get_network(pygm.ipca_gm, in_channel=1024, hidden_channel=2048, out_channel=512, num_layers=3, cross_iter=10, pretrain=False)
+            # feat1/feat2 may be outputs by other neural networks
+            >>> X = pygm.ipca_gm(feat1, feat2, A1, A2, n1, n2, network=net)
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+
     .. dropdown:: PyTorch Example
 
         ::
@@ -489,6 +582,55 @@ def cie(feat_node1, feat_node2, A1, A2, feat_edge1, feat_edge2, n1=None, n2=None
     .. note::
         This function also supports non-batched input, by ignoring all batch dimensions in the input tensors.
 
+    .. dropdown:: Numpy Example
+
+        ::
+
+            >>> import numpy as np
+            >>> import pygmtools as pygm
+            >>> pygm.BACKEND = 'numpy'
+            >>> np.random.seed(1)
+
+            # Generate a batch of isomorphic graphs
+            >>> batch_size = 10
+            >>> X_gt = np.zeros((batch_size, 4, 4))
+            >>> X_gt[:, np.arange(0, 4, dtype='i4'), np.random.permutation(4)] = 1
+            >>> A1 = 1. * (np.random.rand(batch_size, 4, 4) > 0.5)
+            >>> for i in np.arange(4): # discard self-loop edges
+            ...    for j in np.arange(batch_size):
+            ...        A1[j][i][i] = 0
+            >>> e_feat1 = np.expand_dims(np.random.rand(batch_size, 4, 4) * A1,axis=-1) # shape: (10, 4, 4, 1)
+            >>> A2 = np.matmul(np.matmul(X_gt.swapaxes(1, 2), A1), X_gt)
+            >>> e_feat2 = np.expand_dims(np.matmul(np.matmul(X_gt.swapaxes(1, 2),np.squeeze(e_feat1,axis=-1)), X_gt),axis=-1)
+            >>> feat1 = np.random.rand(batch_size, 4, 1024) - 0.5
+            >>> feat2 = np.matmul(X_gt.swapaxes(1, 2), feat1)
+            >>> n1 = n2 = np.array([4] * batch_size)
+
+            # Match by CIE (load pretrained model)
+            >>> X, net = pygm.cie(feat1, feat2, A1, A2, e_feat1, e_feat2, n1, n2, return_network=True)
+            Downloading to ~/.cache/pygmtools/cie_voc_numpy.npy...
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # Pass the net object to avoid rebuilding the model agian
+            >>> X = pygm.cie(feat1, feat2, A1, A2, e_feat1, e_feat2, n1, n2, network=net)
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # You may also load other pretrained weights
+            >>> X, net = pygm.cie(feat1, feat2, A1, A2, e_feat1, e_feat2, n1, n2, return_network=True, pretrain='willow')
+            Downloading to ~/.cache/pygmtools/cie_willow_numpy.npy...
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # You may configure your own model and integrate the model into a deep learning pipeline. For example:
+            >>> net = pygm.utils.get_network(pygm.cie, in_node_channel=1024, in_edge_channel=1, hidden_channel=2048, out_channel=512, num_layers=3, pretrain=False)
+            # feat1/feat2/e_feat1/e_feat2 may be outputs by other neural networks
+            >>> X = pygm.cie(feat1, feat2, A1, A2, e_feat1, e_feat2, n1, n2, network=net)
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+            
+
     .. dropdown:: PyTorch Example
 
         ::
@@ -710,6 +852,55 @@ def ngm(K, n1=None, n2=None, n1max=None, n2max=None, x0=None,
     .. note::
         This function also supports non-batched input, by ignoring all batch dimensions in the input tensors.
 
+    .. dropdown:: Numpy Example
+
+        ::
+
+            >>> import numpy as np
+            >>> import pygmtools as pygm
+            >>> pygm.BACKEND = 'numpy'
+            >>> np.random.seed(1)
+
+            # Generate a batch of isomorphic graphs
+            >>> batch_size = 10
+            >>> X_gt = np.zeros((batch_size, 4, 4))
+            >>> X_gt[:, np.arange(0, 4, dtype='i4'), np.random.permutation(4)] = 1
+            >>> A1 = np.random.rand(batch_size, 4, 4)
+            >>> A2 = np.matmul(np.matmul(X_gt.swapaxes(1, 2), A1), X_gt)
+            >>> n1 = n2 = np.array([4] * batch_size)
+
+            # Build affinity matrix
+            >>> conn1, edge1, ne1 = pygm.utils.dense_to_sparse(A1)
+            >>> conn2, edge2, ne2 = pygm.utils.dense_to_sparse(A2)
+            >>> import functools
+            >>> gaussian_aff = functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.) # set affinity function
+            >>> K = pygm.utils.build_aff_mat(None, edge1, conn1, None, edge2, conn2, n1, None, n2, None, edge_aff_fn=gaussian_aff)
+
+            # Solve by NGM
+            >>> X, net = pygm.ngm(K, n1, n2, return_network=True)
+            Downloading to ~/.cache/pygmtools/ngm_voc_numpy.npy...
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # Pass the net object to avoid rebuilding the model agian
+            >>> X = pygm.ngm(K, n1, n2, network=net)
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # You may also load other pretrained weights
+            >>> X, net = pygm.cie(feat1, feat2, A1, A2, e_feat1, e_feat2, n1, n2, return_network=True, pretrain='willow')
+            Downloading to ~/.cache/pygmtools/ngm_willow_numpy.npy...
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+
+            # You may configure your own model and integrate the model into a deep learning pipeline. For example:
+            >>> net = pygm.utils.get_network(pygm.ngm, gnn_channels=(32, 64, 128, 64, 32), sk_emb=8, pretrain=False)
+            # K may be outputs by other neural networks (constructed K from node/edge features by pygm.utils.build_aff_mat)
+            >>> X = pygm.ngm(K, n1, n2, network=net)
+            >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
+            1.0
+            
+
     .. dropdown:: PyTorch Example
 
         ::

From bce7179eed58868657276c53174beea62e469b6e Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Thu, 8 Dec 2022 17:42:38 +0800
Subject: [PATCH 09/18] add numpy's doc in neural_solvers.py

---
 pygmtools/neural_solvers.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pygmtools/neural_solvers.py b/pygmtools/neural_solvers.py
index e7c16271..f6f2b5dd 100644
--- a/pygmtools/neural_solvers.py
+++ b/pygmtools/neural_solvers.py
@@ -8,6 +8,7 @@
 from pygmtools.utils import NOT_IMPLEMENTED_MSG, _check_shape, _get_shape, _unsqueeze, _squeeze, _check_data_type
 from pygmtools.classic_solvers import __check_gm_arguments
 
+
 def pca_gm(feat1, feat2, A1, A2, n1=None, n2=None,
            in_channel=1024, hidden_channel=2048, out_channel=2048, num_layers=2, sk_max_iter=20, sk_tau=0.05,
            network=None, return_network=False, pretrain='voc',
@@ -320,7 +321,7 @@ def ipca_gm(feat1, feat2, A1, A2, n1=None, n2=None,
     .. note::
         This function also supports non-batched input, by ignoring all batch dimensions in the input tensors.
 
-        .. dropdown:: Numpy Example
+    .. dropdown:: Numpy Example
 
         ::
 
@@ -899,7 +900,7 @@ def ngm(K, n1=None, n2=None, n1max=None, n2max=None, x0=None,
             >>> X = pygm.ngm(K, n1, n2, network=net)
             >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
             1.0
-            
+
 
     .. dropdown:: PyTorch Example
 

From e8460e2693b3ef9a2af175d73e93b93837116899 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Thu, 8 Dec 2022 17:47:55 +0800
Subject: [PATCH 10/18] add numpy_modules.py

---
 pygmtools/numpy_modules.py | 381 +++++++++++++++++++++++++++++++++++++
 1 file changed, 381 insertions(+)
 create mode 100644 pygmtools/numpy_modules.py

diff --git a/pygmtools/numpy_modules.py b/pygmtools/numpy_modules.py
new file mode 100644
index 00000000..33a81fef
--- /dev/null
+++ b/pygmtools/numpy_modules.py
@@ -0,0 +1,381 @@
+import numpy as np
+import math
+
+############################################
+#            Affinity Modules              #
+############################################
+
+class WeightedInnerProdAffinity():
+    def __init__(self, d):
+        self.d = d
+
+        stdv = 1. / math.sqrt(self.d)
+        self.A = np.random.uniform(-stdv,stdv,[self.d,self.d])
+        self.A += np.eye(self.d)
+
+    def forward(self, X, Y):
+        assert X.shape[2] == Y.shape[2] == self.d
+        M = np.matmul(X, self.A)
+        M = np.matmul(M, Y.swapaxes(1, 2))
+        return M
+
+############################################
+#         Graph Convolution Modules        #
+############################################
+def relu(X):
+    X[X<0] = 0
+    return X
+
+def kaiming_uniform_(array: np.ndarray, a: float = 0, mode: str = 'fan_in', nonlinearity: str = 'leaky_relu'):
+    """Numpy's kaiming_uniform_"""
+    gain = math.sqrt(2/(a*a+1))
+    fan_in = array.shape[1]
+    fan_out = array.shape[0]
+    if mode == 'fan_in':
+        fan_mode = fan_in
+    if mode == 'fan_out':
+        fan_mode = fan_out
+    bound = gain * math.sqrt(3/fan_mode)
+    array = uniform_(array, -bound, bound)
+    return array
+
+def uniform_(array,a,b):
+    array = np.random.uniform(a,b,array.shape)
+    return array
+
+def normalize_abs(array,axis):
+    array_shape = array.shape
+    k = abs(array).sum(axis)
+    k = k.repeat(array_shape[axis],(axis-1+len(array_shape)) % len(array_shape)).reshape(array_shape)
+    array = np.nan_to_num(array/k)
+    return array
+
+def expand_as(array,target_arary):
+    ori_array_shape = array.shape
+    array_axis = len(ori_array_shape)
+    ori_target_arary_shape = target_arary.shape
+    target_arary_axis = len(ori_target_arary_shape)
+    if(array_axis != target_arary_axis):
+        if(target_arary_axis > array_axis):
+            for _ in np.arange(target_arary_axis-array_axis):
+                array = np.expand_dims(array,axis=0)
+        else:
+            message = "The size of the input array exceeds the target array!"
+            message += "\ninput array's shape:" + str(ori_array_shape)
+            message += "\ntarget array's shape:" + str(ori_target_arary_shape)
+            raise ValueError(message)
+    array_shape = array.shape
+    target_arary_shape = target_arary.shape
+    l = target_arary_axis
+    for i in np.arange(target_arary_axis):
+        k = l-i-1
+        m = array_shape[k]
+        n = target_arary_shape[k]
+        if(m == 1):
+            array = array.repeat(n/m,axis=k)
+        elif(m != n):
+            message = "\nThe expanded size of the array (" + str(n) + ") must match the existing size (" + str(m)  
+            message += ") at non-singleton dimension " + str(k)
+            message += "\ninput array's shape:" + str(ori_array_shape)
+            message += "\ntarget array's shape:" + str(ori_target_arary_shape)
+            raise ValueError(message)
+    return array
+
+class Linear():
+    """Numpy's Linear"""
+    __constants__ = ['in_features', 'out_features']
+    in_features: int
+    out_features: int
+    weight: np.ndarray
+
+    def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
+        self.in_features = in_features
+        self.out_features = out_features
+        self.weight = np.empty((out_features, in_features), dtype='f')
+        if bias:
+            self.bias = np.empty(out_features, dtype='f')
+        self.reset_parameters()
+
+    def reset_parameters(self) -> None:
+        self.weight = kaiming_uniform_(self.weight, a=math.sqrt(5))
+        if self.bias is not None:
+            fan_in = self.weight.shape[1]
+            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
+            self.bias = uniform_(self.bias, -bound, bound)
+
+    def forward(self, input: np.ndarray) -> np.ndarray:
+        return np.matmul(input,self.weight.swapaxes(-1,-2)) + self.bias
+
+    def extra_repr(self) -> str:
+        return 'in_features={}, out_features={}, bias={}'.format(
+            self.in_features, self.out_features, self.bias is not None
+        )
+
+class Sequential():
+
+    def __init__(self, *args):
+        self._modules = {}
+        for idx, module in enumerate(args):
+            self._modules[idx] = module
+
+    def getitem(self, idx):
+        return self._modules[idx]
+
+    def setitem(self, idx, module):
+        if (idx >= len(self._modules)):
+            raise ValueError("Maximum value exceeded!")
+        self._modules[idx] = module
+
+    def delitem(self, idx):
+        for i in range(idx, len(self._modules) - 1):
+            self._modules[i] = self._modules[i + 1]
+        del self._modules[len(self._modules) - 1]
+
+    def len(self):
+        return len(self._modules)
+
+    def append(self, module):
+        new_idx = int(list(self._modules.keys())[-1]) + 1
+        self._modules[new_idx] = module
+
+    def forward(self, inputs):
+        for module in self._modules.values():
+            inputs = module.forward(inputs)
+        return inputs
+
+class ReLU():
+
+    def __init__(self, inplace: bool = False):
+        self.inplace = inplace
+
+    def forward(self, input: np.ndarray) -> np.ndarray:
+        return relu(input)
+
+    def extra_repr(self) -> str:
+        inplace_str = 'inplace=True' if self.inplace else ''
+        return inplace_str
+
+
+class Gconv():
+    def __init__(self, in_features: int, out_features: int):
+        super(Gconv, self).__init__()
+        self.num_inputs = in_features
+        self.num_outputs = out_features
+        self.a_fc = Linear(self.num_inputs, self.num_outputs)
+        self.u_fc = Linear(self.num_inputs, self.num_outputs)
+
+    def forward(self, A: np.ndarray, x: np.ndarray, norm: bool=True) -> np.ndarray:
+        r"""
+        Forward computation of graph convolution network.
+
+        :param A: :math:`(b\times n\times n)` {0,1} adjacency matrix. :math:`b`: batch size, :math:`n`: number of nodes
+        :param x: :math:`(b\times n\times d)` input node embedding. :math:`d`: feature dimension
+        :param norm: normalize connectivity matrix or not
+        :return: :math:`(b\times n\times d^\prime)` new node embedding
+        """
+        if norm is True:
+            A = normalize_abs(A,axis=-2)
+        ax = self.a_fc.forward(x)
+        ux = self.u_fc.forward(x)
+        x = np.matmul(A,relu(ax)) + relu(ux) # has size (bs, N, num_outputs)
+        return x
+
+class ChannelIndependentConv():
+    r"""
+    Channel Independent Embedding Convolution.
+    Proposed by `"Yu et al. Learning deep graph matching with channel-independent embedding and Hungarian attention.
+    ICLR 2020." <https://openreview.net/forum?id=rJgBd2NYPH>`_
+
+    :param in_features: the dimension of input node features
+    :param out_features: the dimension of output node features
+    :param in_edges: the dimension of input edge features
+    :param out_edges: (optional) the dimension of output edge features. It needs to be the same as ``out_features``
+    """
+    def __init__(self, in_features: int, out_features: int, in_edges: int, out_edges: int=None):
+        if out_edges is None:
+            out_edges = out_features
+        self.in_features = in_features
+        self.out_features = out_features
+        self.out_edges = out_edges
+        self.node_fc = Linear(in_features, out_features)
+        self.node_sfc = Linear(in_features, out_features)
+        self.edge_fc = Linear(in_edges, self.out_edges)
+
+    def forward(self, A: np.ndarray, emb_node: np.ndarray, emb_edge: np.ndarray, mode: int=1):
+        r"""
+        :param A: :math:`(b\times n\times n)` {0,1} adjacency matrix. :math:`b`: batch size, :math:`n`: number of nodes
+        :param emb_node: :math:`(b\times n\times d_n)` input node embedding. :math:`d_n`: node feature dimension
+        :param emb_edge: :math:`(b\times n\times n\times d_e)` input edge embedding. :math:`d_e`: edge feature dimension
+        :param mode: 1 or 2, refer to the paper for details
+        :return: :math:`(b\times n\times d^\prime)` new node embedding,
+         :math:`(b\times n\times n\times d^\prime)` new edge embedding
+        """
+        if mode == 1:
+            node_x = self.node_fc.forward(emb_node)
+            node_sx = self.node_sfc.forward(emb_node)
+            edge_x = self.edge_fc.forward(emb_edge)
+            
+            A = np.expand_dims(A,axis=-1)
+            A =  expand_as(A,edge_x) * edge_x
+
+            node_x = np.matmul(A.swapaxes(2, 3).swapaxes(1, 2),
+                                  np.expand_dims(node_x,axis=2).swapaxes(2, 3).swapaxes(1, 2))
+            node_x = np.squeeze(node_x,axis=-1).swapaxes(1, 2)
+            node_x = relu(node_x) + relu(node_sx)
+            edge_x = relu(edge_x)
+
+            return node_x, edge_x
+
+        elif mode == 2:
+            node_x = self.node_fc(emb_node)
+            node_sx = self.node_sfc(emb_node)
+            edge_x = self.edge_fc(emb_edge)
+
+            d_x = np.expand_dims(node_x,axis=-1) - np.expand_dims(node_x,axis=2)
+            d_x = np.sum(d_x ** 2, axis=3, keepdim=False)
+            d_x = np.exp(-d_x)
+
+            A = np.expand_dims(A,axis=-1)
+            A = expand_as(A,edge_x) * edge_x
+
+            node_x = np.matmul(A.swapaxes(2, 3).swapaxes(1, 2),
+                                  np.expand_dims(node_x,axis=2).swapaxes(2, 3).swapaxes(1, 2))
+            node_x = np.squeeze(node_x,axis=-1).swapaxes(1, 2)
+            node_x = relu(node_x) + relu(node_sx)
+            edge_x = relu(edge_x)
+            return node_x, edge_x
+
+        else:
+            raise ValueError('Unknown mode {}. Possible options: 1 or 2'.format(mode))
+
+class Siamese_Gconv():
+    r"""
+    Siamese Gconv neural network for processing arbitrary number of graphs.
+
+    :param in_features: the dimension of input node features
+    :param num_features: the dimension of output node features
+    """
+    def __init__(self, in_features, num_features):
+        self.gconv = Gconv(in_features, num_features)
+
+    def forward(self, g1, *args):
+        # embx are tensors of size (bs, N, num_features)
+        emb1 = self.gconv.forward(*g1)
+        if len(args) == 0:
+            return emb1
+        else:
+            returns = [emb1]
+            for g in args:
+                returns.append(self.gconv.forward(*g))
+            return returns
+
+class Siamese_ChannelIndependentConv():
+    r"""
+    Siamese Channel Independent Conv neural network for processing arbitrary number of graphs.
+
+    :param in_features: the dimension of input node features
+    :param num_features: the dimension of output node features
+    :param in_edges: the dimension of input edge features
+    :param out_edges: (optional) the dimension of output edge features. It needs to be the same as ``num_features``
+    """
+    def __init__(self, in_features, num_features, in_edges, out_edges=None):
+        self.in_feature = in_features
+        self.gconv = ChannelIndependentConv(in_features, num_features, in_edges, out_edges)
+
+    def forward(self, g1, *args):
+        r"""
+        Forward computation of Siamese Channel Independent Conv.
+
+        :param g1: The first graph, which is a tuple of (:math:`(b\times n\times n)` {0,1} adjacency matrix,
+         :math:`(b\times n\times d_n)` input node embedding, :math:`(b\times n\times n\times d_e)` input edge embedding,
+         mode (``1`` or ``2``))
+        :param args: Other graphs
+        :return: A list of tensors composed of new node embeddings :math:`(b\times n\times d^\prime)`, appended with new
+         edge embeddings :math:`(b\times n\times n\times d^\prime)`
+        """
+        emb1, emb_edge1 = self.gconv.forward(*g1)
+        embs = [emb1]
+        emb_edges = [emb_edge1]
+        for g in args:
+            emb2, emb_edge2 = self.gconv.forward(*g)
+            embs.append(emb2), emb_edges.append(emb_edge2)
+        return embs + emb_edges
+
+class NGMConvLayer():
+    def __init__(self, in_node_features, in_edge_features, out_node_features, out_edge_features,
+                 sk_channel=0, edge_emb=False):
+        self.in_nfeat = in_node_features
+        self.in_efeat = in_edge_features
+        self.out_efeat = out_edge_features
+        self.sk_channel = sk_channel
+        assert out_node_features == out_edge_features + self.sk_channel
+        if self.sk_channel > 0:
+            self.out_nfeat = out_node_features - self.sk_channel
+            self.classifier = Linear(self.out_nfeat, self.sk_channel)
+        else:
+            self.out_nfeat = out_node_features
+            self.classifier = None
+
+        if edge_emb:
+            self.e_func = Sequential(
+                Linear(self.in_efeat + self.in_nfeat, self.out_efeat),
+                ReLU(),
+                Linear(self.out_efeat, self.out_efeat),
+                ReLU()
+            )
+        else:
+            self.e_func = None
+
+        self.n_func = Sequential(
+            Linear(self.in_nfeat, self.out_nfeat),
+            #nn.Linear(self.in_nfeat, self.out_nfeat // self.out_efeat),
+            ReLU(),
+            Linear(self.out_nfeat, self.out_nfeat),
+            #nn.Linear(self.out_nfeat // self.out_efeat, self.out_nfeat // self.out_efeat),
+            ReLU(),
+        )
+
+        self.n_self_func = Sequential(
+            Linear(self.in_nfeat, self.out_nfeat),
+            ReLU(),
+            Linear(self.out_nfeat, self.out_nfeat),
+            ReLU()
+        )
+
+    def forward(self, A, W, x, n1=None, n2=None, norm=True, sk_func=None):
+        """
+        :param A: adjacent matrix in 0/1 (b x n x n)
+        :param W: edge feature tensor (b x n x n x feat_dim)
+        :param x: node feature tensor (b x n x feat_dim)
+        """
+        if self.e_func is not None:
+            W1 = np.expand_dims(A,axis=-1) * np.expand_dims(x,axis=1)
+            W2 = np.concatenate((W, W1), axis=-1)
+            W_new = self.e_func(W2)
+        else:
+            W_new = W
+
+        if norm is True:
+            A = normalize_abs(A,axis=2)
+        
+        x1 = self.n_func.forward(x)
+        tmp1 = (np.expand_dims(A,axis=-1) * W_new).transpose((0, 3, 1, 2))
+        tmp2 = np.expand_dims(x1,axis=2).transpose((0, 3, 1, 2))
+        x2 = np.squeeze(np.matmul(tmp1,tmp2),axis=-1).swapaxes(1, 2)
+        x2 += self.n_self_func.forward(x)
+        
+        if self.classifier is not None:
+            assert n1.max() * n2.max() == x.shape[1]
+            assert sk_func is not None
+            x3 = self.classifier.forward(x2)
+            n1_rep = n1.repeat(self.sk_channel, axis=0)
+            n2_rep = n2.repeat(self.sk_channel, axis=0)
+            x4 = x3.transpose((0,2,1)).reshape(x.shape[0] * self.sk_channel, n2.max(), n1.max()).swapaxes(1, 2)
+            x5 = np.ascontiguousarray(sk_func(x4, n1_rep, n2_rep, dummy_row=True).swapaxes(2, 1))
+            
+            x6 = x5.reshape(x.shape[0], self.sk_channel, n1.max() * n2.max()).transpose((0, 2, 1))
+            x_new = np.concatenate((x2, x6), axis=-1)
+        else:
+            x_new = x2
+        
+        return W_new, x_new
\ No newline at end of file

From 82ffdbd6cde25aa3677fcbec5d258d340ffe9ecf Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Thu, 8 Dec 2022 18:08:13 +0800
Subject: [PATCH 11/18] Update numpy_backend.py

---
 pygmtools/numpy_backend.py | 406 +++++++++++++++++++++++++++++++++----
 1 file changed, 372 insertions(+), 34 deletions(-)

diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py
index e48d0a19..298d61cb 100644
--- a/pygmtools/numpy_backend.py
+++ b/pygmtools/numpy_backend.py
@@ -796,7 +796,379 @@ def gamgm_real(
 #          Neural Network Solvers          #
 ############################################
 
+from pygmtools.numpy_modules import *
 
+def add_module(self, name: str, module) -> None:
+        self._modules[name] = module
+
+class PCA_GM_Net():
+    """
+    Numpy implementation of PCA-GM and IPCA-GM network
+    """
+    def __init__(self, in_channel, hidden_channel, out_channel, num_layers, cross_iter_num=-1):
+        self.gnn_layer = num_layers
+        self.dict = {}
+        for i in range(self.gnn_layer):
+            if i == 0:
+                gnn_layer = Siamese_Gconv(in_channel, hidden_channel)
+            elif 0 < i < self.gnn_layer - 1:
+                gnn_layer = Siamese_Gconv(hidden_channel, hidden_channel)
+            else:
+                gnn_layer = Siamese_Gconv(hidden_channel, out_channel)
+                self.dict['affinity_{}'.format(i)] =  WeightedInnerProdAffinity(out_channel)
+            self.dict['gnn_layer_{}'.format(i)] = gnn_layer
+            if i == self.gnn_layer - 2:  # only the second last layer will have cross-graph module
+                self.dict['cross_graph_{}'.format(i)] = Linear(hidden_channel * 2, hidden_channel)
+                if cross_iter_num <= 0:
+                    self.dict['affinity_{}'.format(i)] = WeightedInnerProdAffinity(hidden_channel)
+
+    def forward(self, feat1, feat2, A1, A2, n1, n2, cross_iter_num, sk_max_iter, sk_tau):
+        _sinkhorn_func = functools.partial(sinkhorn,
+                                           dummy_row=False, max_iter=sk_max_iter, tau=sk_tau, batched_operation=False)
+        emb1, emb2 = feat1, feat2
+        if cross_iter_num <= 0:
+            # Vanilla PCA-GM
+            for i in range(self.gnn_layer):
+                gnn_layer = self.dict['gnn_layer_{}'.format(i)]
+                emb1, emb2 = gnn_layer.forward([A1, emb1], [A2, emb2])
+                if i == self.gnn_layer - 2:
+                    affinity = self.dict['affinity_{}'.format(i)]
+                    s = affinity.forward(emb1, emb2)
+                    s = _sinkhorn_func(s, n1, n2)
+
+                    cross_graph = self.dict['cross_graph_{}'.format(i)]
+                    new_emb1 = cross_graph.forward(np.concatenate((emb1, np.matmul(s, emb2)), axis=-1))
+                    new_emb2 = cross_graph.forward(np.concatenate((emb2, np.matmul(s.swapaxes(1, 2), emb1)), axis=-1))
+                    emb1 = new_emb1
+                    emb2 = new_emb2
+
+            affinity = self.dict['affinity_{}'.format(self.gnn_layer - 1)]
+            s = affinity.forward(emb1, emb2)
+            s = _sinkhorn_func(s, n1, n2)
+
+        else:
+            # IPCA-GM
+            for i in range(self.gnn_layer - 1):
+                gnn_layer = self.dict['gnn_layer_{}'.format(i)]
+                emb1, emb2 = gnn_layer.forward([A1, emb1], [A2, emb2])
+
+            emb1_0, emb2_0 = emb1, emb2
+            s = np.zeros((emb1.shape[0], emb1.shape[1], emb2.shape[1]))
+
+            for x in range(cross_iter_num):
+                # cross-graph convolution in second last layer
+                i = self.gnn_layer - 2
+                cross_graph = self.dict['cross_graph_{}'.format(i)]
+                emb1 = cross_graph.forward(np.concatenate((emb1_0, np.matmul(s, emb2_0)), axis=-1))
+                emb2 = cross_graph.forward(np.concatenate((emb2_0, np.matmul(s.swapaxes(1, 2), emb1_0)), axis=-1))
+
+                # last layer
+                i = self.gnn_layer - 1
+                gnn_layer = self.dict['gnn_layer_{}'.format(i)]
+                emb1, emb2 = gnn_layer.forward([A1, emb1], [A2, emb2])
+                affinity = self.dict['affinity_{}'.format(i)]
+                s = affinity.forward(emb1, emb2)
+                s = _sinkhorn_func(s, n1, n2)
+
+        return s
+
+
+pca_gm_pretrain_path = {
+    'voc':('https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1En_9f5Zi5rSsS-JTIce7B1BV6ijGEAPd',
+           'd85f97498157d723793b8fc1501841ce'),
+    'willow':('https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1LAnK6ASYu0CO1fEe6WpvMbt5vskuvwLo',
+              'c32f7c8a7a6978619b8fdbb6ad5b505f'),
+    'voc-all':('https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1c_aw4wxEBuY7JFC4Rt8rlcise777n189',
+               '0e2725b3ac51f87f0303bbcfaae5df80')
+}
+
+def pca_gm(feat1, feat2, A1, A2, n1, n2,
+           in_channel, hidden_channel, out_channel, num_layers, sk_max_iter, sk_tau,
+           network, pretrain):
+    """
+    Numpy implementation of PCA-GM
+    """
+    if feat1 is None:
+        forward_pass = False
+    else:
+        forward_pass = True
+    if network is None:
+        network = PCA_GM_Net(in_channel, hidden_channel, out_channel, num_layers)
+        if pretrain:
+            if pretrain in pca_gm_pretrain_path.keys():
+                url, md5 = pca_gm_pretrain_path[pretrain]
+                filename = pygmtools.utils.download(f'pca_gm_{pretrain}_numpy.npy', url, md5)
+                pca_gm_numpy_dict = np.load(filename,allow_pickle=True)
+                for i in range(network.gnn_layer):
+                    gnn_layer = network.dict['gnn_layer_{}'.format(i)]
+                    gnn_layer.gconv.a_fc.weight = pca_gm_numpy_dict.item()['gnn_layer_{}.gconv.a_fc.weight'.format(i)]
+                    gnn_layer.gconv.a_fc.bias = pca_gm_numpy_dict.item()['gnn_layer_{}.gconv.a_fc.bias'.format(i)]
+                    gnn_layer.gconv.u_fc.weight = pca_gm_numpy_dict.item()['gnn_layer_{}.gconv.u_fc.weight'.format(i)]
+                    gnn_layer.gconv.u_fc.bias = pca_gm_numpy_dict.item()['gnn_layer_{}.gconv.u_fc.bias'.format(i)]
+                    if i == network.gnn_layer - 2:
+                        affinity = network.dict['affinity_{}'.format(i)]
+                        affinity.A = pca_gm_numpy_dict.item()['affinity_{}.A'.format(i)]
+                        cross_graph = network.dict['cross_graph_{}'.format(i)]
+                        cross_graph.weight = pca_gm_numpy_dict.item()['cross_graph_{}.weight'.format(i)]
+                        cross_graph.bias = pca_gm_numpy_dict.item()['cross_graph_{}.bias'.format(i)]
+                affinity = affinity = network.dict['affinity_{}'.format(network.gnn_layer - 1)]
+                affinity.A = pca_gm_numpy_dict.item()['affinity_{}.A'.format(network.gnn_layer - 1)]
+            else:
+                raise ValueError(f'Unknown pretrain tag. Available tags: {cie_pretrain_path.keys()}')
+    if forward_pass:
+        batch_size = feat1.shape[0]
+        if n1 is None:
+            n1 = np.array([feat1.shape[1]] * batch_size)
+        if n2 is None:
+            n2 = np.array([feat2.shape[1]] * batch_size)
+        result = network.forward(feat1, feat2, A1, A2, n1, n2, -1, sk_max_iter, sk_tau)
+    else:
+        result = None
+    return result, network
+
+ipca_gm_pretrain_path = {
+    'voc':('https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=13g9iBjXZ804bKo6p8wMQe8yNUZBwVGJj',
+           '4479a25558780a4b4c9891b4386659cd'),
+    'willow':('https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1vq0FqjPhiSR80cu9jk0qMljkC4gSFvQA',
+              'ada1df350d45cc877f08e12919993345')
+}
+
+def ipca_gm(feat1, feat2, A1, A2, n1, n2,
+           in_channel, hidden_channel, out_channel, num_layers, cross_iter, sk_max_iter, sk_tau,
+           network, pretrain):
+    """
+    Numpy implementation of IPCA-GM
+    """
+    if feat1 is None:
+        forward_pass = False
+    else:
+        forward_pass = True
+    if network is None:
+        network = PCA_GM_Net(in_channel, hidden_channel, out_channel, num_layers, cross_iter)
+        if pretrain:
+            if pretrain in ipca_gm_pretrain_path.keys():
+                url, md5 = ipca_gm_pretrain_path[pretrain]
+                filename = pygmtools.utils.download(f'ipca_gm_{pretrain}_numpy.npy', url, md5)
+                ipca_gm_numpy_dict = np.load(filename,allow_pickle=True)
+                for i in range(network.gnn_layer-1):
+                    gnn_layer = network.dict['gnn_layer_{}'.format(i)]
+                    gnn_layer.gconv.a_fc.weight = ipca_gm_numpy_dict.item()['gnn_layer_{}.gconv.a_fc.weight'.format(i)]
+                    gnn_layer.gconv.a_fc.bias = ipca_gm_numpy_dict.item()['gnn_layer_{}.gconv.a_fc.bias'.format(i)]
+                    gnn_layer.gconv.u_fc.weight = ipca_gm_numpy_dict.item()['gnn_layer_{}.gconv.u_fc.weight'.format(i)]
+                    gnn_layer.gconv.u_fc.bias = ipca_gm_numpy_dict.item()['gnn_layer_{}.gconv.u_fc.bias'.format(i)]
+                
+                for x in range(cross_iter):
+                    i = network.gnn_layer - 2
+                    cross_graph = network.dict['cross_graph_{}'.format(i)]
+                    cross_graph.weight = ipca_gm_numpy_dict.item()['cross_graph_{}.weight'.format(i)]
+                    cross_graph.bias = ipca_gm_numpy_dict.item()['cross_graph_{}.bias'.format(i)]
+                    
+                    i = network.gnn_layer - 1
+                    gnn_layer = network.dict['gnn_layer_{}'.format(i)]
+                    gnn_layer.gconv.a_fc.weight = ipca_gm_numpy_dict.item()['gnn_layer_{}.gconv.a_fc.weight'.format(i)]
+                    gnn_layer.gconv.a_fc.bias = ipca_gm_numpy_dict.item()['gnn_layer_{}.gconv.a_fc.bias'.format(i)]
+                    gnn_layer.gconv.u_fc.weight = ipca_gm_numpy_dict.item()['gnn_layer_{}.gconv.u_fc.weight'.format(i)]
+                    gnn_layer.gconv.u_fc.bias = ipca_gm_numpy_dict.item()['gnn_layer_{}.gconv.u_fc.bias'.format(i)]
+
+                    affinity = network.dict['affinity_{}'.format(i)]
+                    affinity.A = ipca_gm_numpy_dict.item()['affinity_{}.A'.format(i)]
+            else:
+                raise ValueError(f'Unknown pretrain tag. Available tags: {ipca_gm_pretrain_path.keys()}') 
+    if forward_pass:
+        batch_size = feat1.shape[0]
+        if n1 is None:
+            n1 = np.array([feat1.shape[1]] * batch_size)
+        if n2 is None:
+            n2 = np.array([feat2.shape[1]] * batch_size)
+        result = network.forward(feat1, feat2, A1, A2, n1, n2, cross_iter, sk_max_iter, sk_tau)
+    else:
+        result = None
+    return result, network
+
+
+class CIE_Net():
+    """
+    Numpy implementation of CIE graph matching network
+    """
+    def __init__(self, in_node_channel, in_edge_channel, hidden_channel, out_channel, num_layers):
+        self.gnn_layer = num_layers
+        self.dict = {}
+        for i in range(self.gnn_layer):
+            if i == 0:
+                gnn_layer = Siamese_ChannelIndependentConv(in_node_channel, hidden_channel, in_edge_channel)
+            elif 0 < i < self.gnn_layer - 1:
+                gnn_layer = Siamese_ChannelIndependentConv(hidden_channel, hidden_channel, hidden_channel)
+            else:
+                gnn_layer = Siamese_ChannelIndependentConv(hidden_channel, out_channel, hidden_channel)
+                self.dict['affinity_{}'.format(i)] = WeightedInnerProdAffinity(out_channel)
+            self.dict['gnn_layer_{}'.format(i)] = gnn_layer
+            if i == self.gnn_layer - 2:  # only the second last layer will have cross-graph module
+                self.dict['cross_graph_{}'.format(i)] = Linear(hidden_channel * 2, hidden_channel)
+                self.dict['affinity_{}'.format(i)] = WeightedInnerProdAffinity(hidden_channel)
+
+    def forward(self, feat_node1, feat_node2, A1, A2, feat_edge1, feat_edge2, n1, n2, sk_max_iter, sk_tau):
+        _sinkhorn_func = functools.partial(sinkhorn,
+                                           dummy_row=False, max_iter=sk_max_iter, tau=sk_tau, batched_operation=False)
+        emb1, emb2 = feat_node1, feat_node2
+        emb_edge1, emb_edge2 = feat_edge1, feat_edge2
+        
+        for i in range(self.gnn_layer):
+            gnn_layer = self.dict['gnn_layer_{}'.format(i)]
+            # during forward process, the network structure will not change
+            emb1, emb2, emb_edge1, emb_edge2 = gnn_layer.forward([A1, emb1, emb_edge1], [A2, emb2, emb_edge2])
+            
+            if i == self.gnn_layer - 2:
+                affinity = self.dict['affinity_{}'.format(i)]
+                s = affinity.forward(emb1, emb2)
+                s = _sinkhorn_func(s, n1, n2)
+
+                cross_graph = self.dict['cross_graph_{}'.format(i)]
+                new_emb1 = cross_graph.forward(np.concatenate((emb1, np.matmul(s, emb2)), axis=-1))
+                new_emb2 = cross_graph.forward(np.concatenate((emb2, np.matmul(s.swapaxes(1, 2), emb1)), axis=-1))
+                emb1 = new_emb1
+                emb2 = new_emb2
+        
+        affinity = self.dict['affinity_{}'.format(self.gnn_layer - 1)]
+        s = affinity.forward(emb1, emb2)
+        s = _sinkhorn_func(s, n1, n2)
+        return s
+
+cie_pretrain_path = {
+    'voc':('https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1rP9sJY1fh493LLMWw-7RaeFAMHlbSs2D',
+           '9cbd55fa77d124b95052378643715bae'),
+    'willow':('https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1cMiXrSQjXZ9lDxeB6194z1-luyslVTR8',
+              'bd36e1bf314503c1f1482794e1648b18')
+}
+
+def cie(feat_node1, feat_node2, A1, A2, feat_edge1, feat_edge2, n1, n2,
+        in_node_channel, in_edge_channel, hidden_channel, out_channel, num_layers, sk_max_iter, sk_tau,
+        network, pretrain):
+    """
+    Numpy implementation of CIE
+    """
+    if feat_node1 is None:
+        forward_pass = False
+    else:
+        forward_pass = True
+    if network is None:
+        network = CIE_Net(in_node_channel, in_edge_channel, hidden_channel, out_channel, num_layers)
+        if pretrain:
+            if pretrain in cie_pretrain_path.keys():
+                url, md5 = cie_pretrain_path[pretrain]
+                filename = pygmtools.utils.download(f'cie_{pretrain}_numpy.npy', url, md5)
+                cie_numpy_dict = np.load(filename,allow_pickle=True)
+                for i in range(network.gnn_layer):
+                    gnn_layer = network.dict['gnn_layer_{}'.format(i)]
+                    gnn_layer.gconv.node_fc.weight = cie_numpy_dict.item()['gnn_layer_{}.gconv.node_fc.weight'.format(i)]
+                    gnn_layer.gconv.node_fc.bias = cie_numpy_dict.item()['gnn_layer_{}.gconv.node_fc.bias'.format(i)]
+                    gnn_layer.gconv.node_sfc.weight = cie_numpy_dict.item()['gnn_layer_{}.gconv.node_sfc.weight'.format(i)]
+                    gnn_layer.gconv.node_sfc.bias = cie_numpy_dict.item()['gnn_layer_{}.gconv.node_sfc.bias'.format(i)]
+                    gnn_layer.gconv.edge_fc.weight = cie_numpy_dict.item()['gnn_layer_{}.gconv.edge_fc.weight'.format(i)]
+                    gnn_layer.gconv.edge_fc.bias = cie_numpy_dict.item()['gnn_layer_{}.gconv.edge_fc.bias'.format(i)]
+                    if i == network.gnn_layer - 2:
+                        affinity = network.dict['affinity_{}'.format(i)]
+                        affinity.A = cie_numpy_dict.item()['affinity_{}.A'.format(i)]
+                        cross_graph = network.dict['cross_graph_{}'.format(i)]
+                        cross_graph.weight = cie_numpy_dict.item()['cross_graph_{}.weight'.format(i)]
+                        cross_graph.bias = cie_numpy_dict.item()['cross_graph_{}.bias'.format(i)]
+                affinity = affinity = network.dict['affinity_{}'.format(network.gnn_layer - 1)]
+                affinity.A = cie_numpy_dict.item()['affinity_{}.A'.format(network.gnn_layer - 1)]
+            else:
+                raise ValueError(f'Unknown pretrain tag. Available tags: {cie_pretrain_path.keys()}')
+    if forward_pass:
+        batch_size = feat_node1.shape[0]
+        if n1 is None:
+            n1 = np.array([feat_node1.shape[1]] * batch_size)
+        if n2 is None:
+            n2 = np.array([feat_node1.shape[1]] * batch_size)
+        result = network.forward(feat_node1, feat_node2, A1, A2, feat_edge1, feat_edge2, n1, n2, sk_max_iter, sk_tau)
+    else:
+        result = None
+
+    return result, network
+
+
+class NGM_Net():
+    """
+    Numpy implementation of NGM network
+    """
+    def __init__(self, gnn_channels, sk_emb):
+        self.gnn_layer = len(gnn_channels)
+        self.dict = {}
+        for i in range(self.gnn_layer):
+            if i == 0:
+                gnn_layer = NGMConvLayer(1, 1,
+                                         gnn_channels[i] + sk_emb, gnn_channels[i],
+                                         sk_channel=sk_emb, edge_emb=False)
+            else:
+                gnn_layer = NGMConvLayer(gnn_channels[i - 1] + sk_emb, gnn_channels[i - 1],
+                                         gnn_channels[i] + sk_emb, gnn_channels[i],
+                                         sk_channel=sk_emb, edge_emb=False)
+            self.dict['gnn_layer_{}'.format(i)] = gnn_layer
+        self.classifier = Linear(gnn_channels[-1] + sk_emb, 1)
+
+    def forward(self, K, n1, n2, n1max, n2max, v0, sk_max_iter, sk_tau):
+        _sinkhorn_func = functools.partial(sinkhorn,
+                                           dummy_row=False, max_iter=sk_max_iter, tau=sk_tau, batched_operation=False)
+        emb = v0
+        A = (K != 0)
+        emb_K = np.expand_dims(K,axis=-1)
+
+        # NGM qap solver
+        for i in range(self.gnn_layer):
+            gnn_layer = self.dict['gnn_layer_{}'.format(i)]
+            emb_K, emb = gnn_layer.forward(A, emb_K, emb, n1, n2, sk_func=_sinkhorn_func)
+        v = self.classifier.forward(emb)
+        
+        s = v.reshape(v.shape[0], n2max, -1).swapaxes(1, 2)
+        
+        return _sinkhorn_func(s, n1, n2, dummy_row=True)
+
+ngm_pretrain_path = {
+    'voc':('https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1LY93fLCjH5vDcWsjZxGPmXmrYMF8HZIR',
+           '19cd48afab71b3277d2062624934702c'),
+    'willow':('https://drive.google.com/u/0/uc?export=download&confirm=Z-AR&id=1iD8FHqahRsVV_H6o3ByB6nwBHU8sEgnt',
+              '31968e30c399845f34d80733d0118b8b')
+}
+
+def ngm(K, n1, n2, n1max, n2max, x0, gnn_channels, sk_emb, sk_max_iter, sk_tau, network, return_network, pretrain):
+    """
+    Numpy implementation of NGM
+    """
+    if K is None:
+        forward_pass = False
+    else:
+        forward_pass = True
+    if network is None:
+        network = NGM_Net(gnn_channels, sk_emb)
+        if pretrain:
+            if pretrain in ngm_pretrain_path.keys():
+                url, md5 = ngm_pretrain_path[pretrain]
+                filename = pygmtools.utils.download(f'ngm_{pretrain}_numpy.npy', url, md5)
+                ngm_numpy_dict = np.load(filename,allow_pickle=True)
+                for i in range(network.gnn_layer):
+                    gnn_layer = network.dict['gnn_layer_{}'.format(i)]
+                    gnn_layer.classifier.weight = ngm_numpy_dict.item()['gnn_layer_{}.classifier.weight'.format(i)]
+                    gnn_layer.classifier.bias = ngm_numpy_dict.item()['gnn_layer_{}.classifier.bias'.format(i)]
+                    gnn_layer.n_func.getitem(0).weight = ngm_numpy_dict.item()['gnn_layer_{}.n_func.0.weight'.format(i)]
+                    gnn_layer.n_func.getitem(0).bias = ngm_numpy_dict.item()['gnn_layer_{}.n_func.0.bias'.format(i)]
+                    gnn_layer.n_func.getitem(2).weight = ngm_numpy_dict.item()['gnn_layer_{}.n_func.2.weight'.format(i)]
+                    gnn_layer.n_func.getitem(2).bias = ngm_numpy_dict.item()['gnn_layer_{}.n_func.2.bias'.format(i)]
+                    gnn_layer.n_self_func.getitem(0).weight = ngm_numpy_dict.item()['gnn_layer_{}.n_self_func.0.weight'.format(i)]
+                    gnn_layer.n_self_func.getitem(0).bias = ngm_numpy_dict.item()['gnn_layer_{}.n_self_func.0.bias'.format(i)]
+                    gnn_layer.n_self_func.getitem(2).weight = ngm_numpy_dict.item()['gnn_layer_{}.n_self_func.2.weight'.format(i)]
+                    gnn_layer.n_self_func.getitem(2).bias = ngm_numpy_dict.item()['gnn_layer_{}.n_self_func.2.bias'.format(i)]
+                network.classifier.weight = ngm_numpy_dict.item()['classifier.weight']
+                network.classifier.bias = ngm_numpy_dict.item()['classifier.bias']
+            else:
+                raise ValueError(f'Unknown pretrain tag. Available tags: {ngm_pretrain_path.keys()}')
+    if forward_pass:
+        batch_num, n1, n2, n1max, n2max, n1n2, v0 = _check_and_init_gm(K, n1, n2, n1max, n2max, x0)
+        v0 = v0 / np.mean(v0)
+        result = network.forward(K, n1, n2, n1max, n2max, v0, sk_max_iter, sk_tau)
+    else:
+        result = None
+    return result, network
 #############################################
 #              Utils Functions              #
 #############################################
@@ -913,40 +1285,6 @@ def generate_isomorphic_graphs(node_num, graph_num, node_feat_dim=0):
     else:
         return np.stack(As,axis=0), X_gt
 
-
-"""
-def permutation_loss(pred_dsmat:np.ndarray, gt_perm: np.ndarray, n1: np.ndarray, n2:np.ndarray) -> np.ndarray:
-
-    #Numpy implementation of permutation_loss
-
-    batch_num = pred_dsmat.shape[0]
-
-    pred_dsmat = pred_dsmat.to(dtype='f')
-
-    if not np.all((pred_dsmat >= 0) * (pred_dsmat <= 1)):
-        raise ValueError("pred_dsmat contains invalid numerical entries.")
-    if not np.all((gt_perm >= 0) * (gt_perm <= 1)):
-        raise ValueError("gt_perm contains invalid numerical entries.")
-
-    if n1 is None:
-        n1 = np.array([pred_dsmat.shape[1] for _ in range(batch_num)])
-    if n2 is None:
-        n2 = np.array([pred_dsmat.shape[2] for _ in range(batch_num)])
-
-    loss = np.array(0.)
-    n_sum = np.zeros_like(loss)
-    for b in range(batch_num):
-        batch_slice = [b, slice(n1[b]), slice(n2[b])]
-        loss += array.nn.functional.binary_cross_entropy(
-            pred_dsmat[batch_slice],
-            gt_perm[batch_slice],
-            reduction='sum')
-        n_sum += n1[b].to(n_sum.dtype).to(pred_dsmat.device)
-
-    return loss / n_sum
-"""
-
-
 def _aff_mat_from_node_edge_aff(node_aff: np.ndarray, edge_aff: np.ndarray, connectivity1: np.ndarray, connectivity2: np.ndarray,
                                 n1, n2, ne1, ne2):
     """

From 3a8b785846ba3d733f6778d7962beea7b3665680 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Thu, 8 Dec 2022 18:09:05 +0800
Subject: [PATCH 12/18] change download and add get_md5

---
 pygmtools/utils.py | 45 ++++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/pygmtools/utils.py b/pygmtools/utils.py
index d76f9c59..ded6d5c5 100644
--- a/pygmtools/utils.py
+++ b/pygmtools/utils.py
@@ -13,9 +13,9 @@
 import shutil
 from tqdm.auto import tqdm
 import inspect
-
+import wget
 import pygmtools
-
+import pdb
 NOT_IMPLEMENTED_MSG = \
     'The backend function for {} is not implemented. ' \
     'If you are a user, please use other backends as workarounds.' \
@@ -1080,12 +1080,10 @@ def _mm(input1, input2, backend=None):
         )
     return fn(*args)
 
-
 def download(filename, url, md5=None, retries=5):
     r"""
     Check if content exits. If not, download the content to ``<user cache path>/pygmtools/<filename>``. ``<user cache path>``
     depends on your system. For example, on Debian, it should be ``$HOME/.cache``.
-
     :param filename: the destination file name
     :param url: the url
     :param md5: (optional) the md5sum to verify the content. It should match the result of ``md5sum file`` on Linux.
@@ -1100,27 +1098,32 @@ def download(filename, url, md5=None, retries=5):
         os.makedirs(dirs)
     filename = os.path.join(dirs, filename)
     if not os.path.exists(filename):
-        print(f'Downloading to {filename}...')
-        down_res = requests.get(url, stream=True)
-        file_size = int(down_res.headers.get('Content-Length', 0))
-        with tqdm.wrapattr(down_res.raw, "read", total=file_size) as content:
-            with open(filename, 'wb') as file:
-                shutil.copyfileobj(content, file)
-
+        try:
+            print(f'\nDownloading to {filename}...')
+            down_res = requests.get(url, stream=True)
+            file_size = int(down_res.headers.get('Content-Length', 0))
+            with tqdm.wrapattr(down_res.raw, "read", total=file_size) as content:
+                with open(filename, 'wb') as file:
+                    shutil.copyfileobj(content, file)
+        except:
+            wget.download(url,out=filename)
     if md5 is not None:
-        hash_md5 = hashlib.md5()
-        chunk = 8192
-        with open(filename, 'rb') as file_to_check:
-            while True:
-                buffer = file_to_check.read(chunk)
-                if not buffer:
-                    break
-                hash_md5.update(buffer)
-            md5_returned = hash_md5.hexdigest()
+        md5_returned = get_md5(filename)
         if md5 != md5_returned:
             print('Warning: MD5 check failed for the downloaded content. Retrying...')
             os.remove(filename)
             time.sleep(1)
             return download(filename, url, md5, retries - 1)
-
     return filename
+
+def get_md5(filename):
+    hash_md5 = hashlib.md5()
+    chunk = 8192
+    with open(filename, 'rb') as file_to_check:
+        while True:
+            buffer = file_to_check.read(chunk)
+            if not buffer:
+                break
+            hash_md5.update(buffer)
+        md5_returned = hash_md5.hexdigest()
+        return md5_returned

From 966359d82327496ef2c61749d5ec745839a12cd0 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Thu, 8 Dec 2022 18:10:33 +0800
Subject: [PATCH 13/18] add 'numpy'

---
 tests/test_neural_solvers.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_neural_solvers.py b/tests/test_neural_solvers.py
index dbb6fe41..74d83f1f 100644
--- a/tests/test_neural_solvers.py
+++ b/tests/test_neural_solvers.py
@@ -101,18 +101,18 @@ def _test_neural_solver_on_isomorphic_graphs(graph_num_nodes, node_feat_dim, sol
 def test_pca_gm():
     _test_neural_solver_on_isomorphic_graphs(list(range(10, 30, 2)), 1024, pygm.pca_gm, 'individual-graphs', {
         'pretrain': ['voc', 'willow', 'voc-all'],
-    }, ['pytorch', 'jittor'])
+    }, ['pytorch', 'numpy','jittor'])
 
 def test_ipca_gm():
     _test_neural_solver_on_isomorphic_graphs(list(range(10, 30, 2)), 1024, pygm.ipca_gm, 'individual-graphs', {
         'pretrain': ['voc', 'willow'],
-    }, ['pytorch', 'jittor'])
+    }, ['pytorch', 'numpy', 'jittor'])
 
 def test_cie():
     args = (
         list(range(10, 30, 2)), 1024, pygm.cie, 'individual-graphs-edge', {
             'pretrain': ['voc', 'willow'],
-        }, ['pytorch', 'jittor']
+        }, ['pytorch', 'numpy', 'jittor']
     )
     max_retries = 5
     for i in range(max_retries - 1):
@@ -132,7 +132,7 @@ def test_ngm():
         'edge_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.), pygm.utils.inner_prod_aff_fn],
         'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn],
         'pretrain': ['voc', 'willow'],
-    }, ['pytorch', 'jittor'])
+    }, ['pytorch', 'numpy', 'jittor'])
 
 
 if __name__ == '__main__':

From cc8615671e5831752c6ed0d73d25e9900ecccfb0 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Thu, 8 Dec 2022 18:14:35 +0800
Subject: [PATCH 14/18] delete 'pdb'

---
 pygmtools/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pygmtools/utils.py b/pygmtools/utils.py
index ded6d5c5..25fb832b 100644
--- a/pygmtools/utils.py
+++ b/pygmtools/utils.py
@@ -15,7 +15,7 @@
 import inspect
 import wget
 import pygmtools
-import pdb
+
 NOT_IMPLEMENTED_MSG = \
     'The backend function for {} is not implemented. ' \
     'If you are a user, please use other backends as workarounds.' \

From df42a21d74741f5f3bd02d7d7f09b069fa86e54b Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Thu, 8 Dec 2022 20:02:59 +0800
Subject: [PATCH 15/18] Update requirements.txt

---
 tests/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/requirements.txt b/tests/requirements.txt
index a921300d..435b7496 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -10,3 +10,4 @@ tqdm
 jittor
 appdirs>=1.4.4
 tensorflow
+wget

From 45b8725b2700670e533bf2a7468ed2cc1e2887b4 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Thu, 8 Dec 2022 20:04:31 +0800
Subject: [PATCH 16/18] add wget

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index af1fa5df..ff6a210d 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@ def get_property(prop, project):
 VERSION = get_property('__version__', NAME)
 
 REQUIRED = [
-     'requests>=2.25.1', 'scipy>=1.4.1', 'Pillow>=7.2.0', 'numpy>=1.18.5', 'easydict>=1.7', 'appdirs>=1.4.4', 'tqdm>=4.64.1'
+     'requests>=2.25.1', 'scipy>=1.4.1', 'Pillow>=7.2.0', 'numpy>=1.18.5', 'easydict>=1.7', 'appdirs>=1.4.4', 'tqdm>=4.64.1','wget>=3.2'
 ]
 
 EXTRAS = {}

From 2b6f1a60c53ffad5ae8645a51de087cecabac153 Mon Sep 17 00:00:00 2001
From: Runzhong Wang <18309862+rogerwwww@users.noreply.github.com>
Date: Thu, 8 Dec 2022 23:53:53 +0800
Subject: [PATCH 17/18] update downloading logic

---
 pygmtools/utils.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/pygmtools/utils.py b/pygmtools/utils.py
index 25fb832b..c6c937d6 100644
--- a/pygmtools/utils.py
+++ b/pygmtools/utils.py
@@ -1098,17 +1098,21 @@ def download(filename, url, md5=None, retries=5):
         os.makedirs(dirs)
     filename = os.path.join(dirs, filename)
     if not os.path.exists(filename):
-        try:
-            print(f'\nDownloading to {filename}...')
-            down_res = requests.get(url, stream=True)
-            file_size = int(down_res.headers.get('Content-Length', 0))
-            with tqdm.wrapattr(down_res.raw, "read", total=file_size) as content:
-                with open(filename, 'wb') as file:
-                    shutil.copyfileobj(content, file)
-        except:
+        print(f'\nDownloading to {filename}...')
+        if retries % 2 == 1:
+            try:
+                down_res = requests.get(url, stream=True)
+                file_size = int(down_res.headers.get('Content-Length', 0))
+                with tqdm.wrapattr(down_res.raw, "read", total=file_size) as content:
+                    with open(filename, 'wb') as file:
+                        shutil.copyfileobj(content, file)
+            except requests.exceptions.ConnectionError as err:
+                print('Warning: Network error. Retrying...\n', err)
+                return download(filename, url, md5, retries - 1)
+        else:
             wget.download(url,out=filename)
     if md5 is not None:
-        md5_returned = get_md5(filename)
+        md5_returned = _get_md5(filename)
         if md5 != md5_returned:
             print('Warning: MD5 check failed for the downloaded content. Retrying...')
             os.remove(filename)
@@ -1116,7 +1120,7 @@ def download(filename, url, md5=None, retries=5):
             return download(filename, url, md5, retries - 1)
     return filename
 
-def get_md5(filename):
+def _get_md5(filename):
     hash_md5 = hashlib.md5()
     chunk = 8192
     with open(filename, 'rb') as file_to_check:

From 6ab73d72495395603b368a1df2ded903404e9800 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Fri, 9 Dec 2022 00:15:07 +0800
Subject: [PATCH 18/18] Update neural_solvers.py

---
 pygmtools/neural_solvers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pygmtools/neural_solvers.py b/pygmtools/neural_solvers.py
index f6f2b5dd..b815d743 100644
--- a/pygmtools/neural_solvers.py
+++ b/pygmtools/neural_solvers.py
@@ -889,7 +889,7 @@ def ngm(K, n1=None, n2=None, n1max=None, n2max=None, x0=None,
             1.0
 
             # You may also load other pretrained weights
-            >>> X, net = pygm.cie(feat1, feat2, A1, A2, e_feat1, e_feat2, n1, n2, return_network=True, pretrain='willow')
+            >>> X, net = pygm.ngm(feat1, feat2, A1, A2, e_feat1, e_feat2, n1, n2, return_network=True, pretrain='willow')
             Downloading to ~/.cache/pygmtools/ngm_willow_numpy.npy...
             >>> (pygm.hungarian(X) * X_gt).sum() / X_gt.sum() # accuracy
             1.0