From e12ea7f037f2c03d35839b924501dfefb1123d2f Mon Sep 17 00:00:00 2001 From: clvincen Date: Fri, 24 Nov 2023 00:59:03 +0100 Subject: [PATCH 1/6] add citation for srgw-kl --- README.md | 4 +++- ot/gromov/_semirelaxed.py | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 57b845edb..84b3cf0ee 100644 --- a/README.md +++ b/README.md @@ -340,4 +340,6 @@ distances between Gaussian distributions](https://hal.science/hal-03197398v2/fil [60] Feydy, J., Roussillon, P., Trouvé, A., & Gori, P. (2019). [Fast and scalable optimal transport for brain tractograms](https://arxiv.org/pdf/2107.02010.pdf). In Medical Image Computing and Computer Assisted Intervention–MICCAI 2019: 22nd International Conference, Shenzhen, China, October 13–17, 2019, Proceedings, Part III 22 (pp. 636-644). Springer International Publishing. -[61] Charlier, B., Feydy, J., Glaunes, J. A., Collin, F. D., & Durif, G. (2021). [Kernel operations on the gpu, with autodiff, without memory overflows](https://www.jmlr.org/papers/volume22/20-275/20-275.pdf). The Journal of Machine Learning Research, 22(1), 3457-3462. \ No newline at end of file +[61] Charlier, B., Feydy, J., Glaunes, J. A., Collin, F. D., & Durif, G. (2021). [Kernel operations on the gpu, with autodiff, without memory overflows](https://www.jmlr.org/papers/volume22/20-275/20-275.pdf). The Journal of Machine Learning Research, 22(1), 3457-3462. + +[62] H. Van Assel, C. Vincent-Cuaz, T. Vayer, R. Flamary, N. Courty (2023). [Interpolating between Clustering and Dimensionality Reduction with Gromov-Wasserstein](https://arxiv.org/pdf/2310.03398.pdf). NeurIPS 2023 Workshop Optimal Transport and Machine Learning. diff --git a/ot/gromov/_semirelaxed.py b/ot/gromov/_semirelaxed.py index cbfe64ea8..d064dc669 100644 --- a/ot/gromov/_semirelaxed.py +++ b/ot/gromov/_semirelaxed.py @@ -90,6 +90,10 @@ def semirelaxed_gromov_wasserstein(C1, C2, p=None, loss_fun='square_loss', symme .. [48] Cédric Vincent-Cuaz, Rémi Flamary, Marco Corneli, Titouan Vayer, Nicolas Courty. "Semi-relaxed Gromov-Wasserstein divergence and applications on graphs" International Conference on Learning Representations (ICLR), 2022. + .. [62] H. Van Assel, C. Vincent-Cuaz, T. Vayer, R. Flamary, N. Courty. + "Interpolating between Clustering and Dimensionality Reduction with + Gromov-Wasserstein". NeurIPS 2023 Workshop OTML. + """ arr = [C1, C2] if p is not None: @@ -220,6 +224,10 @@ def semirelaxed_gromov_wasserstein2(C1, C2, p=None, loss_fun='square_loss', symm .. [48] Cédric Vincent-Cuaz, Rémi Flamary, Marco Corneli, Titouan Vayer, Nicolas Courty. "Semi-relaxed Gromov-Wasserstein divergence and applications on graphs" International Conference on Learning Representations (ICLR), 2022. + + .. [62] H. Van Assel, C. Vincent-Cuaz, T. Vayer, R. Flamary, N. Courty. + "Interpolating between Clustering and Dimensionality Reduction with + Gromov-Wasserstein". NeurIPS 2023 Workshop OTML. """ # partial get_backend as the full one will be handled in gromov_wasserstein nx = get_backend(C1, C2) @@ -331,6 +339,10 @@ def semirelaxed_fused_gromov_wasserstein( .. [48] Cédric Vincent-Cuaz, Rémi Flamary, Marco Corneli, Titouan Vayer, Nicolas Courty. "Semi-relaxed Gromov-Wasserstein divergence and applications on graphs" International Conference on Learning Representations (ICLR), 2022. + + .. [62] H. Van Assel, C. Vincent-Cuaz, T. Vayer, R. Flamary, N. Courty. + "Interpolating between Clustering and Dimensionality Reduction with + Gromov-Wasserstein". NeurIPS 2023 Workshop OTML. """ arr = [M, C1, C2] if p is not None: @@ -470,6 +482,10 @@ def semirelaxed_fused_gromov_wasserstein2(M, C1, C2, p=None, loss_fun='square_lo .. [48] Cédric Vincent-Cuaz, Rémi Flamary, Marco Corneli, Titouan Vayer, Nicolas Courty. "Semi-relaxed Gromov-Wasserstein divergence and applications on graphs" International Conference on Learning Representations (ICLR), 2022. + + .. [62] H. Van Assel, C. Vincent-Cuaz, T. Vayer, R. Flamary, N. Courty. + "Interpolating between Clustering and Dimensionality Reduction with + Gromov-Wasserstein". NeurIPS 2023 Workshop OTML. """ # partial get_backend as the full one will be handled in gromov_wasserstein nx = get_backend(C1, C2) @@ -561,6 +577,10 @@ def solve_semirelaxed_gromov_linesearch(G, deltaG, cost_G, C1, C2, ones_p, .. [48] Cédric Vincent-Cuaz, Rémi Flamary, Marco Corneli, Titouan Vayer, Nicolas Courty. "Semi-relaxed Gromov-Wasserstein divergence and applications on graphs" International Conference on Learning Representations (ICLR), 2021. + + .. [62] H. Van Assel, C. Vincent-Cuaz, T. Vayer, R. Flamary, N. Courty. + "Interpolating between Clustering and Dimensionality Reduction with + Gromov-Wasserstein". NeurIPS 2023 Workshop OTML. """ if nx is None: G, deltaG, C1, C2, M = list_to_array(G, deltaG, C1, C2, M) From c80499f4c0b553cf8580651e7fcc3c1fd92d8304 Mon Sep 17 00:00:00 2001 From: clvincen Date: Fri, 24 Nov 2023 12:31:01 +0100 Subject: [PATCH 2/6] init commit - BAPG for GW and FGW --- README.md | 4 + ot/gromov/_bregman.py | 575 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 579 insertions(+) diff --git a/README.md b/README.md index 84b3cf0ee..939cc6158 100644 --- a/README.md +++ b/README.md @@ -343,3 +343,7 @@ distances between Gaussian distributions](https://hal.science/hal-03197398v2/fil [61] Charlier, B., Feydy, J., Glaunes, J. A., Collin, F. D., & Durif, G. (2021). [Kernel operations on the gpu, with autodiff, without memory overflows](https://www.jmlr.org/papers/volume22/20-275/20-275.pdf). The Journal of Machine Learning Research, 22(1), 3457-3462. [62] H. Van Assel, C. Vincent-Cuaz, T. Vayer, R. Flamary, N. Courty (2023). [Interpolating between Clustering and Dimensionality Reduction with Gromov-Wasserstein](https://arxiv.org/pdf/2310.03398.pdf). NeurIPS 2023 Workshop Optimal Transport and Machine Learning. + +[63] Li, J., Tang, J., Kong, L., Liu, H., Li, J., So, A. M. C., & Blanchet, J. (2022). [A Convergent Single-Loop Algorithm for Relaxation of Gromov-Wasserstein in Graph Data](https://openreview.net/pdf?id=0jxPyVWmiiF). In The Eleventh International Conference on Learning Representations. + +[64] Ma, X., Chu, X., Wang, Y., Lin, Y., Zhao, J., Ma, L., & Zhu, W. (2023). [Fused Gromov-Wasserstein Graph Mixup for Graph-level Classifications](https://openreview.net/pdf?id=uqkUguNu40). In Thirty-seventh Conference on Neural Information Processing Systems. diff --git a/ot/gromov/_bregman.py b/ot/gromov/_bregman.py index 3539428d5..5dcdf0165 100644 --- a/ot/gromov/_bregman.py +++ b/ot/gromov/_bregman.py @@ -343,6 +343,281 @@ def entropic_gromov_wasserstein2( return logv['gw_dist'] +def entropic_BAPG_gromov_wasserstein( + C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, + symmetric=None, G0=None, max_iter=1000, tol=1e-9, marginal_loss=False, + verbose=False, log=False): + r""" + Returns the Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + estimated using Bregman Alternated Projected Gradient method. + + The function solves the following Gromov-Wasserstein + optimization problem [63]: + + .. math:: + \mathbf{T}^* \in \mathop{\arg\min}_\mathbf{T} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} + + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 + Where : + + - :math:`\mathbf{C_1}`: Metric cost matrix in the source space + - :math:`\mathbf{C_2}`: Metric cost matrix in the target space + - :math:`\mathbf{p}`: distribution in the source space + - :math:`\mathbf{q}`: distribution in the target space + - `L`: loss function to account for the misfit between the similarity matrices + + .. note:: By algorithmic design the optimal coupling :math:`\mathbf{T}` + returned by this function does not necessarily satisfy the marginal + constraints :math:`\mathbf{T}\mathbf{1}=\mathbf{p}` and + :math:`\mathbf{T}^T\mathbf{1}=\mathbf{q}`. So the returned + Gromov-Wasserstein loss does not necessarily satisfy distance + properties and may be negative. + + Parameters + ---------- + C1 : array-like, shape (ns, ns) + Metric cost matrix in the source space + C2 : array-like, shape (nt, nt) + Metric cost matrix in the target space + p : array-like, shape (ns,), optional + Distribution in the source space. + If let to its default value None, uniform distribution is taken. + q : array-like, shape (nt,), optional + Distribution in the target space. + If let to its default value None, uniform distribution is taken. + loss_fun : string, optional (default='square_loss') + Loss function used for the solver either 'square_loss' or 'kl_loss' + epsilon : float, optional + Regularization term >0 + symmetric : bool, optional + Either C1 and C2 are to be assumed symmetric or not. + If let to its default None value, a symmetry test will be conducted. + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). + G0: array-like, shape (ns,nt), optional + If None the initial transport plan of the solver is pq^T. + Otherwise G0 will be used as initial transport of the solver. G0 is not + required to satisfy marginal constraints but we strongly recommend it + to correctly estimate the GW distance. + max_iter : int, optional + Max number of iterations + tol : float, optional + Stop threshold on error (>0) + marginal_loss: bool, optional. Default is False. + Include constant terms or not in the matching objective function. + verbose : bool, optional + Print information along iterations + log : bool, optional + Record log if True. + Returns + ------- + T : array-like, shape (`ns`, `nt`) + Optimal coupling between the two spaces + + References + ---------- + .. [63] Li, J., Tang, J., Kong, L., Liu, H., Li, J., So, A. M. C., & Blanchet, J. + "A Convergent Single-Loop Algorithm for Relaxation of Gromov-Wasserstein + in Graph Data". International Conference on Learning Representations (ICLR), 2022. + + """ + if loss_fun not in ('square_loss', 'kl_loss'): + raise ValueError(f"Unknown `loss_fun='{loss_fun}'`. Use one of: {'square_loss', 'kl_loss'}.") + + C1, C2 = list_to_array(C1, C2) + arr = [C1, C2] + if p is not None: + arr.append(list_to_array(p)) + else: + p = unif(C1.shape[0], type_as=C1) + if q is not None: + arr.append(list_to_array(q)) + else: + q = unif(C2.shape[0], type_as=C2) + + if G0 is not None: + arr.append(G0) + + nx = get_backend(*arr) + + if G0 is None: + G0 = nx.outer(p, q) + + T = G0 + constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun, nx) + + if symmetric is None: + symmetric = nx.allclose(C1, C1.T, atol=1e-10) and nx.allclose(C2, C2.T, atol=1e-10) + if not symmetric: + constCt, hC1t, hC2t = init_matrix(C1.T, C2.T, p, q, loss_fun, nx) + + if marginal_loss: + if symmetric: + def df(T): + return gwggrad(constC, hC1, hC2, T, nx) + else: + def df(T): + return 0.5 * (gwggrad(constC, hC1, hC2, T, nx) + gwggrad(constCt, hC1t, hC2t, T, nx)) + + else: + if symmetric: + def df(T): + A = - nx.dot(nx.dot(hC1, T), hC2.T) + return 2 * A + else: + def df(T): + A = - nx.dot(nx.dot(hC1, T), hC2t) + At = - nx.dot(nx.dot(hC1t, T), hC2) + return A + At + + cpt = 0 + err = 1e15 + + if log: + log = {'err': []} + + while (err > tol and cpt < max_iter): + + Tprev = T + + # rows update + T = T * nx.exp(- df(T) / epsilon) + row_scaling = p / nx.sum(T, 1) + T = nx.reshape(row_scaling, (-1, 1)) * T + + # columns update + T = T * nx.exp(- df(T) / epsilon) + column_scaling = q / nx.sum(T, 0) + T = nx.reshape(column_scaling, (1, -1)) * T + + if cpt % 10 == 0: + # we can speed up the process by checking for the error only all + # the 10th iterations + err = nx.norm(T - Tprev) + + if log: + log['err'].append(err) + + if verbose: + if cpt % 200 == 0: + print('{:5s}|{:12s}'.format( + 'It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) + + cpt += 1 + + if abs(nx.sum(T) - 1) > 1e-5: + warnings.warn("Solver failed to produce a transport plan. You might " + "want to increase the regularization parameter `epsilon`.") + if log: + log['gw_dist'] = gwloss(constC, hC1, hC2, T, nx) + + if not marginal_loss: + log['loss'] = log['gw_dist'] - nx.sum(constC * T) + + return T, log + else: + return T + + +def entropic_BAPG_gromov_wasserstein2( + C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, symmetric=None, G0=None, max_iter=1000, + tol=1e-9, marginal_loss=False, verbose=False, log=False): + r""" + Returns the Gromov-Wasserstein loss :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + estimated using Bregman Alternated Projected Gradient method. + + The function solves the following Gromov-Wasserstein + optimization problem [63]: + + .. math:: + \mathbf{GW} = \mathop{\min}_\mathbf{T} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} + + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 + Where : + + - :math:`\mathbf{C_1}`: Metric cost matrix in the source space + - :math:`\mathbf{C_2}`: Metric cost matrix in the target space + - :math:`\mathbf{p}`: distribution in the source space + - :math:`\mathbf{q}`: distribution in the target space + - `L`: loss function to account for the misfit between the similarity matrices + + .. note:: By algorithmic design the optimal coupling :math:`\mathbf{T}` + returned by this function does not necessarily satisfy the marginal + constraints :math:`\mathbf{T}\mathbf{1}=\mathbf{p}` and + :math:`\mathbf{T}^T\mathbf{1}=\mathbf{q}`. So the returned + Gromov-Wasserstein loss does not necessarily satisfy distance + properties and may be negative. + + + Parameters + ---------- + C1 : array-like, shape (ns, ns) + Metric cost matrix in the source space + C2 : array-like, shape (nt, nt) + Metric cost matrix in the target space + p : array-like, shape (ns,), optional + Distribution in the source space. + If let to its default value None, uniform distribution is taken. + q : array-like, shape (nt,), optional + Distribution in the target space. + If let to its default value None, uniform distribution is taken. + loss_fun : string, optional (default='square_loss') + Loss function used for the solver either 'square_loss' or 'kl_loss' + epsilon : float, optional + Regularization term >0 + symmetric : bool, optional + Either C1 and C2 are to be assumed symmetric or not. + If let to its default None value, a symmetry test will be conducted. + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). + G0: array-like, shape (ns,nt), optional + If None the initial transport plan of the solver is pq^T. + Otherwise G0 will be used as initial transport of the solver. G0 is not + required to satisfy marginal constraints but we strongly recommand it + to correcly estimate the GW distance. + max_iter : int, optional + Max number of iterations + tol : float, optional + Stop threshold on error (>0) + marginal_loss: bool, optional. Default is False. + Include constant terms or not in the matching objective function. + verbose : bool, optional + Print information along iterations + log : bool, optional + Record log if True. + + Returns + ------- + gw_dist : float + Gromov-Wasserstein distance + + References + ---------- + .. [63] Li, J., Tang, J., Kong, L., Liu, H., Li, J., So, A. M. C., & Blanchet, J. + "A Convergent Single-Loop Algorithm for Relaxation of Gromov-Wasserstein + in Graph Data". International Conference on Learning Representations (ICLR), 2023. + + """ + + T, logv = entropic_BAPG_gromov_wasserstein( + C1, C2, p, q, loss_fun, epsilon, symmetric, G0, max_iter, + tol, marginal_loss, verbose, log=True) + + logv['T'] = T + + if log: + return logv['gw_dist'], logv + else: + return logv['gw_dist'] + + def entropic_gromov_barycenters( N, Cs, ps=None, p=None, lambdas=None, loss_fun='square_loss', epsilon=0.1, symmetric=True, max_iter=1000, tol=1e-9, @@ -877,6 +1152,306 @@ def entropic_fused_gromov_wasserstein2( return logv['fgw_dist'] +def entropic_BAPG_fused_gromov_wasserstein( + M, C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, + symmetric=None, alpha=0.5, G0=None, max_iter=1000, tol=1e-9, + marginal_loss=False, verbose=False, log=False): + r""" + Returns the Fused Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})` + with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}`, + estimated using Bregman Alternated Projected Gradient method. + + The function solves the following Fused Gromov-Wasserstein + optimization problem [63, 64]: + + .. math:: + \mathbf{T}^* \in\mathop{\arg\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + + \alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} + + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 + Where : + + - :math:`\mathbf{M}`: metric cost matrix between features across domains + - :math:`\mathbf{C_1}`: Metric cost matrix in the source space + - :math:`\mathbf{C_2}`: Metric cost matrix in the target space + - :math:`\mathbf{p}`: distribution in the source space + - :math:`\mathbf{q}`: distribution in the target space + - `L`: loss function to account for the misfit between the similarity and feature matrices + - :math:`\alpha`: trade-off parameter + + .. note:: By algorithmic design the optimal coupling :math:`\mathbf{T}` + returned by this function does not necessarily satisfy the marginal + constraints :math:`\mathbf{T}\mathbf{1}=\mathbf{p}` and + :math:`\mathbf{T}^T\mathbf{1}=\mathbf{q}`. So the returned Fused + Gromov-Wasserstein loss does not necessarily satisfy distance + properties and may be negative. + + Parameters + ---------- + M : array-like, shape (ns, nt) + Metric cost matrix between features across domains + C1 : array-like, shape (ns, ns) + Metric cost matrix in the source space + C2 : array-like, shape (nt, nt) + Metric cost matrix in the target space + p : array-like, shape (ns,), optional + Distribution in the source space. + If let to its default value None, uniform distribution is taken. + q : array-like, shape (nt,), optional + Distribution in the target space. + If let to its default value None, uniform distribution is taken. + loss_fun : string, optional (default='square_loss') + Loss function used for the solver either 'square_loss' or 'kl_loss' + epsilon : float, optional + Regularization term >0 + symmetric : bool, optional + Either C1 and C2 are to be assumed symmetric or not. + If let to its default None value, a symmetry test will be conducted. + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). + alpha : float, optional + Trade-off parameter (0 < alpha < 1) + G0: array-like, shape (ns,nt), optional + If None the initial transport plan of the solver is pq^T. + Otherwise G0 will be used as initial transport of the solver. G0 is not + required to satisfy marginal constraints but we strongly recommend it + to correctly estimate the GW distance. + max_iter : int, optional + Max number of iterations + tol : float, optional + Stop threshold on error (>0) + marginal_loss: bool, optional. Default is False. + Include constant terms or not in the matching objective function. + verbose : bool, optional + Print information along iterations + log : bool, optional + Record log if True. + Returns + ------- + T : array-like, shape (`ns`, `nt`) + Optimal coupling between the two joint spaces + + References + ---------- + .. [63] Li, J., Tang, J., Kong, L., Liu, H., Li, J., So, A. M. C., & Blanchet, J. + "A Convergent Single-Loop Algorithm for Relaxation of Gromov-Wasserstein + in Graph Data". International Conference on Learning Representations (ICLR), 2023. + + .. [64] Ma, X., Chu, X., Wang, Y., Lin, Y., Zhao, J., Ma, L., & Zhu, W. + "Fused Gromov-Wasserstein Graph Mixup for Graph-level Classifications". + In Thirty-seventh Conference on Neural Information Processing Systems. + """ + if loss_fun not in ('square_loss', 'kl_loss'): + raise ValueError(f"Unknown `loss_fun='{loss_fun}'`. Use one of: {'square_loss', 'kl_loss'}.") + + M, C1, C2 = list_to_array(M, C1, C2) + arr = [M, C1, C2] + if p is not None: + arr.append(list_to_array(p)) + else: + p = unif(C1.shape[0], type_as=C1) + if q is not None: + arr.append(list_to_array(q)) + else: + q = unif(C2.shape[0], type_as=C2) + + if G0 is not None: + arr.append(G0) + + nx = get_backend(*arr) + + if G0 is None: + G0 = nx.outer(p, q) + + T = G0 + constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun, nx) + if symmetric is None: + symmetric = nx.allclose(C1, C1.T, atol=1e-10) and nx.allclose(C2, C2.T, atol=1e-10) + if not symmetric: + constCt, hC1t, hC2t = init_matrix(C1.T, C2.T, p, q, loss_fun, nx) + + # Define gradients + if marginal_loss: + if symmetric: + def df(T): + return alpha * gwggrad(constC, hC1, hC2, T, nx) + (1 - alpha) * M + else: + def df(T): + return (alpha * 0.5) * (gwggrad(constC, hC1, hC2, T, nx) + gwggrad(constCt, hC1t, hC2t, T, nx)) + (1 - alpha) * M + + else: + if symmetric: + def df(T): + A = - nx.dot(nx.dot(hC1, T), hC2.T) + return 2 * alpha * A + (1 - alpha) * M + else: + def df(T): + A = - nx.dot(nx.dot(hC1, T), hC2t) + At = - nx.dot(nx.dot(hC1t, T), hC2) + return alpha * (A + At) + (1 - alpha) * M + cpt = 0 + err = 1e15 + + if log: + log = {'err': []} + + while (err > tol and cpt < max_iter): + + Tprev = T + + # rows update + T = T * nx.exp(- df(T) / epsilon) + row_scaling = p / nx.sum(T, 1) + T = nx.reshape(row_scaling, (-1, 1)) * T + + # columns update + T = T * nx.exp(- df(T) / epsilon) + column_scaling = q / nx.sum(T, 0) + T = nx.reshape(column_scaling, (1, -1)) * T + + if cpt % 10 == 0: + # we can speed up the process by checking for the error only all + # the 10th iterations + err = nx.norm(T - Tprev) + + if log: + log['err'].append(err) + + if verbose: + if cpt % 200 == 0: + print('{:5s}|{:12s}'.format( + 'It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) + + cpt += 1 + + if abs(nx.sum(T) - 1) > 1e-5: + warnings.warn("Solver failed to produce a transport plan. You might " + "want to increase the regularization parameter `epsilon`.") + if log: + log['fgw_dist'] = (1 - alpha) * nx.sum(M * T) + alpha * gwloss(constC, hC1, hC2, T, nx) + + if not marginal_loss: + log['loss'] = log['fgw_dist'] - alpha * nx.sum(constC * T) + + return T, log + else: + return T + + +def entropic_BAPG_fused_gromov_wasserstein2( + M, C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, + symmetric=None, alpha=0.5, G0=None, max_iter=1000, tol=1e-9, + marginal_loss=False, verbose=False, log=False): + r""" + Returns the Fused Gromov-Wasserstein loss between :math:`(\mathbf{C_1}, \mathbf{Y_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{Y_2}, \mathbf{q})` + with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}`, + estimated using Bregman Alternated Projected Gradient method. + + The function solves the following Fused Gromov-Wasserstein + optimization problem [63, 64]: + + .. math:: + \mathbf{FGW} = \mathop{\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + + \alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} + + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 + Where : + + - :math:`\mathbf{M}`: metric cost matrix between features across domains + - :math:`\mathbf{C_1}`: Metric cost matrix in the source space + - :math:`\mathbf{C_2}`: Metric cost matrix in the target space + - :math:`\mathbf{p}`: distribution in the source space + - :math:`\mathbf{q}`: distribution in the target space + - `L`: loss function to account for the misfit between the similarity and feature matrices + - :math:`\alpha`: trade-off parameter + + .. note:: By algorithmic design the optimal coupling :math:`\mathbf{T}` + returned by this function does not necessarily satisfy the marginal + constraints :math:`\mathbf{T}\mathbf{1}=\mathbf{p}` and + :math:`\mathbf{T}^T\mathbf{1}=\mathbf{q}`. So the returned Fused + Gromov-Wasserstein loss does not necessarily satisfy distance + properties and may be negative. + + Parameters + ---------- + M : array-like, shape (ns, nt) + Metric cost matrix between features across domains + C1 : array-like, shape (ns, ns) + Metric cost matrix in the source space + C2 : array-like, shape (nt, nt) + Metric cost matrix in the target space + p : array-like, shape (ns,), optional + Distribution in the source space. + If let to its default value None, uniform distribution is taken. + q : array-like, shape (nt,), optional + Distribution in the target space. + If let to its default value None, uniform distribution is taken. + loss_fun : string, optional (default='square_loss') + Loss function used for the solver either 'square_loss' or 'kl_loss' + epsilon : float, optional + Regularization term >0 + symmetric : bool, optional + Either C1 and C2 are to be assumed symmetric or not. + If let to its default None value, a symmetry test will be conducted. + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). + alpha : float, optional + Trade-off parameter (0 < alpha < 1) + G0: array-like, shape (ns,nt), optional + If None the initial transport plan of the solver is pq^T. + Otherwise G0 will be used as initial transport of the solver. G0 is not + required to satisfy marginal constraints but we strongly recommend it + to correctly estimate the GW distance. + max_iter : int, optional + Max number of iterations + tol : float, optional + Stop threshold on error (>0) + marginal_loss: bool, optional. Default is False. + Include constant terms or not in the matching objective function. + verbose : bool, optional + Print information along iterations + log : bool, optional + Record log if True. + Returns + ------- + T : array-like, shape (`ns`, `nt`) + Optimal coupling between the two joint spaces + + References + ---------- + .. [63] Li, J., Tang, J., Kong, L., Liu, H., Li, J., So, A. M. C., & Blanchet, J. + "A Convergent Single-Loop Algorithm for Relaxation of Gromov-Wasserstein + in Graph Data". International Conference on Learning Representations (ICLR), 2023. + + .. [64] Ma, X., Chu, X., Wang, Y., Lin, Y., Zhao, J., Ma, L., & Zhu, W. + "Fused Gromov-Wasserstein Graph Mixup for Graph-level Classifications". + In Thirty-seventh Conference on Neural Information Processing Systems. + """ + nx = get_backend(M, C1, C2) + + T, logv = entropic_BAPG_fused_gromov_wasserstein( + M, C1, C2, p, q, loss_fun, epsilon, symmetric, alpha, G0, max_iter, + tol, marginal_loss, verbose, log=True) + + logv['T'] = T + + lin_term = nx.sum(T * M) + logv['quad_loss'] = (logv['fgw_dist'] - (1 - alpha) * lin_term) + logv['lin_loss'] = lin_term * (1 - alpha) + + if log: + return logv['fgw_dist'], logv + else: + return logv['fgw_dist'] + + def entropic_fused_gromov_barycenters( N, Ys, Cs, ps=None, p=None, lambdas=None, loss_fun='square_loss', epsilon=0.1, symmetric=True, alpha=0.5, max_iter=1000, tol=1e-9, From 6381c6369d0340b5c5a37a34d58c4407c19c7217 Mon Sep 17 00:00:00 2001 From: clvincen Date: Fri, 24 Nov 2023 18:19:00 +0100 Subject: [PATCH 3/6] add tests --- RELEASES.md | 1 + ot/gromov/__init__.py | 8 +- ot/gromov/_bregman.py | 10 +- test/test_gromov.py | 220 ++++++++++++++++++++++++++++++++++++++---- 4 files changed, 216 insertions(+), 23 deletions(-) diff --git a/RELEASES.md b/RELEASES.md index 349c56214..11429ec26 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -20,6 +20,7 @@ + Wrapper for `geomloss`` solver on empirical samples (PR #571) + Add `stop_criterion` feature to (un)regularized (f)gw barycenter solvers (PR #578) + Add `fixed_structure` and `fixed_features` to entropic fgw barycenter solver (PR #578) ++ Add new entropic BAPG solvers for GW and FGW (PR #581) #### Closed issues - Fix line search evaluating cost outside of the interpolation range (Issue #502, PR #504) diff --git a/ot/gromov/__init__.py b/ot/gromov/__init__.py index e39d906cf..63223e961 100644 --- a/ot/gromov/__init__.py +++ b/ot/gromov/__init__.py @@ -20,9 +20,13 @@ from ._bregman import (entropic_gromov_wasserstein, entropic_gromov_wasserstein2, + entropic_BAPG_gromov_wasserstein, + entropic_BAPG_gromov_wasserstein2, entropic_gromov_barycenters, entropic_fused_gromov_wasserstein, entropic_fused_gromov_wasserstein2, + entropic_BAPG_fused_gromov_wasserstein, + entropic_BAPG_fused_gromov_wasserstein2, entropic_fused_gromov_barycenters) from ._estimators import (GW_distance_estimation, pointwise_gromov_wasserstein, @@ -49,8 +53,10 @@ 'gromov_wasserstein', 'gromov_wasserstein2', 'fused_gromov_wasserstein', 'fused_gromov_wasserstein2', 'solve_gromov_linesearch', 'gromov_barycenters', 'fgw_barycenters', 'entropic_gromov_wasserstein', 'entropic_gromov_wasserstein2', + 'entropic_BAPG_gromov_wasserstein', 'entropic_BAPG_gromov_wasserstein2', 'entropic_gromov_barycenters', 'entropic_fused_gromov_wasserstein', - 'entropic_fused_gromov_wasserstein2', 'entropic_fused_gromov_barycenters', + 'entropic_fused_gromov_wasserstein2', 'entropic_BAPG_fused_gromov_wasserstein', + 'entropic_BAPG_fused_gromov_wasserstein2', 'entropic_fused_gromov_barycenters', 'GW_distance_estimation', 'pointwise_gromov_wasserstein', 'sampled_gromov_wasserstein', 'semirelaxed_gromov_wasserstein', 'semirelaxed_gromov_wasserstein2', 'semirelaxed_fused_gromov_wasserstein', 'semirelaxed_fused_gromov_wasserstein2', diff --git a/ot/gromov/_bregman.py b/ot/gromov/_bregman.py index 5dcdf0165..bb3ba5627 100644 --- a/ot/gromov/_bregman.py +++ b/ot/gromov/_bregman.py @@ -509,9 +509,10 @@ def df(T): cpt += 1 - if abs(nx.sum(T) - 1) > 1e-5: + if nx.any(nx.isnan(T)): warnings.warn("Solver failed to produce a transport plan. You might " - "want to increase the regularization parameter `epsilon`.") + "want to increase the regularization parameter `epsilon`.", + UserWarning) if log: log['gw_dist'] = gwloss(constC, hC1, hC2, T, nx) @@ -1328,9 +1329,10 @@ def df(T): cpt += 1 - if abs(nx.sum(T) - 1) > 1e-5: + if nx.any(nx.isnan(T)): warnings.warn("Solver failed to produce a transport plan. You might " - "want to increase the regularization parameter `epsilon`.") + "want to increase the regularization parameter `epsilon`.", + UserWarning) if log: log['fgw_dist'] = (1 - alpha) * nx.sum(M * T) + alpha * gwloss(constC, hC1, hC2, T, nx) diff --git a/test/test_gromov.py b/test/test_gromov.py index 3158f9dc9..c156154ed 100644 --- a/test/test_gromov.py +++ b/test/test_gromov.py @@ -570,20 +570,108 @@ def test_entropic_gromov_dtype_device(nx): C1b, C2b, pb, qb = nx.from_numpy(C1, C2, p, q, type_as=tp) - for solver in ['PGD', 'PPA']: - Gb = ot.gromov.entropic_gromov_wasserstein( - C1b, C2b, pb, qb, 'square_loss', epsilon=1e-1, max_iter=5, - solver=solver, verbose=True - ) - gw_valb = ot.gromov.entropic_gromov_wasserstein2( - C1b, C2b, pb, qb, 'square_loss', epsilon=1e-1, max_iter=5, - solver=solver, verbose=True - ) + for solver in ['PGD', 'PPA', 'BAPG']: + if solver == 'BAPG': + Gb = ot.gromov.entropic_BAPG_gromov_wasserstein( + C1b, C2b, pb, qb, max_iter=2, verbose=True) + gw_valb = ot.gromov.entropic_BAPG_gromov_wasserstein2( + C1b, C2b, pb, qb, max_iter=2, verbose=True) + else: + Gb = ot.gromov.entropic_gromov_wasserstein( + C1b, C2b, pb, qb, max_iter=2, solver=solver, verbose=True) + gw_valb = ot.gromov.entropic_gromov_wasserstein2( + C1b, C2b, pb, qb, max_iter=2, solver=solver, verbose=True) nx.assert_same_dtype_device(C1b, Gb) nx.assert_same_dtype_device(C1b, gw_valb) +def test_entropic_BAPG_gromov(nx): + n_samples = 10 # nb samples + + mu_s = np.array([0, 0]) + cov_s = np.array([[1, 0], [0, 1]]) + + xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s, random_state=42) + + xt = xs[::-1].copy() + + p = ot.unif(n_samples) + q = ot.unif(n_samples) + G0 = p[:, None] * q[None, :] + C1 = ot.dist(xs, xs) + C2 = ot.dist(xt, xt) + + C1 /= C1.max() + C2 /= C2.max() + + C1b, C2b, pb, qb, G0b = nx.from_numpy(C1, C2, p, q, G0) + + # complete test with marginal loss = True + marginal_loss = True + with pytest.raises(ValueError): + loss_fun = 'weird_loss_fun' + G, log = ot.gromov.entropic_BAPG_gromov_wasserstein( + C1, C2, None, q, loss_fun, symmetric=None, G0=G0, + epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, + verbose=True, log=True) + + G, log = ot.gromov.entropic_BAPG_gromov_wasserstein( + C1, C2, None, q, 'square_loss', symmetric=None, G0=G0, + epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, + verbose=True, log=True) + Gb = nx.to_numpy(ot.gromov.entropic_BAPG_gromov_wasserstein( + C1b, C2b, pb, None, 'square_loss', symmetric=True, G0=None, + epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True, + log=False + )) + + # check constraints + np.testing.assert_allclose(G, Gb, atol=1e-06) + np.testing.assert_allclose( + p, Gb.sum(1), atol=1e-02) # cf convergence gromov + np.testing.assert_allclose( + q, Gb.sum(0), atol=1e-02) # cf convergence gromov + + with pytest.warns(UserWarning): + + gw = ot.gromov.entropic_BAPG_gromov_wasserstein2( + C1, C2, p, q, 'kl_loss', symmetric=False, G0=None, + max_iter=10, epsilon=1e-2, marginal_loss=marginal_loss, log=False) + + gw, log = ot.gromov.entropic_BAPG_gromov_wasserstein2( + C1, C2, p, q, 'kl_loss', symmetric=False, G0=None, + max_iter=10, epsilon=1., marginal_loss=marginal_loss, log=True) + gwb, logb = ot.gromov.entropic_BAPG_gromov_wasserstein2( + C1b, C2b, pb, qb, 'kl_loss', symmetric=None, G0=G0b, + max_iter=10, epsilon=1., marginal_loss=marginal_loss, log=True) + gwb = nx.to_numpy(gwb) + + G = log['T'] + Gb = nx.to_numpy(logb['T']) + + np.testing.assert_allclose(gw, gwb, atol=1e-06) + np.testing.assert_allclose(gw, 0, atol=1e-1, rtol=1e-1) + + # check constraints + np.testing.assert_allclose(G, Gb, atol=1e-06) + np.testing.assert_allclose( + p, Gb.sum(1), atol=1e-02) # cf convergence gromov + np.testing.assert_allclose( + q, Gb.sum(0), atol=1e-02) # cf convergence gromov + + marginal_loss = False + G, log = ot.gromov.entropic_BAPG_gromov_wasserstein( + C1, C2, None, q, 'square_loss', symmetric=None, G0=G0, + epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, + verbose=True, log=True) + Gb = nx.to_numpy(ot.gromov.entropic_BAPG_gromov_wasserstein( + C1b, C2b, pb, None, 'square_loss', symmetric=False, G0=None, + epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True, + log=False + )) + + @pytest.skip_backend("tf", reason="test very slow with tf backend") def test_entropic_fgw(nx): n_samples = 5 # nb samples @@ -722,6 +810,99 @@ def test_entropic_proximal_fgw(nx): q, Gb.sum(0), atol=1e-04) # cf convergence gromov +def test_entropic_BAPG_fgw(nx): + n_samples = 5 # nb samples + + mu_s = np.array([0, 0]) + cov_s = np.array([[1, 0], [0, 1]]) + + xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s, random_state=42) + + xt = xs[::-1].copy() + + rng = np.random.RandomState(42) + ys = rng.randn(xs.shape[0], 2) + yt = ys[::-1].copy() + + p = ot.unif(n_samples) + q = ot.unif(n_samples) + G0 = p[:, None] * q[None, :] + + C1 = ot.dist(xs, xs) + C2 = ot.dist(xt, xt) + + C1 /= C1.max() + C2 /= C2.max() + + M = ot.dist(ys, yt) + + Mb, C1b, C2b, pb, qb, G0b = nx.from_numpy(M, C1, C2, p, q, G0) + + with pytest.raises(ValueError): + loss_fun = 'weird_loss_fun' + G, log = ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + M, C1, C2, p, q, loss_fun=loss_fun, max_iter=1, log=True) + + # complete test with marginal loss = True + marginal_loss = True + + G, log = ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + M, C1, C2, p, q, 'square_loss', symmetric=None, G0=G0, + epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, log=True) + Gb = nx.to_numpy(ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + Mb, C1b, C2b, pb, qb, 'square_loss', symmetric=True, G0=None, + epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True)) + + # check constraints + np.testing.assert_allclose(G, Gb, atol=1e-06) + np.testing.assert_allclose( + p, Gb.sum(1), atol=1e-02) # cf convergence gromov + np.testing.assert_allclose( + q, Gb.sum(0), atol=1e-02) # cf convergence gromov + + with pytest.warns(UserWarning): + + fgw = ot.gromov.entropic_BAPG_fused_gromov_wasserstein2( + M, C1, C2, p, q, 'kl_loss', symmetric=False, G0=None, + max_iter=10, epsilon=1e-3, marginal_loss=marginal_loss, log=False) + + fgw, log = ot.gromov.entropic_BAPG_fused_gromov_wasserstein2( + M, C1, C2, p, None, 'kl_loss', symmetric=True, G0=None, + max_iter=5, epsilon=1, marginal_loss=marginal_loss, log=True) + fgwb, logb = ot.gromov.entropic_BAPG_fused_gromov_wasserstein2( + Mb, C1b, C2b, None, qb, 'kl_loss', symmetric=None, G0=G0b, + max_iter=5, epsilon=1, marginal_loss=marginal_loss, log=True) + fgwb = nx.to_numpy(fgwb) + + G = log['T'] + Gb = nx.to_numpy(logb['T']) + + np.testing.assert_allclose(fgw, fgwb, atol=1e-06) + np.testing.assert_allclose(fgw, 0, atol=1e-1, rtol=1e-1) + + # check constraints + np.testing.assert_allclose(G, Gb, atol=1e-06) + np.testing.assert_allclose( + p, Gb.sum(1), atol=1e-02) # cf convergence gromov + np.testing.assert_allclose( + q, Gb.sum(0), atol=1e-02) # cf convergence gromov + + # Tests with marginal_loss = False + marginal_loss = False + G, log = ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + M, C1, C2, p, q, 'square_loss', symmetric=False, G0=G0, + epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, log=True) + Gb = nx.to_numpy(ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + Mb, C1b, C2b, pb, qb, 'square_loss', symmetric=None, G0=None, + epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True)) + # check constraints + np.testing.assert_allclose(G, Gb, atol=1e-06) + np.testing.assert_allclose( + p, Gb.sum(1), atol=1e-02) # cf convergence gromov + np.testing.assert_allclose( + q, Gb.sum(0), atol=1e-02) # cf convergence gromov + + def test_asymmetric_entropic_fgw(nx): n_samples = 5 # nb samples rng = np.random.RandomState(0) @@ -797,15 +978,18 @@ def test_entropic_fgw_dtype_device(nx): Mb, C1b, C2b, pb, qb = nx.from_numpy(M, C1, C2, p, q, type_as=tp) - for solver in ['PGD', 'PPA']: - Gb = ot.gromov.entropic_fused_gromov_wasserstein( - Mb, C1b, C2b, pb, qb, 'square_loss', epsilon=0.1, max_iter=5, - solver=solver, verbose=True - ) - fgw_valb = ot.gromov.entropic_fused_gromov_wasserstein2( - Mb, C1b, C2b, pb, qb, 'square_loss', epsilon=0.1, max_iter=5, - solver=solver, verbose=True - ) + for solver in ['PGD', 'PPA', 'BAPG']: + if solver == 'BAPG': + Gb = ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + Mb, C1b, C2b, pb, qb, max_iter=2) + fgw_valb = ot.gromov.entropic_BAPG_fused_gromov_wasserstein2( + Mb, C1b, C2b, pb, qb, max_iter=2) + + else: + Gb = ot.gromov.entropic_fused_gromov_wasserstein( + Mb, C1b, C2b, pb, qb, max_iter=2, solver=solver) + fgw_valb = ot.gromov.entropic_fused_gromov_wasserstein2( + Mb, C1b, C2b, pb, qb, max_iter=2, solver=solver) nx.assert_same_dtype_device(C1b, Gb) nx.assert_same_dtype_device(C1b, fgw_valb) From a0344ba9f3d79e1d1b3ea7e94381ef061cf8bb46 Mon Sep 17 00:00:00 2001 From: clvincen Date: Sun, 26 Nov 2023 14:00:03 +0100 Subject: [PATCH 4/6] update example with fgw solvers comparison --- examples/gromov/plot_fgw_solvers.py | 133 +++++++++++++++++++--------- 1 file changed, 89 insertions(+), 44 deletions(-) diff --git a/examples/gromov/plot_fgw_solvers.py b/examples/gromov/plot_fgw_solvers.py index 5f8a885c9..4a66b5858 100644 --- a/examples/gromov/plot_fgw_solvers.py +++ b/examples/gromov/plot_fgw_solvers.py @@ -5,8 +5,9 @@ ============================== This example illustrates the computation of FGW for attributed graphs -using 3 different solvers to estimate the distance based on Conditional -Gradient [24] or Sinkhorn projections [12, 51]. +using 4 different solvers to estimate the distance based on Conditional +Gradient [24], Sinkhorn projections [12, 51] and alternated Bregman +projections [63, 64]. We generate two graphs following Stochastic Block Models further endowed with node features and compute their FGW matchings. @@ -23,6 +24,16 @@ [51] Xu, H., Luo, D., Zha, H., & Duke, L. C. (2019). "Gromov-wasserstein learning for graph matching and node embedding". In International Conference on Machine Learning (ICML), 2019. + +[63] Li, J., Tang, J., Kong, L., Liu, H., Li, J., So, A. M. C., & Blanchet, J. +"A Convergent Single-Loop Algorithm for Relaxation of Gromov-Wasserstein in +Graph Data". International Conference on Learning Representations (ICLR), 2023. + +[64] Ma, X., Chu, X., Wang, Y., Lin, Y., Zhao, J., Ma, L., & Zhu, W. +"Fused Gromov-Wasserstein Graph Mixup for Graph-level Classifications". +In Thirty-seventh Conference on Neural Information Processing Systems +(NeurIPS), 2023. + """ # Author: Cédric Vincent-Cuaz @@ -33,9 +44,12 @@ import numpy as np import matplotlib.pylab as pl -from ot.gromov import fused_gromov_wasserstein, entropic_fused_gromov_wasserstein +from ot.gromov import (fused_gromov_wasserstein, + entropic_fused_gromov_wasserstein, + entropic_BAPG_fused_gromov_wasserstein) import networkx from networkx.generators.community import stochastic_block_model as sbm +from time import time ############################################################################# # @@ -85,34 +99,59 @@ # Conditional Gradient algorithm -fgw0, log0 = fused_gromov_wasserstein( - M, C2, C3, h2, h3, 'square_loss', alpha=alpha, verbose=True, log=True) +print('Conditional Gradient \n') +start_cg = time() +T_cg, log_cg = fused_gromov_wasserstein( + M, C2, C3, h2, h3, 'square_loss', alpha=alpha, tol_rel=1e-9, + verbose=True, log=True) +end_cg = time() +time_cg = 1000 * (end_cg - start_cg) # Proximal Point algorithm with Kullback-Leibler as proximal operator -fgw, log = entropic_fused_gromov_wasserstein( +print('Proximal Point Algorithm \n') +start_ppa = time() +T_ppa, log_ppa = entropic_fused_gromov_wasserstein( M, C2, C3, h2, h3, 'square_loss', alpha=alpha, epsilon=1., solver='PPA', - log=True, verbose=True, warmstart=False, numItermax=10) + tol=1e-9, log=True, verbose=True, warmstart=False, numItermax=10) +end_ppa = time() +time_ppa = 1000 * (end_ppa - start_ppa) # Projected Gradient algorithm with entropic regularization -fgwe, loge = entropic_fused_gromov_wasserstein( +print('Projected Gradient Descent \n') +start_pgd = time() +T_pgd, log_pgd = entropic_fused_gromov_wasserstein( M, C2, C3, h2, h3, 'square_loss', alpha=alpha, epsilon=0.01, solver='PGD', - log=True, verbose=True, warmstart=False, numItermax=10) - -print('Fused Gromov-Wasserstein distance estimated with Conditional Gradient solver: ' + str(log0['fgw_dist'])) -print('Fused Gromov-Wasserstein distance estimated with Proximal Point solver: ' + str(log['fgw_dist'])) -print('Entropic Fused Gromov-Wasserstein distance estimated with Projected Gradient solver: ' + str(loge['fgw_dist'])) + tol=1e-9, log=True, verbose=True, warmstart=False, numItermax=10) +end_pgd = time() +time_pgd = 1000 * (end_pgd - start_pgd) + +# Alternated Bregman Projected Gradient algorithm with Kullback-Leibler as proximal operator +print('Bregman Alternated Projected Gradient \n') +start_bapg = time() +T_bapg, log_bapg = entropic_BAPG_fused_gromov_wasserstein( + M, C2, C3, h2, h3, 'square_loss', alpha=alpha, epsilon=1., + tol=1e-9, marginal_loss=True, verbose=True, log=True) +end_bapg = time() +time_bapg = 1000 * (end_bapg - start_bapg) + +print('Fused Gromov-Wasserstein distance estimated with Conditional Gradient solver: ' + str(log_cg['fgw_dist'])) +print('Fused Gromov-Wasserstein distance estimated with Proximal Point solver: ' + str(log_ppa['fgw_dist'])) +print('Entropic Fused Gromov-Wasserstein distance estimated with Projected Gradient solver: ' + str(log_pgd['fgw_dist'])) +print('Fused Gromov-Wasserstein distance estimated with Projected Gradient solver: ' + str(log_bapg['fgw_dist'])) # compute OT sparsity level -fgw0_sparsity = 100 * (fgw0 == 0.).astype(np.float64).sum() / (N2 * N3) -fgw_sparsity = 100 * (fgw == 0.).astype(np.float64).sum() / (N2 * N3) -fgwe_sparsity = 100 * (fgwe == 0.).astype(np.float64).sum() / (N2 * N3) +T_cg_sparsity = 100 * (T_cg == 0.).astype(np.float64).sum() / (N2 * N3) +T_ppa_sparsity = 100 * (T_ppa == 0.).astype(np.float64).sum() / (N2 * N3) +T_pgd_sparsity = 100 * (T_pgd == 0.).astype(np.float64).sum() / (N2 * N3) +T_bapg_sparsity = 100 * (T_bapg == 0.).astype(np.float64).sum() / (N2 * N3) -# Methods using Sinkhorn projections tend to produce feasibility errors on the +# Methods using Sinkhorn/Bregman projections tend to produce feasibility errors on the # marginal constraints -err0 = np.linalg.norm(fgw0.sum(1) - h2) + np.linalg.norm(fgw0.sum(0) - h3) -err = np.linalg.norm(fgw.sum(1) - h2) + np.linalg.norm(fgw.sum(0) - h3) -erre = np.linalg.norm(fgwe.sum(1) - h2) + np.linalg.norm(fgwe.sum(0) - h3) +err_cg = np.linalg.norm(T_cg.sum(1) - h2) + np.linalg.norm(T_cg.sum(0) - h3) +err_ppa = np.linalg.norm(T_ppa.sum(1) - h2) + np.linalg.norm(T_ppa.sum(0) - h3) +err_pgd = np.linalg.norm(T_pgd.sum(1) - h2) + np.linalg.norm(T_pgd.sum(0) - h3) +err_bapg = np.linalg.norm(T_bapg.sum(1) - h2) + np.linalg.norm(T_bapg.sum(0) - h3) ############################################################################# # @@ -242,46 +281,52 @@ def draw_transp_colored_GW(G1, C1, G2, C2, part_G1, p1, p2, T, seed_G2 = 0 seed_G3 = 4 -pl.figure(2, figsize=(12, 3.5)) +pl.figure(2, figsize=(15, 3.5)) pl.clf() -pl.subplot(131) +pl.subplot(141) pl.axis('off') -pl.axis -pl.title('(CG algo) FGW=%s \n \n OT sparsity = %s \n feasibility error = %s' % ( - np.round(log0['fgw_dist'], 3), str(np.round(fgw0_sparsity, 2)) + ' %', - np.round(err0, 4)), fontsize=fontsize) -p0, q0 = fgw0.sum(1), fgw0.sum(0) # check marginals +pl.title('(CG) FGW=%s\n \n OT sparsity = %s \n marg. error = %s \n runtime = %s' % ( + np.round(log_cg['fgw_dist'], 3), str(np.round(T_cg_sparsity, 2)) + ' %', + np.round(err_cg, 4), str(np.round(time_cg, 2)) + ' ms'), fontsize=fontsize) pos1, pos2 = draw_transp_colored_GW( - weightedG2, C2, weightedG3, C3, part_G2, p1=p0, p2=q0, T=fgw0, - shiftx=1.5, node_size=node_size, seed_G1=seed_G2, seed_G2=seed_G3) + weightedG2, C2, weightedG3, C3, part_G2, p1=T_cg.sum(1), p2=T_cg.sum(0), + T=T_cg, shiftx=1.5, node_size=node_size, seed_G1=seed_G2, seed_G2=seed_G3) -pl.subplot(132) +pl.subplot(142) pl.axis('off') -p, q = fgw.sum(1), fgw.sum(0) # check marginals - -pl.title('(PP algo) FGW=%s\n \n OT sparsity = %s \n feasibility error = %s' % ( - np.round(log['fgw_dist'], 3), str(np.round(fgw_sparsity, 2)) + ' %', - np.round(err, 4)), fontsize=fontsize) +pl.title('(PPA) FGW=%s\n \n OT sparsity = %s \n marg. error = %s \n runtime = %s' % ( + np.round(log_ppa['fgw_dist'], 3), str(np.round(T_ppa_sparsity, 2)) + ' %', + np.round(err_ppa, 4), str(np.round(time_ppa, 2)) + ' ms'), fontsize=fontsize) pos1, pos2 = draw_transp_colored_GW( - weightedG2, C2, weightedG3, C3, part_G2, p1=p, p2=q, T=fgw, - pos1=pos1, pos2=pos2, shiftx=0., node_size=node_size, seed_G1=0, seed_G2=0) + weightedG2, C2, weightedG3, C3, part_G2, p1=T_ppa.sum(1), p2=T_ppa.sum(0), + T=T_ppa, pos1=pos1, pos2=pos2, shiftx=0., node_size=node_size, seed_G1=0, seed_G2=0) -pl.subplot(133) +pl.subplot(143) pl.axis('off') -pe, qe = fgwe.sum(1), fgwe.sum(0) # check marginals +pl.title('(PGD) Entropic FGW=%s\n \n OT sparsity = %s \n marg. error = %s \n runtime = %s' % ( + np.round(log_pgd['fgw_dist'], 3), str(np.round(T_pgd_sparsity, 2)) + ' %', + np.round(err_pgd, 4), str(np.round(time_pgd, 2)) + ' ms'), fontsize=fontsize) + +pos1, pos2 = draw_transp_colored_GW( + weightedG2, C2, weightedG3, C3, part_G2, p1=T_pgd.sum(1), p2=T_pgd.sum(0), + T=T_pgd, pos1=pos1, pos2=pos2, shiftx=0., node_size=node_size, seed_G1=0, seed_G2=0) + + +pl.subplot(144) +pl.axis('off') -pl.title('Entropic FGW=%s\n \n OT sparsity = %s \n feasibility error = %s' % ( - np.round(loge['fgw_dist'], 3), str(np.round(fgwe_sparsity, 2)) + ' %', - np.round(erre, 4)), fontsize=fontsize) +pl.title('(BAPG) FGW=%s\n \n OT sparsity = %s \n marg. error = %s \n runtime = %s' % ( + np.round(log_bapg['fgw_dist'], 3), str(np.round(T_bapg_sparsity, 2)) + ' %', + np.round(err_bapg, 4), str(np.round(time_bapg, 2)) + ' ms'), fontsize=fontsize) pos1, pos2 = draw_transp_colored_GW( - weightedG2, C2, weightedG3, C3, part_G2, p1=pe, p2=qe, T=fgwe, - pos1=pos1, pos2=pos2, shiftx=0., node_size=node_size, seed_G1=0, seed_G2=0) + weightedG2, C2, weightedG3, C3, part_G2, p1=T_bapg.sum(1), p2=T_bapg.sum(0), + T=T_bapg, pos1=pos1, pos2=pos2, shiftx=0., node_size=node_size, seed_G1=0, seed_G2=0) pl.tight_layout() From 345f0943d3f7f7a1bf38e272d1cb60fbc569c5d4 Mon Sep 17 00:00:00 2001 From: clvincen Date: Mon, 27 Nov 2023 15:22:40 +0100 Subject: [PATCH 5/6] change BAPG names + improve doc --- examples/gromov/plot_fgw_solvers.py | 4 +- ot/gromov/__init__.py | 14 ++--- ot/gromov/_bregman.py | 98 +++++++++++++++++++++++------ test/test_gromov.py | 44 ++++++------- 4 files changed, 109 insertions(+), 51 deletions(-) diff --git a/examples/gromov/plot_fgw_solvers.py b/examples/gromov/plot_fgw_solvers.py index 4a66b5858..75c12cca0 100644 --- a/examples/gromov/plot_fgw_solvers.py +++ b/examples/gromov/plot_fgw_solvers.py @@ -46,7 +46,7 @@ import matplotlib.pylab as pl from ot.gromov import (fused_gromov_wasserstein, entropic_fused_gromov_wasserstein, - entropic_BAPG_fused_gromov_wasserstein) + BAPG_fused_gromov_wasserstein) import networkx from networkx.generators.community import stochastic_block_model as sbm from time import time @@ -128,7 +128,7 @@ # Alternated Bregman Projected Gradient algorithm with Kullback-Leibler as proximal operator print('Bregman Alternated Projected Gradient \n') start_bapg = time() -T_bapg, log_bapg = entropic_BAPG_fused_gromov_wasserstein( +T_bapg, log_bapg = BAPG_fused_gromov_wasserstein( M, C2, C3, h2, h3, 'square_loss', alpha=alpha, epsilon=1., tol=1e-9, marginal_loss=True, verbose=True, log=True) end_bapg = time() diff --git a/ot/gromov/__init__.py b/ot/gromov/__init__.py index 63223e961..4d77fc57a 100644 --- a/ot/gromov/__init__.py +++ b/ot/gromov/__init__.py @@ -20,13 +20,13 @@ from ._bregman import (entropic_gromov_wasserstein, entropic_gromov_wasserstein2, - entropic_BAPG_gromov_wasserstein, - entropic_BAPG_gromov_wasserstein2, + BAPG_gromov_wasserstein, + BAPG_gromov_wasserstein2, entropic_gromov_barycenters, entropic_fused_gromov_wasserstein, entropic_fused_gromov_wasserstein2, - entropic_BAPG_fused_gromov_wasserstein, - entropic_BAPG_fused_gromov_wasserstein2, + BAPG_fused_gromov_wasserstein, + BAPG_fused_gromov_wasserstein2, entropic_fused_gromov_barycenters) from ._estimators import (GW_distance_estimation, pointwise_gromov_wasserstein, @@ -53,10 +53,10 @@ 'gromov_wasserstein', 'gromov_wasserstein2', 'fused_gromov_wasserstein', 'fused_gromov_wasserstein2', 'solve_gromov_linesearch', 'gromov_barycenters', 'fgw_barycenters', 'entropic_gromov_wasserstein', 'entropic_gromov_wasserstein2', - 'entropic_BAPG_gromov_wasserstein', 'entropic_BAPG_gromov_wasserstein2', + 'BAPG_gromov_wasserstein', 'BAPG_gromov_wasserstein2', 'entropic_gromov_barycenters', 'entropic_fused_gromov_wasserstein', - 'entropic_fused_gromov_wasserstein2', 'entropic_BAPG_fused_gromov_wasserstein', - 'entropic_BAPG_fused_gromov_wasserstein2', 'entropic_fused_gromov_barycenters', + 'entropic_fused_gromov_wasserstein2', 'BAPG_fused_gromov_wasserstein', + 'BAPG_fused_gromov_wasserstein2', 'entropic_fused_gromov_barycenters', 'GW_distance_estimation', 'pointwise_gromov_wasserstein', 'sampled_gromov_wasserstein', 'semirelaxed_gromov_wasserstein', 'semirelaxed_gromov_wasserstein2', 'semirelaxed_fused_gromov_wasserstein', 'semirelaxed_fused_gromov_wasserstein2', diff --git a/ot/gromov/_bregman.py b/ot/gromov/_bregman.py index bb3ba5627..df4ba0ae3 100644 --- a/ot/gromov/_bregman.py +++ b/ot/gromov/_bregman.py @@ -343,7 +343,7 @@ def entropic_gromov_wasserstein2( return logv['gw_dist'] -def entropic_BAPG_gromov_wasserstein( +def BAPG_gromov_wasserstein( C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, symmetric=None, G0=None, max_iter=1000, tol=1e-9, marginal_loss=False, verbose=False, log=False): @@ -351,12 +351,25 @@ def entropic_BAPG_gromov_wasserstein( Returns the Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` estimated using Bregman Alternated Projected Gradient method. - The function solves the following Gromov-Wasserstein - optimization problem [63]: + If `marginal_loss=True`, the function solves the following Gromov-Wasserstein + optimization problem : .. math:: \mathbf{T}^* \in \mathop{\arg\min}_\mathbf{T} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 + + Else, the function solves an equivalent problem [63], where constant terms only + depending on the marginals :math:`\mathbf{p}`: and :math:`\mathbf{q}`: are + discarded while assuming that L decomposes as in Proposition 1 in [12]: + + .. math:: + \mathbf{T}^* \in\mathop{\arg\min}_\mathbf{T} \quad - \langle h_1(\mathbf{C}_1) \mathbf{T} h_2(\mathbf{C_2})^\top , \mathbf{T} \rangle_F + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} \mathbf{T}^T \mathbf{1} &= \mathbf{q} @@ -369,6 +382,7 @@ def entropic_BAPG_gromov_wasserstein( - :math:`\mathbf{p}`: distribution in the source space - :math:`\mathbf{q}`: distribution in the target space - `L`: loss function to account for the misfit between the similarity matrices + satisfying :math:`L(a, b) = f_1(a) + f_2(b) - h_1(a) h_2(b)` .. note:: By algorithmic design the optimal coupling :math:`\mathbf{T}` returned by this function does not necessarily satisfy the marginal @@ -407,7 +421,7 @@ def entropic_BAPG_gromov_wasserstein( tol : float, optional Stop threshold on error (>0) marginal_loss: bool, optional. Default is False. - Include constant terms or not in the matching objective function. + Include constant marginal terms or not in the objective function. verbose : bool, optional Print information along iterations log : bool, optional @@ -524,19 +538,33 @@ def df(T): return T -def entropic_BAPG_gromov_wasserstein2( +def BAPG_gromov_wasserstein2( C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, symmetric=None, G0=None, max_iter=1000, tol=1e-9, marginal_loss=False, verbose=False, log=False): r""" Returns the Gromov-Wasserstein loss :math:`\mathbf{GW}` between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` estimated using Bregman Alternated Projected Gradient method. - The function solves the following Gromov-Wasserstein - optimization problem [63]: + If `marginal_loss=True`, the function solves the following Gromov-Wasserstein + optimization problem : + .. math:: \mathbf{GW} = \mathop{\min}_\mathbf{T} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 + + Else, the function solves an equivalent problem [63, 64], where constant terms only + depending on the marginals :math:`\mathbf{p}`: and :math:`\mathbf{q}`: are + discarded while assuming that L decomposes as in Proposition 1 in [12]: + + .. math:: + \mathop{\min}_\mathbf{T} \quad - \langle h_1(\mathbf{C}_1) \mathbf{T} h_2(\mathbf{C_2})^\top , \mathbf{T} \rangle_F + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} \mathbf{T}^T \mathbf{1} &= \mathbf{q} @@ -549,6 +577,7 @@ def entropic_BAPG_gromov_wasserstein2( - :math:`\mathbf{p}`: distribution in the source space - :math:`\mathbf{q}`: distribution in the target space - `L`: loss function to account for the misfit between the similarity matrices + satisfying :math:`L(a, b) = f_1(a) + f_2(b) - h_1(a) h_2(b)` .. note:: By algorithmic design the optimal coupling :math:`\mathbf{T}` returned by this function does not necessarily satisfy the marginal @@ -588,7 +617,7 @@ def entropic_BAPG_gromov_wasserstein2( tol : float, optional Stop threshold on error (>0) marginal_loss: bool, optional. Default is False. - Include constant terms or not in the matching objective function. + Include constant marginal terms or not in the objective function. verbose : bool, optional Print information along iterations log : bool, optional @@ -607,7 +636,7 @@ def entropic_BAPG_gromov_wasserstein2( """ - T, logv = entropic_BAPG_gromov_wasserstein( + T, logv = BAPG_gromov_wasserstein( C1, C2, p, q, loss_fun, epsilon, symmetric, G0, max_iter, tol, marginal_loss, verbose, log=True) @@ -1153,7 +1182,7 @@ def entropic_fused_gromov_wasserstein2( return logv['fgw_dist'] -def entropic_BAPG_fused_gromov_wasserstein( +def BAPG_fused_gromov_wasserstein( M, C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, symmetric=None, alpha=0.5, G0=None, max_iter=1000, tol=1e-9, marginal_loss=False, verbose=False, log=False): @@ -1162,8 +1191,8 @@ def entropic_BAPG_fused_gromov_wasserstein( with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}`, estimated using Bregman Alternated Projected Gradient method. - The function solves the following Fused Gromov-Wasserstein - optimization problem [63, 64]: + If `marginal_loss=True`, the function solves the following Fused Gromov-Wasserstein + optimization problem : .. math:: \mathbf{T}^* \in\mathop{\arg\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + @@ -1174,14 +1203,29 @@ def entropic_BAPG_fused_gromov_wasserstein( \mathbf{T}^T \mathbf{1} &= \mathbf{q} \mathbf{T} &\geq 0 + + Else, the function solves an equivalent problem [63, 64], where constant terms only + depending on the marginals :math:`\mathbf{p}`: and :math:`\mathbf{q}`: are + discarded while assuming that L decomposes as in Proposition 1 in [12]: + + .. math:: + \mathbf{T}^* \in\mathop{\arg\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F - + \alpha \langle h_1(\mathbf{C}_1) \mathbf{T} h_2(\mathbf{C_2})^\top , \mathbf{T} \rangle_F + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 + Where : - - :math:`\mathbf{M}`: metric cost matrix between features across domains + - :math:`\mathbf{M}`: pairwise relation matrix between features across domains - :math:`\mathbf{C_1}`: Metric cost matrix in the source space - :math:`\mathbf{C_2}`: Metric cost matrix in the target space - :math:`\mathbf{p}`: distribution in the source space - :math:`\mathbf{q}`: distribution in the target space - `L`: loss function to account for the misfit between the similarity and feature matrices + satisfying :math:`L(a, b) = f_1(a) + f_2(b) - h_1(a) h_2(b)` - :math:`\alpha`: trade-off parameter .. note:: By algorithmic design the optimal coupling :math:`\mathbf{T}` @@ -1194,7 +1238,7 @@ def entropic_BAPG_fused_gromov_wasserstein( Parameters ---------- M : array-like, shape (ns, nt) - Metric cost matrix between features across domains + Pairwise relation matrix between features across domains C1 : array-like, shape (ns, ns) Metric cost matrix in the source space C2 : array-like, shape (nt, nt) @@ -1225,7 +1269,7 @@ def entropic_BAPG_fused_gromov_wasserstein( tol : float, optional Stop threshold on error (>0) marginal_loss: bool, optional. Default is False. - Include constant terms or not in the matching objective function. + Include constant marginal terms or not in the objective function. verbose : bool, optional Print information along iterations log : bool, optional @@ -1344,7 +1388,7 @@ def df(T): return T -def entropic_BAPG_fused_gromov_wasserstein2( +def BAPG_fused_gromov_wasserstein2( M, C1, C2, p=None, q=None, loss_fun='square_loss', epsilon=0.1, symmetric=None, alpha=0.5, G0=None, max_iter=1000, tol=1e-9, marginal_loss=False, verbose=False, log=False): @@ -1353,8 +1397,8 @@ def entropic_BAPG_fused_gromov_wasserstein2( with pairwise distance matrix :math:`\mathbf{M}` between node feature matrices :math:`\mathbf{Y_1}` and :math:`\mathbf{Y_2}`, estimated using Bregman Alternated Projected Gradient method. - The function solves the following Fused Gromov-Wasserstein - optimization problem [63, 64]: + If `marginal_loss=True`, the function solves the following Fused Gromov-Wasserstein + optimization problem : .. math:: \mathbf{FGW} = \mathop{\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F + @@ -1365,6 +1409,19 @@ def entropic_BAPG_fused_gromov_wasserstein2( \mathbf{T}^T \mathbf{1} &= \mathbf{q} \mathbf{T} &\geq 0 + + Else, the function solves an equivalent problem [63, 64], where constant terms only + depending on the marginals :math:`\mathbf{p}`: and :math:`\mathbf{q}`: are + discarded while assuming that L decomposes as in Proposition 1 in [12]: + + .. math:: + \mathop{\min}_\mathbf{T} \quad (1 - \alpha) \langle \mathbf{T}, \mathbf{M} \rangle_F - + \alpha \langle h_1(\mathbf{C}_1) \mathbf{T} h_2(\mathbf{C_2})^\top , \mathbf{T} \rangle_F + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 Where : - :math:`\mathbf{M}`: metric cost matrix between features across domains @@ -1373,6 +1430,7 @@ def entropic_BAPG_fused_gromov_wasserstein2( - :math:`\mathbf{p}`: distribution in the source space - :math:`\mathbf{q}`: distribution in the target space - `L`: loss function to account for the misfit between the similarity and feature matrices + satisfying :math:`L(a, b) = f_1(a) + f_2(b) - h_1(a) h_2(b)` - :math:`\alpha`: trade-off parameter .. note:: By algorithmic design the optimal coupling :math:`\mathbf{T}` @@ -1416,7 +1474,7 @@ def entropic_BAPG_fused_gromov_wasserstein2( tol : float, optional Stop threshold on error (>0) marginal_loss: bool, optional. Default is False. - Include constant terms or not in the matching objective function. + Include constant marginal terms or not in the objective function. verbose : bool, optional Print information along iterations log : bool, optional @@ -1438,7 +1496,7 @@ def entropic_BAPG_fused_gromov_wasserstein2( """ nx = get_backend(M, C1, C2) - T, logv = entropic_BAPG_fused_gromov_wasserstein( + T, logv = BAPG_fused_gromov_wasserstein( M, C1, C2, p, q, loss_fun, epsilon, symmetric, alpha, G0, max_iter, tol, marginal_loss, verbose, log=True) diff --git a/test/test_gromov.py b/test/test_gromov.py index c156154ed..83d65306b 100644 --- a/test/test_gromov.py +++ b/test/test_gromov.py @@ -572,9 +572,9 @@ def test_entropic_gromov_dtype_device(nx): for solver in ['PGD', 'PPA', 'BAPG']: if solver == 'BAPG': - Gb = ot.gromov.entropic_BAPG_gromov_wasserstein( + Gb = ot.gromov.BAPG_gromov_wasserstein( C1b, C2b, pb, qb, max_iter=2, verbose=True) - gw_valb = ot.gromov.entropic_BAPG_gromov_wasserstein2( + gw_valb = ot.gromov.BAPG_gromov_wasserstein2( C1b, C2b, pb, qb, max_iter=2, verbose=True) else: Gb = ot.gromov.entropic_gromov_wasserstein( @@ -586,7 +586,7 @@ def test_entropic_gromov_dtype_device(nx): nx.assert_same_dtype_device(C1b, gw_valb) -def test_entropic_BAPG_gromov(nx): +def test_BAPG_gromov(nx): n_samples = 10 # nb samples mu_s = np.array([0, 0]) @@ -611,16 +611,16 @@ def test_entropic_BAPG_gromov(nx): marginal_loss = True with pytest.raises(ValueError): loss_fun = 'weird_loss_fun' - G, log = ot.gromov.entropic_BAPG_gromov_wasserstein( + G, log = ot.gromov.BAPG_gromov_wasserstein( C1, C2, None, q, loss_fun, symmetric=None, G0=G0, epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True, log=True) - G, log = ot.gromov.entropic_BAPG_gromov_wasserstein( + G, log = ot.gromov.BAPG_gromov_wasserstein( C1, C2, None, q, 'square_loss', symmetric=None, G0=G0, epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True, log=True) - Gb = nx.to_numpy(ot.gromov.entropic_BAPG_gromov_wasserstein( + Gb = nx.to_numpy(ot.gromov.BAPG_gromov_wasserstein( C1b, C2b, pb, None, 'square_loss', symmetric=True, G0=None, epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True, log=False @@ -635,14 +635,14 @@ def test_entropic_BAPG_gromov(nx): with pytest.warns(UserWarning): - gw = ot.gromov.entropic_BAPG_gromov_wasserstein2( + gw = ot.gromov.BAPG_gromov_wasserstein2( C1, C2, p, q, 'kl_loss', symmetric=False, G0=None, max_iter=10, epsilon=1e-2, marginal_loss=marginal_loss, log=False) - gw, log = ot.gromov.entropic_BAPG_gromov_wasserstein2( + gw, log = ot.gromov.BAPG_gromov_wasserstein2( C1, C2, p, q, 'kl_loss', symmetric=False, G0=None, max_iter=10, epsilon=1., marginal_loss=marginal_loss, log=True) - gwb, logb = ot.gromov.entropic_BAPG_gromov_wasserstein2( + gwb, logb = ot.gromov.BAPG_gromov_wasserstein2( C1b, C2b, pb, qb, 'kl_loss', symmetric=None, G0=G0b, max_iter=10, epsilon=1., marginal_loss=marginal_loss, log=True) gwb = nx.to_numpy(gwb) @@ -661,11 +661,11 @@ def test_entropic_BAPG_gromov(nx): q, Gb.sum(0), atol=1e-02) # cf convergence gromov marginal_loss = False - G, log = ot.gromov.entropic_BAPG_gromov_wasserstein( + G, log = ot.gromov.BAPG_gromov_wasserstein( C1, C2, None, q, 'square_loss', symmetric=None, G0=G0, epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True, log=True) - Gb = nx.to_numpy(ot.gromov.entropic_BAPG_gromov_wasserstein( + Gb = nx.to_numpy(ot.gromov.BAPG_gromov_wasserstein( C1b, C2b, pb, None, 'square_loss', symmetric=False, G0=None, epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True, log=False @@ -810,7 +810,7 @@ def test_entropic_proximal_fgw(nx): q, Gb.sum(0), atol=1e-04) # cf convergence gromov -def test_entropic_BAPG_fgw(nx): +def test_BAPG_fgw(nx): n_samples = 5 # nb samples mu_s = np.array([0, 0]) @@ -840,16 +840,16 @@ def test_entropic_BAPG_fgw(nx): with pytest.raises(ValueError): loss_fun = 'weird_loss_fun' - G, log = ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + G, log = ot.gromov.BAPG_fused_gromov_wasserstein( M, C1, C2, p, q, loss_fun=loss_fun, max_iter=1, log=True) # complete test with marginal loss = True marginal_loss = True - G, log = ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + G, log = ot.gromov.BAPG_fused_gromov_wasserstein( M, C1, C2, p, q, 'square_loss', symmetric=None, G0=G0, epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, log=True) - Gb = nx.to_numpy(ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + Gb = nx.to_numpy(ot.gromov.BAPG_fused_gromov_wasserstein( Mb, C1b, C2b, pb, qb, 'square_loss', symmetric=True, G0=None, epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True)) @@ -862,14 +862,14 @@ def test_entropic_BAPG_fgw(nx): with pytest.warns(UserWarning): - fgw = ot.gromov.entropic_BAPG_fused_gromov_wasserstein2( + fgw = ot.gromov.BAPG_fused_gromov_wasserstein2( M, C1, C2, p, q, 'kl_loss', symmetric=False, G0=None, max_iter=10, epsilon=1e-3, marginal_loss=marginal_loss, log=False) - fgw, log = ot.gromov.entropic_BAPG_fused_gromov_wasserstein2( + fgw, log = ot.gromov.BAPG_fused_gromov_wasserstein2( M, C1, C2, p, None, 'kl_loss', symmetric=True, G0=None, max_iter=5, epsilon=1, marginal_loss=marginal_loss, log=True) - fgwb, logb = ot.gromov.entropic_BAPG_fused_gromov_wasserstein2( + fgwb, logb = ot.gromov.BAPG_fused_gromov_wasserstein2( Mb, C1b, C2b, None, qb, 'kl_loss', symmetric=None, G0=G0b, max_iter=5, epsilon=1, marginal_loss=marginal_loss, log=True) fgwb = nx.to_numpy(fgwb) @@ -889,10 +889,10 @@ def test_entropic_BAPG_fgw(nx): # Tests with marginal_loss = False marginal_loss = False - G, log = ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + G, log = ot.gromov.BAPG_fused_gromov_wasserstein( M, C1, C2, p, q, 'square_loss', symmetric=False, G0=G0, epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, log=True) - Gb = nx.to_numpy(ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + Gb = nx.to_numpy(ot.gromov.BAPG_fused_gromov_wasserstein( Mb, C1b, C2b, pb, qb, 'square_loss', symmetric=None, G0=None, epsilon=1e-1, max_iter=10, marginal_loss=marginal_loss, verbose=True)) # check constraints @@ -980,9 +980,9 @@ def test_entropic_fgw_dtype_device(nx): for solver in ['PGD', 'PPA', 'BAPG']: if solver == 'BAPG': - Gb = ot.gromov.entropic_BAPG_fused_gromov_wasserstein( + Gb = ot.gromov.BAPG_fused_gromov_wasserstein( Mb, C1b, C2b, pb, qb, max_iter=2) - fgw_valb = ot.gromov.entropic_BAPG_fused_gromov_wasserstein2( + fgw_valb = ot.gromov.BAPG_fused_gromov_wasserstein2( Mb, C1b, C2b, pb, qb, max_iter=2) else: From 655a95e87c5e60cc76d489966b15f34602a2c28f Mon Sep 17 00:00:00 2001 From: clvincen Date: Thu, 30 Nov 2023 13:35:07 +0100 Subject: [PATCH 6/6] merge --- RELEASES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASES.md b/RELEASES.md index a6c517065..9919076f6 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -20,7 +20,7 @@ + Wrapper for `geomloss`` solver on empirical samples (PR #571) + Add `stop_criterion` feature to (un)regularized (f)gw barycenter solvers (PR #578) + Add `fixed_structure` and `fixed_features` to entropic fgw barycenter solver (PR #578) -+ Add new entropic BAPG solvers for GW and FGW (PR #581) ++ Add new BAPG solvers with KL projections for GW and FGW (PR #581) + Add Bures-Wasserstein barycenter in `ot.gaussian` (PR #582)