From cba9c7b7d27b59edf49979c746e480dbce787bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Vincent-Cuaz?= Date: Thu, 30 May 2024 01:09:22 +0200 Subject: [PATCH] [WIP] quantized gromov wasserstein solver (#603) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * first commit : quantized gromov wasserstein solver * start setting up tests * fix build OT for all backends - nb: concatenation procedure is less efficient for numpy and torch * dealing with edge cases * fix pep8 * updates + start setting exemple * updates + start setting exemple * updating code + exemple + test + docs * fix sklearn imports * fix * setting up new API for qGW * fix pep8 * tests * update qFGW plots * update qFGW plots * up tests * update example * merge master * complete tests --------- Co-authored-by: Rémi Flamary --- CONTRIBUTORS.md | 2 +- README.md | 5 +- RELEASES.md | 1 + .../plot_quantized_gromov_wasserstein.py | 515 ++++++++ ot/gromov/__init__.py | 16 +- ot/gromov/_quantized.py | 1147 +++++++++++++++++ test/gromov/test_quantized.py | 377 ++++++ 7 files changed, 2060 insertions(+), 3 deletions(-) create mode 100644 examples/gromov/plot_quantized_gromov_wasserstein.py create mode 100644 ot/gromov/_quantized.py create mode 100644 test/gromov/test_quantized.py diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index c185e18a7..e982cd5b6 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -41,7 +41,7 @@ The contributors to this library are: * [Tanguy Kerdoncuff](https://hv0nnus.github.io/) (Sampled Gromov Wasserstein) * [Minhui Huang](https://mhhuang95.github.io) (Projection Robust Wasserstein Distance) * [Nathan Cassereau](https://github.com/ncassereau-idris) (Backends) -* [Cédric Vincent-Cuaz](https://github.com/cedricvincentcuaz) (Graph Dictionary Learning, FGW, semi-relaxed FGW) +* [Cédric Vincent-Cuaz](https://github.com/cedricvincentcuaz) (Graph Dictionary Learning, FGW, semi-relaxed FGW, quantized FGW) * [Eloi Tanguy](https://github.com/eloitanguy) (Generalized Wasserstein Barycenters) * [Camille Le Coz](https://www.linkedin.com/in/camille-le-coz-8593b91a1/) (EMD2 debug) * [Eduardo Fernandes Montesuma](https://eddardd.github.io/my-personal-blog/) (Free support sinkhorn barycenter) diff --git a/README.md b/README.md index f1149a008..1cd9fb59b 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ POT provides the following generic OT solvers (links to examples): * [Spherical Sliced Wasserstein](https://pythonot.github.io/auto_examples/sliced-wasserstein/plot_variance_ssw.html) [46] * [Graph Dictionary Learning solvers](https://pythonot.github.io/auto_examples/gromov/plot_gromov_wasserstein_dictionary_learning.html) [38]. * [Semi-relaxed (Fused) Gromov-Wasserstein divergences](https://pythonot.github.io/auto_examples/gromov/plot_semirelaxed_fgw.html) (exact and regularized [48]). +* [Quantized (Fused) Gromov-Wasserstein distances](https://pythonot.github.io/auto_examples/gromov/plot_quantized_gromov_wasserstein.html) [68]. * [Efficient Discrete Multi Marginal Optimal Transport Regularization](https://pythonot.github.io/auto_examples/others/plot_demd_gradient_minimize.html) [50]. * [Several backends](https://pythonot.github.io/quickstart.html#solving-ot-with-multiple-backends) for easy use of POT with [Pytorch](https://pytorch.org/)/[jax](https://github.com/google/jax)/[Numpy](https://numpy.org/)/[Cupy](https://cupy.dev/)/[Tensorflow](https://www.tensorflow.org/) arrays. * Smooth Strongly Convex Nearest Brenier Potentials [58], with an extension to bounding potentials using [59]. @@ -358,4 +359,6 @@ distances between Gaussian distributions](https://hal.science/hal-03197398v2/fil [66] Pooladian, Aram-Alexandre, and Jonathan Niles-Weed. [Entropic estimation of optimal transport maps](https://arxiv.org/pdf/2109.12004.pdf). arXiv preprint arXiv:2109.12004 (2021). -[67] Scetbon, M., Peyré, G. & Cuturi, M. (2022). [Linear-Time GromovWasserstein Distances using Low Rank Couplings and Costs](https://proceedings.mlr.press/v162/scetbon22b/scetbon22b.pdf). In International Conference on Machine Learning (ICML), 2022. \ No newline at end of file +[67] Scetbon, M., Peyré, G. & Cuturi, M. (2022). [Linear-Time Gromov-Wasserstein Distances using Low Rank Couplings and Costs](https://proceedings.mlr.press/v162/scetbon22b/scetbon22b.pdf). In International Conference on Machine Learning (ICML), 2022. + +[68] Chowdhury, S., Miller, D., & Needham, T. (2021). [Quantized gromov-wasserstein](https://link.springer.com/chapter/10.1007/978-3-030-86523-8_49). ECML PKDD 2021. Springer International Publishing. diff --git a/RELEASES.md b/RELEASES.md index c31081451..51075c973 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -3,6 +3,7 @@ ## 0.9.4dev #### New features ++ New quantized FGW solvers `ot.gromov.quantized_fused_gromov_wasserstein`, `ot.gromov.quantized_fused_gromov_wasserstein_samples` and `ot.gromov.quantized_fused_gromov_wasserstein_partitioned` (PR #603) + `ot.gromov._gw.solve_gromov_linesearch` now has an argument to specify if the matrices are symmetric in which case the computation can be done faster (PR #607). + Continuous entropic mapping (PR #613) + New general unbalanced solvers for `ot.solve` and BFGS solver and illustrative example (PR #620) diff --git a/examples/gromov/plot_quantized_gromov_wasserstein.py b/examples/gromov/plot_quantized_gromov_wasserstein.py new file mode 100644 index 000000000..02d777c71 --- /dev/null +++ b/examples/gromov/plot_quantized_gromov_wasserstein.py @@ -0,0 +1,515 @@ +# -*- coding: utf-8 -*- +""" +=============================================== +Quantized Fused Gromov-Wasserstein examples +=============================================== + +These examples show how to use the quantized (Fused) Gromov-Wasserstein +solvers (qFGW) [68]. POT provides a generic solver `quantized_fused_gromov_wasserstein_partitioned` +that takes as inputs partitioned graphs potentially endowed with node features, +which have to be built by the user. On top of that, POT provides two wrappers: + i) `quantized_fused_gromov_wasserstein` operating over generic graphs, whose + partitioning is performed via `get_graph_partition` using e.g the Louvain algorithm, + and representant for each partition can be selected via `get_graph_representants` + using e.g the PageRank algorithm. + + ii) `quantized_fused_gromov_wasserstein_samples` operating over point clouds, + e.g :math:`X_1 \in R^{n_1 * d_1}` and :math:`X_2 \in R^{n_2 * d_2}` + endowed with their respective euclidean geometry, whose partitioning and + representant selection is performed jointly using e.g the K-means algorithm + via the function `get_partition_and_representants_samples`. + + +We illustrate next how to compute the qGW distance on both types of data by: + + i) Generating two graphs following Stochastic Block Models encoded as shortest + path matrices as qGW solvers tends to require dense structure to achieve a good + approximation of the GW distance (as qGW is an upper-bound of GW). In the meantime, + we illustrate an optional feature of our solvers, namely the use of auxiliary + structures e.g adjacency matrices to perform the graph partitioning. + + ii) Generating two point clouds representing curves in 2D and 3D respectively. + We augment these point clouds by considering additional features of the same + dimensionaly :math:`F_1 \in R^{n_1 * d}` and :math:`F_2 \in R^{n_2 * d}`, + representing the color intensity associated to each sample of both distributions. + Then we compute the qFGW distance between these attributed point clouds. + + +[68] Chowdhury, S., Miller, D., & Needham, T. (2021). Quantized gromov-wasserstein. +ECML PKDD 2021. Springer International Publishing. +""" + +# Author: Cédric Vincent-Cuaz +# +# License: MIT License + +# sphinx_gallery_thumbnail_number = 2 + +import numpy as np +import matplotlib.pylab as pl +import matplotlib.pyplot as plt +import networkx +from networkx.generators.community import stochastic_block_model as sbm +from scipy.sparse.csgraph import shortest_path + +from ot.gromov import ( + quantized_fused_gromov_wasserstein_partitioned, quantized_fused_gromov_wasserstein, + get_graph_partition, get_graph_representants, format_partitioned_graph, + quantized_fused_gromov_wasserstein_samples, + get_partition_and_representants_samples) + +############################################################################# +# +# Generate graphs +# -------------------------------------------------------------------------- +# +# Create two graphs following Stochastic Block models of 2 and 3 clusters. + +N1 = 30 # 2 communities +N2 = 45 # 3 communities +p1 = [[0.8, 0.1], + [0.1, 0.7]] +p2 = [[0.8, 0.1, 0.], + [0.1, 0.75, 0.1], + [0., 0.1, 0.7]] +G1 = sbm(seed=0, sizes=[N1 // 2, N1 // 2], p=p1) +G2 = sbm(seed=0, sizes=[N2 // 3, N2 // 3, N2 // 3], p=p2) + + +C1 = networkx.to_numpy_array(G1) +C2 = networkx.to_numpy_array(G2) + +spC1 = shortest_path(C1) +spC2 = shortest_path(C2) + +h1 = np.ones(C1.shape[0]) / C1.shape[0] +h2 = np.ones(C2.shape[0]) / C2.shape[0] + +# Add weights on the edges for visualization later on +weight_intra_G1 = 5 +weight_inter_G1 = 0.5 +weight_intra_G2 = 1. +weight_inter_G2 = 1.5 + +weightedG1 = networkx.Graph() +part_G1 = [G1.nodes[i]['block'] for i in range(N1)] + +for node in G1.nodes(): + weightedG1.add_node(node) +for i, j in G1.edges(): + if part_G1[i] == part_G1[j]: + weightedG1.add_edge(i, j, weight=weight_intra_G1) + else: + weightedG1.add_edge(i, j, weight=weight_inter_G1) + +weightedG2 = networkx.Graph() +part_G2 = [G2.nodes[i]['block'] for i in range(N2)] + +for node in G2.nodes(): + weightedG2.add_node(node) +for i, j in G2.edges(): + if part_G2[i] == part_G2[j]: + weightedG2.add_edge(i, j, weight=weight_intra_G2) + else: + weightedG2.add_edge(i, j, weight=weight_inter_G2) + + +# setup for graph visualization + +def node_coloring(part, starting_color=0): + + # get graphs partition and their coloring + unique_colors = ['C%s' % (starting_color + i) for i in np.unique(part)] + nodes_color_part = [] + for cluster in part: + nodes_color_part.append(unique_colors[cluster]) + + return nodes_color_part + + +def draw_graph(G, C, nodes_color_part, rep_indices, node_alphas=None, pos=None, + edge_color='black', alpha_edge=0.7, node_size=None, + shiftx=0, seed=0, highlight_rep=False): + + if (pos is None): + pos = networkx.spring_layout(G, scale=1., seed=seed) + + if shiftx != 0: + for k, v in pos.items(): + v[0] = v[0] + shiftx + + width_edge = 1.5 + + if not highlight_rep: + networkx.draw_networkx_edges( + G, pos, width=width_edge, alpha=alpha_edge, edge_color=edge_color) + else: + for edge in G.edges: + if (edge[0] in rep_indices) and (edge[1] in rep_indices): + networkx.draw_networkx_edges( + G, pos, edgelist=[edge], width=width_edge, alpha=alpha_edge, + edge_color=edge_color) + else: + networkx.draw_networkx_edges( + G, pos, edgelist=[edge], width=width_edge, alpha=0.2, + edge_color=edge_color) + + for node, node_color in enumerate(nodes_color_part): + local_node_shape, local_node_size = 'o', node_size + + if highlight_rep: + if node in rep_indices: + local_node_shape, local_node_size = '*', 6 * node_size + + if node_alphas is None: + alpha = 0.9 + if highlight_rep: + alpha = 0.9 if node in rep_indices else 0.1 + + else: + alpha = node_alphas[node] + + networkx.draw_networkx_nodes(G, pos, nodelist=[node], alpha=alpha, + node_shape=local_node_shape, + node_size=local_node_size, + node_color=node_color) + + return pos + + +############################################################################# +# +# Compute their quantized Gromov-Wasserstein distance without using the wrapper +# --------------------------------------------------------- +# +# We detail next the steps implemented within the wrapper that preprocess graphs +# to form partitioned graphs, which are then passed as input to the generic qFGW solver. + +# 1-a) Partition C1 and C2 in 2 and 3 clusters respectively using Louvain +# algorithm from Networkx. Then encode these partitions via vectors of assignments. + +part_method = 'louvain' +rep_method = 'pagerank' + +npart_1 = 2 # 2 clusters used to describe C1 +npart_2 = 3 # 3 clusters used to describe C2 + +part1 = get_graph_partition( + C1, npart=npart_1, part_method=part_method, F=None, alpha=1.) +part2 = get_graph_partition( + C2, npart=npart_2, part_method=part_method, F=None, alpha=1.) + +# 1-b) Select representant in each partition using the Pagerank algorithm +# implementation from networkx. + +rep_indices1 = get_graph_representants(C1, part1, rep_method=rep_method) +rep_indices2 = get_graph_representants(C2, part2, rep_method=rep_method) + +# 1-c) Formate partitions such that: +# CR contains relations between representants in each space. +# list_R contains relations between samples and representants within each partition. +# list_h contains samples relative importance within each partition. + +CR1, list_R1, list_h1 = format_partitioned_graph( + spC1, h1, part1, rep_indices1, F=None, M=None, alpha=1.) + +CR2, list_R2, list_h2 = format_partitioned_graph( + spC2, h2, part2, rep_indices2, F=None, M=None, alpha=1.) + +# 1-d) call to partitioned quantized gromov-wasserstein solver + +OT_global_, OTs_local_, OT_, log_ = quantized_fused_gromov_wasserstein_partitioned( + CR1, CR2, list_R1, list_R2, list_h1, list_h2, MR=None, + alpha=1., build_OT=True, log=True) + + +# Visualization of the graph pre-processing + +node_size = 40 +fontsize = 10 +seed_G1 = 0 +seed_G2 = 3 + +part1_ = part1.astype(np.int32) +part2_ = part2.astype(np.int32) + + +nodes_color_part1 = node_coloring(part1_, starting_color=0) +nodes_color_part2 = node_coloring(part2_, starting_color=np.unique(nodes_color_part1).shape[0]) + + +pl.figure(1, figsize=(6, 5)) +pl.clf() +pl.axis('off') +pl.subplot(2, 3, 1) +pl.title(r'Input graph: $\mathbf{spC_1}$', fontsize=fontsize) + +pos1 = draw_graph( + G1, C1, ['C0' for _ in part1_], rep_indices1, node_size=node_size, seed=seed_G1) + +pl.subplot(2, 3, 2) +pl.title('Partitioning', fontsize=fontsize) + +_ = draw_graph( + G1, C1, nodes_color_part1, rep_indices1, pos=pos1, node_size=node_size, seed=seed_G1) + +pl.subplot(2, 3, 3) +pl.title('Representant selection', fontsize=fontsize) + +_ = draw_graph( + G1, C1, nodes_color_part1, rep_indices1, pos=pos1, node_size=node_size, + seed=seed_G1, highlight_rep=True) + +pl.subplot(2, 3, 4) +pl.title(r'Input graph: $\mathbf{spC_2}$', fontsize=fontsize) + +pos2 = draw_graph( + G2, C2, ['C0' for _ in part2_], rep_indices2, node_size=node_size, seed=seed_G2) + +pl.subplot(2, 3, 5) +pl.title(r'Partitioning', fontsize=fontsize) + +_ = draw_graph( + G2, C2, nodes_color_part2, rep_indices2, pos=pos2, node_size=node_size, seed=seed_G2) + +pl.subplot(2, 3, 6) +pl.title(r'Representant selection', fontsize=fontsize) + +_ = draw_graph( + G2, C2, nodes_color_part2, rep_indices2, pos=pos2, node_size=node_size, + seed=seed_G2, highlight_rep=True) +pl.tight_layout() + +############################################################################# +# +# Compute the quantized Gromov-Wasserstein distance using the wrapper +# --------------------------------------------------------- +# +# Compute qGW(spC1, h1, spC2, h2). We also illustrate the use of auxiliary matrices +# such that the adjacency matrices `C1_aux=C1` and `C2_aux=C2` to partition the graph using +# Louvain algorithm, and the Pagerank algorithm for selecting representant within +# each partition. Notice that `C1_aux` and `C2_aux` are optional, if they are not +# specified these pre-processing algorithms will be applied to spC2 and spC3. + + +# no node features are considered on this synthetic dataset. Hence we simply +# let F1, F2 = None and set alpha = 1. +OT_global, OTs_local, OT, log = quantized_fused_gromov_wasserstein( + spC1, spC2, npart_1, npart_2, h1, h2, C1_aux=C1, C2_aux=C2, F1=None, F2=None, + alpha=1., part_method=part_method, rep_method=rep_method, log=True) + +qGW_dist = log['qFGW_dist'] + + +############################################################################# +# +# Visualization of the quantized Gromov-Wasserstein matching +# -------------------------------------------------------------- +# +# We color nodes of the graph based on the respective partition of each graph. +# On the first plot we illustrate the qGW matching between both shortest path matrices. +# While the GW matching across representants of each space is illustrated on the right. + + +def draw_transp_colored_qGW( + G1, C1, G2, C2, part1, part2, rep_indices1, rep_indices2, T, + pos1=None, pos2=None, shiftx=4, switchx=False, node_size=70, + seed_G1=0, seed_G2=0, highlight_rep=False): + starting_color = 0 + # get graphs partition and their coloring + unique_colors1 = ['C%s' % (starting_color + i) for i in np.unique(part1)] + nodes_color_part1 = [] + for cluster in part1: + nodes_color_part1.append(unique_colors1[cluster]) + + starting_color = len(unique_colors1) + 1 + unique_colors2 = ['C%s' % (starting_color + i) for i in np.unique(part2)] + nodes_color_part2 = [] + for cluster in part2: + nodes_color_part2.append(unique_colors2[cluster]) + + pos1 = draw_graph( + G1, C1, nodes_color_part1, rep_indices1, pos=pos1, node_size=node_size, + shiftx=0, seed=seed_G1, highlight_rep=highlight_rep) + pos2 = draw_graph( + G2, C2, nodes_color_part2, rep_indices2, pos=pos2, node_size=node_size, + shiftx=shiftx, seed=seed_G1, highlight_rep=highlight_rep) + + if not highlight_rep: + for k1, v1 in pos1.items(): + max_Tk1 = np.max(T[k1, :]) + for k2, v2 in pos2.items(): + if (T[k1, k2] > 0): + pl.plot([pos1[k1][0], pos2[k2][0]], + [pos1[k1][1], pos2[k2][1]], + '-', lw=0.7, alpha=T[k1, k2] / max_Tk1, + color=nodes_color_part1[k1]) + + else: # OT is only between representants + for id1, node_id1 in enumerate(rep_indices1): + max_Tk1 = np.max(T[id1, :]) + for id2, node_id2 in enumerate(rep_indices2): + if (T[id1, id2] > 0): + pl.plot([pos1[node_id1][0], pos2[node_id2][0]], + [pos1[node_id1][1], pos2[node_id2][1]], + '-', lw=0.8, alpha=T[id1, id2] / max_Tk1, + color=nodes_color_part1[node_id1]) + return pos1, pos2 + + +pl.figure(2, figsize=(5, 2.5)) +pl.clf() +pl.axis('off') +pl.subplot(1, 2, 1) +pl.title(r'qGW$(\mathbf{spC_1}, \mathbf{spC_1}) =%s$' % (np.round(qGW_dist, 3)), fontsize=fontsize) + +pos1, pos2 = draw_transp_colored_qGW( + weightedG1, C1, weightedG2, C2, part1_, part2_, rep_indices1, rep_indices2, + T=OT_, shiftx=1.5, node_size=node_size, seed_G1=seed_G1, seed_G2=seed_G2) + +pl.tight_layout() + +pl.subplot(1, 2, 2) +pl.title(r' GW$(\mathbf{CR_1}, \mathbf{CR_2}) =%s$' % (np.round(log_['global dist'], 3)), fontsize=fontsize) + +pos1, pos2 = draw_transp_colored_qGW( + weightedG1, C1, weightedG2, C2, part1_, part2_, rep_indices1, rep_indices2, + T=OT_global, shiftx=1.5, node_size=node_size, seed_G1=seed_G1, seed_G2=seed_G2, + highlight_rep=True) + +pl.tight_layout() +pl.show() + +############################################################################# +# +# Generate attributed point clouds +# -------------------------------------------------------------------------- +# +# Create two attributed point clouds representing curves in 2D and 3D respectively, +# whose samples are further associated to various color intensities. + +n_samples = 100 + +# Generate 2D and 3D curves +theta = np.linspace(-4 * np.pi, 4 * np.pi, n_samples) +z = np.linspace(1, 2, n_samples) +r = z**2 + 1 +x = r * np.sin(theta) +y = r * np.cos(theta) + +# Source and target distribution across spaces encoded respectively via their +# squared euclidean distance matrices. + +X = np.concatenate([x.reshape(-1, 1), z.reshape(-1, 1)], axis=1) +Y = np.concatenate([x.reshape(-1, 1), y.reshape(-1, 1), z.reshape(-1, 1)], axis=1) + +# Further associated to color intensity features derived from z + +FX = z - z.min() / (z.max() - z.min()) +FX = np.clip(0.8 * FX + 0.2, a_min=0.2, a_max=1.) # for numerical issues +FY = FX + + +############################################################################# +# +# Visualize partitioned attributed point clouds +# -------------------------------------------------------------------------- +# +# Compute the partitioning and representant selection further used within +# qFGW wrapper, both provided by a K-means algorithm. Then visualize partitioned spaces. + +part1, rep_indices1 = get_partition_and_representants_samples( + X, 4, 'kmeans', 0) +part2, rep_indices2 = get_partition_and_representants_samples( + Y, 4, 'kmeans', 0) + +upart1 = np.unique(part1) +upart2 = np.unique(part2) + +# Plot the source and target samples as distributions +s = 20 +fig = plt.figure(3, figsize=(6, 3)) + +ax1 = fig.add_subplot(1, 3, 1) +ax1.set_title("2D curve") +ax1.scatter(X[:, 0], X[:, 1], color="C0", alpha=FX, s=s) +plt.axis('off') + + +ax2 = fig.add_subplot(1, 3, 2) +ax2.set_title("Partitioning") +for i, elem in enumerate(upart1): + idx = np.argwhere(part1 == elem)[:, 0] + ax2.scatter(X[idx, 0], X[idx, 1], color="C%s" % i, alpha=FX[idx], s=s) +plt.axis('off') + +ax3 = fig.add_subplot(1, 3, 3) +ax3.set_title("Representant selection") +for i, elem in enumerate(upart1): + idx = np.argwhere(part1 == elem)[:, 0] + ax3.scatter(X[idx, 0], X[idx, 1], color="C%s" % i, alpha=FX[idx], s=10) + rep_idx = rep_indices1[i] + ax3.scatter([X[rep_idx, 0]], [X[rep_idx, 1]], color="C%s" % i, alpha=1, s=6 * s, marker='*') +plt.axis('off') +plt.tight_layout() +plt.show() + +start_color = upart1.shape[0] + 1 + +fig = plt.figure(4, figsize=(6, 5)) + +ax4 = fig.add_subplot(1, 3, 1, projection="3d") +ax4.set_title("3D curve") +ax4.scatter(Y[:, 0], Y[:, 1], Y[:, 2], c='C0', alpha=FY, s=s) +plt.axis('off') + +ax5 = fig.add_subplot(1, 3, 2, projection="3d") +ax5.set_title("Partitioning") +for i, elem in enumerate(upart2): + idx = np.argwhere(part2 == elem)[:, 0] + color = 'C%s' % (start_color + i) + ax5.scatter(Y[idx, 0], Y[idx, 1], Y[idx, 2], c=color, alpha=FY[idx], s=s) +plt.axis('off') + +ax6 = fig.add_subplot(1, 3, 3, projection="3d") +ax6.set_title("Representant selection") +for i, elem in enumerate(upart2): + idx = np.argwhere(part2 == elem)[:, 0] + color = 'C%s' % (start_color + i) + rep_idx = rep_indices2[i] + ax6.scatter(Y[idx, 0], Y[idx, 1], Y[idx, 2], c=color, alpha=FY[idx], s=s) + ax6.scatter([Y[rep_idx, 0]], [Y[rep_idx, 1]], [Y[rep_idx, 2]], c=color, alpha=1, s=6 * s, marker='*') +plt.axis('off') +plt.tight_layout() +plt.show() + +############################################################################# +# +# Compute the quantized Fused Gromov-Wasserstein distance between samples using the wrapper +# --------------------------------------------------------- +# +# Compute qFGW(X, FX, hX, Y, FY, HY), setting the trade-off parameter between +# structures and features `alpha=0.5`. This solver considers a squared euclidean structure +# for each distribution X and Y, and partition each of them into 4 clusters using +# the K-means algorithm before computing qFGW. + +T_global, Ts_local, T, log = quantized_fused_gromov_wasserstein_samples( + X, Y, 4, 4, p=None, q=None, F1=FX[:, None], F2=FY[:, None], alpha=0.5, + method='kmeans', log=True) + +# Plot low rank GW with different ranks +pl.figure(5, figsize=(6, 3)) +pl.subplot(1, 2, 1) +pl.title('OT between distributions') +pl.imshow(T, interpolation="nearest", aspect="auto") +pl.colorbar() +pl.axis('off') + +pl.subplot(1, 2, 2) +pl.title('OT between representants') +pl.imshow(T_global, interpolation="nearest", aspect="auto") +pl.axis('off') +pl.colorbar() + +pl.tight_layout() +pl.show() diff --git a/ot/gromov/__init__.py b/ot/gromov/__init__.py index b33dafd32..03663dab4 100644 --- a/ot/gromov/__init__.py +++ b/ot/gromov/__init__.py @@ -50,6 +50,16 @@ from ._lowrank import (_flat_product_operator, lowrank_gromov_wasserstein_samples) +from ._quantized import (quantized_fused_gromov_wasserstein_partitioned, + get_graph_partition, + get_graph_representants, + format_partitioned_graph, + quantized_fused_gromov_wasserstein, + get_partition_and_representants_samples, + format_partitioned_samples, + quantized_fused_gromov_wasserstein_samples + ) + __all__ = ['init_matrix', 'tensor_product', 'gwloss', 'gwggrad', 'update_square_loss', 'update_kl_loss', 'update_feature_matrix', 'init_matrix_semirelaxed', 'gromov_wasserstein', 'gromov_wasserstein2', 'fused_gromov_wasserstein', @@ -66,4 +76,8 @@ 'entropic_semirelaxed_gromov_wasserstein2', 'entropic_semirelaxed_fused_gromov_wasserstein', 'entropic_semirelaxed_fused_gromov_wasserstein2', 'gromov_wasserstein_dictionary_learning', 'gromov_wasserstein_linear_unmixing', 'fused_gromov_wasserstein_dictionary_learning', - 'fused_gromov_wasserstein_linear_unmixing', 'lowrank_gromov_wasserstein_samples'] + 'fused_gromov_wasserstein_linear_unmixing', 'lowrank_gromov_wasserstein_samples', + 'quantized_fused_gromov_wasserstein_partitioned', 'get_graph_partition', + 'get_graph_representants', 'format_partitioned_graph', + 'quantized_fused_gromov_wasserstein', 'get_partition_and_representants_samples', + 'format_partitioned_samples', 'quantized_fused_gromov_wasserstein_samples'] diff --git a/ot/gromov/_quantized.py b/ot/gromov/_quantized.py new file mode 100644 index 000000000..147f4b221 --- /dev/null +++ b/ot/gromov/_quantized.py @@ -0,0 +1,1147 @@ +""" +Quantized (Fused) Gromov-Wasserstein solvers. +""" + +# Author: Cédric Vincent-Cuaz +# +# License: MIT License + +import numpy as np +import warnings + +try: + from networkx.algorithms.community import asyn_fluidc, louvain_communities + from networkx import from_numpy_array, pagerank + networkx_import = True +except ImportError: + networkx_import = False + +try: + from sklearn.cluster import SpectralClustering, KMeans + sklearn_import = True +except ImportError: + sklearn_import = False + +import random + +from ..utils import list_to_array, unif, dist +from ..backend import get_backend +from ..lp import emd_1d +from ._gw import gromov_wasserstein, fused_gromov_wasserstein +from ._utils import init_matrix, gwloss + + +def quantized_fused_gromov_wasserstein_partitioned( + CR1, CR2, list_R1, list_R2, list_p1, list_p2, MR=None, + alpha=1., build_OT=False, log=False, armijo=False, max_iter=1e4, + tol_rel=1e-9, tol_abs=1e-9, nx=None, **kwargs): + r""" + Returns the quantized Fused Gromov-Wasserstein transport between + :math:`(\mathbf{C_1}, \mathbf{F_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, + \mathbf{F_2}, \mathbf{q})`, whose samples are assigned to partitions and representants + :math:`\mathcal{P_1} = \{(\mathbf{P_{1, i}}, \mathbf{r_{1, i}})\}_{i \leq npart1}` + and :math:`\mathcal{P_2} = \{(\mathbf{P_{2, j}}, \mathbf{r_{2, j}})\}_{j \leq npart2}`. + The latter must be precomputed and encoded e.g for the source as: :math:`\mathbf{CR_1}` + structure matrix between representants; `list_R1` a list of relations between + representants and their associated samples; `list_p1` a list of nodes + distribution within each partition; :math:`\mathbf{FR_1}` feature matrix + of representants. + + The function estimates the following optimization problem: + + .. math:: + \mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad \alpha \sum_{i,j,k,l} + L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} + + (1-\alpha) \langle \mathbf{T}, M\rangle_F + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 + + \mathbf{T}_{|\mathbf{P_{1, i}}, \mathbf{P_{2, j}}} &= T^{g}_{ij} \mathbf{T}^{(i,j)} + + using a two-step strategy computing: i) a global alignment :math:`\mathbf{T}^{g}` + between representants joint structure and feature spaces; ii) local alignments + :math:`\mathbf{T}^{(i, j)}` between partitions :math:`\mathbf{P_{1, i}}` + and :math:`\mathbf{P_{2, j}}` seen as 1D measures. + + Where : + + - :math:`\mathbf{C_1}`: Metric cost matrix in the source space + - :math:`\mathbf{C_2}`: Metric cost matrix in the target space + - :math:`\mathbf{F_1}`: Feature matrix in the source space + - :math:`\mathbf{F_2}`: Feature matrix in the target space + - :math:`\mathbf{M}`: Pairwise similarity matrix between features + - :math:`\mathbf{p}`: distribution in the source space + - :math:`\mathbf{q}`: distribution in the target space + - :math:`L`: quadratic loss function to account for the misfit between the similarity matrices + + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. + .. note:: All computations in the Gromov-Wasserstein conjugate gradient solver + are done with numpy to limit memory overhead. + + Parameters + ---------- + CR1 : array-like, shape (npart1, npart1) + Structure matrix between partition representants in the source space. + CR2 : array-like, shape (npart2, npart2) + Structure matrix between partition representants in the target space. + list_R1 : list of npart1 arrays, + List of relations between representants and their associated samples in the source space. + list_R2 : list of npart2 arrays, + List of relations between representants and their associated samples in the target space. + list_p1 : list of npart1 arrays, + List of node distributions within each partition of the source space. + list_p : list of npart2 arrays, + List of node distributions within each partition of the target space. + MR : array-like, shape (npart1, npart2), optional. (Default is None) + Metric cost matrix between features of representants across spaces. + alpha: float, optional. Default is None. + FGW trade-off parameter in :math:`]0, 1]` between structure and features. + If `alpha = 1` features are ignored hence computing qGW. + build_OT: bool, optional. Default is False + Either to build or not the OT between non-partitioned structures. + log : bool, optional. Default is False + record log if True + armijo : bool, optional + If True the step of the line-search is found via an armijo research. Else closed form is used. + If there are convergence issues use False. + max_iter : int, optional + Max number of iterations + tol_rel : float, optional + Stop threshold on relative error (>0) + tol_abs : float, optional + Stop threshold on absolute error (>0) + nx : backend, optional + POT backend + + **kwargs : dict + parameters can be directly passed to the ot.optim.cg solver + + Returns + ------- + T_global: array-like, shape (`npart1`, `npart2`) + Gromov-Wasserstein alignment :math:`\mathbf{T}^{g}` between representants. + Ts_local: dict of local OT matrices. + Dictionary with keys :math:`(i, j)` corresponding to 1D OT between + :math:`\mathbf{P_{1, i}}` and :math:`\mathbf{P_{2, j}}` if :math:`T^{g}_{ij} \neq 0`. + T: array-like, shape `(ns, nt)` + Coupling between the two spaces if `build_OT=True` else None. + log : dict, if `log=True`. + Convergence information and losses of inner OT problems. + + References + ---------- + .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021). + Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing. + + """ + if nx is None: + arr = [CR1, CR2, *list_R1, *list_R2, *list_p1, *list_p2] + + if MR is not None: + arr.append(MR) + + nx = get_backend(*arr) + + npart1 = len(list_R1) + npart2 = len(list_R2) + + # compute marginals for global alignment + pR1 = nx.from_numpy(list_to_array([nx.sum(p) for p in list_p1])) + pR2 = nx.from_numpy(list_to_array([nx.sum(q) for q in list_p2])) + + # compute global alignment + if alpha == 1.: + res_global = gromov_wasserstein( + CR1, CR2, pR1, pR2, loss_fun='square_loss', log=log, + armijo=armijo, max_iter=max_iter, tol_rel=tol_rel, tol_abs=tol_abs) + + if log: + T_global, dist_global = res_global[0], res_global[1]['gw_dist'] + else: + T_global = res_global + + elif (alpha < 1.) and (alpha > 0.): + + res_global = fused_gromov_wasserstein( + MR, CR1, CR2, pR1, pR2, 'square_loss', alpha=alpha, log=log, + armijo=armijo, max_iter=max_iter, tol_rel=tol_rel, tol_abs=tol_abs) + + if log: + T_global, dist_global = res_global[0], res_global[1]['fgw_dist'] + else: + T_global = res_global + + else: + raise ValueError( + f""" + `alpha='{alpha}'` should be in ]0, 1]. + """) + + if log: + log_ = {} + log_['global dist'] = dist_global + + # compute local alignments + Ts_local = {} + list_p1_norm = [p / nx.sum(p) for p in list_p1] + list_p2_norm = [q / nx.sum(q) for q in list_p2] + + for i in range(npart1): + for j in range(npart2): + if T_global[i, j] != 0.: + res_1d = emd_1d(list_R1[i], list_R2[j], list_p1_norm[i], list_p2_norm[j], + metric='sqeuclidean', p=1., log=log) + if log: + T_local, log_local = res_1d + Ts_local[(i, j)] = T_local + log_[f'local dist ({i},{j})'] = log_local['cost'] + else: + Ts_local[(i, j)] = res_1d + + if build_OT: + T_rows = [] + for i in range(npart1): + list_Ti = [] + for j in range(npart2): + if T_global[i, j] == 0.: + T_local = nx.zeros((list_R1[i].shape[0], list_R2[j].shape[0]), type_as=T_global) + else: + T_local = T_global[i, j] * Ts_local[(i, j)] + list_Ti.append(T_local) + + Ti = nx.concatenate(list_Ti, axis=1) + T_rows.append(Ti) + T = nx.concatenate(T_rows, axis=0) + + else: + T = None + + if log: + return T_global, Ts_local, T, log_ + + else: + return T_global, Ts_local, T + + +def get_graph_partition(C, npart, part_method='random', F=None, alpha=1., + random_state=0, nx=None): + """ + Partitioning a given graph with structure matrix :math:`\mathbf{C} \in R^{n \times n}` + into `npart` partitions either 'random', or using one of {'louvain', 'fluid'} + algorithms from networkx, or 'spectral' clustering from scikit-learn, + or (Fused) Gromov-Wasserstein projections from POT. + + Parameters + ---------- + C : array-like, shape (n, n) + Structure matrix. + npart : int, + number of partitions/clusters smaller than the number of nodes in + :math:`\mathbf{C}`. + part_method : str, optional. Default is 'random'. + Partitioning algorithm to use among {'random', 'louvain', 'fluid', 'spectral', 'GW', 'FGW'}. + 'random' for random sampling of points; 'louvain' and 'fluid' for graph + partitioning algorithm that works well on adjacency matrix, If the + louvain algorithm is used, `npart` is ignored; 'spectral' for spectral + clustering; '(F)GW' for (F)GW projection using sr(F)GW solvers. + F : array-like, shape (n, d), optional. (Default is None) + Optional feature matrix aligned with the graph structure. Only used if + `part_method="FGW"`. + alpha : float, optional. (Default is 1.) + Trade-off parameter between feature and structure matrices, taking + values in [0, 1] and only used if `F != None` and `part_method="FGW"`. + random_state: int, optional + Random seed for the partitioning algorithm. + nx : backend, optional + POT backend. + + Returns + ------- + part : array-like, shape (npart,) + Array of partition assignment for each node. + + References + ---------- + .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021). + Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing. + + """ + if nx is None: + nx = get_backend(C) + + n = C.shape[0] + C0 = C + + if (alpha != 1.) and (F is None): + raise ValueError("`alpha != 1` but node features are not provided.") + + if npart >= n: + warnings.warn( + "Requested number of partitions higher than the number of nodes" + "hence we enforce each node to be a partition.", + stacklevel=2 + ) + + part = np.arange(n) + + elif npart == 1: + part = np.zeros(n) + + elif part_method == 'random': + # randomly partition the space + random.seed(random_state) + part = list_to_array(random.choices(np.arange(npart), k=C.shape[0])) + + elif part_method == 'louvain': + C = nx.to_numpy(C0) + graph = from_numpy_array(C) + part_sets = louvain_communities(graph, seed=random_state) + part = np.zeros(n) + for iset_, set_ in enumerate(part_sets): + set_ = list(set_) + part[set_] = iset_ + + elif part_method == 'fluid': + C = nx.to_numpy(C0) + graph = from_numpy_array(C) + part_sets = asyn_fluidc(graph, npart, seed=random_state) + part = np.zeros(n) + for iset_, set_ in enumerate(part_sets): + set_ = list(set_) + part[set_] = iset_ + + elif part_method == 'spectral': + C = nx.to_numpy(C0) + sc = SpectralClustering(n_clusters=npart, + random_state=random_state, + affinity='precomputed').fit(C) + part = sc.labels_ + + elif part_method in ['GW', 'FGW']: + raise ValueError(f"`part_method == {part_method}` not implemented yet.") + + else: + raise ValueError( + f""" + Unknown `part_method='{part_method}'`. Use one of: + {'random', 'louvain', 'fluid', 'spectral', 'GW', 'FGW'}. + """) + return nx.from_numpy(part, type_as=C0) + + +def get_graph_representants(C, part, rep_method='pagerank', random_state=0, nx=None): + """ + Get representative node for each partition given by :math:`\mathbf{part} \in R^{n}` + of a graph with structure matrix :math:`\mathbf{C} \in R^{n \times n}`. + Selection is either done randomly or using 'pagerank' algorithm from networkx. + + Parameters + ---------- + C : array-like, shape (n, n) + structure matrix. + part : array-like, shape (n,) + Array of partition assignment for each node. + rep_method : str, optional. Default is 'pagerank'. + Selection method for representant in each partition. Can be either 'random' + i.e random sampling within each partition, or 'pagerank' to select a + node with maximal pagerank. + random_state: int, optional + Random seed for the partitioning algorithm + nx : backend, optional + POT backend + + Returns + ------- + rep_indices : list, shape (npart,) + indices for representative node of each partition sorted + according to partition identifiers. + + References + ---------- + .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021). + Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing. + + """ + if nx is None: + nx = get_backend(C, part) + + rep_indices = [] + part_ids = nx.unique(part) + n_part_ids = part_ids.shape[0] + if n_part_ids == C.shape[0]: + rep_indices = nx.arange(n_part_ids) + + elif rep_method == 'random': + random.seed(random_state) + for id_, part_id in enumerate(part_ids): + indices = nx.where(part == part_id)[0] + rep_indices.append(random.choice(indices)) + + elif rep_method == 'pagerank': + C0, part0 = C, part + C = nx.to_numpy(C0) + part = nx.to_numpy(part0) + part_ids = np.unique(part) + + for id_ in part_ids: + indices = np.where(part == id_)[0] + C_id = C[indices, :][:, indices] + graph = from_numpy_array(C_id) + pagerank_values = list(pagerank(graph).values()) + rep_idx = np.argmax(pagerank_values) + rep_indices.append(indices[rep_idx]) + + else: + raise ValueError( + f""" + Unknown `rep_method='{rep_method}'`. Use one of: + {'random', 'pagerank'}. + """) + + return rep_indices + + +def format_partitioned_graph(C, p, part, rep_indices, F=None, M=None, + alpha=1., nx=None): + """ + Format an attributed graph :math:`(\mathbf{C}, \mathbf{F}, \mathbf{p})` + with structure matrix :math:`(\mathbf{C} \in R^{n \times n}`, feature matrix + :math:`(\mathbf{F} \in R^{n \times d}` and node relative importance + :math:`(\mathbf{p} \in \Sigma_n`, into a partitioned attributed graph + taking into account partitions and representants :math:`\mathcal{P} = \left{(\mathbf{P_{i}}, \mathbf{r_{i}})\right}_i`. + + Parameters + ---------- + C : array-like, shape (n, n) + Structure matrix. + p : array-like, shape (n,), + Node distribution. + part : array-like, shape (n,) + Array of partition assignment for each node. + rep_indices : list of array-like of ints, shape (npart,) + indices for representative node of each partition sorted according to + partition identifiers. + F : array-like, shape (n, d), optional. (Default is None) + Optional feature matrix aligned with the graph structure. + M : array-like, shape (n, n), optional. (Default is None) + Optional pairwise similarity matrix between features. + alpha: float, optional. Default is 1. + Trade-off parameter in :math:`]0, 1]` between structure and features. + If `alpha = 1` features are ignored. This trade-off is taken into account + into the outputted relations between nodes and representants. + nx : backend, optional + POT backend + + Returns + ------- + CR : array-like, shape (npart, npart) + Structure matrix between partition representants. + list_R : list of npart arrays, + List of relations between a representant and nodes in its partition, + for each partition. + list_p : list of npart arrays, + List of node distributions within each partition. + FR : array-like, shape (npart, d), if `F != None`. + Feature matrix of representants. + + References + ---------- + .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021). + Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing. + + """ + if nx is None: + arr = [C, p, part] + if F is not None: + arr.append(F) + if M is not None: + arr.append(M) + + nx = get_backend(*arr) + + if alpha != 1.: + if (M is None) or (F is None): + raise ValueError( + f""" + `alpha == {alpha} != 1` but features information is not properly provided. + """) + + CR = C[rep_indices, :][:, rep_indices] + + if alpha != 1.: + C_new = alpha * C + (1 - alpha) * M + else: + C_new = C + + list_R, list_p = [], [] + + part_ids = nx.unique(part) + + for id_, part_id in enumerate(part_ids): + indices = nx.where(part == part_id)[0] + list_R.append(C_new[rep_indices[id_], indices]) + list_p.append(p[indices]) + + if F is None: + + return CR, list_R, list_p + else: + FR = F[rep_indices, :] + + return CR, list_R, list_p, FR + + +def quantized_fused_gromov_wasserstein( + C1, C2, npart1, npart2, p=None, q=None, C1_aux=None, C2_aux=None, + F1=None, F2=None, alpha=1., part_method='fluid', + rep_method='random', log=False, armijo=False, max_iter=1e4, + tol_rel=1e-9, tol_abs=1e-9, random_state=0, **kwargs): + r""" + Returns the quantized Fused Gromov-Wasserstein transport between + :math:`(\mathbf{C_1}, \mathbf{F_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, + \mathbf{F_2}, \mathbf{q})`, whose samples are assigned to partitions and + representants :math:`\mathcal{P_1} = \{(\mathbf{P_{1, i}}, \mathbf{r_{1, i}})\}_{i \leq npart1}` + and :math:`\mathcal{P_2} = \{(\mathbf{P_{2, j}}, \mathbf{r_{2, j}})\}_{j \leq npart2}`. + + The function estimates the following optimization problem: + + .. math:: + \mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad \alpha \sum_{i,j,k,l} + L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} + + (1-\alpha) \langle \mathbf{T}, \mathbf{D}(\mathbf{F_1}, \mathbf{F}_2) \rangle_F + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 + + \mathbf{T}_{|\mathbf{P_{1, i}}, \mathbf{P_{2, j}}} &= T^{g}_{ij} \mathbf{T}^{(i,j)} + + using a two-step strategy computing: i) a global alignment :math:`\mathbf{T}^{g}` + between representants across joint structure and feature spaces; + ii) local alignments :math:`\mathbf{T}^{(i, j)}` between partitions + :math:`\mathbf{P_{1, i}}` and :math:`\mathbf{P_{2, j}}` seen as 1D measures. + + Where : + + - :math:`\mathbf{C_1}`: Metric cost matrix in the source space + - :math:`\mathbf{C_2}`: Metric cost matrix in the target space + - :math:`\mathbf{F_1}`: Feature matrix in the source space + - :math:`\mathbf{F_2}`: Feature matrix in the target space + - :math:`\mathbf{D}(\mathbf{F_1}, \mathbf{F_2})`: Pairwise euclidean distance matrix between features + - :math:`\mathbf{p}`: distribution in the source space + - :math:`\mathbf{q}`: distribution in the target space + - :math:`L`: quadratic loss function to account for the misfit between the similarity matrices + + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. + .. note:: All computations in the conjugate gradient solver are done with + numpy to limit memory overhead. + + Parameters + ---------- + C1 : array-like, shape (ns, ns) + Structure matrix in the source space. + C2 : array-like, shape (nt, nt) + Structure matrix in the target space. + npart1 : int, + number of partition in the source space. + npart2 : int, + number of partition in the target space. + p : array-like, shape (ns,), optional + Distribution in the source space. + If let to its default value None, uniform distribution is taken. + q : array-like, shape (nt,), optional + Distribution in the target space. + If let to its default value None, uniform distribution is taken. + C1_aux : array-like, shape (ns, ns), optional. Default is None. + Auxiliary structure matrix in the source space to perform the partitioning + and representant selection. + C2_aux : array-like, shape (nt, nt), optional. Default is None. + Auxiliary structure matrix in the target space to perform the partitioning + and representant selection. + F1 : array-like, shape (ns, d), optional. Default is None. + Feature matrix in the source space. + F2 : array-like, shape (nt, d), optional. Default is None. + Feature matrix in the target space + alpha: float, optional. Default is 1. + FGW trade-off parameter in :math:`]0, 1]` between structure and features. + If `alpha = 1` features are ignored hence computing qGW, if `alpha=0` + structures are ignored and we compute the quantized Wasserstein transport. + part_method : str, optional. Default is 'spectral'. + Partitioning algorithm to use among {'random', 'louvain', 'fluid', + 'spectral', 'louvain_fused', 'fluid_fused', 'spectral_fused', 'GW', 'FGW'}. + If part_method in {'louvain_fused', 'fluid_fused', 'spectral_fused'}, + corresponding graph partitioning algorithm {'louvain', 'fluid', 'spectral'} + will be used on the modified structure matrix + :math:`\alpha \mathbf{C} + (1 - \alpha) \mathbf{D}(\mathbf{F})` where + :math:`\mathbf{D}(\mathbf{F})` is the pairwise euclidean matrix between features. + If part_method in {'GW', 'FGW'}, a (F)GW projection is used. + If the louvain algorithm is used, the requested number of partitions is + ignored. + rep_method : str, optional. Default is 'pagerank'. + Selection method for node representant in each partition. + Can be either 'random' i.e random sampling within each partition, + {'pagerank', 'pagerank_fused'} to select a node with maximal pagerank w.r.t + :math:`\mathbf{C}` or :math:`\alpha \mathbf{C} + (1 - \alpha) \mathbf{D}(\mathbf{F})`. + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + armijo : bool, optional + If True the step of the line-search is found via an armijo research. Else closed form is used. + If there are convergence issues use False. + max_iter : int, optional + Max number of iterations + tol_rel : float, optional + Stop threshold on relative error (>0) + tol_abs : float, optional + Stop threshold on absolute error (>0) + random_state: int, optional + Random seed for the partitioning algorithm + **kwargs : dict + parameters can be directly passed to the ot.optim.cg solver + + Returns + ------- + T_global: array-like, shape (`npart1`, `npart2`) + Fused Gromov-Wasserstein alignment :math:`\mathbf{T}^{g}` between representants. + Ts_local: dict of local OT matrices. + Dictionary with keys :math:`(i, j)` corresponding to 1D OT between + :math:`\mathbf{P_{1, i}}` and :math:`\mathbf{P_{2, j}}` if :math:`T^{g}_{ij} \neq 0`. + T: array-like, shape `(ns, nt)` + Coupling between the two spaces. + log : dict + Convergence information for inner problems and qGW loss. + + References + ---------- + .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021). + Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing. + + """ + if (part_method in ['fluid', 'louvain', 'fluid_fused', 'louvain_fused'] or (rep_method in ['pagerank', 'pagerank_fused'])): + if not networkx_import: + warnings.warn( + f""" + Networkx is not installed, so part_method={part_method} and/or + rep_method={rep_method} cannot be used and are set to `random` + default methods. Consider installing Networkx to fix this. + """ + ) + part_method = 'random' + rep_method = 'random' + + if (part_method in ['spectral', 'spectral_fused']) and (not sklearn_import): + warnings.warn( + f""" + Scikit-learn is not installed, so part_method={part_method} and/or + rep_method={rep_method} cannot be used and are set to `random` + default methods. Consider installing Scikit-learn to fix this. + """ + ) + part_method = 'random' + rep_method = 'random' + + if (('fused' in part_method) or ('fused' in rep_method) or (part_method == 'FGW')): + if (F1 is None) or (F2 is None): + raise ValueError( + f""" + `part_method='{part_method}'` and/or `rep_method='{rep_method}'` + require feature matrices which are not provided as inputs. + """) + + arr = [C1, C2] + if C1_aux is not None: + arr.append(C1_aux) + else: + C1_aux = C1 + if C2_aux is not None: + arr.append(C2_aux) + else: + C2_aux = C2 + if p is not None: + arr.append(list_to_array(p)) + else: + p = unif(C1.shape[0], type_as=C1) + if q is not None: + arr.append(list_to_array(q)) + else: + q = unif(C2.shape[0], type_as=C1) + if F1 is not None: + arr.append(F1) + if F2 is not None: + arr.append(F1) + + nx = get_backend(*arr) + + DF1 = None + DF2 = None + # compute attributed graph partitions potentially using the auxiliary structure + if 'fused' in part_method: + + DF1 = dist(F1, F1) + DF2 = dist(F2, F2) + C1_new = alpha * C1_aux + (1 - alpha) * DF1 + C2_new = alpha * C2_aux + (1 - alpha) * DF2 + + part_method_ = part_method[:-6] + part1 = get_graph_partition(C1_new, npart1, part_method_, random_state=random_state, nx=nx) + part2 = get_graph_partition(C2_new, npart2, part_method_, random_state=random_state, nx=nx) + + else: + part1 = get_graph_partition(C1_aux, npart1, part_method, F1, alpha, random_state, nx) + part2 = get_graph_partition(C2_aux, npart2, part_method, F2, alpha, random_state, nx) + + if 'fused' in rep_method: + if DF1 is None: + DF1 = dist(F1, F1) + DF2 = dist(F2, F2) + C1_new = alpha * C1_aux + (1 - alpha) * DF1 + C2_new = alpha * C2_aux + (1 - alpha) * DF2 + + rep_method_ = rep_method[:-6] + + rep_indices1 = get_graph_representants(C1_new, part1, rep_method_, random_state, nx) + rep_indices2 = get_graph_representants(C2_new, part2, rep_method_, random_state, nx) + + else: + rep_indices1 = get_graph_representants(C1_aux, part1, rep_method, random_state, nx) + rep_indices2 = get_graph_representants(C2_aux, part2, rep_method, random_state, nx) + + # format partitions over (C1, F1) and (C2, F2) + if (F1 is None) and (F2 is None): + CR1, list_R1, list_p1 = format_partitioned_graph(C1, p, part1, rep_indices1, nx=nx) + CR2, list_R2, list_p2 = format_partitioned_graph(C2, q, part2, rep_indices2, nx=nx) + + MR = None + else: + if DF1 is None: + DF1 = dist(F1, F1) + DF2 = dist(F2, F2) + + CR1, list_R1, list_p1, FR1 = format_partitioned_graph(C1, p, part1, rep_indices1, F1, DF1, alpha, nx) + CR2, list_R2, list_p2, FR2 = format_partitioned_graph(C2, q, part2, rep_indices2, F2, DF2, alpha, nx) + + MR = dist(FR1, FR2) + # call to partitioned quantized fused gromov-wasserstein solver + + res = quantized_fused_gromov_wasserstein_partitioned( + CR1, CR2, list_R1, list_R2, list_p1, list_p2, MR, alpha, build_OT=True, + log=log, armijo=armijo, max_iter=max_iter, tol_rel=tol_rel, + tol_abs=tol_abs, nx=nx, **kwargs) + + if log: + T_global, Ts_local, T, log_ = res + + # compute the transport cost on structures + constC, hC1, hC2 = init_matrix(C1, C2, p, q, 'square_loss', nx) + structure_cost = gwloss(constC, hC1, hC2, T, nx) + + if alpha != 1.: + M = dist(F1, F2) + feature_cost = nx.sum(M * T) + else: + feature_cost = 0. + + log_['qFGW_dist'] = alpha * structure_cost + (1 - alpha) * feature_cost + return T_global, Ts_local, T, log_ + + else: + T_global, Ts_local, T = res + + return T_global, Ts_local, T + + +def get_partition_and_representants_samples( + X, npart, method='kmeans', random_state=0, nx=None): + """ + Compute `npart` partitions and representants over samples :math:`\mathbf{X} \in R^{n \times d}` + using either a random or a kmeans algorithm. + + Parameters + ---------- + X : array-like, shape (n, d) + Samples endowed with an euclidean geometry. + npart : int, + number of partitions smaller than the number of samples in + :math:`\mathbf{X}`. + method : str, optional. Default is 'kmeans'. + Partitioning and representant selection algorithms to use among + {'random', 'kmeans'}. 'random' for random sampling of points; 'kmeans' + for k-means clustering using scikit-learn implementation where closest + points to centroids are considered as representants. + random_state: int, optional + Random seed for the partitioning algorithm. + nx : backend, optional + POT backend. + + Returns + ------- + part : array-like, shape (npart,) + Array of partition assignment for each node. + + rep_indices : list, shape (npart,) + indices for representative node of each partition sorted + according to partition identifiers. + + References + ---------- + .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021). + Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing. + + """ + if nx is None: + nx = get_backend(X) + + n = X.shape[0] + X0 = X + + if npart >= n: + warnings.warn( + "Requested number of partitions higher than the number of nodes" + "hence we enforce each node to be a partition.", + stacklevel=2 + ) + + part = nx.arange(n) + rep_indices = nx.arange(n) + + elif npart == 1: + random.seed(random_state) + part = nx.zeros(n) + rep_indices = [random.choice(nx.arange(n))] + + elif method == 'random': + # randomly partition the space + random.seed(random_state) + part = list_to_array(random.choices(np.arange(npart), k=X.shape[0])) + part = nx.from_numpy(part, type_as=X0) + + # randomly select representant in each partition + rep_indices = [] + part_ids = nx.unique(part) + for id_, part_id in enumerate(part_ids): + indices = nx.where(part == part_id)[0] + rep_indices.append(random.choice(indices)) + + elif method == 'kmeans': + X = nx.to_numpy(X0) + km = KMeans(n_clusters=npart, random_state=random_state).fit(X) + part = nx.from_numpy(km.labels_, type_as=X0) + + rep_indices = [] + for part_id in range(npart): + indices = nx.where(part == part_id)[0] + dists = dist(X[indices], km.cluster_centers_[part_id][None, :]) + best_idx = indices[dists.argmin()] + rep_indices.append(best_idx) + + else: + raise ValueError( + f""" + Unknown `method='{method}'`. Use one of: {'random', 'kmeans'} + """) + + return part, rep_indices + + +def format_partitioned_samples( + X, p, part, rep_indices, F=None, alpha=1., nx=None): + """ + Format an attributed graph :math:`(\mathbf{D}(\mathbf{X}), \mathbf{F}, \mathbf{p})` + with euclidean structure matrix :math:`(\mathbf{D}(\mathbf{X}) \in R^{n \times n}`, + feature matrix :math:`(\mathbf{F} \in R^{n \times d}` and node relative importance + :math:`(\mathbf{p} \in \Sigma_n`, into a partitioned attributed graph + taking into account partitions and representants :math:`\mathcal{P} = \left{(\mathbf{P_{i}}, \mathbf{r_{i}})\right}_i`. + + Parameters + ---------- + X : array-like, shape (n, d) + Structure matrix. + p : array-like, shape (n,), + Node distribution. + part : array-like, shape (n,) + Array of partition assignment for each node. + rep_indices : list of array-like of ints, shape (npart,) + indices for representative node of each partition sorted according to + partition identifiers. + F : array-like, shape (n, p), optional. (Default is None) + Optional feature matrix aligned with the samples. + alpha: float, optional. Default is 1. + Trade-off parameter in :math:`]0, 1]` between structure and features. + If `alpha = 1` features are ignored. This trade-off is taken into account + into the outputted relations between nodes and representants. + nx : backend, optional + POT backend + + Returns + ------- + CR : array-like, shape (npart, npart) + Structure matrix between partition representants. + list_R : list of npart arrays, + List of relations between a representant and nodes in its partition, + for each partition. + list_p : list of npart arrays, + List of node distributions within each partition. + FR : array-like, shape (npart, d), if `F != None`. + Feature matrix of representants. + + References + ---------- + .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021). + Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing. + + """ + if nx is None: + arr = [X, p, part] + if F is not None: + arr.append(F) + + nx = get_backend(*arr) + + if alpha != 1.: + if F is None: + raise ValueError( + f""" + `alpha == {alpha} != 1` but features information is not properly provided. + """) + + XR = X[rep_indices, :] + CR = dist(XR, XR) + + list_R, list_p = [], [] + + part_ids = nx.unique(part) + + for id_, part_id in enumerate(part_ids): + indices = nx.where(part == part_id)[0] + structure_R = dist(X[indices], X[rep_indices[id_]][None, :]) + + if alpha != 1: + features_R = dist(F[indices], F[rep_indices[id_]][None, :]) + else: + features_R = 0. + + list_R.append(alpha * structure_R + (1 - alpha) * features_R) + list_p.append(p[indices]) + + if F is None: + + return CR, list_R, list_p + else: + FR = F[rep_indices, :] + + return CR, list_R, list_p, FR + + +def quantized_fused_gromov_wasserstein_samples( + X1, X2, npart1, npart2, p=None, q=None, F1=None, F2=None, alpha=1., + method='kmeans', log=False, armijo=False, max_iter=1e4, + tol_rel=1e-9, tol_abs=1e-9, random_state=0, **kwargs): + r""" + Returns the quantized Fused Gromov-Wasserstein transport between samples + endowed with their respective euclidean geometry :math:`(\mathbf{D}(\mathbf{X_1}), \mathbf{F_1}, \mathbf{p})` + and :math:`(\mathbf{D}(\mathbf{X_1}), \mathbf{F_2}, \mathbf{q})`, whose samples are assigned to partitions and + representants :math:`\mathcal{P_1} = \{(\mathbf{P_{1, i}}, \mathbf{r_{1, i}})\}_{i \leq npart1}` + and :math:`\mathcal{P_2} = \{(\mathbf{P_{2, j}}, \mathbf{r_{2, j}})\}_{j \leq npart2}`. + + The function estimates the following optimization problem: + + .. math:: + \mathbf{T}^* \in \mathop{\arg \min}_\mathbf{T} \quad \alpha \sum_{i,j,k,l} + L(\mathbf{D}(\mathbf{X_1})_{i,k}, \mathbf{D}(\mathbf{X_2})_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} + + (1-\alpha) \langle \mathbf{T}, \mathbf{D}(\mathbf{F_1}, \mathbf{F}_2) \rangle_F + s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p} + + \mathbf{T}^T \mathbf{1} &= \mathbf{q} + + \mathbf{T} &\geq 0 + + \mathbf{T}_{|\mathbf{P_{1, i}}, \mathbf{P_{2, j}}} &= T^{g}_{ij} \mathbf{T}^{(i,j)} + + using a two-step strategy computing: i) a global alignment :math:`\mathbf{T}^{g}` + between representants across joint structure and feature spaces; + ii) local alignments :math:`\mathbf{T}^{(i, j)}` between partitions + :math:`\mathbf{P_{1, i}}` and :math:`\mathbf{P_{2, j}}` seen as 1D measures. + + Where : + + - :math:`\mathbf{X_1}`: Samples in the source space + - :math:`\mathbf{X_2}`: Samples in the target space + - :math:`\mathbf{F_1}`: Feature matrix in the source space + - :math:`\mathbf{F_2}`: Feature matrix in the target space + - :math:`\mathbf{D}(\mathbf{F_1}, \mathbf{F_2})`: Pairwise euclidean distance matrix between features + - :math:`\mathbf{p}`: distribution in the source space + - :math:`\mathbf{q}`: distribution in the target space + - :math:`L`: quadratic loss function to account for the misfit between the similarity matrices + + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. + .. note:: All computations in the conjugate gradient solver are done with + numpy to limit memory overhead. + + Parameters + ---------- + X1 : array-like, shape (ns, ds) + Samples in the source space. + X2 : array-like, shape (nt, dt) + Samples in the target space. + npart1 : int, + number of partition in the source space. + npart2 : int, + number of partition in the target space. + p : array-like, shape (ns,), optional + Distribution in the source space. + If let to its default value None, uniform distribution is taken. + q : array-like, shape (nt,), optional + Distribution in the target space. + If let to its default value None, uniform distribution is taken. + F1 : array-like, shape (ns, d), optional. Default is None. + Feature matrix in the source space. + F2 : array-like, shape (nt, d), optional. Default is None. + Feature matrix in the target space + alpha: float, optional. Default is 1. + FGW trade-off parameter in :math:`]0, 1]` between structure and features. + If `alpha = 1` features are ignored hence computing qGW, if `alpha=0` + structures are ignored and we compute the quantized Wasserstein transport. + method : str, optional. Default is 'kmeans'. + Partitioning and representant selection algorithms to use among + {'random', 'kmeans', 'kmeans_fused'}. + If `part_method == 'kmeans_fused'`, kmeans is performed on augmented + samples :math:`[\alpha \mathbf{X}; (1 - \alpha) \mathbf{F}]`. + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + armijo : bool, optional + If True the step of the line-search is found via an armijo research. Else closed form is used. + If there are convergence issues use False. + max_iter : int, optional + Max number of iterations + tol_rel : float, optional + Stop threshold on relative error (>0) + tol_abs : float, optional + Stop threshold on absolute error (>0) + random_state: int, optional + Random seed for the partitioning algorithm + **kwargs : dict + parameters can be directly passed to the ot.optim.cg solver + + Returns + ------- + T_global: array-like, shape (`npart1`, `npart2`) + Fused Gromov-Wasserstein alignment :math:`\mathbf{T}^{g}` between representants. + Ts_local: dict of local OT matrices. + Dictionary with keys :math:`(i, j)` corresponding to 1D OT between + :math:`\mathbf{P_{1, i}}` and :math:`\mathbf{P_{2, j}}` if :math:`T^{g}_{ij} \neq 0`. + T: array-like, shape `(ns, nt)` + Coupling between the two spaces. + log : dict + Convergence information for inner problems and qGW loss. + + References + ---------- + .. [68] Chowdhury, S., Miller, D., & Needham, T. (2021). + Quantized gromov-wasserstein. ECML PKDD 2021. Springer International Publishing. + + """ + + if (method in ['kmeans', 'kmeans_fused']) and (not sklearn_import): + warnings.warn( + f""" + Scikit-learn is not installed, so method={method} cannot be used + and is set to `random` default methods. Consider installing + Scikit-learn to fix this. + """ + ) + method = 'random' + + if ('fused' in method) and ((F1 is None) or (F2 is None)): + raise ValueError( + f""" + `method='{method}'` requires feature matrices which are not provided as inputs. + """) + + arr = [X1, X2] + if p is not None: + arr.append(list_to_array(p)) + else: + p = unif(X1.shape[0], type_as=X1) + if q is not None: + arr.append(list_to_array(q)) + else: + q = unif(X2.shape[0], type_as=X1) + if F1 is not None: + arr.append(F1) + if F2 is not None: + arr.append(F1) + + nx = get_backend(*arr) + + # compute attributed partitions and representants + if ('fused' in method) and (alpha != 1.): + X1_new = nx.concatenate([alpha * X1, (1 - alpha) * F1], axis=1) + X2_new = nx.concatenate([alpha * X2, (1 - alpha) * F2], axis=1) + method_ = method[:-6] + else: + X1_new, X2_new = X1, X2 + method_ = method + part1, rep_indices1 = get_partition_and_representants_samples( + X1_new, npart1, method_, random_state, nx) + part2, rep_indices2 = get_partition_and_representants_samples( + X2_new, npart2, method_, random_state, nx) + # format partitions over (C1, F1) and (C2, F2) + + if (F1 is None) and (F2 is None): + CR1, list_R1, list_p1 = format_partitioned_samples( + X1, p, part1, rep_indices1, nx=nx) + CR2, list_R2, list_p2 = format_partitioned_samples( + X2, q, part2, rep_indices2, nx=nx) + + MR = None + else: + CR1, list_R1, list_p1, FR1 = format_partitioned_samples( + X1, p, part1, rep_indices1, F1, alpha, nx) + CR2, list_R2, list_p2, FR2 = format_partitioned_samples( + X2, q, part2, rep_indices2, F2, alpha, nx) + + MR = dist(FR1, FR2) + + # call to partitioned quantized fused gromov-wasserstein solver + + res = quantized_fused_gromov_wasserstein_partitioned( + CR1, CR2, list_R1, list_R2, list_p1, list_p2, MR, alpha, build_OT=True, + log=log, armijo=armijo, max_iter=max_iter, tol_rel=tol_rel, + tol_abs=tol_abs, nx=nx, **kwargs) + + if log: + T_global, Ts_local, T, log_ = res + + C1 = dist(X1, X1) + C2 = dist(X2, X2) + + # compute the transport cost on structures + constC, hC1, hC2 = init_matrix(C1, C2, p, q, 'square_loss', nx) + structure_cost = gwloss(constC, hC1, hC2, T, nx) + + if alpha != 1.: + M = dist(F1, F2) + feature_cost = nx.sum(M * T) + else: + feature_cost = 0. + + log_['qFGW_dist'] = alpha * structure_cost + (1 - alpha) * feature_cost + return T_global, Ts_local, T, log_ + + else: + T_global, Ts_local, T = res + + return T_global, Ts_local, T diff --git a/test/gromov/test_quantized.py b/test/gromov/test_quantized.py new file mode 100644 index 000000000..a864a8a46 --- /dev/null +++ b/test/gromov/test_quantized.py @@ -0,0 +1,377 @@ +"""Tests for gromov._quantized.py """ + +# Author: Cédric Vincent-Cuaz +# +# License: MIT License + +import numpy as np +import pytest + +import ot + +from ot.gromov._quantized import ( + networkx_import, sklearn_import) + + +def test_quantized_gw(nx): + n_samples = 30 # nb samples + + rng = np.random.RandomState(0) + C1 = rng.uniform(low=0., high=10, size=(n_samples, n_samples)) + C1 = (C1 + C1.T) / 2. + + C2 = rng.uniform(low=10., high=20., size=(n_samples, n_samples)) + C2 = (C2 + C2.T) / 2. + + p = ot.unif(n_samples) + q = ot.unif(n_samples) + + npart2 = 3 + + C1b, C2b, pb, qb = nx.from_numpy(C1, C2, p, q) + + for npart1 in [1, n_samples + 1, 2]: + log_tests = [True, False, False, True, True, False] + + pairs_part_rep = [('random', 'random')] + if networkx_import: + pairs_part_rep += [('louvain', 'random'), ('fluid', 'pagerank')] + if sklearn_import: + pairs_part_rep += [('spectral', 'random')] + + count_mode = 0 + + for part_method, rep_method in pairs_part_rep: + log_ = log_tests[count_mode] + count_mode += 1 + + res = ot.gromov.quantized_fused_gromov_wasserstein( + C1, C2, npart1, npart2, p, None, C1, None, part_method=part_method, + rep_method=rep_method, log=log_) + + resb = ot.gromov.quantized_fused_gromov_wasserstein( + C1b, C2b, npart1, npart2, None, qb, None, C2b, part_method=part_method, + rep_method=rep_method, log=log_) + + if log_: + T_global, Ts_local, T, log = res + T_globalb, Ts_localb, Tb, logb = resb + else: + T_global, Ts_local, T = res + T_globalb, Ts_localb, Tb = resb + + Tb = nx.to_numpy(Tb) + # check constraints + np.testing.assert_allclose(T, Tb, atol=1e-06) + np.testing.assert_allclose( + p, Tb.sum(1), atol=1e-06) # cf convergence gromov + np.testing.assert_allclose( + q, Tb.sum(0), atol=1e-06) # cf convergence gromov + + if log_: + for key in log.keys(): + # The inner test T_global[i, j] != 0. can lead to different + # computation of 1D OT computations between partition depending + # on the different float errors across backend + if key in logb.keys(): + np.testing.assert_allclose(log[key], logb[key], atol=1e-06) + + +def test_quantized_fgw(nx): + n_samples = 30 # nb samples + + rng = np.random.RandomState(0) + C1 = rng.uniform(low=0., high=10, size=(n_samples, n_samples)) + C1 = (C1 + C1.T) / 2. + + F1 = rng.uniform(low=0., high=10, size=(n_samples, 1)) + + C2 = rng.uniform(low=10., high=20., size=(n_samples, n_samples)) + C2 = (C2 + C2.T) / 2. + + F2 = rng.uniform(low=0., high=10, size=(n_samples, 1)) + + p = ot.unif(n_samples) + q = ot.unif(n_samples) + + npart1 = 2 + npart2 = 3 + + C1b, C2b, F1b, F2b, pb, qb = nx.from_numpy(C1, C2, F1, F2, p, q) + + log_tests = [True, False, False, True, True, False] + + pairs_part_rep = [] + if networkx_import: + pairs_part_rep += [('louvain_fused', 'pagerank'), + ('louvain', 'pagerank_fused'), + ('fluid_fused', 'pagerank_fused')] + if sklearn_import: + pairs_part_rep += [('spectral_fused', 'random')] + + pairs_part_rep += [('random', 'random')] + count_mode = 0 + + alpha = 0.5 + + for part_method, rep_method in pairs_part_rep: + log_ = log_tests[count_mode] + count_mode += 1 + + res = ot.gromov.quantized_fused_gromov_wasserstein( + C1, C2, npart1, npart2, p, None, C1, None, F1, F2, alpha, + part_method, rep_method, log_) + + resb = ot.gromov.quantized_fused_gromov_wasserstein( + C1b, C2b, npart1, npart2, None, qb, None, C2b, F1b, F2b, alpha, + part_method, rep_method, log_) + + if log_: + T_global, Ts_local, T, log = res + T_globalb, Ts_localb, Tb, logb = resb + else: + T_global, Ts_local, T = res + T_globalb, Ts_localb, Tb = resb + + Tb = nx.to_numpy(Tb) + # check constraints + np.testing.assert_allclose(T, Tb, atol=1e-06) + np.testing.assert_allclose( + p, Tb.sum(1), atol=1e-06) # cf convergence gromov + np.testing.assert_allclose( + q, Tb.sum(0), atol=1e-06) # cf convergence gromov + + if log_: + for key in log.keys(): + # The inner test T_global[i, j] != 0. can lead to different + # computation of 1D OT computations between partition depending + # on the different float errors across backend + if key in logb.keys(): + np.testing.assert_allclose(log[key], logb[key], atol=1e-06) + + # complementary tests for utils functions + DF1b = ot.dist(F1b, F1b) + DF2b = ot.dist(F2b, F2b) + C1b_new = alpha * C1b + (1 - alpha) * DF1b + C2b_new = alpha * C2b + (1 - alpha) * DF2b + + part1b = ot.gromov.get_graph_partition( + C1b_new, npart1, part_method=pairs_part_rep[-1][0], random_state=0) + part2b = ot.gromov._quantized.get_graph_partition( + C2b_new, npart2, part_method=pairs_part_rep[-1][0], random_state=0) + + rep_indices1b = ot.gromov.get_graph_representants( + C1b, part1b, rep_method=pairs_part_rep[-1][1], random_state=0) + rep_indices2b = ot.gromov.get_graph_representants( + C2b, part2b, rep_method=pairs_part_rep[-1][1], random_state=0) + + CR1b, list_R1b, list_p1b, FR1b = ot.gromov.format_partitioned_graph( + C1b, pb, part1b, rep_indices1b, F1b, DF1b, alpha) + CR2b, list_R2b, list_p2b, FR2b = ot.gromov.format_partitioned_graph( + C2b, qb, part2b, rep_indices2b, F2b, DF2b, alpha) + + MRb = ot.dist(FR1b, FR2b) + + T_globalb, Ts_localb, _ = ot.gromov.quantized_fused_gromov_wasserstein_partitioned( + CR1b, CR2b, list_R1b, list_R2b, list_p1b, list_p2b, MRb, alpha, build_OT=False) + + T_globalb = nx.to_numpy(T_globalb) + np.testing.assert_allclose(T_global, T_globalb, atol=1e-06) + + for key in Ts_localb.keys(): + T_localb = nx.to_numpy(Ts_localb[key]) + np.testing.assert_allclose(Ts_local[key], T_localb, atol=1e-06) + + # tests for edge cases of the graph partitioning + for method in ['unknown_method', 'GW', 'FGW']: + with pytest.raises(ValueError): + ot.gromov.get_graph_partition( + C1b, npart1, part_method=method, random_state=0) + + with pytest.raises(ValueError): + ot.gromov.get_graph_partition( + C1b, npart1, part_method=method, alpha=0.5, F=None, random_state=0) + + # tests for edge cases of the representant selection + with pytest.raises(ValueError): + ot.gromov.get_graph_representants( + C1b, part1b, rep_method='unknown_method', random_state=0) + + # tests for edge cases of the format_partitioned_graph function + with pytest.raises(ValueError): + CR1b, list_R1b, list_p1b, FR1b = ot.gromov.format_partitioned_graph( + C1b, pb, part1b, rep_indices1b, F1b, None, alpha) + + # Tests in qFGW solvers + # for non admissible values of alpha + with pytest.raises(ValueError): + ot.gromov.quantized_fused_gromov_wasserstein_partitioned( + CR1b, CR2b, list_R1b, list_R2b, list_p1b, list_p2b, MRb, 0, build_OT=False) + + # for non-consistent feature information provided + with pytest.raises(ValueError): + ot.gromov.quantized_fused_gromov_wasserstein( + C1, C2, npart1, npart2, p, q, None, None, F1, None, 0.5, + 'spectral_fused', 'random', log_) + + +@pytest.skip_backend("jax", reason="test very slow with jax backend") +def test_quantized_gw_samples(nx): + n_samples_1 = 15 # nb samples + n_samples_2 = 20 # nb samples + + rng = np.random.RandomState(0) + X1 = rng.uniform(low=0., high=10, size=(n_samples_1, 2)) + X2 = rng.uniform(low=0., high=10, size=(n_samples_2, 4)) + + p = ot.unif(n_samples_1) + q = ot.unif(n_samples_2) + + npart1 = 2 + npart2 = 3 + + X1b, X2b, pb, qb = nx.from_numpy(X1, X2, p, q) + + log_tests = [True, False, True] + methods = ['random'] + if sklearn_import: + methods += ['kmeans'] + + count_mode = 0 + alpha = 1. + + for method in methods: + log_ = log_tests[count_mode] + count_mode += 1 + + res = ot.gromov.quantized_fused_gromov_wasserstein_samples( + X1, X2, npart1, npart2, p, None, None, None, alpha, method, log_) + + resb = ot.gromov.quantized_fused_gromov_wasserstein_samples( + X1b, X2b, npart1, npart2, None, qb, None, None, alpha, method, log_) + + if log_: + T_global, Ts_local, T, log = res + T_globalb, Ts_localb, Tb, logb = resb + else: + T_global, Ts_local, T = res + T_globalb, Ts_localb, Tb = resb + + Tb = nx.to_numpy(Tb) + # check constraints + np.testing.assert_allclose(T, Tb, atol=1e-06) + np.testing.assert_allclose( + p, Tb.sum(1), atol=1e-06) # cf convergence gromov + np.testing.assert_allclose( + q, Tb.sum(0), atol=1e-06) # cf convergence gromov + + if log_: + for key in log.keys(): + # The inner test T_global[i, j] != 0. can lead to different + # computation of 1D OT computations between partition depending + # on the different float errors across backend + if key in logb.keys(): + np.testing.assert_allclose(log[key], logb[key], atol=1e-06) + + # tests for edge cases of the representant selection + with pytest.raises(ValueError): + ot.gromov.get_partition_and_representants_samples( + X1, npart1, method='unknown_method', random_state=0) + + +@pytest.skip_backend("jax", reason="test very slow with jax backend") +def test_quantized_fgw_samples(nx): + n_samples_1 = 20 # nb samples + n_samples_2 = 30 # nb samples + + rng = np.random.RandomState(0) + X1 = rng.uniform(low=0., high=10, size=(n_samples_1, 2)) + X2 = rng.uniform(low=0., high=10, size=(n_samples_2, 4)) + + F1 = rng.uniform(low=0., high=10, size=(n_samples_1, 3)) + F2 = rng.uniform(low=0., high=10, size=(n_samples_2, 3)) + + p = ot.unif(n_samples_1) + q = ot.unif(n_samples_2) + + npart1 = 2 + npart2 = 3 + + X1b, X2b, F1b, F2b, pb, qb = nx.from_numpy(X1, X2, F1, F2, p, q) + + methods = [] + if sklearn_import: + methods += ['kmeans', 'kmeans_fused'] + methods += ['random'] + + alpha = 0.5 + + for npart1 in [1, n_samples_1 + 1, 2]: + log_tests = [True, False, True] + count_mode = 0 + + for method in methods: + log_ = log_tests[count_mode] + count_mode += 1 + + res = ot.gromov.quantized_fused_gromov_wasserstein_samples( + X1, X2, npart1, npart2, p, None, F1, F2, alpha, method, log_) + + resb = ot.gromov.quantized_fused_gromov_wasserstein_samples( + X1b, X2b, npart1, npart2, None, qb, F1b, F2b, alpha, method, log_) + + if log_: + T_global, Ts_local, T, log = res + T_globalb, Ts_localb, Tb, logb = resb + else: + T_global, Ts_local, T = res + T_globalb, Ts_localb, Tb = resb + + Tb = nx.to_numpy(Tb) + # check constraints + np.testing.assert_allclose(T, Tb, atol=1e-06) + np.testing.assert_allclose( + p, Tb.sum(1), atol=1e-06) # cf convergence gromov + np.testing.assert_allclose( + q, Tb.sum(0), atol=1e-06) # cf convergence gromov + + if log_: + for key in log.keys(): + # The inner test T_global[i, j] != 0. can lead to different + # computation of 1D OT computations between partition depending + # on the different float errors across backend + if key in logb.keys(): + np.testing.assert_allclose(log[key], logb[key], atol=1e-06) + + # complementary tests for utils functions + part1b, rep_indices1 = ot.gromov.get_partition_and_representants_samples( + X1b, npart1, method=method, random_state=0) + part2b, rep_indices2 = ot.gromov.get_partition_and_representants_samples( + X2b, npart2, method=method, random_state=0) + + CR1b, list_R1b, list_p1b, FR1b = ot.gromov.format_partitioned_samples( + X1b, pb, part1b, rep_indices1, F1b, alpha) + CR2b, list_R2b, list_p2b, FR2b = ot.gromov.format_partitioned_samples( + X2b, qb, part2b, rep_indices2, F2b, alpha) + + MRb = ot.dist(FR1b, FR2b) + + T_globalb, Ts_localb, _ = ot.gromov.quantized_fused_gromov_wasserstein_partitioned( + CR1b, CR2b, list_R1b, list_R2b, list_p1b, list_p2b, MRb, alpha, build_OT=False) + + T_globalb = nx.to_numpy(T_globalb) + np.testing.assert_allclose(T_global, T_globalb, atol=1e-06) + + for key in Ts_localb.keys(): + T_localb = nx.to_numpy(Ts_localb[key]) + np.testing.assert_allclose(Ts_local[key], T_localb, atol=1e-06) + + # tests for edge cases of the format_partitioned_graph function + with pytest.raises(ValueError): + CR1b, list_R1b, list_p1b, FR1b = ot.gromov.format_partitioned_samples( + X1b, pb, part1b, rep_indices1, None, alpha) + + # for non-consistent feature information provided + with pytest.raises(ValueError): + ot.gromov.quantized_fused_gromov_wasserstein_samples( + X1, X2, npart1, npart2, p, None, None, F2, alpha, 'fused_spectral', log_)